#!/usr/bin/perl

#This script converts lines of the form <<word> <index>> into lines
#of the form <<integer>\t<index>>, in which each word is represented by
#a different integer. The conversion table is stored in a .tt file
# \t = tabulator

$filecount = 0;    

while ($filecount <= @ARGV-1) {
	    unless (open (INFILE, $ARGV[$filecount])){
	        die (" Can't open input file $ARGV[$filecount]\n");
   	 }

	    $filename = $ARGV[$filecount];
	    @name = split (/[.]/, $filename);

	    unless (open (OUTFILE, ">$name[0].int")){
		die (" Can't open output file $name[0].int\n");
	    }
	    unless (open (TTFILE, ">$name[0].tt")){
		die (" Can't open output file $name[0].tt\n");
	    }

	    $ct = 1;	

	    while ($line = <INFILE>) {

		chomp($line);
		($word, $index) = split (/ /, $line);

		if (!defined ($ints{$word})) {
		    $ints{$word} = $ct;
		    print OUTFILE "$ct\t$index\n";
		    print TTFILE "$word $ct\n";
		    $ct++;
		}
		else {
		    print OUTFILE "$ints{$word}\t$index\n";
		}

	    }

	    close (INFILE);

	    $filecount++;
	}

