#include #include #include #include #include #include using namespace std; void readVoc(istream&in,map&voc) { string line,s1,s2; voc["1"]="UNK"; if( !in )cerr <<"Vocabulary does not exist.\n"; while(getline(in,line)) { istrstream eingabe(line.c_str()); if( !(eingabe>>s1>>s2)) cerr << "ERROR in vocabulary '" << line << "'\n"; voc[s1]=s2; } } int main(int argc,char **argv) { if( argc!=5&&argc!=6 ) { cerr << "Usage: " << argv[0] << " vcb1 vcb2 snt12 output_prefix [ -counts ]\n"; cerr << "Converts GIZA++ snt-format into plain text.\n"; exit(1); } bool counts=0; if( argc==6 ) { if(string(argv[5])!="-counts") cerr << "ERROR: wrong option " << argv[5] << endl; counts=1; } ifstream v1(argv[1]),v2(argv[2]),t(argv[3]); string prefix(argv[4]); string outfil1=prefix+"1.txt"; string outfil2=prefix+"2.txt"; ofstream out1(outfil1.c_str()); ofstream out2(outfil2.c_str()); mapvoc1,voc2; readVoc(v1,voc1); readVoc(v2,voc2); int source=0,target=0; string line1,line2,line3; int printed=0; while(getline(t,line1)&&getline(t,line2)&&getline(t,line3)) { istrstream eingabe1(line1.c_str()),eingabe2(line2.c_str()),eingabe3(line3.c_str()); double count; string word; eingabe1>>count; vectorl1,l2; while(eingabe2>>word) l1.push_back(word); while(eingabe3>>word) l2.push_back(word); if( counts ) cout << count << '\n'; for(unsigned int p=0;p