#include #include #include #include #include #include using namespace std; int main(int argc,char**argv) { vectorweights; vectorfilenames; for(int i=1;i v1,v2; map id1,id2; vector iid1(2),iid2(2); string w1(filenames[0]); string w2(filenames[1]); if( w1.length()>4&&w2.length()>4&&((w1.substr(w1.length()-4,w1.length())==".tok" && w2.substr(w2.length()-4,w2.length())==".tok" )|| (w1.substr(w1.length()-4,w1.length())==".txt" && w2.substr(w2.length()-4,w2.length())==".txt" ) )) { w1=w1.substr(0,w1.length()-4); w2=w2.substr(0,w2.length()-4); cerr << "w1:"<< w1 << " w2:" << w2 << endl; } string vocab1(w1),vocab2(w2),snt1,snt2; unsigned int slashpos=vocab1.rfind('/')+1; if( slashpos>=vocab1.length() ) slashpos=0; string vocab1x(vocab1.substr(slashpos,vocab1.length())); cout << vocab1 << " -> " << vocab1x << endl; slashpos=vocab2.rfind('/')+1; if( slashpos>=vocab2.length() ) slashpos=0; string vocab2x(vocab2.substr(slashpos,vocab2.length())); cout << vocab2 << " -> " << vocab2x << endl; snt1=vocab1+"_"+vocab2x+string(".snt"); snt2=vocab2+"_"+vocab1x+string(".snt"); vocab1+=string(".vcb"); vocab2+=string(".vcb"); ofstream ovocab1(vocab1.c_str()),ovocab2(vocab2.c_str()),osnt1(snt1.c_str()),osnt2(snt2.c_str()); for(unsigned int i=0;i t1,t2; istrstream ii1(line1.c_str()); while(ii1>>word) { t1.push_back(word); v1[word]++; if( id1.find(word)==id1.end() ) { iid1.push_back(word); id1[word]=iid1.size()-1; } } istrstream ii2(line2.c_str()); while(ii2>>word) { t2.push_back(word); v2[word]++; if( id2.find(word)==id2.end() ) { iid2.push_back(word); id2[word]=iid2.size()-1; } } double w=1.0; if( i/2