/* Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI) This file is part of GIZA++ ( extension of GIZA ). This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #ifndef _d4tables_h_define #define _d4tables_h_define #include #include "WordClasses.h" #include "Globals.h" #include "myassert.h" extern float d4modelsmooth_factor; class m4_key { public: int deps; int l; int m; int F; int E; int prevj; int vacancies1,vacancies2; m4_key(int _deps,int _l,int _m,int _F,int _E,int _prevj,int _v1,int _v2) : deps(_deps),l(_l),m(_m),F(_F),E(_E),prevj(_prevj),vacancies1(_v1),vacancies2(_v2) {} friend ostream&print1(ostream&out,const m4_key&x,const WordClasses&wce,const WordClasses&wcf) { if(x.deps&DEP_MODEL_l)out << "l: " << x.l<<' '; if(x.deps&DEP_MODEL_m)out << "m: " << x.m<<' '; if(x.deps&DEP_MODEL_F)out << "F: " << wcf.classString(x.F)<< ' '; if(x.deps&DEP_MODEL_E)out << "E: " << wce.classString(x.E)<< ' '; // if(x.deps&DEP_MODEL_pj)out << "j-1: " << x.prevj<<' '; if(x.vacancies1!=-1)out << "v1: " << x.vacancies1 << ' '; if(x.vacancies2!=-1)out << "v2: " << x.vacancies2 << ' '; return out << '\n'; } friend ostream&print1_m5(ostream&out,const m4_key&x,const WordClasses&wce,const WordClasses&wcf) { out << ((x.deps&DEP_MODEL_E)?wce.classString(x.E):string("0"))<< ' '; out << ((x.deps&DEP_MODEL_F)?wcf.classString(x.F):string("0"))<< ' '; out << x.vacancies1 << ' '; out << x.vacancies2 << ' '; return out; } friend ostream&printb1(ostream&out,const m4_key&x,const WordClasses&wce,const WordClasses&wcf) { if(x.deps&DEP_MODELb_l)out << "l: " << x.l<<' '; if(x.deps&DEP_MODELb_m)out << "m: " << x.m<<' '; if(x.deps&DEP_MODELb_F)out << "F: " << wcf.classString(x.F) << ' '; if(x.deps&DEP_MODELb_E)out << "E: " << wce.classString(x.E) << ' '; if(x.vacancies1!=-1)out << "v1: " << x.vacancies1 << ' '; if(x.vacancies2!=-1)out << "v2: " << x.vacancies2 << ' '; return out << '\n'; } friend ostream&printb1_m5(ostream&out,const m4_key&x,const WordClasses&wcf) { out << "-1 " << ((x.deps&DEP_MODEL_F)?wcf.classString(x.F):string("0"))<< ' '; out << x.vacancies1 << ' '; out << x.vacancies2 << ' '; return out; } }; class compare1 { private: int deps; public: compare1(int _deps) : deps(_deps) {} bool operator()(const m4_key&a,const m4_key&b)const { if(deps&DEP_MODEL_l){if( a.l&out) { string s; istrstream l(in.c_str()); while(l>>s) out.push_back(s); } class d4model { public: typedef Vector > Vpff; map D1; map Db1; PositionIndex msl; WordClasses ewordclasses; WordClasses fwordclasses; template void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile) { ifstream estrm(efile.c_str()),fstrm(ffile.c_str()); if( !estrm ) { cerr << "ERROR: can not read " << efile << endl; } else ewordclasses.read(estrm,m1); if( !fstrm ) cerr << "ERROR: can not read " << ffile << endl; else fwordclasses.read(fstrm,m2); } d4model(PositionIndex _msl) : D1(compare1(M4_Dependencies)),Db1(compareb1(M4_Dependencies)),msl(_msl) {} COUNT&getCountRef_first(WordIndex j,WordIndex j_cp,int E,int F,int l,int m) { assert(j>=1); m4_key key(M4_Dependencies,l,m,F,E,j_cp,-1,-1); map::iterator p=D1.find(key); if(p==D1.end())p=D1.insert(make_pair(key,Vpff(msl*2+1,pair(0.0,0.0)))).first; assert(p!=D1.end()); return (p->second)[j-j_cp+msl].first; } COUNT&getCountRef_bigger(WordIndex j,WordIndex j_prev,int E,int F,int l,int m) { assert(j>=1); assert(j_prev>=1); m4_key key(M4_Dependencies,l,m,F,E,j_prev,-1,-1); map::iterator p=Db1.find(key); if(p==Db1.end())p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair(0.0,0.0)))).first; assert(p!=Db1.end()); return (p->second)[j-j_prev+msl].first; } map::const_iterator getProb_first_iterator(int E,int F,int l,int m)const {return D1.find(m4_key(M4_Dependencies,l,m,F,E,0,-1,-1));} PROB getProb_first_withiterator(WordIndex j,WordIndex j_cp,int m,const map::const_iterator& p)const { assert(j>=1);//assert(j_cp>=0); assert(j<=msl);assert(j_cp<=msl); if(p==D1.end()) { return PROB_SMOOTH; } else { massert((p->second)[j-j_cp+msl].second<=1.0); return max(PROB_SMOOTH,d4modelsmooth_factor/(2*m-1)+(1-d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second); } } PROB getProb_first(WordIndex j,WordIndex j_cp,int E,int F,int l,int m)const { assert(j>=1);//assert(j_cp>=0); assert(j<=msl);assert(j_cp<=msl); m4_key key(M4_Dependencies,l,m,F,E,j_cp,-1,-1); map::const_iterator p=D1.find(key); if(p==D1.end()) { return PROB_SMOOTH; } else { massert((p->second)[j-j_cp+msl].second<=1.0); return max(PROB_SMOOTH,d4modelsmooth_factor/(2*m-1)+(1-d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second); } } map::const_iterator getProb_bigger_iterator(int E,int F,int l,int m)const { return Db1.find(m4_key(M4_Dependencies,l,m,F,E,0,-1,-1)); } PROB getProb_bigger_withiterator(WordIndex j,WordIndex j_prev,int m,const map::const_iterator&p)const { massert(j>=1);massert(j_prev>=1); massert(j>j_prev); massert(j<=msl);massert(j_prev<=msl); if(p==Db1.end()) { return PROB_SMOOTH; } else { massert((p->second)[j-j_prev+msl].second<=1.0 ); return max(PROB_SMOOTH,d4modelsmooth_factor/(m-1)+(1-d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second); } } PROB getProb_bigger(WordIndex j,WordIndex j_prev,int E,int F,int l,int m)const { massert(j>=1);massert(j_prev>=1); massert(j>j_prev); massert(j<=msl);massert(j_prev<=msl); m4_key key(M4_Dependencies,l,m,F,E,j_prev,-1,-1); map::const_iterator p=Db1.find(key); if(p==Db1.end()) { return PROB_SMOOTH; } else { massert((p->second)[j-j_prev+msl].second<=1.0 ); return max(PROB_SMOOTH,d4modelsmooth_factor/(m-1)+(1-d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second); } } void normalizeTable() { int nParams=0; for(map::iterator i=D1.begin();i!=D1.end();++i) { Vpff&d1=i->second; double sum=0.0; for(PositionIndex i=0;i::iterator i=Db1.begin();i!=Db1.end();++i) { Vpff&db1=i->second; double sum=0.0; for(PositionIndex i=0;i::iterator i=D1.begin();i!=D1.end();++i) { Vpff&d1=i->second; for(PositionIndex i=0;i::iterator i=Db1.begin();i!=Db1.end();++i) { Vpff&db1=i->second; for(PositionIndex i=0;i::const_iterator i=D1.begin();i!=D1.end();++i) { const Vpff&d1=i->second; double sum=0.0; for(PositionIndex ii=0;iifirst,ewordclasses,fwordclasses); out << "SUM: " << sum << ' '<< '\n'; for(unsigned ii=0;ii::const_iterator i=Db1.begin();i!=Db1.end();++i) { const Vpff&db1=i->second; double sum=0.0; for(PositionIndex ii=0;iifirst,ewordclasses,fwordclasses); out << "SUM: " << sum << ' '<<'\n'; for(unsigned ii=0;ii::const_iterator i=D1.begin();i!=D1.end();++i) { const Vpff&d1=i->second; for(unsigned ii=0;iifirst.E) << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << d1[ii].second << '\n'; } for(map::const_iterator i=Db1.begin();i!=Db1.end();++i) { const Vpff&db1=i->second; for(unsigned ii=0;iifirst.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << db1[ii].second << '\n'; } } } bool readProbTable(const char *fname) { cerr << "Reading D4Tables from " << fname << endl; ifstream file(fname); string line; do { getline(file,line); } while(line.length()&&line[0]=='#'); do { while(line.length()==0) getline(file,line); if( line[0]=='#') break; Vector linestr; tokenize(line,linestr); m4_key k(M4_Dependencies,0,0,0,0,0,-1,-1); for(unsigned int i=0;i> str >> sum; iassert(str=="SUM:"); if( str!="SUM:") cerr << "ERROR: string is " << str << " and not sum " << endl; do { int value; double count; getline(file,line); istrstream twonumbers(line.c_str()); if(twonumbers >> value >> count) { if( D1.count(k)==0 ) D1.insert(make_pair(k,Vpff(msl*2+1,pair(0.0,0.0)))); D1[k][value+msl]=make_pair(count,count/sum); } }while(line.length()); }while(file); do { getline(file,line); } while(line.length()&&line[0]=='#'); do { while(line.length()==0) getline(file,line); if( line[0]=='#') break; Vector linestr; tokenize(line,linestr); m4_key k(M4_Dependencies,0,0,0,0,0,-1,-1); bool sumRead=0; for(unsigned int i=0;i> str >> sum; else { str=linestr[0]; sum=atof(linestr[1].c_str()); } if( str!="SUM:" ) cerr << "ERROR: should read SUM but read " << str << endl; do { int value; double count; getline(file,line); istrstream twonumbers(line.c_str()); if(twonumbers >> value >> count) { if( Db1.count(k)==0 ) Db1.insert(make_pair(k,Vpff(msl*2+1,pair(0.0,0.0)))); Db1[k][value+msl]=make_pair(count,count/sum); } }while(file&&line.length()); }while(file); return 1; } }; #endif