/* Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI) This file is part of GIZA++ ( extension of GIZA ). This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #ifndef _d5tables_h_define #define _d5tables_h_define #include #include "D4Tables.h" extern float d5modelsmooth_countoffset; extern float d5modelsmooth_factor; #define UNSEENPROB (1.0/vacancies_total) class d5model { private: typedef Vector < pair < COUNT,PROB > >Vpff; map< m4_key,Vpff,compare1 > D1; map< m4_key,Vpff,compareb1 > Db1; public: d4model&d4m; WordClasses ewordclasses,fwordclasses; template void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile) { ifstream estrm(efile.c_str()),fstrm(ffile.c_str()); if( !estrm ) cerr << "ERROR: can not read classes from " << efile << endl; else ewordclasses.read(estrm,m1); if( !fstrm ) cerr << "ERROR: can not read classes from " << ffile << endl; else fwordclasses.read(fstrm,m2); } d5model (d4model&_d4m) :D1 (compare1(M5_Dependencies)), Db1 (compareb1(M5_Dependencies)),d4m(_d4m) {} COUNT &getCountRef_first (PositionIndex vacancies_j, PositionIndex vacancies_jp, int F, PositionIndex l, PositionIndex m, PositionIndex vacancies_total) { massert(vacancies_j>0); massert(vacancies_total>0); //massert(vacancies_jp<=vacancies_total); massert(vacancies_j <=vacancies_total); massert(vacancies_total<=m); m4_key key(M5_Dependencies,l,m,F,0,0,vacancies_jp,vacancies_total); map::iterator p=D1.find(key); if(p==D1.end()) p=D1.insert(make_pair(key,Vpff(vacancies_total+1,make_pair(0,UNSEENPROB)))).first; // !!! constrain length massert(p!=D1.end()); return (p->second)[vacancies_j].first; } COUNT &getCountRef_bigger (PositionIndex vacancies_j, PositionIndex vacancies_jp, int F, PositionIndex l, PositionIndex m, PositionIndex vacancies_total) { massert(vacancies_j>0); massert(vacancies_total>0); massert (vacancies_jp <= vacancies_j); massert (vacancies_j-vacancies_jp <= vacancies_total); m4_key key(M5_Dependencies,l,m,F,0,0,-1,vacancies_total); map::iterator p=Db1.find(key); if(p==Db1.end()) p=Db1.insert(make_pair(key,Vpff(vacancies_total+1,make_pair(0,UNSEENPROB)))).first; // !!! constrain length massert(p!=Db1.end()); return (p->second)[vacancies_j - vacancies_jp].first; } PROB getProb_first (PositionIndex vacancies_j, PositionIndex vacancies_jp, int F, PositionIndex l, PositionIndex m, PositionIndex vacancies_total) const { massert(vacancies_j>0); massert(vacancies_total>0); //massert(vacancies_jp<=vacancies_total); massert(vacancies_j <=vacancies_total); massert(vacancies_total<=m); m4_key key(M5_Dependencies,l,m,F,0,0,vacancies_jp,vacancies_total); map::const_iterator p=D1.find(key); if( p==D1.end() ) return UNSEENPROB; else return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j].second); } PROB getProb_bigger (PositionIndex vacancies_j, PositionIndex vacancies_jp, int F, PositionIndex l, PositionIndex m, PositionIndex vacancies_total) const { massert(vacancies_j>0); massert(vacancies_total>0); massert (vacancies_jp <= vacancies_j); massert (vacancies_j-vacancies_jp <= vacancies_total); m4_key key(M5_Dependencies,l,m,F,0,0,-1,vacancies_total); map::const_iterator p=Db1.find(key); if(p==Db1.end()) return UNSEENPROB; else return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j - vacancies_jp].second); } void normalizeTable () { int nParams=0; for(map::iterator i=D1.begin();i!=D1.end();++i) { Vpff&d1=i->second; COUNT sum=0.0; for(PositionIndex i=0;i::iterator i=Db1.begin();i!=Db1.end();++i) { Vpff&db1=i->second; double sum=0.0; for(PositionIndex i=0;i::const_iterator i=d5m.D1.begin();i!=d5m.D1.end();++i) { const Vpff&d1=i->second; COUNT sum=0.0; for(PositionIndex ii=0;iifirst,d5m.ewordclasses,d5m.fwordclasses); out << (int)(ii) << ' ' << d1[ii].second << ' ' << d1[ii].first << '\n'; } out << endl; } } out << "# Table for non-head of cept.\n"; for(map::const_iterator i=d5m.Db1.begin();i!=d5m.Db1.end();++i) { const Vpff&db1=i->second; double sum=0.0; for(PositionIndex ii=0;iifirst,d5m.fwordclasses); out << (int)(ii) << ' ' << db1[ii].second << ' ' << db1[ii].first << '\n'; } out << endl; } } return out; } void readProbTable(const char*x) { ifstream f(x); string l; while(getline(f,l)) { if(l.length()&&l[0]=='#') continue; istrstream is(l.c_str()); string E,F; int v1,v2,ii; double prob,count; if(is>>E>>F>>v1>>v2>>ii>>prob>>count) { //cerr << "Read: " << E << " " << F << " " << v1 << " " << v2 << " " << prob<< endl; if( count>0 ) if( E=="-1") getCountRef_bigger(ii,0,fwordclasses(F),1000,1000,v2)+=count; else getCountRef_first(ii,v1,fwordclasses(F),1000,1000,v2)+=count; } } normalizeTable(); ofstream of("M5FILE"); of << (*this); } void clear() { for(map::iterator i=D1.begin();i!=D1.end();++i) { Vpff&d1=i->second; for(PositionIndex i=0;i::iterator i=Db1.begin();i!=Db1.end();++i) { Vpff&db1=i->second; for(PositionIndex i=0;i