/* Copyright (C) 1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI) This file is part of GIZA++ ( extension of GIZA ). This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "hmm.h" #include "Globals.h" #include "utility.h" #include "HMMTables.h" #include "ForwardBackward.h" #include "Parameter.h" #define CLASSIFY(i,empty,ianf) bool empty=(i>=l); unsigned int ianf=(i%l); #define CLASSIFY2(i,ianf) unsigned int ianf=(i%l); short PredictionInAlignments=0; short UniformEntryExit=3; short HMMTrainingSpecialFlags=0; GLOBAL_PARAMETER2(int,ModelH_Dump_Freq,"HMM DUMP FREQUENCY","th","dump frequency of HMM",PARLEV_OUTPUT,0); GLOBAL_PARAMETER(short,CompareAlDeps,"emAlignmentDependencies", "lextrain: dependencies in the HMM alignment model. " " &1: sentence length; &2: previous class; &4: previous position; " " &8: French position; &16: French class" ,PARLEV_MODELS,2); GLOBAL_PARAMETER(double,GLOBALProbabilityForEmpty,"emProbForEmpty", "f-b-trn: probability for empty word",PARLEV_MODELS,0.4); GLOBAL_PARAMETER(short,SmoothHMM,"emSmoothHMM", "f-b-trn: smooth HMM model &1: modified counts; &2:perform smoothing with -emAlSmooth",PARLEV_SPECIAL,2); GLOBAL_PARAMETER(double,HMMAlignmentModelSmoothFactor,"emAlSmooth", "f-b-trn: smoothing factor for HMM alignment model (can be ignored by -emSmoothHMM)",PARLEV_SMOOTH,0.2); /*template void smooth_standard(T*a,T*b,double p) { int n=b-a; if( n==0 ) return; double pp=p/n; for(T*i=a;i!=b;++i) *i = (1.0-p)*(*i)+pp; }*/ hmm::hmm(model2& m) : model2(m),counts(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses), probs(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses) { } void hmm::initialize_table_uniformly(sentenceHandler&){} int hmm::em_with_tricks(int noIterations) { double minErrors=1.0;int minIter=0; string modelName="Hmm",shortModelName="hmm"; int dumpFreq=ModelH_Dump_Freq; time_t it_st, st, it_fn, fn; string tfile, afile,afileh, number, alignfile, test_alignfile; int pair_no = 0; bool dump_files = false ; ofstream of2 ; st = time(NULL) ; sHandler1.rewind(); cout << "\n==========================================================\n"; cout << modelName << " Training Started at: " << ctime(&st); for(int it=1; it <= noIterations ; it++){ pair_no = 0; it_st = time(NULL) ; cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n'; dump_files = (dumpFreq != 0) && ((it % dumpFreq) == 0) && !NODUMPS; number = ""; int n = it; do{ number.insert((size_t)0, 1, (char)(n % 10 + '0')); } while((n /= 10) > 0); tfile = Prefix + ".t" + shortModelName + "." + number ; afile = Prefix + ".a" + shortModelName + "." + number ; afileh = Prefix + ".h" + shortModelName + "." + number ; alignfile = Prefix + ".A" + shortModelName + "." + number ; test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ; counts=HMMTables(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses); aCountTable.clear(); initAL(); em_loop(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1,it); if( errorsAL()cross_entropy() << " PERPLEXITY " << testViterbiPerp->perplexity() << '\n'; if (dump_files){ if( OutputInAachenFormat==0) tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat); ofstream afilestream(afileh.c_str()); probs.writeJumps(afilestream); aCountTable.printTable(afile.c_str()); } it_fn = time(NULL) ; cout << "\n" << modelName << " Iteration: " << it<< " took: " << difftime(it_fn, it_st) << " seconds\n"; } // end of iterations fn = time(NULL) ; cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n"; //cout << "tTable contains " << tTable.getHash().bucket_count() // << " buckets and " << tTable.getHash().size() << " entries." ; cout << "==========================================================\n"; return minIter; } /*template T normalize_if_possible_with_increment(T*a,T*b,int increment) { T sum=0; for(T*i=a;i!=b;i+=increment) sum+=*i; if( sum ) for(T*i=a;i!=b;i+=increment) *i/=sum; else { T factor=increment/(b-a); for(T*i=a;i!=b;i+=increment) *i=factor; } return sum; }*/ void hmm::load_table(const char* aname){ cout << "Hmm: loading a table not implemented.\n"; abort(); ifstream anamefile(aname); probs.readJumps(anamefile); } HMMNetwork *hmm::makeHMMNetwork(const Vector& es,const Vector&fs,bool doInit)const { unsigned int i,j; unsigned int l = es.size() - 1; unsigned int m = fs.size() - 1; unsigned int I=2*l,J=m; int IJ=I*J; bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2); bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0); HMMNetwork *net = new HMMNetwork(I,J); fill(net->alphainit.begin(),net->alphainit.end(),0.0); fill(net->betainit.begin(),net->betainit.end(),0.0); for(j=1;j<=m;j++) { for(i=1;i<=l;i++) net->n(i-1,j-1)=tTable.getProb(es[i], fs[j]) ; double emptyContribution=0; emptyContribution=tTable.getProb(es[0],fs[j]) ; for(i=1;i<=l;i++) net->n(i+l-1,j-1)=emptyContribution; net->finalMultiply*=max(normalize_if_possible_with_increment(&net->n(0,j-1),&net->n(0,j-1)+IJ,J),double(1e-12)); } if( DependencyOfJ ) net->e.resize(m-1); else net->e.resize(J>1); for(j=0;je.size();j++) { int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(j)+1)]); net->e[j].resize(I,I,0); for(unsigned int i1=0;i1 al(l); CLASSIFY2(i1,i1real); for(unsigned int i2=0;i2(al.begin()),conv(al.end())); if( SmoothHMM&2 ) smooth_standard(conv(al.begin()),conv(al.end()),HMMAlignmentModelSmoothFactor); for(unsigned int i2=0;i2e[j](i1,i2) = al[i2real]; if( empty_i2 ) if(i1real!=i2real) { net->e[j](i1,i2)=0; } else { net->e[j](i1,i2)=doInit?al[0]:(probs.getProbabilityForEmpty()); // make first HMM iteration like IBM-1 } } normalize_if_possible(&net->e[j](i1,0),&net->e[j](i1,0)+I); } } if( doInit ) { for(unsigned int i=0;ialphainit[i]=net->betainit[i]=(ibetainit[i]=1.0; } } else { if( DependencyOfPrevAJ==0 ) { for(i=0;ialphainit[i]=probs.getAlProb(-1,ireal,l,m,0,fwordclasses.getClass(fs[1+0]),0); } } else { if( UniformEntryExit&2 )probs.getBetaInit(I,net->betainit); if( UniformEntryExit&1 )probs.getAlphaInit(I,net->alphainit); } } massert( net->alphainit.size()==I );massert( net->betainit.size()==I ); normalize_if_possible(conv(net->alphainit.begin()),conv(net->alphainit.end())); normalize_if_possible(conv(net->betainit.begin()),conv(net->betainit.end())); transform(net->betainit.begin(),net->betainit.end(),net->betainit.begin(),bind1st(multiplies(),2*l)); return net; } extern float MINCOUNTINCREASE; void hmm::em_loop(Perplexity& perp, sentenceHandler& sHandler1, bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp, bool test,bool doInit,int ) { WordIndex i, j, l, m ; double cross_entropy; int pair_no=0 ; perp.clear(); viterbi_perp.clear(); ofstream of2; // for each sentence pair in the corpus if (dump_alignment||FEWDUMPS ) of2.open(alignfile); sentPair sent ; sHandler1.rewind(); while(sHandler1.getNextSentence(sent)){ const Vector& es = sent.get_eSent(); const Vector& fs = sent.get_fSent(); const float so = sent.getCount(); l = es.size() - 1; m = fs.size() - 1; cross_entropy = log(1.0); Vector viterbi_alignment(fs.size()); unsigned int I=2*l,J=m; bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2); bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0); HMMNetwork *net=makeHMMNetwork(es,fs,doInit); Array gamma; Array > epsilon(DependencyOfJ?(m-1):1); double trainProb; trainProb=ForwardBackwardTraining(*net,gamma,epsilon); if( !test ) { double *gp=conv(gamma.begin()); for(unsigned int i2=0;i2MINCOUNTINCREASE ) { COUNT add= *gp*so; if( i1>=l ) { tTable.incCount(es[0],fs[1+i2],add); aCountTable.getRef(0,i2+1,l,m)+=add; } else { tTable.incCount(es[1+i1],fs[1+i2],add); aCountTable.getRef(1+i1,1+i2,l,m)+=add; } } double p0c=0.0,np0c=0.0; for(unsigned int jj=0;jj(gamma.begin()),*gp2=conv(gamma.end())-I; Array&ai=counts.doGetAlphaInit(I); Array&bi=counts.doGetBetaInit(I); int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0; for(i=0;ifinalMultiply,1e-100)); Arrayvit; double viterbi_score=1.0; if( (HMMTrainingSpecialFlags&1) ) HMMViterbi(*net,gamma,vit); else viterbi_score=HMMRealViterbi(*net,vit); for(j=1;j<=m;j++) { viterbi_alignment[j]=vit[j-1]+1; if( viterbi_alignment[j]>l) viterbi_alignment[j]=0; } sHandler1.setProbOfSentence(sent,cross_entropy); perp.addFactor(cross_entropy, so, l, m,1); viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1); if( Verbose ) cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl; delete net;net=0; if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) ) printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score); addAL(viterbi_alignment,sent.getSentenceNo(),l); pair_no++; } /* of while */ sHandler1.rewind(); perp.record("HMM"); viterbi_perp.record("HMM"); errorReportAL(cout,"HMM"); } #include "HMMTables.cc" template HMMTables;