/* Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och mkcls - a program for making word classes . This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #ifndef KATEG_OPT_H #define KATEG_OPT_H #include #include #include "Problem.h" extern double rhoLo; typedef int Kategory; typedef int Word; #ifdef FREQTYPE_DOUBLE typedef double FreqType; #else typedef int FreqType; #endif #include "KategProblemWBC.h" #include "KategProblemKBC.h" enum { INIT_RAN=1, INIT_AIO=2, INIT_LWRW=3, INIT_FREQ=4, INIT_OTHER=5 }; enum { W_RAN=(8|16), W_DET_DECR=(16), W_DET_INCR =(32) }; #define CHOOSE_WORD (8|16|32) enum { K_DET=(64), K_RAN=(128), K_BEST=(64|128) }; #define CHOOSE_KAT (64|128) enum { CRITERION_ML=0, CRITERION_LO=1, CRITERION_MY=2 }; class NWG { private: Array freq; Array timeOfFreq; int curTime; public: NWG(int n); void init(); int anzNot0; Array not0; int word; inline void addFreq(int C,FreqType n); void sort(); FreqType getFreq(int i) { if( timeOfFreq[i]==curTime ) return freq[i]; else return 0; }; }; inline void NWG::addFreq(int g,FreqType n) { if(timeOfFreq[g]==curTime) freq[g]+=n; else { timeOfFreq[g]=curTime; freq[g]=n; not0[anzNot0++]=g; } } struct KategProblemChange : public ProblemChange { void *operator new(size_t size); void operator delete(void *ptr,size_t size); int word; int toKat; int fromKat; }; class KategProblem : public Problem { private: double kat_h_full(int n); double kat_h_full(double n); double kat_h_part(int n); double kat_h_part(double n); double sigmaVerfaelschung; short katWasEmpty; int nwgWord; NWG nwg; NWG ngw; FreqType nww; int ursprung,ziel; Array _katOfWord; int _maxComp,_maxCompVal; double nmo_my(int i,int j); double nmo(int i,int j); double nmo_lo(int i,int j,int &e0,int &e1); void putWord(int word,int to); void fastPutWord(int word,int to); void setKatOfWord(int w,int k) { if( !(wordFreq.fixedWord[w]==k||wordFreq.fixedWord[w]==-1||k==-1) ) { cout << "mkcls::setKatOfWord::ERROR: " << w << " " << k << " " << wordFreq.fixedWord[w] << " " << (*words)[w] << endl; } _katOfWord[w]=k; nwgWord=-1; }; void fillNWG(int w); inline FreqType nstrich(int i,int j); void vnstrich(int i,int j); protected: virtual int _change(ProblemChange **p); virtual void _doChange(ProblemChange &c); virtual void _undoChange(ProblemChange &c); virtual double _value(); double _valueChange(KategProblemChange &k); virtual void incrementDirection(); virtual int maxDimensionVal(void) ; virtual int maxDimension(void) ; public: leda_array *words; typedef leda_set intSet; leda_array *kats; KategProblemWBC wordFreq; KategProblemKBC katFreq; Array initLike; KategProblem(int aw,int mak,int _initialisierung,int _auswertung, int _nachbarschaft,int minw=0); virtual ~KategProblem(); virtual void _initialize(int initTyp); virtual void _initialize(int initTyp,int specialFixedWord); virtual double valueChange(ProblemChange&c); virtual Problem *makeEqualProblem(); virtual double nicevalue(double value=1e100); void makeKats(); virtual void dumpOn(ostream &strm); virtual void dumpInfos(ostream &strm); inline void katwahl(int k); inline void wortwahl(int w); inline int katOfWord(int w); inline short wortwahl(); inline short katwahl() ; virtual int maxNonBetterIterations(); virtual int expectedNumberOfIterations(); const char *getString(int i); string getTheString(int i); void makeTitle(char x[512]); void fixInitLike(); }; inline int KategProblem::katOfWord(int w){return _katOfWord[w];}; inline short KategProblem::wortwahl(){return nachbarschaft&CHOOSE_WORD;}; inline short KategProblem::katwahl() {return nachbarschaft&CHOOSE_KAT;}; inline void KategProblem::katwahl(int k) { nachbarschaft = (nachbarschaft&(~CHOOSE_KAT)) | k; if(k==K_BEST) _maxCompVal=1; else _maxCompVal=katFreq.nKats-2; }; inline void KategProblem::wortwahl(int w) { nachbarschaft = (nachbarschaft&(~CHOOSE_WORD)) | w; }; inline FreqType KategProblem::nstrich(int i,int j) { FreqType n=0; if( i==ursprung ) n-=nwg.getFreq(j); if( i==ziel ) n+=nwg.getFreq(j); if( j==ursprung ) n-=ngw.getFreq(i); if( j==ziel ) n+=ngw.getFreq(i); if( i==ursprung && j==ursprung ) n+=nww; if( i==ziel && j==ziel ) n+=nww; if( i==ursprung && j==ziel ) n-=nww; if( i==ziel && j==ursprung ) n-=nww; return n; } #define MAX_H_TABLE 4000 extern double h_table[],l_table[],hmy_table[],hmy_sigma; inline double kat_mlog(double x) { if(x<=1e-9) return 0; else return log(x); } inline double kat_mlog(int s) { if(s<=0) return 0; else if( s=-1); if(n<=0) return 0; else if(n1) return e1*log( (ePlus-1.0)/(e0+1.0)*rhoLo ); else return 0; } double mkat_h_full(int n,double tf); double mkat_h_part(int n,double cf); int Hash(const string& s); #endif