/* Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och mkcls - a program for making word classes . This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include #include "KategProblem.h" static int oneFreqCompareSteigend(const void *p,const void *j) { #ifdef FREQTYPE_DOUBLE if( (((OneFreq *)p)->n < ((OneFreq *)j)->n) ) return -1; if( (((OneFreq *)p)->n > ((OneFreq *)j)->n) ) return +1; else return 0; #else return ((OneFreq *)p)->n - ((OneFreq *)j)->n; #endif } static int oneFreqCompareFallend(const void *p,const void *j) { #ifdef FREQTYPE_DOUBLE if( (((OneFreq *)p)->n > ((OneFreq *)j)->n) ) return -1; if( (((OneFreq *)p)->n < ((OneFreq *)j)->n) ) return +1; else return 0; #else return -((OneFreq *)p)->n + ((OneFreq *)j)->n; #endif } KategProblemWBC::KategProblemWBC(int n,int minw) : _n1(n,0),_n2(n,0),with_h_of_words(0),afterFilled(n,0),beforeFilled(n,0),filled(0),fixedWord(n,-1),absteigend(0),nWords(n),nTranspWords(0), mindestAnzahl(minw),after(n),before(n),minIndex(n,-1),maxIndex(n,-1) { } KategProblemWBC::~KategProblemWBC() { massert( after.size()==nWords); if( absteigend ) delete absteigend; } void KategProblemWBC::init(int specialFixedWord) { nTranspWords=0; int i; for(i=0;i<_n1.size();i++) { if( (_n1[i]1 ) { cout << "MEAN(|L(w)|+|R(w)|)=" << (beforeFilledSum/(float)nWords) +(afterFilledSum/(float)nWords) << endl; cout << "Hapaslegomena: " << enaNom << endl; } int symmetrisch=1; for(i=0;i1 ) cout << "Asymmetrie: " << i << " " << _n1[i] << " " << _n2[i] << endl; } } if(verboseMode && symmetrisch==0) cout << "Warning: word bigram statistic is not symmetric " "(this is possibly an error)\n"; return ret; } Array &KategProblemWBC::getSortedList(int steigend) { int siz=_n2.size(),i; massert(filled); Array &sortedList =*new Array(siz); Array list(siz); int pos=0; for(i=0;i=0 ) { list[pos].w=i; list[pos].n=_n1[i]; pos++; } } massert(pos==siz); if(steigend ) qsort(list.getPointerToData(),anzFree,sizeof(OneFreq),oneFreqCompareSteigend); else qsort(list.getPointerToData(),anzFree,sizeof(OneFreq),oneFreqCompareFallend); massert( anzFree<=list.size() ); for(i=0;i=anzFree || list[i-1].n>=list[i].n ); massert((!steigend) || i==0 || i>=anzFree || list[i-1].n<=list[i].n ); } return sortedList; } FreqType KategProblemWBC::numberOfWords() { FreqType n1=0,n2=0; for(int i=0;i<_n1.size();i++) { n1+=_n1[i]; n2+=_n2[i]; } #ifndef FREQTYPE_DOUBLE massert(n1==n2); #endif return n1; } void KategProblemWBC::setDollar(int n) { if( fixedWord[n]<0 ) nTranspWords--; fixedWord[n]=0; } void KategProblemWBC::initializeIndex(const leda_array&words,char firstChar,int unten,int oben,bool noHapas) { int n=0; int i; massert(-1=mindestAnzahl || ((short)(n2(i)+0.0001))>=mindestAnzahl) ) { minIndex[i]=unten; maxIndex[i]=oben; n++; } } if( verboseMode ) cout << "InitializeIndex gefunden fuer " << n << " Woerter.\n"; }