/* EGYPT Toolkit for Statistical Machine Translation Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* Noah A. Smith Dictionary object for dictionary filter in Model 1 training Dictionary file must be in order (sorted) by Foreign vocab id, but English vocab ids may be in any order. 9 August 1999 */ #include "Dictionary.h" Dictionary::Dictionary(const char *filename){ if(!strcmp(filename, "")){ dead = true; return; } dead = false; cout << "Reading dictionary from: " << filename << '\n'; ifstream dFile(filename); if(!dFile){ cerr << "ERROR: Can't open dictionary: " << filename << '\n'; exit(1); } currindexmin = 0; currindexmax = 0; currval = 0; int p, q; while((dFile >> p >> q)){ pairs[0].push_back(p); pairs[1].push_back(q); } cout << "Dictionary read; " << pairs[0].size() << " pairs loaded." << '\n'; dFile.close(); } bool Dictionary::indict(int p, int q){ if(dead) return false; if(p == 0 && q == 0) return false; if(currval == p){ for(int i = currindexmin; i <= currindexmax; i++) if(pairs[1][i] == q) return true; return false; } else{ int begin = 0, end = pairs[0].size() - 1, middle = 0; unsigned int t; bool ret = false; while(begin <= end){ middle = begin + ((end - begin) >> 1); if(p < pairs[0][middle]) end = middle - 1; else if(p > pairs[0][middle]) begin = middle + 1; else{ break; } } t = middle; while(pairs[0][t] == p ) if(pairs[1][t--] == q) ret = true; currindexmin = t + 1; t = middle + 1; while(pairs[0][t] == p && t < pairs[0].size()) if(pairs[1][t++] == q) ret = true; currindexmax = t - 1; currval = p; return ret; } }