CNS // Phase 7: Modify AllUnigrams to let CNSLM work.
This commit is contained in:
parent
50cdb6be6c
commit
2320c2657b
|
@ -19,6 +19,7 @@ vChewingLM::vChewingLM()
|
|||
vChewingLM::~vChewingLM()
|
||||
{
|
||||
m_languageModel.close();
|
||||
m_cnsModel.close();
|
||||
m_userPhrases.close();
|
||||
m_excludedPhrases.close();
|
||||
m_phraseReplacement.close();
|
||||
|
@ -35,8 +36,8 @@ void vChewingLM::loadLanguageModel(const char* languageModelDataPath)
|
|||
void vChewingLM::loadCNSData(const char* cnsDataPath)
|
||||
{
|
||||
if (cnsDataPath) {
|
||||
m_cnsData.close();
|
||||
m_cnsData.open(cnsDataPath);
|
||||
m_cnsModel.close();
|
||||
m_cnsModel.open(cnsDataPath);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -70,7 +71,8 @@ const vector<Unigram> vChewingLM::unigramsForKey(const string& key)
|
|||
{
|
||||
vector<Unigram> allUnigrams;
|
||||
vector<Unigram> userUnigrams;
|
||||
|
||||
vector<Unigram> cnsUnigrams;
|
||||
|
||||
unordered_set<string> excludedValues;
|
||||
unordered_set<string> insertedValues;
|
||||
|
||||
|
@ -90,8 +92,14 @@ const vector<Unigram> vChewingLM::unigramsForKey(const string& key)
|
|||
vector<Unigram> rawGlobalUnigrams = m_languageModel.unigramsForKey(key);
|
||||
allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues);
|
||||
}
|
||||
|
||||
|
||||
if (m_cnsModel.hasUnigramsForKey(key)) {
|
||||
vector<Unigram> rawCNSUnigrams = m_cnsModel.unigramsForKey(key);
|
||||
cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues);
|
||||
}
|
||||
|
||||
allUnigrams.insert(allUnigrams.begin(), userUnigrams.begin(), userUnigrams.end());
|
||||
allUnigrams.insert(allUnigrams.end(), cnsUnigrams.begin(), cnsUnigrams.end());
|
||||
return allUnigrams;
|
||||
}
|
||||
|
||||
|
@ -114,6 +122,16 @@ bool vChewingLM::phraseReplacementEnabled()
|
|||
return m_phraseReplacementEnabled;
|
||||
}
|
||||
|
||||
void vChewingLM::setCNSEnabled(bool enabled)
|
||||
{
|
||||
m_CNSEnabled = enabled;
|
||||
}
|
||||
|
||||
bool vChewingLM::CNSEnabled()
|
||||
{
|
||||
return m_CNSEnabled;
|
||||
}
|
||||
|
||||
const vector<Unigram> vChewingLM::filterAndTransformUnigrams(vector<Unigram> unigrams, const unordered_set<string>& excludedValues, unordered_set<string>& insertedValues)
|
||||
{
|
||||
vector<Unigram> results;
|
||||
|
|
|
@ -24,31 +24,34 @@ class vChewingLM : public LanguageModel {
|
|||
public:
|
||||
vChewingLM();
|
||||
~vChewingLM();
|
||||
|
||||
|
||||
void loadLanguageModel(const char* languageModelPath);
|
||||
void loadCNSData(const char* cnsDataPath);
|
||||
void loadUserPhrases(const char* userPhrasesPath, const char* excludedPhrasesPath);
|
||||
|
||||
void loadPhraseReplacementMap(const char* phraseReplacementPath);
|
||||
|
||||
|
||||
const vector<Bigram> bigramsForKeys(const string& preceedingKey, const string& key);
|
||||
const vector<Unigram> unigramsForKey(const string& key);
|
||||
bool hasUnigramsForKey(const string& key);
|
||||
|
||||
|
||||
void setPhraseReplacementEnabled(bool enabled);
|
||||
bool phraseReplacementEnabled();
|
||||
|
||||
|
||||
void setCNSEnabled(bool enabled);
|
||||
bool CNSEnabled();
|
||||
|
||||
protected:
|
||||
const vector<Unigram> filterAndTransformUnigrams(vector<Unigram> unigrams,
|
||||
const std::unordered_set<string>& excludedValues,
|
||||
std::unordered_set<string>& insertedValues);
|
||||
|
||||
const std::unordered_set<string>& excludedValues,
|
||||
std::unordered_set<string>& insertedValues);
|
||||
|
||||
FastLM m_languageModel;
|
||||
CNSLM m_cnsData;
|
||||
CNSLM m_cnsModel;
|
||||
UserPhrasesLM m_userPhrases;
|
||||
UserPhrasesLM m_excludedPhrases;
|
||||
PhraseReplacementMap m_phraseReplacement;
|
||||
bool m_phraseReplacementEnabled;
|
||||
bool m_CNSEnabled;
|
||||
};
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue