CNS // Phase 7: Modify AllUnigrams to let CNSLM work.

This commit is contained in:
ShikiSuen 2022-01-23 18:47:38 +08:00
parent 50cdb6be6c
commit 2320c2657b
2 changed files with 34 additions and 13 deletions

View File

@ -19,6 +19,7 @@ vChewingLM::vChewingLM()
vChewingLM::~vChewingLM() vChewingLM::~vChewingLM()
{ {
m_languageModel.close(); m_languageModel.close();
m_cnsModel.close();
m_userPhrases.close(); m_userPhrases.close();
m_excludedPhrases.close(); m_excludedPhrases.close();
m_phraseReplacement.close(); m_phraseReplacement.close();
@ -35,8 +36,8 @@ void vChewingLM::loadLanguageModel(const char* languageModelDataPath)
void vChewingLM::loadCNSData(const char* cnsDataPath) void vChewingLM::loadCNSData(const char* cnsDataPath)
{ {
if (cnsDataPath) { if (cnsDataPath) {
m_cnsData.close(); m_cnsModel.close();
m_cnsData.open(cnsDataPath); m_cnsModel.open(cnsDataPath);
} }
} }
@ -70,6 +71,7 @@ const vector<Unigram> vChewingLM::unigramsForKey(const string& key)
{ {
vector<Unigram> allUnigrams; vector<Unigram> allUnigrams;
vector<Unigram> userUnigrams; vector<Unigram> userUnigrams;
vector<Unigram> cnsUnigrams;
unordered_set<string> excludedValues; unordered_set<string> excludedValues;
unordered_set<string> insertedValues; unordered_set<string> insertedValues;
@ -91,7 +93,13 @@ const vector<Unigram> vChewingLM::unigramsForKey(const string& key)
allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues); allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues);
} }
if (m_cnsModel.hasUnigramsForKey(key)) {
vector<Unigram> rawCNSUnigrams = m_cnsModel.unigramsForKey(key);
cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues);
}
allUnigrams.insert(allUnigrams.begin(), userUnigrams.begin(), userUnigrams.end()); allUnigrams.insert(allUnigrams.begin(), userUnigrams.begin(), userUnigrams.end());
allUnigrams.insert(allUnigrams.end(), cnsUnigrams.begin(), cnsUnigrams.end());
return allUnigrams; return allUnigrams;
} }
@ -114,6 +122,16 @@ bool vChewingLM::phraseReplacementEnabled()
return m_phraseReplacementEnabled; return m_phraseReplacementEnabled;
} }
void vChewingLM::setCNSEnabled(bool enabled)
{
m_CNSEnabled = enabled;
}
bool vChewingLM::CNSEnabled()
{
return m_CNSEnabled;
}
const vector<Unigram> vChewingLM::filterAndTransformUnigrams(vector<Unigram> unigrams, const unordered_set<string>& excludedValues, unordered_set<string>& insertedValues) const vector<Unigram> vChewingLM::filterAndTransformUnigrams(vector<Unigram> unigrams, const unordered_set<string>& excludedValues, unordered_set<string>& insertedValues)
{ {
vector<Unigram> results; vector<Unigram> results;

View File

@ -28,7 +28,6 @@ public:
void loadLanguageModel(const char* languageModelPath); void loadLanguageModel(const char* languageModelPath);
void loadCNSData(const char* cnsDataPath); void loadCNSData(const char* cnsDataPath);
void loadUserPhrases(const char* userPhrasesPath, const char* excludedPhrasesPath); void loadUserPhrases(const char* userPhrasesPath, const char* excludedPhrasesPath);
void loadPhraseReplacementMap(const char* phraseReplacementPath); void loadPhraseReplacementMap(const char* phraseReplacementPath);
const vector<Bigram> bigramsForKeys(const string& preceedingKey, const string& key); const vector<Bigram> bigramsForKeys(const string& preceedingKey, const string& key);
@ -38,17 +37,21 @@ public:
void setPhraseReplacementEnabled(bool enabled); void setPhraseReplacementEnabled(bool enabled);
bool phraseReplacementEnabled(); bool phraseReplacementEnabled();
void setCNSEnabled(bool enabled);
bool CNSEnabled();
protected: protected:
const vector<Unigram> filterAndTransformUnigrams(vector<Unigram> unigrams, const vector<Unigram> filterAndTransformUnigrams(vector<Unigram> unigrams,
const std::unordered_set<string>& excludedValues, const std::unordered_set<string>& excludedValues,
std::unordered_set<string>& insertedValues); std::unordered_set<string>& insertedValues);
FastLM m_languageModel; FastLM m_languageModel;
CNSLM m_cnsData; CNSLM m_cnsModel;
UserPhrasesLM m_userPhrases; UserPhrasesLM m_userPhrases;
UserPhrasesLM m_excludedPhrases; UserPhrasesLM m_excludedPhrases;
PhraseReplacementMap m_phraseReplacement; PhraseReplacementMap m_phraseReplacement;
bool m_phraseReplacementEnabled; bool m_phraseReplacementEnabled;
bool m_CNSEnabled;
}; };
}; };