CNS // Phase 7: Modify AllUnigrams to let CNSLM work.

This commit is contained in:
ShikiSuen 2022-01-23 18:47:38 +08:00
parent 50cdb6be6c
commit 2320c2657b
2 changed files with 34 additions and 13 deletions

View File

@ -19,6 +19,7 @@ vChewingLM::vChewingLM()
vChewingLM::~vChewingLM()
{
m_languageModel.close();
m_cnsModel.close();
m_userPhrases.close();
m_excludedPhrases.close();
m_phraseReplacement.close();
@ -35,8 +36,8 @@ void vChewingLM::loadLanguageModel(const char* languageModelDataPath)
void vChewingLM::loadCNSData(const char* cnsDataPath)
{
if (cnsDataPath) {
m_cnsData.close();
m_cnsData.open(cnsDataPath);
m_cnsModel.close();
m_cnsModel.open(cnsDataPath);
}
}
@ -70,7 +71,8 @@ const vector<Unigram> vChewingLM::unigramsForKey(const string& key)
{
vector<Unigram> allUnigrams;
vector<Unigram> userUnigrams;
vector<Unigram> cnsUnigrams;
unordered_set<string> excludedValues;
unordered_set<string> insertedValues;
@ -90,8 +92,14 @@ const vector<Unigram> vChewingLM::unigramsForKey(const string& key)
vector<Unigram> rawGlobalUnigrams = m_languageModel.unigramsForKey(key);
allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues);
}
if (m_cnsModel.hasUnigramsForKey(key)) {
vector<Unigram> rawCNSUnigrams = m_cnsModel.unigramsForKey(key);
cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues);
}
allUnigrams.insert(allUnigrams.begin(), userUnigrams.begin(), userUnigrams.end());
allUnigrams.insert(allUnigrams.end(), cnsUnigrams.begin(), cnsUnigrams.end());
return allUnigrams;
}
@ -114,6 +122,16 @@ bool vChewingLM::phraseReplacementEnabled()
return m_phraseReplacementEnabled;
}
void vChewingLM::setCNSEnabled(bool enabled)
{
m_CNSEnabled = enabled;
}
bool vChewingLM::CNSEnabled()
{
return m_CNSEnabled;
}
const vector<Unigram> vChewingLM::filterAndTransformUnigrams(vector<Unigram> unigrams, const unordered_set<string>& excludedValues, unordered_set<string>& insertedValues)
{
vector<Unigram> results;

View File

@ -24,31 +24,34 @@ class vChewingLM : public LanguageModel {
public:
vChewingLM();
~vChewingLM();
void loadLanguageModel(const char* languageModelPath);
void loadCNSData(const char* cnsDataPath);
void loadUserPhrases(const char* userPhrasesPath, const char* excludedPhrasesPath);
void loadPhraseReplacementMap(const char* phraseReplacementPath);
const vector<Bigram> bigramsForKeys(const string& preceedingKey, const string& key);
const vector<Unigram> unigramsForKey(const string& key);
bool hasUnigramsForKey(const string& key);
void setPhraseReplacementEnabled(bool enabled);
bool phraseReplacementEnabled();
void setCNSEnabled(bool enabled);
bool CNSEnabled();
protected:
const vector<Unigram> filterAndTransformUnigrams(vector<Unigram> unigrams,
const std::unordered_set<string>& excludedValues,
std::unordered_set<string>& insertedValues);
const std::unordered_set<string>& excludedValues,
std::unordered_set<string>& insertedValues);
FastLM m_languageModel;
CNSLM m_cnsData;
CNSLM m_cnsModel;
UserPhrasesLM m_userPhrases;
UserPhrasesLM m_excludedPhrases;
PhraseReplacementMap m_phraseReplacement;
bool m_phraseReplacementEnabled;
bool m_CNSEnabled;
};
};