From 2320c2657be8a7e6b97cfaeb27a24c326ddc0aa4 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Sun, 23 Jan 2022 18:47:38 +0800 Subject: [PATCH] CNS // Phase 7: Modify AllUnigrams to let CNSLM work. --- Source/Engine/LanguageModel/vChewingLM.cpp | 26 ++++++++++++++++++---- Source/Engine/LanguageModel/vChewingLM.h | 21 +++++++++-------- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/Source/Engine/LanguageModel/vChewingLM.cpp b/Source/Engine/LanguageModel/vChewingLM.cpp index d6c77be3..31e32617 100644 --- a/Source/Engine/LanguageModel/vChewingLM.cpp +++ b/Source/Engine/LanguageModel/vChewingLM.cpp @@ -19,6 +19,7 @@ vChewingLM::vChewingLM() vChewingLM::~vChewingLM() { m_languageModel.close(); + m_cnsModel.close(); m_userPhrases.close(); m_excludedPhrases.close(); m_phraseReplacement.close(); @@ -35,8 +36,8 @@ void vChewingLM::loadLanguageModel(const char* languageModelDataPath) void vChewingLM::loadCNSData(const char* cnsDataPath) { if (cnsDataPath) { - m_cnsData.close(); - m_cnsData.open(cnsDataPath); + m_cnsModel.close(); + m_cnsModel.open(cnsDataPath); } } @@ -70,7 +71,8 @@ const vector vChewingLM::unigramsForKey(const string& key) { vector allUnigrams; vector userUnigrams; - + vector cnsUnigrams; + unordered_set excludedValues; unordered_set insertedValues; @@ -90,8 +92,14 @@ const vector vChewingLM::unigramsForKey(const string& key) vector rawGlobalUnigrams = m_languageModel.unigramsForKey(key); allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues); } - + + if (m_cnsModel.hasUnigramsForKey(key)) { + vector rawCNSUnigrams = m_cnsModel.unigramsForKey(key); + cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues); + } + allUnigrams.insert(allUnigrams.begin(), userUnigrams.begin(), userUnigrams.end()); + allUnigrams.insert(allUnigrams.end(), cnsUnigrams.begin(), cnsUnigrams.end()); return allUnigrams; } @@ -114,6 +122,16 @@ bool vChewingLM::phraseReplacementEnabled() return m_phraseReplacementEnabled; } +void vChewingLM::setCNSEnabled(bool enabled) +{ + m_CNSEnabled = enabled; +} + +bool vChewingLM::CNSEnabled() +{ + return m_CNSEnabled; +} + const vector vChewingLM::filterAndTransformUnigrams(vector unigrams, const unordered_set& excludedValues, unordered_set& insertedValues) { vector results; diff --git a/Source/Engine/LanguageModel/vChewingLM.h b/Source/Engine/LanguageModel/vChewingLM.h index 06feb42b..3378e5de 100644 --- a/Source/Engine/LanguageModel/vChewingLM.h +++ b/Source/Engine/LanguageModel/vChewingLM.h @@ -24,31 +24,34 @@ class vChewingLM : public LanguageModel { public: vChewingLM(); ~vChewingLM(); - + void loadLanguageModel(const char* languageModelPath); void loadCNSData(const char* cnsDataPath); void loadUserPhrases(const char* userPhrasesPath, const char* excludedPhrasesPath); - void loadPhraseReplacementMap(const char* phraseReplacementPath); - + const vector bigramsForKeys(const string& preceedingKey, const string& key); const vector unigramsForKey(const string& key); bool hasUnigramsForKey(const string& key); - + void setPhraseReplacementEnabled(bool enabled); bool phraseReplacementEnabled(); - + + void setCNSEnabled(bool enabled); + bool CNSEnabled(); + protected: const vector filterAndTransformUnigrams(vector unigrams, - const std::unordered_set& excludedValues, - std::unordered_set& insertedValues); - + const std::unordered_set& excludedValues, + std::unordered_set& insertedValues); + FastLM m_languageModel; - CNSLM m_cnsData; + CNSLM m_cnsModel; UserPhrasesLM m_userPhrases; UserPhrasesLM m_excludedPhrases; PhraseReplacementMap m_phraseReplacement; bool m_phraseReplacementEnabled; + bool m_CNSEnabled; }; };