From b348a057359e3bd13825f973a567f3fd04f3f70e Mon Sep 17 00:00:00 2001 From: zonble Date: Sat, 15 Jan 2022 18:23:52 +0800 Subject: [PATCH 1/5] Filters duplicated unigram values properly. --- Source/Engine/McBopomofoLM.cpp | 84 +++++++++++++--------------------- Source/Engine/McBopomofoLM.h | 10 ++-- 2 files changed, 40 insertions(+), 54 deletions(-) diff --git a/Source/Engine/McBopomofoLM.cpp b/Source/Engine/McBopomofoLM.cpp index ea85c2dc..3fba9fc9 100644 --- a/Source/Engine/McBopomofoLM.cpp +++ b/Source/Engine/McBopomofoLM.cpp @@ -24,7 +24,6 @@ #include "McBopomofoLM.h" #include #include -#include using namespace McBopomofo; @@ -49,7 +48,7 @@ void McBopomofoLM::loadLanguageModel(const char* languageModelDataPath) } void McBopomofoLM::loadUserPhrases(const char* userPhrasesDataPath, - const char* excludedPhrasesDataPath) + const char* excludedPhrasesDataPath) { if (userPhrasesDataPath) { m_userPhrases.close(); @@ -61,7 +60,8 @@ void McBopomofoLM::loadUserPhrases(const char* userPhrasesDataPath, } } -void McBopomofoLM::loadPhraseReplacementMap(const char* phraseReplacementPath) { +void McBopomofoLM::loadPhraseReplacementMap(const char* phraseReplacementPath) +{ if (phraseReplacementPath) { m_phraseReplacement.close(); m_phraseReplacement.open(phraseReplacementPath); @@ -75,75 +75,37 @@ const vector McBopomofoLM::bigramsForKeys(const string& preceedingKey, c const vector McBopomofoLM::unigramsForKey(const string& key) { - vector unigrams; + vector allUnigrams; vector userUnigrams; - // Use unordered_set so that you don't have to do O(n*m) unordered_set excludedValues; - unordered_set userValues; + unordered_set insertedValues; if (m_excludedPhrases.hasUnigramsForKey(key)) { vector excludedUnigrams = m_excludedPhrases.unigramsForKey(key); transform(excludedUnigrams.begin(), excludedUnigrams.end(), - inserter(excludedValues, excludedValues.end()), - [](const Unigram &u) { return u.keyValue.value; }); + inserter(excludedValues, excludedValues.end()), + [](const Unigram& u) { return u.keyValue.value; }); } if (m_userPhrases.hasUnigramsForKey(key)) { vector rawUserUnigrams = m_userPhrases.unigramsForKey(key); - vector filterredUserUnigrams = m_userPhrases.unigramsForKey(key); - - for (auto&& unigram : rawUserUnigrams) { - if (excludedValues.find(unigram.keyValue.value) == excludedValues.end()) { - filterredUserUnigrams.push_back(unigram); - } - } - - transform(filterredUserUnigrams.begin(), filterredUserUnigrams.end(), - inserter(userValues, userValues.end()), - [](const Unigram &u) { return u.keyValue.value; }); - - if (m_phraseReplacementEnabled) { - for (auto&& unigram : filterredUserUnigrams) { - string value = unigram.keyValue.value; - string replacement = m_phraseReplacement.valueForKey(value); - if (replacement != "") { - unigram.keyValue.value = replacement; - } - unigrams.push_back(unigram); - } - } else { - unigrams = filterredUserUnigrams; - } + userUnigrams = filterAndTransformUnigrams(rawUserUnigrams, excludedValues, insertedValues); } if (m_languageModel.hasUnigramsForKey(key)) { - vector globalUnigrams = m_languageModel.unigramsForKey(key); - - for (auto&& unigram : globalUnigrams) { - string value = unigram.keyValue.value; - if (excludedValues.find(value) == excludedValues.end() && - userValues.find(value) == userValues.end()) { - if (m_phraseReplacementEnabled) { - string replacement = m_phraseReplacement.valueForKey(value); - if (replacement != "") { - unigram.keyValue.value = replacement; - } - } - unigrams.push_back(unigram); - } - } + vector rawGlobalUnigrams = m_languageModel.unigramsForKey(key); + allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues); } - unigrams.insert(unigrams.begin(), userUnigrams.begin(), userUnigrams.end()); - return unigrams; + allUnigrams.insert(allUnigrams.begin(), userUnigrams.begin(), userUnigrams.end()); + return allUnigrams; } bool McBopomofoLM::hasUnigramsForKey(const string& key) { if (!m_excludedPhrases.hasUnigramsForKey(key)) { - return m_userPhrases.hasUnigramsForKey(key) || - m_languageModel.hasUnigramsForKey(key); + return m_userPhrases.hasUnigramsForKey(key) || m_languageModel.hasUnigramsForKey(key); } return unigramsForKey(key).size() > 0; @@ -159,3 +121,23 @@ bool McBopomofoLM::phraseReplacementEnabled() return m_phraseReplacementEnabled; } +const vector McBopomofoLM::filterAndTransformUnigrams(vector unigrams, const unordered_set& excludedValues, unordered_set& insertedValues) +{ + vector results; + + for (auto&& unigram : unigrams) { + string value = unigram.keyValue.value; + if (m_phraseReplacementEnabled) { + string replacement = m_phraseReplacement.valueForKey(value); + if (replacement != "") { + value = replacement; + unigram.keyValue.value = value; + } + } + if (excludedValues.find(value) == excludedValues.end() && insertedValues.find(value) == insertedValues.end()) { + results.push_back(unigram); + insertedValues.insert(value); + } + } + return results; +} diff --git a/Source/Engine/McBopomofoLM.h b/Source/Engine/McBopomofoLM.h index 00babc01..00dbc360 100644 --- a/Source/Engine/McBopomofoLM.h +++ b/Source/Engine/McBopomofoLM.h @@ -28,6 +28,7 @@ #include "UserPhrasesLM.h" #include "ParselessLM.h" #include "PhraseReplacementMap.h" +#include namespace McBopomofo { @@ -38,9 +39,8 @@ public: McBopomofoLM(); ~McBopomofoLM(); - void loadLanguageModel(const char* languageModelDataPath); - void loadUserPhrases(const char* userPhrasesDataPath, - const char* excludedPhrasesDataPath); + void loadLanguageModel(const char* languageModelPath); + void loadUserPhrases(const char* userPhrasesPath, const char* excludedPhrasesPath); void loadPhraseReplacementMap(const char* phraseReplacementPath); const vector bigramsForKeys(const string& preceedingKey, const string& key); @@ -51,6 +51,10 @@ public: bool phraseReplacementEnabled(); protected: + const vector filterAndTransformUnigrams(vector unigrams, + const std::unordered_set& excludedValues, + std::unordered_set& insertedValues); + ParselessLM m_languageModel; UserPhrasesLM m_userPhrases; UserPhrasesLM m_excludedPhrases; From b627e8e3b67d692f6bcc1ec5d7dd4a9e9eb4d519 Mon Sep 17 00:00:00 2001 From: zonble Date: Sat, 15 Jan 2022 20:12:13 +0800 Subject: [PATCH 2/5] Adds an option to let users to choose Chinse conversion style. Option 0: converts the output. Option 1: converts the models. --- Source/AppDelegate.swift | 1 + Source/Base.lproj/preferences.xib | 83 ++++++++++++++++++++-------- Source/Engine/McBopomofoLM.cpp | 32 ++++++++++- Source/Engine/McBopomofoLM.h | 6 ++ Source/InputMethodController.mm | 25 +++++---- Source/LanguageModelManager.h | 1 + Source/LanguageModelManager.mm | 29 ++++++++++ Source/McBopomofo-Bridging-Header.h | 1 + Source/Preferences.swift | 46 ++++++++++++--- Source/zh-Hant.lproj/preferences.xib | 83 ++++++++++++++++++++-------- 10 files changed, 238 insertions(+), 69 deletions(-) diff --git a/Source/AppDelegate.swift b/Source/AppDelegate.swift index f93bf8b2..f8d348fb 100644 --- a/Source/AppDelegate.swift +++ b/Source/AppDelegate.swift @@ -51,6 +51,7 @@ class AppDelegate: NSObject, NSApplicationDelegate, NonModalAlertWindowControlle private var updateNextStepURL: URL? func applicationDidFinishLaunching(_ notification: Notification) { + LanguageModelManager.setupDataModelValueConverter() LanguageModelManager.loadDataModels() LanguageModelManager.loadUserPhrases() LanguageModelManager.loadUserPhraseReplacement() diff --git a/Source/Base.lproj/preferences.xib b/Source/Base.lproj/preferences.xib index 451d8d39..25ee9a38 100644 --- a/Source/Base.lproj/preferences.xib +++ b/Source/Base.lproj/preferences.xib @@ -19,14 +19,14 @@ - + - + - + @@ -35,7 +35,7 @@ - + @@ -47,7 +47,7 @@ - + @@ -56,7 +56,7 @@ - + @@ -84,7 +84,7 @@ - + @@ -93,7 +93,7 @@ - + @@ -114,7 +114,7 @@ - + @@ -134,7 +134,7 @@ - + @@ -143,7 +143,7 @@ - + @@ -152,7 +152,7 @@ - + @@ -161,7 +161,7 @@ - + @@ -187,7 +187,7 @@ - + @@ -212,8 +212,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + @@ -239,7 +274,7 @@ - + @@ -261,12 +296,8 @@ - - - - + + + + - + diff --git a/Source/Engine/McBopomofoLM.cpp b/Source/Engine/McBopomofoLM.cpp index 3fba9fc9..1e235447 100644 --- a/Source/Engine/McBopomofoLM.cpp +++ b/Source/Engine/McBopomofoLM.cpp @@ -121,20 +121,46 @@ bool McBopomofoLM::phraseReplacementEnabled() return m_phraseReplacementEnabled; } +void McBopomofoLM::setExternalConverterEnabled(bool enabled) +{ + m_externalConverterEnabled = enabled; +} + +bool McBopomofoLM::externalConverterEnabled() +{ + return m_externalConverterEnabled; +} + +void McBopomofoLM::setExternalConvrter(std::function externalConverter) +{ + m_externalConverter = externalConverter; +} + const vector McBopomofoLM::filterAndTransformUnigrams(vector unigrams, const unordered_set& excludedValues, unordered_set& insertedValues) { vector results; for (auto&& unigram : unigrams) { - string value = unigram.keyValue.value; + // excludedValues filters out the unigrams with the original value. + // insertedValues filters out the ones with the converted value + string originalValue = unigram.keyValue.value; + if (excludedValues.find(originalValue) != excludedValues.end()) { + continue; + } + + string value = originalValue; if (m_phraseReplacementEnabled) { string replacement = m_phraseReplacement.valueForKey(value); if (replacement != "") { value = replacement; - unigram.keyValue.value = value; } } - if (excludedValues.find(value) == excludedValues.end() && insertedValues.find(value) == insertedValues.end()) { + if (m_externalConverterEnabled && m_externalConverter) { + string replacement = m_externalConverter(value); + value = replacement; + } + unigram.keyValue.value = value; + if (insertedValues.find(value) == insertedValues.end()) { results.push_back(unigram); insertedValues.insert(value); } diff --git a/Source/Engine/McBopomofoLM.h b/Source/Engine/McBopomofoLM.h index 00dbc360..63ae7361 100644 --- a/Source/Engine/McBopomofoLM.h +++ b/Source/Engine/McBopomofoLM.h @@ -50,6 +50,10 @@ public: void setPhraseReplacementEnabled(bool enabled); bool phraseReplacementEnabled(); + void setExternalConverterEnabled(bool enabled); + bool externalConverterEnabled(); + void setExternalConvrter(std::function externalConverter); + protected: const vector filterAndTransformUnigrams(vector unigrams, const std::unordered_set& excludedValues, @@ -60,6 +64,8 @@ protected: UserPhrasesLM m_excludedPhrases; PhraseReplacementMap m_phraseReplacement; bool m_phraseReplacementEnabled; + bool m_externalConverterEnabled; + std::function m_externalConverter; }; }; diff --git a/Source/InputMethodController.mm b/Source/InputMethodController.mm index 762e7665..16963ebc 100644 --- a/Source/InputMethodController.mm +++ b/Source/InputMethodController.mm @@ -243,6 +243,8 @@ static double FindHighestScore(const vector& nodes, double epsilon) Preferences.keyboardLayout = KeyboardLayoutStandard; } + _languageModel->setExternalConverterEnabled(Preferences.chineseConversionStyle == 1); + [(AppDelegate *)[NSApp delegate] checkForUpdate]; } @@ -275,12 +277,14 @@ static double FindHighestScore(const vector& nodes, double epsilon) if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) { newInputMode = kPlainBopomofoModeIdentifier; newLanguageModel = [LanguageModelManager languageModelPlainBopomofo]; + newLanguageModel->setPhraseReplacementEnabled(false); } else { newInputMode = kBopomofoModeIdentifier; newLanguageModel = [LanguageModelManager languageModelMcBopomofo]; newLanguageModel->setPhraseReplacementEnabled(Preferences.phraseReplacementEnabled); } + newLanguageModel->setExternalConverterEnabled(Preferences.chineseConversionStyle == 1); // Only apply the changes if the value is changed if (![_inputMode isEqualToString:newInputMode]) { @@ -312,8 +316,16 @@ static double FindHighestScore(const vector& nodes, double epsilon) #pragma mark - IMKServerInput protocol methods -- (NSString *)_convertToSimplifiedChinese:(NSString *)text +- (NSString *)_convertToSimplifiedChineseIfRequired:(NSString *)text { + if (!Preferences.chineseConversionEnabled) { + return text; + } + + if (Preferences.chineseConversionStyle == 1) { + return text; + } + if (Preferences.chineneConversionEngine == 1) { return [VXHanConvert convertToSimplifiedFrom:text]; } @@ -333,11 +345,7 @@ static double FindHighestScore(const vector& nodes, double epsilon) } // Chinese conversion. - NSString *buffer = _composingBuffer; - - if (Preferences.chineseConversionEnabled) { - buffer = [self _convertToSimplifiedChinese:_composingBuffer]; - } + NSString *buffer = [self _convertToSimplifiedChineseIfRequired:_composingBuffer]; // commit the text, clear the state [client insertText:buffer replacementRange:NSMakeRange(NSNotFound, NSNotFound)]; @@ -483,10 +491,7 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; } NodeAnchor &anchor = _walkedNodes[0]; NSString *popedText = [NSString stringWithUTF8String:anchor.node->currentKeyValue().value.c_str()]; // Chinese conversion. - BOOL chineseConversionEnabled = Preferences.chineseConversionEnabled; - if (chineseConversionEnabled) { - popedText = [self _convertToSimplifiedChinese:popedText]; - } + popedText = [self _convertToSimplifiedChineseIfRequired:popedText]; [client insertText:popedText replacementRange:NSMakeRange(NSNotFound, NSNotFound)]; _builder->removeHeadReadings(anchor.spanningLength); } diff --git a/Source/LanguageModelManager.h b/Source/LanguageModelManager.h index 6a82c47a..ce28eaf5 100644 --- a/Source/LanguageModelManager.h +++ b/Source/LanguageModelManager.h @@ -9,6 +9,7 @@ NS_ASSUME_NONNULL_BEGIN + (void)loadDataModels; + (void)loadUserPhrases; + (void)loadUserPhraseReplacement; ++ (void)setupDataModelValueConverter; + (BOOL)checkIfUserLanguageModelFilesExist; + (BOOL)writeUserPhrase:(NSString *)userPhrase; diff --git a/Source/LanguageModelManager.mm b/Source/LanguageModelManager.mm index 189e2eb6..37fa2897 100644 --- a/Source/LanguageModelManager.mm +++ b/Source/LanguageModelManager.mm @@ -4,6 +4,10 @@ #import #import "OVStringHelper.h" #import "OVUTF8Helper.h" +#import "McBopomofo-Swift.h" + +@import VXHanConvert; +@import OpenCCBridge; using namespace std; using namespace Formosa::Gramambular; @@ -43,6 +47,31 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, McBopomo gLanguageModelMcBopomofo.loadPhraseReplacementMap([[self phraseReplacementDataPathMcBopomofo] UTF8String]); } ++ (void)setupDataModelValueConverter +{ + auto converter = [] (string input) { + if (!Preferences.chineseConversionEnabled) { + return input; + } + + if (Preferences.chineseConversionStyle == 0) { + return input; + } + + NSString *text = [NSString stringWithUTF8String:input.c_str()]; + if (Preferences.chineneConversionEngine == 1) { + text = [VXHanConvert convertToSimplifiedFrom:text]; + } + else { + text = [OpenCCBridge convertToSimplified:text]; + } + return string(text.UTF8String); + }; + + gLanguageModelMcBopomofo.setExternalConvrter(converter); + gLanguageModelPlainBopomofo.setExternalConvrter(converter); +} + + (BOOL)checkIfUserDataFolderExists { NSString *folderPath = [self dataFolderPath]; diff --git a/Source/McBopomofo-Bridging-Header.h b/Source/McBopomofo-Bridging-Header.h index 8310cc67..69a7fc4f 100644 --- a/Source/McBopomofo-Bridging-Header.h +++ b/Source/McBopomofo-Bridging-Header.h @@ -8,4 +8,5 @@ + (void)loadDataModels; + (void)loadUserPhrases; + (void)loadUserPhraseReplacement; ++ (void)setupDataModelValueConverter; @end diff --git a/Source/Preferences.swift b/Source/Preferences.swift index e1f2303c..61ecead0 100644 --- a/Source/Preferences.swift +++ b/Source/Preferences.swift @@ -50,8 +50,9 @@ private let kEscToCleanInputBufferKey = "EscToCleanInputBuffer" private let kCandidateTextFontName = "CandidateTextFontName" private let kCandidateKeyLabelFontName = "CandidateKeyLabelFontName" private let kCandidateKeys = "CandidateKeys" -private let kChineseConversionEngineKey = "ChineseConversionEngine" private let kPhraseReplacementEnabledKey = "PhraseReplacementEnabled" +private let kChineseConversionEngineKey = "ChineseConversionEngine" +private let kChineseConversionStyle = "ChineseConversionStyle" private let kDefaultCandidateListTextSize: CGFloat = 16 private let kMinKeyLabelSize: CGFloat = 10 @@ -217,6 +218,20 @@ struct ComposingKeys { } } +@objc enum ChineseConversionStyle: Int { + case output + case model + + var name: String { + switch (self) { + case .output: + return "output" + case .model: + return "model" + } + } +} + // MARK: - class Preferences: NSObject { @@ -285,13 +300,6 @@ class Preferences: NSObject { kDefaultKeys } - @UserDefault(key: kChineseConversionEngineKey, defaultValue: 0) - @objc static var chineneConversionEngine: Int - - @objc static var chineneConversionEngineName: String? { - return ChineseConversionEngine(rawValue: chineneConversionEngine)?.name - } - @UserDefault(key: kPhraseReplacementEnabledKey, defaultValue: false) @objc static var phraseReplacementEnabled: Bool @@ -300,4 +308,26 @@ class Preferences: NSObject { return phraseReplacementEnabled; } + /// The conversion engine. + /// + /// - 0: OpenCC + /// - 1: VXHanConvert + @UserDefault(key: kChineseConversionEngineKey, defaultValue: 0) + @objc static var chineneConversionEngine: Int + + @objc static var chineneConversionEngineName: String? { + return ChineseConversionEngine(rawValue: chineneConversionEngine)?.name + } + + /// The conversion style. + /// + /// - 0: convert the output + /// - 1: convert the phrase models. + @UserDefault(key: kChineseConversionStyle, defaultValue: 0) + @objc static var chineseConversionStyle: Int + + @objc static var chineseConversionStyleName: String? { + return ChineseConversionStyle(rawValue: chineseConversionStyle)?.name + } + } diff --git a/Source/zh-Hant.lproj/preferences.xib b/Source/zh-Hant.lproj/preferences.xib index 6cd9981c..3aad70ae 100644 --- a/Source/zh-Hant.lproj/preferences.xib +++ b/Source/zh-Hant.lproj/preferences.xib @@ -19,14 +19,14 @@ - + - + - + @@ -49,7 +49,7 @@ - + @@ -58,7 +58,7 @@ - + @@ -70,7 +70,7 @@ - + @@ -79,7 +79,7 @@ - + @@ -88,7 +88,7 @@ - + @@ -97,7 +97,7 @@ - + @@ -123,7 +123,7 @@ - + @@ -149,7 +149,7 @@ - + @@ -172,7 +172,7 @@ - + @@ -200,7 +200,7 @@ - + @@ -209,7 +209,7 @@ - + @@ -229,16 +229,16 @@ - + - + - + @@ -263,12 +263,43 @@ - - + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + From 5c0a14deeb5d745d669ec85fbe01561987bfd21e Mon Sep 17 00:00:00 2001 From: zonble Date: Sat, 15 Jan 2022 20:34:02 +0800 Subject: [PATCH 3/5] Refactors the function to filter and transform unigrams in McBopomofoLM. --- Source/Engine/McBopomofoLM.cpp | 9 ++++++--- Source/Engine/McBopomofoLM.h | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Source/Engine/McBopomofoLM.cpp b/Source/Engine/McBopomofoLM.cpp index 1e235447..c7b51c49 100644 --- a/Source/Engine/McBopomofoLM.cpp +++ b/Source/Engine/McBopomofoLM.cpp @@ -136,7 +136,7 @@ void McBopomofoLM::setExternalConvrter(std::function externalCon m_externalConverter = externalConverter; } -const vector McBopomofoLM::filterAndTransformUnigrams(vector unigrams, const unordered_set& excludedValues, unordered_set& insertedValues) +const vector McBopomofoLM::filterAndTransformUnigrams(const vector unigrams, const unordered_set& excludedValues, unordered_set& insertedValues) { vector results; @@ -159,9 +159,12 @@ const vector McBopomofoLM::filterAndTransformUnigrams(vector u string replacement = m_externalConverter(value); value = replacement; } - unigram.keyValue.value = value; if (insertedValues.find(value) == insertedValues.end()) { - results.push_back(unigram); + Unigram g; + g.keyValue.value = value; + g.keyValue.key = unigram.keyValue.key; + g.score = unigram.score; + results.push_back(g); insertedValues.insert(value); } } diff --git a/Source/Engine/McBopomofoLM.h b/Source/Engine/McBopomofoLM.h index 63ae7361..3b8a5109 100644 --- a/Source/Engine/McBopomofoLM.h +++ b/Source/Engine/McBopomofoLM.h @@ -55,7 +55,7 @@ public: void setExternalConvrter(std::function externalConverter); protected: - const vector filterAndTransformUnigrams(vector unigrams, + const vector filterAndTransformUnigrams(const vector unigrams, const std::unordered_set& excludedValues, std::unordered_set& insertedValues); From 4e56f350e81295617952ea079d7413bf539cd06b Mon Sep 17 00:00:00 2001 From: zonble Date: Sat, 15 Jan 2022 20:50:12 +0800 Subject: [PATCH 4/5] Fixes a wrong API call. --- Source/InputMethodController.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/InputMethodController.mm b/Source/InputMethodController.mm index 16963ebc..a1e30e27 100644 --- a/Source/InputMethodController.mm +++ b/Source/InputMethodController.mm @@ -1509,7 +1509,7 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; } { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-result" - [Preferences tooglePhraseReplacementEnabled]; + [Preferences toogleHalfWidthPunctuationEnabled]; #pragma GCC diagnostic pop } From c4259c4c4ea7f0f20a4ad3f182bf687716f30ca4 Mon Sep 17 00:00:00 2001 From: zonble Date: Sun, 16 Jan 2022 15:02:32 +0800 Subject: [PATCH 5/5] Updates comments and fixes a typo. --- Source/Engine/McBopomofoLM.cpp | 2 +- Source/Engine/McBopomofoLM.h | 49 +++++++++++++++++++++++++++++++++- Source/LanguageModelManager.mm | 4 +-- 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/Source/Engine/McBopomofoLM.cpp b/Source/Engine/McBopomofoLM.cpp index c7b51c49..9c73bcef 100644 --- a/Source/Engine/McBopomofoLM.cpp +++ b/Source/Engine/McBopomofoLM.cpp @@ -131,7 +131,7 @@ bool McBopomofoLM::externalConverterEnabled() return m_externalConverterEnabled; } -void McBopomofoLM::setExternalConvrter(std::function externalConverter) +void McBopomofoLM::setExternalConverter(std::function externalConverter) { m_externalConverter = externalConverter; } diff --git a/Source/Engine/McBopomofoLM.h b/Source/Engine/McBopomofoLM.h index 3b8a5109..de90a4a3 100644 --- a/Source/Engine/McBopomofoLM.h +++ b/Source/Engine/McBopomofoLM.h @@ -34,27 +34,74 @@ namespace McBopomofo { using namespace Formosa::Gramambular; +/// McBopomofoLM is a facade for managing a set of models including +/// the input method language model, user phrases and excluded phrases. +/// +/// It is the primary model class that the input controller and grammer builder +/// of McBopomofo talk to. When the grammer builder starts to build a sentense +/// from a series of BPMF readings, it passes the readings to the model to see +/// if there are valid unigrams, and use returned unigrams to produce the final +/// results. +/// +/// McBopomofoLM combine and transform the unigrams from the primary language +/// model and user phrases. The process is +/// +/// 1) Get the original unigrams. +/// 2) Drop the unigrams whose value is contained in the exclusion map. +/// 3) Replace the values of the unigrams using the phrase replacement map. +/// 4) Replace the values of the unigrams using an external converter lambda. +/// 5) Drop the duplicated phrases. +/// +/// The controller can ask the model to load the primary input method language +/// model while launching and to load the user phrases anytime if the custom +/// files are modified. It does not keep the reference of the data pathes but +/// you have to pass the paths when you ask it to do loading. class McBopomofoLM : public LanguageModel { public: McBopomofoLM(); ~McBopomofoLM(); + /// Asks to load the primary language model a the given path. + /// @param languageModelPath Thw path of the language model. void loadLanguageModel(const char* languageModelPath); + /// Asks to load the user phrases and excluded phrases at the given path. + /// @param userPhrasesPath The path of user phrases. + /// @param excludedPhrasesPath The path of excluded phrases. void loadUserPhrases(const char* userPhrasesPath, const char* excludedPhrasesPath); + /// Asks to load th phrase replacement table at the given path. + /// @param phraseReplacementPath The path of the phrase replacement table. void loadPhraseReplacementMap(const char* phraseReplacementPath); + /// Not implemented since we do not have data to provide bigram function. const vector bigramsForKeys(const string& preceedingKey, const string& key); + /// Returns a list of available unigram for the given key. + /// @param key A string represents the BPMF reading or a symbol key. For + /// example, it you pass "ㄇㄚ", it returns "嗎", "媽", and so on. const vector unigramsForKey(const string& key); + /// If the model has unigrams for the given key. + /// @param key The key. bool hasUnigramsForKey(const string& key); + /// Enables or disables phrase replacement. void setPhraseReplacementEnabled(bool enabled); + /// If phrease replacement is enabled or not. bool phraseReplacementEnabled(); + /// Enables or disables the external converter. void setExternalConverterEnabled(bool enabled); + /// If the external converted is enabled or not. bool externalConverterEnabled(); - void setExternalConvrter(std::function externalConverter); + /// Sets a lambda to let the values of unigrams could be converted by it. + void setExternalConverter(std::function externalConverter); protected: + /// Filters and converts the input unigrams and return a new list of unigrams. + /// + /// @param unigrams The unigrams to be processed. + /// @param excludedValues The values to excluded unigrams. + /// @param insertedValues The values for unigrams already in the results. + /// It helps to prevent duplicated unigrams. Please note that the method + /// has a side effect that it inserts values to `insertedValues`. const vector filterAndTransformUnigrams(const vector unigrams, const std::unordered_set& excludedValues, std::unordered_set& insertedValues); diff --git a/Source/LanguageModelManager.mm b/Source/LanguageModelManager.mm index 37fa2897..bdf0ac9f 100644 --- a/Source/LanguageModelManager.mm +++ b/Source/LanguageModelManager.mm @@ -68,8 +68,8 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, McBopomo return string(text.UTF8String); }; - gLanguageModelMcBopomofo.setExternalConvrter(converter); - gLanguageModelPlainBopomofo.setExternalConvrter(converter); + gLanguageModelMcBopomofo.setExternalConverter(converter); + gLanguageModelPlainBopomofo.setExternalConverter(converter); } + (BOOL)checkIfUserDataFolderExists