From e909dc20b5fa429bb29f75df171d1f743d9fab8c Mon Sep 17 00:00:00 2001 From: zonble Date: Sun, 9 Jan 2022 19:41:36 +0800 Subject: [PATCH] Uses user phrases in the block builder. --- .../Engine/Gramambular/BlockReadingBuilder.h | 13 +++++++++++-- Source/InputMethodController.h | 2 +- Source/InputMethodController.mm | 19 +++++++++++-------- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/Source/Engine/Gramambular/BlockReadingBuilder.h b/Source/Engine/Gramambular/BlockReadingBuilder.h index 9ab510be..516ec69a 100644 --- a/Source/Engine/Gramambular/BlockReadingBuilder.h +++ b/Source/Engine/Gramambular/BlockReadingBuilder.h @@ -38,7 +38,7 @@ namespace Formosa { class BlockReadingBuilder { public: - BlockReadingBuilder(LanguageModel *inLM); + BlockReadingBuilder(LanguageModel *inLM, LanguageModel *inUserPhraseLM); void clear(); size_t length() const; @@ -73,11 +73,13 @@ namespace Formosa { Grid m_grid; LanguageModel *m_LM; + LanguageModel *m_UserPhraseLM; string m_joinSeparator; }; - inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM) + inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM, LanguageModel *inUserPhraseLM) : m_LM(inLM) + , m_UserPhraseLM(inUserPhraseLM) , m_cursorIndex(0) , m_markerCursorIndex(SIZE_MAX) { @@ -219,6 +221,13 @@ namespace Formosa { for (size_t p = begin ; p < end ; p++) { for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) { string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator); + if (m_UserPhraseLM != NULL) { + if (m_UserPhraseLM->hasUnigramsForKey(combinedReading) && !m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) { + Node n(combinedReading, m_UserPhraseLM->unigramsForKeys(combinedReading), vector()); + m_grid.insertNode(n, p, q); + continue; + } + } if (m_LM->hasUnigramsForKey(combinedReading) && !m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) { Node n(combinedReading, m_LM->unigramsForKeys(combinedReading), vector()); diff --git a/Source/InputMethodController.h b/Source/InputMethodController.h index 8edc389c..3fe1fdcc 100644 --- a/Source/InputMethodController.h +++ b/Source/InputMethodController.h @@ -47,7 +47,7 @@ // language model Formosa::Gramambular::FastLM *_languageModel; - Formosa::Gramambular::FastLM *_userPhrases; + Formosa::Gramambular::FastLM *_userPhrasesModel; // the grid (lattice) builder for the unigrams (and bigrams) Formosa::Gramambular::BlockReadingBuilder* _builder; diff --git a/Source/InputMethodController.mm b/Source/InputMethodController.mm index da504505..03541288 100644 --- a/Source/InputMethodController.mm +++ b/Source/InputMethodController.mm @@ -116,6 +116,10 @@ FastLM gLanguageModel; FastLM gLanguageModelPlainBopomofo; FastLM gUserPhraseLanguageModel; +static const int kUserOverrideModelCapacity = 500; +static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr. +McBopomofo::UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife); + static NSString *userDataFolderPath() { NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES); @@ -129,12 +133,6 @@ static NSString *userPhrasesDataPath() return [userDataFolderPath() stringByAppendingPathComponent:@"data.txt"]; } - - -static const int kUserOverrideModelCapacity = 500; -static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr. -McBopomofo::UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife); - // https://clang-analyzer.llvm.org/faq.html __attribute__((annotate("returns_localized_nsstring"))) static inline NSString *LocalizationNotNeeded(NSString *s) { @@ -206,7 +204,8 @@ static double FindHighestScore(const vector& nodes, double epsilon) // create the lattice builder _languageModel = &gLanguageModel; - _builder = new BlockReadingBuilder(_languageModel); + _userPhrasesModel = &gUserPhraseLanguageModel; + _builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel); _uom = &gUserOverrideModel; // each Mandarin syllable is separated by a hyphen @@ -338,14 +337,17 @@ static double FindHighestScore(const vector& nodes, double epsilon) { NSString *newInputMode; Formosa::Gramambular::FastLM *newLanguageModel; + Formosa::Gramambular::FastLM *userPhraseModel; if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) { newInputMode = kPlainBopomofoModeIdentifier; newLanguageModel = &gLanguageModelPlainBopomofo; + userPhraseModel = NULL; } else { newInputMode = kBopomofoModeIdentifier; newLanguageModel = &gLanguageModel; + userPhraseModel = &gUserPhraseLanguageModel; } // Only apply the changes if the value is changed @@ -361,6 +363,7 @@ static double FindHighestScore(const vector& nodes, double epsilon) _inputMode = newInputMode; _languageModel = newLanguageModel; + _userPhrasesModel = userPhraseModel; if (!_bpmfReadingBuffer->isEmpty()) { _bpmfReadingBuffer->clear(); @@ -373,7 +376,7 @@ static double FindHighestScore(const vector& nodes, double epsilon) if (_builder) { delete _builder; - _builder = new BlockReadingBuilder(_languageModel); + _builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel); _builder->setJoinSeparator("-"); } }