From 9b485b799ca1c66fe69c4f66f19e63267e400db9 Mon Sep 17 00:00:00 2001 From: zonble Date: Wed, 12 Jan 2022 00:16:55 +0800 Subject: [PATCH] Implements excluding phrases. --- .../Engine/Gramambular/BlockReadingBuilder.h | 52 +++++++++--- Source/InputMethodController.h | 1 + Source/InputMethodController.mm | 82 +++++++++++++------ Source/LanguageModelManager.h | 9 +- Source/LanguageModelManager.mm | 76 ++++++++++++++--- Source/McBopomofo-Bridging-Header.h | 2 +- 6 files changed, 171 insertions(+), 51 deletions(-) diff --git a/Source/Engine/Gramambular/BlockReadingBuilder.h b/Source/Engine/Gramambular/BlockReadingBuilder.h index 08508b55..bd3dc2d0 100644 --- a/Source/Engine/Gramambular/BlockReadingBuilder.h +++ b/Source/Engine/Gramambular/BlockReadingBuilder.h @@ -38,7 +38,7 @@ namespace Formosa { class BlockReadingBuilder { public: - BlockReadingBuilder(LanguageModel *inLM, LanguageModel *inUserPhraseLM); + BlockReadingBuilder(LanguageModel *inLM, LanguageModel *inUserPhraseLM, LanguageModel *inExcludedPhrasesLM); void clear(); size_t length() const; @@ -58,6 +58,8 @@ namespace Formosa { vector readingsAtRange(size_t begin, size_t end) const; Grid& grid(); + + bool checkIfUnigramExistInVector(Unigram& unigram, vectorvector); protected: void build(); @@ -73,13 +75,17 @@ namespace Formosa { Grid m_grid; LanguageModel *m_LM; - LanguageModel *m_UserPhraseLM; + LanguageModel *m_userPhraseLM; + LanguageModel *m_excludedPhrasesLM; string m_joinSeparator; }; - inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM, LanguageModel *inUserPhraseLM) + inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM, + LanguageModel *inUserPhraseLM, + LanguageModel *inExcludedPhrasesLM) : m_LM(inLM) - , m_UserPhraseLM(inUserPhraseLM) + , m_userPhraseLM(inUserPhraseLM) + , m_excludedPhrasesLM(inExcludedPhrasesLM) , m_cursorIndex(0) , m_markerCursorIndex(SIZE_MAX) { @@ -197,7 +203,17 @@ namespace Formosa { { return m_grid; } - + + inline bool BlockReadingBuilder::checkIfUnigramExistInVector(Unigram& unigram, vectorvector) + { + for (std::vector::iterator it=vector.begin(); it!=vector.end(); ++it) { + if (it->keyValue.value == unigram.keyValue.value) { + return true; + } + } + return false; + } + inline void BlockReadingBuilder::build() { if (!m_LM) { @@ -223,17 +239,31 @@ namespace Formosa { string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator); if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) { vector unigrams; + vector userUnigrams; - if (m_UserPhraseLM != NULL) { - if (m_UserPhraseLM->hasUnigramsForKey(combinedReading)) { - vector userUnigrams = m_UserPhraseLM->unigramsForKeys(combinedReading); - unigrams.insert(unigrams.end(), userUnigrams.begin(), userUnigrams.end()); - } + if (m_userPhraseLM != NULL && m_userPhraseLM->hasUnigramsForKey(combinedReading)) { + userUnigrams = m_userPhraseLM->unigramsForKeys(combinedReading); } if (m_LM->hasUnigramsForKey(combinedReading)) { vector globalUnigrams = m_LM->unigramsForKeys(combinedReading); - unigrams.insert(unigrams.end(), globalUnigrams.begin(), globalUnigrams.end()); + for (std::vector::iterator it=globalUnigrams.begin(); it!=globalUnigrams.end(); ++it) { + if (!checkIfUnigramExistInVector(*it, unigrams)) { + unigrams.push_back(*it); + } + } + } + unigrams.insert(unigrams.begin(), userUnigrams.begin(), userUnigrams.end()); + + if (m_excludedPhrasesLM != NULL && m_excludedPhrasesLM->hasUnigramsForKey(combinedReading)) { + vector excludedUnigrams = m_excludedPhrasesLM->unigramsForKeys(combinedReading); + vector newUnigram; + for (std::vector::iterator it=unigrams.begin(); it!=unigrams.end(); ++it) { + if (!checkIfUnigramExistInVector(*it, excludedUnigrams)) { + newUnigram.push_back(*it); + } + } + unigrams = newUnigram; } if (unigrams.size() > 0) { diff --git a/Source/InputMethodController.h b/Source/InputMethodController.h index a9813def..a99e248e 100644 --- a/Source/InputMethodController.h +++ b/Source/InputMethodController.h @@ -48,6 +48,7 @@ // language model Formosa::Gramambular::FastLM *_languageModel; Formosa::Gramambular::FastLM *_userPhrasesModel; + Formosa::Gramambular::FastLM *_excludedPhraseModel; // user override model McBopomofo::UserOverrideModel *_userOverrideModel; diff --git a/Source/InputMethodController.mm b/Source/InputMethodController.mm index 2111b963..2a36ad49 100644 --- a/Source/InputMethodController.mm +++ b/Source/InputMethodController.mm @@ -178,8 +178,9 @@ static double FindHighestScore(const vector& nodes, double epsilon) _languageModel = [LanguageModelManager languageModelMcBopomofo]; _userPhrasesModel = [LanguageModelManager userPhraseLanguageModel]; _userOverrideModel = [LanguageModelManager userOverrideModel]; + _excludedPhraseModel = [LanguageModelManager excludedPhrasesLanguageModelMcBopomofo]; - _builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel); + _builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel, _excludedPhraseModel); // each Mandarin syllable is separated by a hyphen _builder->setJoinSeparator("-"); @@ -206,18 +207,23 @@ static double FindHighestScore(const vector& nodes, double epsilon) chineseConversionMenuItem.state = _chineseConversionEnabled ? NSControlStateValueOn : NSControlStateValueOff; [menu addItem:chineseConversionMenuItem]; - if (_inputMode != kPlainBopomofoModeIdentifier) { - [menu addItem:[NSMenuItem separatorItem]]; - [menu addItemWithTitle:NSLocalizedString(@"User Phrases", @"") action:NULL keyEquivalent:@""]; - NSMenuItem *editUserPheaseItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Edit User Phrases", @"") action:@selector(openUserPhrases:) keyEquivalent:@""]; - [editUserPheaseItem setIndentationLevel:2]; - [menu addItem:editUserPheaseItem]; - - NSMenuItem *reloadUserPheaseItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Reload User Phrases", @"") action:@selector(reloadUserPhrases:) keyEquivalent:@""]; - [reloadUserPheaseItem setIndentationLevel:2]; - [menu addItem:reloadUserPheaseItem]; - [menu addItem:[NSMenuItem separatorItem]]; + [menu addItem:[NSMenuItem separatorItem]]; + [menu addItemWithTitle:NSLocalizedString(@"User Phrases", @"") action:NULL keyEquivalent:@""]; + if (_inputMode == kPlainBopomofoModeIdentifier) { + NSMenuItem *editExcludedPhrasesItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Edit Excluded Phrases", @"") action:@selector(openExcludedPhrasesPlainBopomofo:) keyEquivalent:@""]; + [menu addItem:editExcludedPhrasesItem]; } + else { + NSMenuItem *editUserPhrasesItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Edit User Phrases", @"") action:@selector(openUserPhrases:) keyEquivalent:@""]; + [menu addItem:editUserPhrasesItem]; + + NSMenuItem *editExcludedPhrasesItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Edit Excluded Phrases", @"") action:@selector(openExcludedPhrasesMcBopomofo:) keyEquivalent:@""]; + [menu addItem:editExcludedPhrasesItem]; + } + + NSMenuItem *reloadUserPhrasesItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Reload User Phrases", @"") action:@selector(reloadUserPhrases:) keyEquivalent:@""]; + [menu addItem:reloadUserPhrasesItem]; + [menu addItem:[NSMenuItem separatorItem]]; NSMenuItem *updateCheckItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Check for Updates…", @"") action:@selector(checkForUpdate:) keyEquivalent:@""]; [menu addItem:updateCheckItem]; @@ -318,18 +324,21 @@ static double FindHighestScore(const vector& nodes, double epsilon) - (void)setValue:(id)value forTag:(long)tag client:(id)sender { NSString *newInputMode; - Formosa::Gramambular::FastLM *newLanguageModel; - Formosa::Gramambular::FastLM *newUserPhraseModel; + FastLM *newLanguageModel; + FastLM *newUserPhrasesModel; + FastLM *newExcludedPhraseModel; if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) { newInputMode = kPlainBopomofoModeIdentifier; newLanguageModel = [LanguageModelManager languageModelPlainBopomofo]; - newUserPhraseModel = NULL; + newUserPhrasesModel = NULL; + newExcludedPhraseModel = [LanguageModelManager excludedPhrasesLanguageModelPlainBopomofo]; } else { newInputMode = kBopomofoModeIdentifier; newLanguageModel = [LanguageModelManager languageModelMcBopomofo]; - newUserPhraseModel = [LanguageModelManager userPhraseLanguageModel]; + newUserPhrasesModel = [LanguageModelManager userPhraseLanguageModel]; + newExcludedPhraseModel = [LanguageModelManager excludedPhrasesLanguageModelMcBopomofo]; } // Only apply the changes if the value is changed @@ -345,7 +354,8 @@ static double FindHighestScore(const vector& nodes, double epsilon) _inputMode = newInputMode; _languageModel = newLanguageModel; - _userPhrasesModel = newUserPhraseModel; + _userPhrasesModel = newUserPhrasesModel; + _excludedPhraseModel = newExcludedPhraseModel; if (!_bpmfReadingBuffer->isEmpty()) { _bpmfReadingBuffer->clear(); @@ -358,7 +368,7 @@ static double FindHighestScore(const vector& nodes, double epsilon) if (_builder) { delete _builder; - _builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel); + _builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel, _excludedPhraseModel); _builder->setJoinSeparator("-"); } } @@ -1489,24 +1499,44 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; } [(AppDelegate *)[[NSApplication sharedApplication] delegate] checkForUpdateForced:YES]; } -- (void)openUserPhrases:(id)sender +- (BOOL)_checkUserFiles { - NSLog(@"openUserPhrases called"); - if (![LanguageModelManager checkIfUserLanguageModelFileExists] ) { + if (![LanguageModelManager checkIfUserLanguageModelFilesExist] ) { NSString *content = [NSString stringWithFormat:NSLocalizedString(@"Please check the permission of at \"%@\".", @""), [LanguageModelManager dataFolderPath]]; [[NonModalAlertWindowController sharedInstance] showWithTitle:NSLocalizedString(@"Unable to create the user phrase file.", @"") content:content confirmButtonTitle:NSLocalizedString(@"OK", @"") cancelButtonTitle:nil cancelAsDefault:NO delegate:nil]; - return; + return NO; } - NSString *path = [LanguageModelManager userPhrasesDataPath]; - NSLog(@"Open %@", path); - if (![[NSFileManager defaultManager] fileExistsAtPath:path]) { - [[@"" dataUsingEncoding:NSUTF8StringEncoding] writeToFile:path atomically:YES]; + return YES; +} + +- (void)_openUserFile:(NSString *)path +{ + if (![self _checkUserFiles]) { + return; } NSURL *url = [NSURL fileURLWithPath:path]; [[NSWorkspace sharedWorkspace] openURL:url]; } +- (void)openUserPhrases:(id)sender +{ + NSLog(@"openUserPhrases called"); + [self _openUserFile:[LanguageModelManager userPhrasesDataPathMcBopomofo]]; +} + +- (void)openExcludedPhrasesPlainBopomofo:(id)sender +{ + NSLog(@"openExcludedPhrasesPlainBopomofo called"); + [self _openUserFile:[LanguageModelManager excludedPhrasesDataPathPlainBopomofo]]; +} + +- (void)openExcludedPhrasesMcBopomofo:(id)sender +{ + NSLog(@"openExcludedPhrasesMcBopomofo called"); + [self _openUserFile:[LanguageModelManager excludedPhrasesDataPathMcBopomofo]]; +} + - (void)reloadUserPhrases:(id)sender { NSLog(@"reloadUserPhrases called"); diff --git a/Source/LanguageModelManager.h b/Source/LanguageModelManager.h index 0c199cc0..9ba88bb0 100644 --- a/Source/LanguageModelManager.h +++ b/Source/LanguageModelManager.h @@ -8,16 +8,19 @@ NS_ASSUME_NONNULL_BEGIN + (void)loadDataModels; + (void)loadUserPhrasesModel; -+ (BOOL)checkIfUserLanguageModelFileExists; ++ (BOOL)checkIfUserLanguageModelFilesExist; + (BOOL)writeUserPhrase:(NSString *)userPhrase; @property (class, readonly, nonatomic) NSString *dataFolderPath; -@property (class, readonly, nonatomic) NSString *userPhrasesDataPath; +@property (class, readonly, nonatomic) NSString *userPhrasesDataPathMcBopomofo; +@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathMcBopomofo; +@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathPlainBopomofo; @property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *languageModelMcBopomofo; @property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *languageModelPlainBopomofo; @property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *userPhraseLanguageModel; +@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *excludedPhrasesLanguageModelMcBopomofo; +@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *excludedPhrasesLanguageModelPlainBopomofo; @property (class, readonly, nonatomic) McBopomofo::UserOverrideModel *userOverrideModel; - @end NS_ASSUME_NONNULL_END diff --git a/Source/LanguageModelManager.mm b/Source/LanguageModelManager.mm index f2ec7493..68924f54 100644 --- a/Source/LanguageModelManager.mm +++ b/Source/LanguageModelManager.mm @@ -15,6 +15,8 @@ static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr. FastLM globalLanguageModel; FastLM globalLanguageModelPlainBopomofo; FastLM globalUserPhraseLanguageModel; +FastLM globalUserExcludedPhrasesMcBopomofo; +FastLM globalUserExcludedPhrasesPlainBopomofo; McBopomofo::UserOverrideModel globalUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife); @implementation LanguageModelManager @@ -42,13 +44,27 @@ static bool LTLoadLanguageModelFile(NSString *filenameWithoutExtension, FastLM & + (void)loadUserPhrasesModel { globalUserPhraseLanguageModel.close(); - bool result = globalUserPhraseLanguageModel.open([[self userPhrasesDataPath] UTF8String]); + globalUserExcludedPhrasesMcBopomofo.close(); + globalUserExcludedPhrasesPlainBopomofo.close(); + + bool result = false; + + result = globalUserPhraseLanguageModel.open([[self userPhrasesDataPathMcBopomofo] UTF8String]); if (!result) { - NSLog(@"Failed to open user phrases."); + NSLog(@"Failed to open user phrases. %@", [self userPhrasesDataPathMcBopomofo]); + } + result = globalUserExcludedPhrasesMcBopomofo.open([[self excludedPhrasesDataPathMcBopomofo] UTF8String]); + if (!result) { + NSLog(@"Failed to open excluded phrases McBopomofo. %@", [self excludedPhrasesDataPathMcBopomofo]); + } + + result = globalUserExcludedPhrasesPlainBopomofo.open([[self excludedPhrasesDataPathPlainBopomofo] UTF8String]); + if (!result) { + NSLog(@"Failed to open excluded phrases Plain Bopomofo. %@", [self excludedPhrasesDataPathPlainBopomofo]); } } -+ (BOOL)checkIfUserLanguageModelFileExists ++ (BOOL)checkIfUserDataFolderExists { NSString *folderPath = [self dataFolderPath]; BOOL isFolder = NO; @@ -70,8 +86,11 @@ static bool LTLoadLanguageModelFile(NSString *filenameWithoutExtension, FastLM & return NO; } } + return YES; +} - NSString *filePath = [self userPhrasesDataPath]; ++ (BOOL)checkIfFileExist:(NSString *)filePath +{ if (![[NSFileManager defaultManager] fileExistsAtPath:filePath]) { BOOL result = [[@"" dataUsingEncoding:NSUTF8StringEncoding] writeToFile:filePath atomically:YES]; if (!result) { @@ -82,15 +101,32 @@ static bool LTLoadLanguageModelFile(NSString *filenameWithoutExtension, FastLM & return YES; } ++ (BOOL)checkIfUserLanguageModelFilesExist +{ + if (![self checkIfUserDataFolderExists]) { + return NO; + } + if (![self checkIfFileExist:[self userPhrasesDataPathMcBopomofo]]) { + return NO; + } + if (![self checkIfFileExist:[self excludedPhrasesDataPathMcBopomofo]]) { + return NO; + } + if (![self checkIfFileExist:[self excludedPhrasesDataPathPlainBopomofo]]) { + return NO; + } + return YES; +} + + (BOOL)writeUserPhrase:(NSString *)userPhrase { - if (![self checkIfUserLanguageModelFileExists]) { + if (![self checkIfUserLanguageModelFilesExist]) { return NO; } NSString *currentMarkedPhrase = [userPhrase stringByAppendingString:@"\n"]; - NSString *path = [self userPhrasesDataPath]; + NSString *path = [self userPhrasesDataPathMcBopomofo]; NSFileHandle *file = [NSFileHandle fileHandleForUpdatingAtPath:path]; if (!file) { return NO; @@ -112,26 +148,46 @@ static bool LTLoadLanguageModelFile(NSString *filenameWithoutExtension, FastLM & return userDictPath; } -+ (NSString *)userPhrasesDataPath ++ (NSString *)userPhrasesDataPathMcBopomofo { return [[self dataFolderPath] stringByAppendingPathComponent:@"data.txt"]; } - + (Formosa::Gramambular::FastLM *)languageModelMcBopomofo ++ (NSString *)excludedPhrasesDataPathMcBopomofo +{ + return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases.txt"]; +} + ++ (NSString *)excludedPhrasesDataPathPlainBopomofo +{ + return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-plain-bpmf.txt"]; +} + + + (FastLM *)languageModelMcBopomofo { return &globalLanguageModel; } -+ (Formosa::Gramambular::FastLM *)languageModelPlainBopomofo ++ (FastLM *)languageModelPlainBopomofo { return &globalLanguageModelPlainBopomofo; } -+ (Formosa::Gramambular::FastLM *)userPhraseLanguageModel ++ (FastLM *)userPhraseLanguageModel { return &globalUserPhraseLanguageModel; } ++ (FastLM *)excludedPhrasesLanguageModelMcBopomofo +{ + return &globalUserExcludedPhrasesMcBopomofo; +} + ++ (FastLM *)excludedPhrasesLanguageModelPlainBopomofo +{ + return &globalUserExcludedPhrasesPlainBopomofo; +} + + (McBopomofo::UserOverrideModel *)userOverrideModel { return &globalUserOverrideModel; diff --git a/Source/McBopomofo-Bridging-Header.h b/Source/McBopomofo-Bridging-Header.h index 0b8d8d44..6e2a1a13 100644 --- a/Source/McBopomofo-Bridging-Header.h +++ b/Source/McBopomofo-Bridging-Header.h @@ -10,5 +10,5 @@ @interface LanguageModelManager : NSObject + (void)loadDataModels; + (void)loadUserPhrasesModel; -+ (BOOL)checkIfUserLanguageModelFileExists; ++ (BOOL)checkIfUserLanguageModelFilesExist; @end