From 3760d24350fb3de9d1550aa370d9d2bbd141270f Mon Sep 17 00:00:00 2001 From: ovadmin Date: Fri, 29 Sep 2017 11:33:27 +0800 Subject: [PATCH 1/8] =?UTF-8?q?=E7=A7=BB=E9=99=A4=E6=97=A9=E6=9C=9F?= =?UTF-8?q?=E7=9A=84=E5=80=99=E9=81=B8=E6=AD=B7=E5=8F=B2=E8=A8=98=E6=86=B6?= =?UTF-8?q?=E6=A9=9F=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 這個機制從未正式發布,設計本身也有很多缺陷,因此決定移除。 --- Source/InputMethodController.mm | 197 -------------------------------- 1 file changed, 197 deletions(-) diff --git a/Source/InputMethodController.mm b/Source/InputMethodController.mm index 1a76847a..3b4ad001 100644 --- a/Source/InputMethodController.mm +++ b/Source/InputMethodController.mm @@ -76,7 +76,6 @@ static NSString *const kCandidateListTextSizeKey = @"CandidateListTextSize"; static NSString *const kSelectPhraseAfterCursorAsCandidatePreferenceKey = @"SelectPhraseAfterCursorAsCandidate"; static NSString *const kUseHorizontalCandidateListPreferenceKey = @"UseHorizontalCandidateList"; static NSString *const kComposingBufferSizePreferenceKey = @"ComposingBufferSize"; -static NSString *const kDisableUserCandidateSelectionLearning = @"DisableUserCandidateSelectionLearning"; static NSString *const kChooseCandidateUsingSpaceKey = @"ChooseCandidateUsingSpaceKey"; static NSString *const kChineseConversionEnabledKey = @"ChineseConversionEnabledKey"; static NSString *const kEscToCleanInputBufferKey = @"EscToCleanInputBufferKey"; @@ -104,9 +103,6 @@ enum { kDeleteKeyCode = 117 }; -// a global object for saving the "learned" user candidate selections -NSMutableDictionary *gCandidateLearningDictionary = nil; -NSString *gUserCandidatesDictionaryPath = nil; VTCandidateController *gCurrentCandidateController = nil; // if DEBUG is defined, a DOT file (GraphViz format) will be written to the @@ -133,10 +129,7 @@ static inline NSString *LocalizationNotNeeded(NSString *s) { - (void)collectCandidates; - (size_t)actualCandidateCursorIndex; -- (NSString *)neighborTrigramString; -- (void)_performDeferredSaveUserCandidatesDictionary; -- (void)saveUserCandidatesDictionary; - (void)_showCandidateWindowUsingVerticalMode:(BOOL)useVerticalMode client:(id)client; - (void)beep; @@ -190,11 +183,6 @@ public: // create the composing buffer _composingBuffer = [[NSMutableString alloc] init]; - // populate the settings, by default, DISABLE user candidate learning - if (![[NSUserDefaults standardUserDefaults] objectForKey:kDisableUserCandidateSelectionLearning]) { - [[NSUserDefaults standardUserDefaults] setObject:(id)kCFBooleanTrue forKey:kDisableUserCandidateSelectionLearning]; - } - _inputMode = kBopomofoModeIdentifier; _chineseConversionEnabled = [[NSUserDefaults standardUserDefaults] boolForKey:kChineseConversionEnabledKey]; } @@ -209,30 +197,6 @@ public: NSMenuItem *preferenceMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"McBopomofo Preferences", @"") action:@selector(showPreferences:) keyEquivalent:@""]; [menu addItem:preferenceMenuItem]; - // If Option key is pressed, show the learning-related menu - - #if DEBUG - //I think the following line is 10.6+ specific - if ([[NSEvent class] respondsToSelector:@selector(modifierFlags)] && ([NSEvent modifierFlags] & NSAlternateKeyMask)) { - - BOOL learningEnabled = ![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning]; - - NSMenuItem *learnMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Enable Selection Learning", @"") action:@selector(toggleLearning:) keyEquivalent:@""]; - learnMenuItem.state = learningEnabled ? NSControlStateValueOn : NSControlStateValueOff; - [menu addItem:learnMenuItem]; - - if (learningEnabled) { - NSString *clearMenuItemTitle = [NSString stringWithFormat:NSLocalizedString(@"Clear Learning Dictionary (%ju Items)", @""), (uintmax_t)[gCandidateLearningDictionary count]]; - NSMenuItem *clearMenuItem = [[NSMenuItem alloc] initWithTitle:clearMenuItemTitle action:@selector(clearLearningDictionary:) keyEquivalent:@""]; - [menu addItem:clearMenuItem]; - - - NSMenuItem *dumpMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Dump Learning Data to Console", @"") action:@selector(dumpLearningDictionary:) keyEquivalent:@""]; - [menu addItem:dumpMenuItem]; - } - } - #endif //DEBUG - NSMenuItem *chineseConversionMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Chinese Conversion", @"") action:@selector(toggleChineseConverter:) keyEquivalent:@"G"]; chineseConversionMenuItem.keyEquivalentModifierMask = NSEventModifierFlagCommand | NSEventModifierFlagControl; chineseConversionMenuItem.state = _chineseConversionEnabled ? NSControlStateValueOn : NSControlStateValueOff; @@ -695,17 +659,6 @@ public: // then walk the lattice [self popOverflowComposingTextAndWalk:client]; - // see if we need to override the selection if a learned one exists - if (![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning]) { - NSString *trigram = [self neighborTrigramString]; - - // Lookup from the user dict to see if the trigram fit or not - NSString *overrideCandidateString = [gCandidateLearningDictionary objectForKey:trigram]; - if (overrideCandidateString) { - [self candidateSelected:(NSAttributedString *)overrideCandidateString]; - } - } - // then update the text _bpmfReadingBuffer->clear(); [self updateClientComposingBuffer:client]; @@ -1292,78 +1245,6 @@ public: return cursorIndex; } -- (NSString *)neighborTrigramString -{ - // gather the "trigram" for user candidate selection learning - - NSMutableArray *termArray = [NSMutableArray array]; - - size_t cursorIndex = [self actualCandidateCursorIndex]; - vector nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex); - - const Node* prev = 0; - const Node* current = 0; - const Node* next = 0; - - size_t wni = 0; - size_t wnc = _walkedNodes.size(); - size_t accuSpanningLength = 0; - for (wni = 0; wni < wnc; wni++) { - NodeAnchor& anchor = _walkedNodes[wni]; - if (!anchor.node) { - continue; - } - - accuSpanningLength += anchor.spanningLength; - if (accuSpanningLength >= cursorIndex) { - prev = current; - current = anchor.node; - break; - } - - current = anchor.node; - } - - if (wni + 1 < wnc) { - next = _walkedNodes[wni + 1].node; - } - - string term; - if (prev) { - term = prev->currentKeyValue().key; - [termArray addObject:[NSString stringWithUTF8String:term.c_str()]]; - } - - if (current) { - term = current->currentKeyValue().key; - [termArray addObject:[NSString stringWithUTF8String:term.c_str()]]; - } - - if (next) { - term = next->currentKeyValue().key; - [termArray addObject:[NSString stringWithUTF8String:term.c_str()]]; - } - - return [termArray componentsJoinedByString:@"-"]; -} - -- (void)_performDeferredSaveUserCandidatesDictionary -{ - BOOL __unused success = [gCandidateLearningDictionary writeToFile:gUserCandidatesDictionaryPath atomically:YES]; -} - -- (void)saveUserCandidatesDictionary -{ - if (!gUserCandidatesDictionaryPath) { - return; - } - - [NSObject cancelPreviousPerformRequestsWithTarget:self selector:@selector(_performDeferredSaveUserCandidatesDictionary) object:nil]; - - // TODO: Const-ize the delay - [self performSelector:@selector(_performDeferredSaveUserCandidatesDictionary) withObject:nil afterDelay:5.0]; -} - - (void)_showCandidateWindowUsingVerticalMode:(BOOL)useVerticalMode client:(id)client { // set the candidate panel style @@ -1467,30 +1348,12 @@ public: [[NSApplication sharedApplication] activateIgnoringOtherApps:YES]; } -- (void)toggleLearning:(id)sender -{ - BOOL toggle = ![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning]; - - [[NSUserDefaults standardUserDefaults] setBool:toggle forKey:kDisableUserCandidateSelectionLearning]; -} - - (void)toggleChineseConverter:(id)sender { _chineseConversionEnabled = !_chineseConversionEnabled; [[NSUserDefaults standardUserDefaults] setBool:_chineseConversionEnabled forKey:kChineseConversionEnabledKey]; } -- (void)clearLearningDictionary:(id)sender -{ - [gCandidateLearningDictionary removeAllObjects]; - [self _performDeferredSaveUserCandidatesDictionary]; -} - -- (void)dumpLearningDictionary:(id)sender -{ - NSLog(@"%@", gCandidateLearningDictionary); -} - - (NSUInteger)candidateCountForController:(VTCandidateController *)controller { return [_candidates count]; @@ -1508,13 +1371,6 @@ public: // candidate selected, override the node with selection string selectedValue = [[_candidates objectAtIndex:index] UTF8String]; - if (![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning]) { - NSString *trigram = [self neighborTrigramString]; - NSString *selectedNSString = [NSString stringWithUTF8String:selectedValue.c_str()]; - [gCandidateLearningDictionary setObject:selectedNSString forKey:trigram]; - [self saveUserCandidatesDictionary]; - } - size_t cursorIndex = [self actualCandidateCursorIndex]; _builder->grid().fixNodeSelectedCandidate(cursorIndex, selectedValue); @@ -1545,57 +1401,4 @@ void LTLoadLanguageModel() { LTLoadLanguageModelFile(@"data", gLanguageModel); LTLoadLanguageModelFile(@"data-plain-bpmf", gLanguageModelPlainBopomofo); - - - // initialize the singleton learning dictionary - // putting singleton in @synchronized is the standard way in Objective-C - // to avoid race condition - gCandidateLearningDictionary = [[NSMutableDictionary alloc] init]; - - // the first instance is also responsible for loading the dictionary - NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES); - if (![paths count]) { - NSLog(@"Fatal error: cannot find Applicaiton Support directory."); - return; - } - - NSString *appSupportPath = [paths objectAtIndex:0]; - NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"McBopomofo"]; - - BOOL isDir = NO; - BOOL exists = [[NSFileManager defaultManager] fileExistsAtPath:userDictPath isDirectory:&isDir]; - - if (exists) { - if (!isDir) { - NSLog(@"Fatal error: Path '%@' is not a directory", userDictPath); - return; - } - } - else { - NSError *error = nil; - BOOL success = [[NSFileManager defaultManager] createDirectoryAtPath:userDictPath withIntermediateDirectories:YES attributes:nil error:&error]; - if (!success) { - NSLog(@"Failed to create directory '%@', error: %@", userDictPath, error); - return; - } - } - - // TODO: Change this - NSString *userDictFile = [userDictPath stringByAppendingPathComponent:@"UserCandidatesCache.plist"]; - gUserCandidatesDictionaryPath = userDictFile; - - exists = [[NSFileManager defaultManager] fileExistsAtPath:userDictFile isDirectory:&isDir]; - if (exists && !isDir) { - NSData *data = [NSData dataWithContentsOfFile:userDictFile]; - if (!data) { - return; - } - - id plist = [NSPropertyListSerialization propertyListWithData:data options:NSPropertyListImmutable format:NULL error:NULL]; - if (plist && [plist isKindOfClass:[NSDictionary class]]) { - [gCandidateLearningDictionary setDictionary:(NSDictionary *)plist]; - NSLog(@"User dictionary read, item count: %ju", (uintmax_t)[gCandidateLearningDictionary count]); - } - } - } From a17438b67a3df3e991f11146a424ad0df9cf981e Mon Sep 17 00:00:00 2001 From: ovadmin Date: Sat, 30 Sep 2017 11:00:48 +0800 Subject: [PATCH 2/8] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E4=B8=80=E4=BA=9B?= =?UTF-8?q?=E9=81=B8=E5=AD=97=E6=A9=9F=E5=88=B6=20C++=20=E6=AA=94=E6=A1=88?= =?UTF-8?q?=20#include=20=E4=B8=8D=E5=AE=8C=E6=95=B4=E7=9A=84=E5=95=8F?= =?UTF-8?q?=E9=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Source/Engine/Gramambular/Bigram.h | 2 ++ Source/Engine/Gramambular/BlockReadingBuilder.h | 2 +- Source/Engine/Gramambular/KeyValuePair.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Gramambular/Bigram.h b/Source/Engine/Gramambular/Bigram.h index 194ea755..42ac9033 100644 --- a/Source/Engine/Gramambular/Bigram.h +++ b/Source/Engine/Gramambular/Bigram.h @@ -28,6 +28,8 @@ #ifndef Bigram_h #define Bigram_h +#include + #include "KeyValuePair.h" namespace Formosa { diff --git a/Source/Engine/Gramambular/BlockReadingBuilder.h b/Source/Engine/Gramambular/BlockReadingBuilder.h index f6909b06..ed6fd173 100644 --- a/Source/Engine/Gramambular/BlockReadingBuilder.h +++ b/Source/Engine/Gramambular/BlockReadingBuilder.h @@ -199,7 +199,7 @@ namespace Formosa { } } - const string BlockReadingBuilder::Join(vector::const_iterator begin, vector::const_iterator end, const string& separator) + inline const string BlockReadingBuilder::Join(vector::const_iterator begin, vector::const_iterator end, const string& separator) { string result; for (vector::const_iterator iter = begin ; iter != end ; ) { diff --git a/Source/Engine/Gramambular/KeyValuePair.h b/Source/Engine/Gramambular/KeyValuePair.h index ea6fd33d..0abbb891 100644 --- a/Source/Engine/Gramambular/KeyValuePair.h +++ b/Source/Engine/Gramambular/KeyValuePair.h @@ -28,6 +28,7 @@ #ifndef KeyValuePair_h #define KeyValuePair_h +#include #include namespace Formosa { From d672136843bc5f887adb6ee4bd9e41b27c1aa5a0 Mon Sep 17 00:00:00 2001 From: ovadmin Date: Sat, 30 Sep 2017 11:03:04 +0800 Subject: [PATCH 3/8] =?UTF-8?q?=E5=AF=A6=E4=BD=9C=E7=B0=A1=E5=96=AE?= =?UTF-8?q?=E7=9A=84=E7=94=A8=E6=88=B6=E9=81=B8=E5=AD=97=E8=A8=98=E6=86=B6?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 這個模型基本上只是根據游標前的兩個 unigram 記憶當前的用戶選字。當有超過 一個以上的用戶選字時,則要給每個選字評分。評分標準是選字頻率乘上一個透過 半衰期遞減的最近選字經歷時間。如此一來我們在「少用但最近選過」及「常用但 最近少選」之間取得一個平衡。半衰期透過經驗法則決定。 目前這個簡易模型並不存入磁碟,因此下一次重開機後就會洗掉重來。目前這樣選 擇純粹是因為模型有半衰期,因此長時間存放後還是會遺忘。 這個模型的好處是對既有詞庫提供詞的影響很小,對於連續單字詞的 override 有 還不錯的幫助。如此對於人名、地名、公司名等專有名詞,應該可以減少選字的頻 率。這個模型應用起來的缺點是,如果用戶修改的字詞原來是個雙字詞,例如先前 的兩個 unigram 分別是 A, BB ,而用戶想改的是 BB 的第二個字,使選完後的三 個字分別是 A, B', C,這個 C 往往是記不起來的,但如果一開始用戶逐字選取, 亦即在 BB 只出現 B 時就選取 B' 然後再打 C ,則 A, B', C 這個組合往往能被 正確記憶。實際發生原因在此不討論,但跟底層所用的組字網架的架構有關。確實 要改進的話得要從底層重新架構來下手,但至少目前這個模型給的建議偏保守,不 至干擾原有的預設選字。衡諸得失,這個模型提供一些邊際上的改善,應該還是值 得採用的。 --- McBopomofo.xcodeproj/project.pbxproj | 8 ++ Source/UserOverrideModel.cpp | 202 +++++++++++++++++++++++++++ Source/UserOverrideModel.h | 81 +++++++++++ 3 files changed, 291 insertions(+) create mode 100644 Source/UserOverrideModel.cpp create mode 100644 Source/UserOverrideModel.h diff --git a/McBopomofo.xcodeproj/project.pbxproj b/McBopomofo.xcodeproj/project.pbxproj index 64f7b808..aa27af5b 100644 --- a/McBopomofo.xcodeproj/project.pbxproj +++ b/McBopomofo.xcodeproj/project.pbxproj @@ -49,6 +49,7 @@ 6AFF97F3253B299E007F1C49 /* OVNonModalAlertWindowController.m in Sources */ = {isa = PBXBuildFile; fileRef = 6AFF97F1253B299E007F1C49 /* OVNonModalAlertWindowController.m */; }; D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */; }; D48550A325EBE689006A204C /* OpenCC in Frameworks */ = {isa = PBXBuildFile; productRef = D48550A225EBE689006A204C /* OpenCC */; }; + 6AE30A491F7F40B7008735BD /* UserOverrideModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6AE30A471F7F40B7008735BD /* UserOverrideModel.cpp */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -211,6 +212,8 @@ 6AFF97F1253B299E007F1C49 /* OVNonModalAlertWindowController.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = OVNonModalAlertWindowController.m; sourceTree = ""; }; D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "McBopomofo-Bridging-Header.h"; sourceTree = ""; }; D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpenCCBridge.swift; sourceTree = ""; }; + 6AE30A471F7F40B7008735BD /* UserOverrideModel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = UserOverrideModel.cpp; sourceTree = ""; }; + 6AE30A481F7F40B7008735BD /* UserOverrideModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = UserOverrideModel.h; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -286,6 +289,10 @@ 6A0D4ECC15FC0D6400ABF4B3 /* PreferencesWindowController.m */, D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */, D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */, + 6A0D4ECD15FC0D6400ABF4B3 /* UpdateNotificationController.h */, + 6A0D4ECE15FC0D6400ABF4B3 /* UpdateNotificationController.m */, + 6AE30A471F7F40B7008735BD /* UserOverrideModel.cpp */, + 6AE30A481F7F40B7008735BD /* UserOverrideModel.h */, ); path = Source; sourceTree = ""; @@ -647,6 +654,7 @@ 6A0D4F0015FC0DA600ABF4B3 /* VTHorizontalCandidateView.m in Sources */, 6AFF97F3253B299E007F1C49 /* OVNonModalAlertWindowController.m in Sources */, 6A0D4F0115FC0DA600ABF4B3 /* VTVerticalCandidateController.m in Sources */, + 6AE30A491F7F40B7008735BD /* UserOverrideModel.cpp in Sources */, 6A0D4F0215FC0DA600ABF4B3 /* VTVerticalCandidateTableView.m in Sources */, 6A0D4F0315FC0DA600ABF4B3 /* VTVerticalKeyLabelStripView.m in Sources */, D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */, diff --git a/Source/UserOverrideModel.cpp b/Source/UserOverrideModel.cpp new file mode 100644 index 00000000..9f38cf1d --- /dev/null +++ b/Source/UserOverrideModel.cpp @@ -0,0 +1,202 @@ +// +// UserOverrideModel.cpp +// +// Copyright (c) 2017 The McBopomofo Project. +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +#include "UserOverrideModel.h" + +#include +#include +#include + +using namespace McBopomofo; + +// About 20 generations. +static const double DecayThreshould = 1.0 / 1048576.0; + +static double Score(size_t eventCount, + size_t totalCount, + double eventTimestamp, + double timestamp, + double lambda); +static string WalkedNodesToKey(const std::vector& walkedNodes, + size_t cursorIndex); + +UserOverrideModel::UserOverrideModel(size_t capacity, double decayConstant) + : m_capacity(capacity) { + assert(m_capacity > 0); + m_decayExponent = log(0.5) / decayConstant; +} + +void UserOverrideModel::observe(const std::vector& walkedNodes, + size_t cursorIndex, + const string& candidate, + double timestamp) { + string key = WalkedNodesToKey(walkedNodes, cursorIndex); + auto mapIter = m_lruMap.find(key); + if (mapIter == m_lruMap.end()) { + auto keyValuePair = KeyObservationPair(key, Observation()); + Observation& observation = keyValuePair.second; + observation.update(candidate, timestamp); + + m_lruList.push_front(keyValuePair); + auto listIter = m_lruList.begin(); + auto lruKeyValue = std::pair::iterator>(key, listIter); + m_lruMap.insert(lruKeyValue); + + if (m_lruList.size() > m_capacity) { + auto lastKeyValuePair = m_lruList.end(); + --lastKeyValuePair; + m_lruMap.erase(lastKeyValuePair->first); + m_lruList.pop_back(); + } + } else { + auto listIter = mapIter->second; + m_lruList.splice(m_lruList.begin(), m_lruList, listIter); + + auto& keyValuePair = *listIter; + Observation& observation = keyValuePair.second; + observation.update(candidate, timestamp); + } +} + +string UserOverrideModel::suggest(const std::vector& walkedNodes, + size_t cursorIndex, + double timestamp) { + string key = WalkedNodesToKey(walkedNodes, cursorIndex); + auto mapIter = m_lruMap.find(key); + if (mapIter == m_lruMap.end()) { + return string(); + } + + auto listIter = mapIter->second; + auto& keyValuePair = *listIter; + const Observation& observation = keyValuePair.second; + + string candidate; + double score = 0.0; + for (auto i = observation.overrides.begin(); + i != observation.overrides.end(); + ++i) { + const Override& o = i->second; + double overrideScore = Score(o.count, + observation.count, + o.timestamp, + timestamp, + m_decayExponent); + if (overrideScore == 0.0) { + continue; + } + + if (overrideScore > score) { + candidate = i->first; + score = overrideScore; + } + } + return candidate; +} + +void UserOverrideModel::Observation::update(const string& candidate, + double timestamp) { + count++; + auto& o = overrides[candidate]; + o.timestamp = timestamp; + o.count++; +} + +static double Score(size_t eventCount, + size_t totalCount, + double eventTimestamp, + double timestamp, + double lambda) { + double decay = exp((timestamp - eventTimestamp) * lambda); + if (decay < DecayThreshould) { + return 0.0; + } + + double prob = (double)eventCount / (double)totalCount; + return prob * decay; +} + +static string WalkedNodesToKey(const std::vector& walkedNodes, + size_t cursorIndex) { + std::stringstream s; + std::vector n; + size_t ll = 0; + for (std::vector::const_iterator i = walkedNodes.begin(); + i != walkedNodes.end(); + ++i) { + const auto& nn = *i; + n.push_back(nn); + ll += nn.spanningLength; + if (ll >= cursorIndex) { + break; + } + } + + std::vector::const_reverse_iterator r = n.rbegin(); + + if (r == n.rend()) { + return ""; + } + + string current = (*r).node->currentKeyValue().key; + ++r; + + s.clear(); + s.str(std::string()); + if (r != n.rend()) { + s << "(" + << (*r).node->currentKeyValue().key + << "," + << (*r).node->currentKeyValue().value + << ")"; + ++r; + } else { + s << "()"; + } + string prev = s.str(); + + s.clear(); + s.str(std::string()); + if (r != n.rend()) { + s << "(" + << (*r).node->currentKeyValue().key + << "," + << (*r).node->currentKeyValue().value + << ")"; + ++r; + } else { + s << "()"; + } + string anterior = s.str(); + + s.clear(); + s.str(std::string()); + s << "(" << anterior << "," << prev << "," << current << ")"; + + return s.str(); +} diff --git a/Source/UserOverrideModel.h b/Source/UserOverrideModel.h new file mode 100644 index 00000000..0b981923 --- /dev/null +++ b/Source/UserOverrideModel.h @@ -0,0 +1,81 @@ +// +// UserOverrideModel.h +// +// Copyright (c) 2017 The McBopomofo Project. +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +#ifndef USEROVERRIDEMODEL_H +#define USEROVERRIDEMODEL_H + +#include +#include +#include + +#include "Gramambular.h" + +namespace McBopomofo { + +using namespace Formosa::Gramambular; + +class UserOverrideModel { +public: + UserOverrideModel(size_t capacity, double decayConstant); + + void observe(const std::vector& walkedNodes, + size_t cursorIndex, + const string& candidate, + double timestamp); + + string suggest(const std::vector& walkedNodes, + size_t cursorIndex, + double timestamp); + +private: + struct Override { + size_t count; + double timestamp; + + Override() : count(0), timestamp(0.0) {} + }; + + struct Observation { + size_t count; + std::map overrides; + + Observation() : count(0) {} + void update(const string& candidate, double timestamp); + }; + + typedef std::pair KeyObservationPair; + + size_t m_capacity; + double m_decayExponent; + std::list m_lruList; + std::map::iterator> m_lruMap; +}; + +}; // namespace McBopomofo + +#endif + From 3e0e859febe10de17f5de6274bb7bbee0496a9a4 Mon Sep 17 00:00:00 2001 From: ovadmin Date: Sat, 30 Sep 2017 11:22:44 +0800 Subject: [PATCH 4/8] =?UTF-8?q?=E5=B0=87=E7=94=A8=E6=88=B6=E9=81=B8?= =?UTF-8?q?=E5=AD=97=E8=A8=98=E6=86=B6=E6=A9=9F=E5=88=B6=E6=95=B4=E5=90=88?= =?UTF-8?q?=E5=85=A5=20InputMethodController?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Source/Engine/Gramambular/Node.h | 19 +++++++++++++++++++ Source/InputMethodController.h | 4 ++++ Source/InputMethodController.mm | 30 ++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/Source/Engine/Gramambular/Node.h b/Source/Engine/Gramambular/Node.h index 89a74813..6e2bf45e 100644 --- a/Source/Engine/Gramambular/Node.h +++ b/Source/Engine/Gramambular/Node.h @@ -47,10 +47,12 @@ namespace Formosa { const vector& candidates() const; void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true); void resetCandidate(); + void selectFloatingCandidateAtIndex(size_t index, double score); const string& key() const; double score() const; const KeyValuePair currentKeyValue() const; + double highestUnigramScore() const; protected: const LanguageModel* m_LM; @@ -175,6 +177,16 @@ namespace Formosa { m_score = m_unigrams[0].score; } } + + inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) { + if (index >= m_unigrams.size()) { + m_selectedUnigramIndex = 0; + } else { + m_selectedUnigramIndex = index; + } + m_candidateFixed = false; + m_score = score; + } inline const string& Node::key() const { @@ -185,6 +197,13 @@ namespace Formosa { { return m_score; } + + inline double Node::highestUnigramScore() const { + if (m_unigrams.empty()) { + return 0.0; + } + return m_unigrams[0].score; + } inline const KeyValuePair Node::currentKeyValue() const { diff --git a/Source/InputMethodController.h b/Source/InputMethodController.h index 8741cbed..71437a00 100644 --- a/Source/InputMethodController.h +++ b/Source/InputMethodController.h @@ -37,6 +37,7 @@ #import "Mandarin.h" #import "Gramambular.h" #import "FastLM.h" +#import "UserOverrideModel.h" @interface McBopomofoInputMethodController : IMKInputController { @@ -53,6 +54,9 @@ // latest walked path (trellis) using the Viterbi algorithm std::vector _walkedNodes; + // user override model + McBopomofo::UserOverrideModel *_uom; + // the latest composing buffer that is updated to the foreground app NSMutableString *_composingBuffer; NSInteger _latestReadingCursor; diff --git a/Source/InputMethodController.mm b/Source/InputMethodController.mm index 3b4ad001..b11e4609 100644 --- a/Source/InputMethodController.mm +++ b/Source/InputMethodController.mm @@ -114,6 +114,7 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot // shared language model object that stores our phrase-term probability database FastLM gLanguageModel; FastLM gLanguageModelPlainBopomofo; +McBopomofo::UserOverrideModel gUserOverrideModel(200, 60.0); // https://clang-analyzer.llvm.org/faq.html __attribute__((annotate("returns_localized_nsstring"))) @@ -176,6 +177,7 @@ public: // create the lattice builder _languageModel = &gLanguageModel; _builder = new BlockReadingBuilder(_languageModel); + _uom = &gUserOverrideModel; // each Mandarin syllable is separated by a hyphen _builder->setJoinSeparator("-"); @@ -659,6 +661,33 @@ public: // then walk the lattice [self popOverflowComposingTextAndWalk:client]; + // get user override model suggestion + string overrideCandidate = _uom->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]); + if (!overrideCandidate.empty()) { + size_t cursorIndex = [self actualCandidateCursorIndex]; + vector nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex); + + double highestScore = 0.0; + for (auto ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) { + double score = ni->node->highestUnigramScore(); + if (score > highestScore) { + highestScore = score; + } + } + highestScore += 0.00001; + + for (vector::iterator ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) { + const vector& candidates = (*ni).node->candidates(); + for (size_t i = 0, c = candidates.size(); i < c; ++i) { + if (candidates[i].value == overrideCandidate) { + // found our node + const_cast((*ni).node)->selectFloatingCandidateAtIndex(i, highestScore); + break; + } + } + } + } + // then update the text _bpmfReadingBuffer->clear(); [self updateClientComposingBuffer:client]; @@ -1373,6 +1402,7 @@ public: size_t cursorIndex = [self actualCandidateCursorIndex]; _builder->grid().fixNodeSelectedCandidate(cursorIndex, selectedValue); + _uom->observe(_walkedNodes, cursorIndex, selectedValue, [[NSDate date] timeIntervalSince1970]); [_candidates removeAllObjects]; From eef6f8c0ce33646726954f875192923228f9cd8f Mon Sep 17 00:00:00 2001 From: ovadmin Date: Mon, 2 Oct 2017 00:38:23 +0800 Subject: [PATCH 5/8] =?UTF-8?q?=E5=8A=A0=E5=A4=A7=E7=94=A8=E6=88=B6?= =?UTF-8?q?=E9=81=B8=E5=AD=97=E8=A9=9E=E6=A8=A1=E5=9E=8B=E7=9A=84=E5=AE=B9?= =?UTF-8?q?=E9=87=8F=E8=B7=9F=E5=8D=8A=E8=A1=B0=E6=9C=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Source/InputMethodController.mm | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Source/InputMethodController.mm b/Source/InputMethodController.mm index b11e4609..32819493 100644 --- a/Source/InputMethodController.mm +++ b/Source/InputMethodController.mm @@ -114,7 +114,10 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot // shared language model object that stores our phrase-term probability database FastLM gLanguageModel; FastLM gLanguageModelPlainBopomofo; -McBopomofo::UserOverrideModel gUserOverrideModel(200, 60.0); + +static const int kUserOverrideModelCapacity = 500; +static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr. +McBopomofo::UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife); // https://clang-analyzer.llvm.org/faq.html __attribute__((annotate("returns_localized_nsstring"))) From 2e8e78971ce8140bdf23af453fc8d002fb4c8c0a Mon Sep 17 00:00:00 2001 From: ovadmin Date: Mon, 2 Oct 2017 00:40:28 +0800 Subject: [PATCH 6/8] =?UTF-8?q?=E5=82=B3=E7=B5=B1=E6=B3=A8=E9=9F=B3?= =?UTF-8?q?=E4=B8=8D=E8=A6=81=E8=A8=98=E4=BD=8F=E7=94=A8=E6=88=B6=E9=81=B8?= =?UTF-8?q?=E5=AD=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Source/InputMethodController.mm | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Source/InputMethodController.mm b/Source/InputMethodController.mm index 32819493..dfc43e3d 100644 --- a/Source/InputMethodController.mm +++ b/Source/InputMethodController.mm @@ -665,7 +665,9 @@ public: [self popOverflowComposingTextAndWalk:client]; // get user override model suggestion - string overrideCandidate = _uom->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]); + string overrideCandidate = + (_inputMode == kPlainBopomofoModeIdentifier) ? "" : + _uom->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]); if (!overrideCandidate.empty()) { size_t cursorIndex = [self actualCandidateCursorIndex]; vector nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex); @@ -1405,7 +1407,9 @@ public: size_t cursorIndex = [self actualCandidateCursorIndex]; _builder->grid().fixNodeSelectedCandidate(cursorIndex, selectedValue); - _uom->observe(_walkedNodes, cursorIndex, selectedValue, [[NSDate date] timeIntervalSince1970]); + if (_inputMode != kPlainBopomofoModeIdentifier) { + _uom->observe(_walkedNodes, cursorIndex, selectedValue, [[NSDate date] timeIntervalSince1970]); + } [_candidates removeAllObjects]; From aeb774a8ed14d8c7c55652fa60e4758a20cdaa60 Mon Sep 17 00:00:00 2001 From: ovadmin Date: Mon, 2 Oct 2017 00:59:20 +0800 Subject: [PATCH 7/8] =?UTF-8?q?=E5=B0=8F=E5=B9=85=E9=87=8D=E6=A7=8B?= =?UTF-8?q?=E9=87=8D=E8=A4=87=E7=9A=84=E7=A8=8B=E5=BC=8F=E7=A2=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Source/Engine/Gramambular/Grid.h | 29 ++++++++++++++++++++++++ Source/InputMethodController.mm | 39 ++++++++++++++------------------ 2 files changed, 46 insertions(+), 22 deletions(-) diff --git a/Source/Engine/Gramambular/Grid.h b/Source/Engine/Gramambular/Grid.h index e13c8eab..d4103c99 100644 --- a/Source/Engine/Gramambular/Grid.h +++ b/Source/Engine/Gramambular/Grid.h @@ -47,7 +47,18 @@ namespace Formosa { size_t width() const; vector nodesEndingAt(size_t inLocation); vector nodesCrossingOrEndingAt(size_t inLocation); + + // "Freeze" the node with the unigram that represents the selected canditate value. + // After this, the node that contains the unigram will always be evaluated to that + // unigram, while all other overlapping nodes will be reset to their initial state + // (that is, if any of those nodes were "frozen" or fixed, they will be unfrozen.) void fixNodeSelectedCandidate(size_t location, const string& value); + + // Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, only + // boost the unigram that represents the value with an overriding score. This + // has the same side effect as fixNodeSelectedCandidate, which is that all other + // overlapping nodes will be reset to their initial state. + void overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore); const string dumpDOT(); @@ -194,6 +205,24 @@ namespace Formosa { } } } + + inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore) + { + vector nodes = nodesCrossingOrEndingAt(location); + for (auto nodeAnchor : nodes) { + auto candidates = nodeAnchor.node->candidates(); + + // Reset the candidate-fixed state of every node at the location. + const_cast(nodeAnchor.node)->resetCandidate(); + + for (size_t i = 0, c = candidates.size(); i < c; ++i) { + if (candidates[i].value == value) { + const_cast(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore); + break; + } + } + } + } inline const string Grid::dumpDOT() { diff --git a/Source/InputMethodController.mm b/Source/InputMethodController.mm index dfc43e3d..9e48f829 100644 --- a/Source/InputMethodController.mm +++ b/Source/InputMethodController.mm @@ -150,6 +150,19 @@ public: } }; +static const double kEpsilon = 0.000001; + +static double FindHighestScore(const vector& nodes, double epsilon) { + double highestScore = 0.0; + for (auto ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) { + double score = ni->node->highestUnigramScore(); + if (score > highestScore) { + highestScore = score; + } + } + return highestScore + epsilon; +} + @implementation McBopomofoInputMethodController - (void)dealloc { @@ -665,32 +678,14 @@ public: [self popOverflowComposingTextAndWalk:client]; // get user override model suggestion - string overrideCandidate = + string overrideValue = (_inputMode == kPlainBopomofoModeIdentifier) ? "" : _uom->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]); - if (!overrideCandidate.empty()) { + if (!overrideValue.empty()) { size_t cursorIndex = [self actualCandidateCursorIndex]; vector nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex); - - double highestScore = 0.0; - for (auto ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) { - double score = ni->node->highestUnigramScore(); - if (score > highestScore) { - highestScore = score; - } - } - highestScore += 0.00001; - - for (vector::iterator ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) { - const vector& candidates = (*ni).node->candidates(); - for (size_t i = 0, c = candidates.size(); i < c; ++i) { - if (candidates[i].value == overrideCandidate) { - // found our node - const_cast((*ni).node)->selectFloatingCandidateAtIndex(i, highestScore); - break; - } - } - } + double highestScore = FindHighestScore(nodes, kEpsilon); + _builder->grid().overrideNodeScoreForSelectedCandidate(cursorIndex, overrideValue, highestScore); } // then update the text From 789d2a5687f6c3194795f2e05992df9286239119 Mon Sep 17 00:00:00 2001 From: ovadmin Date: Mon, 2 Oct 2017 01:10:59 +0800 Subject: [PATCH 8/8] =?UTF-8?q?=E8=A8=88=E7=AE=97=E9=81=B8=E5=AD=97?= =?UTF-8?q?=E4=BA=8B=E4=BB=B6=E6=99=82=EF=BC=8C=E8=8B=A5=E9=81=87=E5=88=B0?= =?UTF-8?q?=E5=B8=B8=E7=94=A8=E6=A8=99=E9=BB=9E=EF=BC=8C=E5=B0=87=E6=A8=99?= =?UTF-8?q?=E9=BB=9E=E8=A6=96=E7=82=BA=E5=8F=A5=E5=B0=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 如此一來標點後的單字詞,在計算時,等同於句首第一詞。 --- Source/UserOverrideModel.cpp | 41 +++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/Source/UserOverrideModel.cpp b/Source/UserOverrideModel.cpp index 9f38cf1d..8b2df522 100644 --- a/Source/UserOverrideModel.cpp +++ b/Source/UserOverrideModel.cpp @@ -41,6 +41,7 @@ static double Score(size_t eventCount, double eventTimestamp, double timestamp, double lambda); +static bool IsEndingPunctuation(const string& value); static string WalkedNodesToKey(const std::vector& walkedNodes, size_t cursorIndex); @@ -141,6 +142,10 @@ static double Score(size_t eventCount, return prob * decay; } +static bool IsEndingPunctuation(const string& value) { + return value == "," || value == "。" || value== "!" || value == "?" || + value == "」" || value == "』" || value== "”" || value == "”"; +} static string WalkedNodesToKey(const std::vector& walkedNodes, size_t cursorIndex) { std::stringstream s; @@ -169,12 +174,18 @@ static string WalkedNodesToKey(const std::vector& walkedNodes, s.clear(); s.str(std::string()); if (r != n.rend()) { - s << "(" - << (*r).node->currentKeyValue().key - << "," - << (*r).node->currentKeyValue().value - << ")"; - ++r; + string value = (*r).node->currentKeyValue().value; + if (IsEndingPunctuation(value)) { + s << "()"; + r = n.rend(); + } else { + s << "(" + << (*r).node->currentKeyValue().key + << "," + << value + << ")"; + ++r; + } } else { s << "()"; } @@ -183,12 +194,18 @@ static string WalkedNodesToKey(const std::vector& walkedNodes, s.clear(); s.str(std::string()); if (r != n.rend()) { - s << "(" - << (*r).node->currentKeyValue().key - << "," - << (*r).node->currentKeyValue().value - << ")"; - ++r; + string value = (*r).node->currentKeyValue().value; + if (IsEndingPunctuation(value)) { + s << "()"; + r = n.rend(); + } else { + s << "(" + << (*r).node->currentKeyValue().key + << "," + << value + << ")"; + ++r; + } } else { s << "()"; }