From c02eabe12b2a80e6ce905b8b21f3439b63871942 Mon Sep 17 00:00:00 2001 From: ovadmin Date: Fri, 29 Sep 2017 11:33:27 +0800 Subject: [PATCH 01/10] =?UTF-8?q?=E7=A7=BB=E9=99=A4=E6=97=A9=E6=9C=9F?= =?UTF-8?q?=E7=9A=84=E5=80=99=E9=81=B8=E6=AD=B7=E5=8F=B2=E8=A8=98=E6=86=B6?= =?UTF-8?q?=E6=A9=9F=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 這個機制從未正式發布,設計本身也有很多缺陷,因此決定移除。 --- Source/InputMethodController.mm | 130 -------------------------------- 1 file changed, 130 deletions(-) diff --git a/Source/InputMethodController.mm b/Source/InputMethodController.mm index 5e207827..30979cab 100644 --- a/Source/InputMethodController.mm +++ b/Source/InputMethodController.mm @@ -73,7 +73,6 @@ static NSString *const kCandidateListTextSizeKey = @"CandidateListTextSize"; static NSString *const kSelectPhraseAfterCursorAsCandidatePreferenceKey = @"SelectPhraseAfterCursorAsCandidate"; static NSString *const kUseHorizontalCandidateListPreferenceKey = @"UseHorizontalCandidateList"; static NSString *const kComposingBufferSizePreferenceKey = @"ComposingBufferSize"; -static NSString *const kDisableUserCandidateSelectionLearning = @"DisableUserCandidateSelectionLearning"; static NSString *const kChooseCandidateUsingSpaceKey = @"ChooseCandidateUsingSpaceKey"; // advanced (usually optional) settings @@ -122,10 +121,7 @@ FastLM gLanguageModelPlainBopomofo; - (void)collectCandidates; - (size_t)actualCandidateCursorIndex; -- (NSString *)neighborTrigramString; -- (void)_performDeferredSaveUserCandidatesDictionary; -- (void)saveUserCandidatesDictionary; - (void)_showCandidateWindowUsingVerticalMode:(BOOL)useVerticalMode client:(id)client; - (void)beep; @@ -186,11 +182,6 @@ public: // create the composing buffer _composingBuffer = [[NSMutableString alloc] init]; - // populate the settings, by default, DISABLE user candidate learning - if (![[NSUserDefaults standardUserDefaults] objectForKey:kDisableUserCandidateSelectionLearning]) { - [[NSUserDefaults standardUserDefaults] setObject:(id)kCFBooleanTrue forKey:kDisableUserCandidateSelectionLearning]; - } - _inputMode = kBopomofoModeIdentifier; } @@ -204,36 +195,6 @@ public: NSMenuItem *preferenceMenuItem = [[[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"McBopomofo Preferences", @"") action:@selector(showPreferences:) keyEquivalent:@""] autorelease]; [menu addItem:preferenceMenuItem]; - // If Option key is pressed, show the learning-related menu - - #if DEBUG - //I think the following line is 10.6+ specific - if ([[NSEvent class] respondsToSelector:@selector(modifierFlags)] && ([NSEvent modifierFlags] & NSAlternateKeyMask)) { - - BOOL learningEnabled = ![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning]; - - NSMenuItem *learnMenuItem = [[[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Enable Selection Learning", @"") action:@selector(toggleLearning:) keyEquivalent:@""] autorelease]; - if (learningEnabled) { - [learnMenuItem setState:NSOnState]; - } - else { - [learnMenuItem setState:NSOffState]; - } - - [menu addItem:learnMenuItem]; - - if (learningEnabled) { - NSString *clearMenuItemTitle = [NSString stringWithFormat:NSLocalizedString(@"Clear Learning Dictionary (%ju Items)", @""), (uintmax_t)[gCandidateLearningDictionary count]]; - NSMenuItem *clearMenuItem = [[[NSMenuItem alloc] initWithTitle:clearMenuItemTitle action:@selector(clearLearningDictionary:) keyEquivalent:@""] autorelease]; - [menu addItem:clearMenuItem]; - - - NSMenuItem *dumpMenuItem = [[[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Dump Learning Data to Console", @"") action:@selector(dumpLearningDictionary:) keyEquivalent:@""] autorelease]; - [menu addItem:dumpMenuItem]; - } - } - #endif //DEBUG - #if DEBUG NSMenuItem *updateCheckItem = [[[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Check for Updates…", @"") action:@selector(checkForUpdate:) keyEquivalent:@""] autorelease]; [menu addItem:updateCheckItem]; @@ -649,17 +610,6 @@ public: // then walk the lattice [self popOverflowComposingTextAndWalk:client]; - // see if we need to override the selection if a learned one exists - if (![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning]) { - NSString *trigram = [self neighborTrigramString]; - - // Lookup from the user dict to see if the trigram fit or not - NSString *overrideCandidateString = [gCandidateLearningDictionary objectForKey:trigram]; - if (overrideCandidateString) { - [self candidateSelected:(NSAttributedString *)overrideCandidateString]; - } - } - // then update the text _bpmfReadingBuffer->clear(); [self updateClientComposingBuffer:client]; @@ -1229,61 +1179,6 @@ public: return cursorIndex; } -- (NSString *)neighborTrigramString -{ - // gather the "trigram" for user candidate selection learning - - NSMutableArray *termArray = [NSMutableArray array]; - - size_t cursorIndex = [self actualCandidateCursorIndex]; - vector nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex); - - const Node* prev = 0; - const Node* current = 0; - const Node* next = 0; - - size_t wni = 0; - size_t wnc = _walkedNodes.size(); - size_t accuSpanningLength = 0; - for (wni = 0; wni < wnc; wni++) { - NodeAnchor& anchor = _walkedNodes[wni]; - if (!anchor.node) { - continue; - } - - accuSpanningLength += anchor.spanningLength; - if (accuSpanningLength >= cursorIndex) { - prev = current; - current = anchor.node; - break; - } - - current = anchor.node; - } - - if (wni + 1 < wnc) { - next = _walkedNodes[wni + 1].node; - } - - string term; - if (prev) { - term = prev->currentKeyValue().key; - [termArray addObject:[NSString stringWithUTF8String:term.c_str()]]; - } - - if (current) { - term = current->currentKeyValue().key; - [termArray addObject:[NSString stringWithUTF8String:term.c_str()]]; - } - - if (next) { - term = next->currentKeyValue().key; - [termArray addObject:[NSString stringWithUTF8String:term.c_str()]]; - } - - return [termArray componentsJoinedByString:@"-"]; -} - - (void)_performDeferredSaveUserCandidatesDictionary { BOOL __unused success = [gCandidateLearningDictionary writeToFile:gUserCandidatesDictionaryPath atomically:YES]; @@ -1404,24 +1299,6 @@ public: [[NSApplication sharedApplication] activateIgnoringOtherApps:YES]; } -- (void)toggleLearning:(id)sender -{ - BOOL toggle = ![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning]; - - [[NSUserDefaults standardUserDefaults] setBool:toggle forKey:kDisableUserCandidateSelectionLearning]; -} - -- (void)clearLearningDictionary:(id)sender -{ - [gCandidateLearningDictionary removeAllObjects]; - [self _performDeferredSaveUserCandidatesDictionary]; -} - -- (void)dumpLearningDictionary:(id)sender -{ - NSLog(@"%@", gCandidateLearningDictionary); -} - - (NSUInteger)candidateCountForController:(VTCandidateController *)controller { return [_candidates count]; @@ -1439,13 +1316,6 @@ public: // candidate selected, override the node with selection string selectedValue = [[_candidates objectAtIndex:index] UTF8String]; - if (![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning]) { - NSString *trigram = [self neighborTrigramString]; - NSString *selectedNSString = [NSString stringWithUTF8String:selectedValue.c_str()]; - [gCandidateLearningDictionary setObject:selectedNSString forKey:trigram]; - [self saveUserCandidatesDictionary]; - } - size_t cursorIndex = [self actualCandidateCursorIndex]; vector nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex); From b1f45f26ef9a3221f2fd3a37e2d9678be64cf7bc Mon Sep 17 00:00:00 2001 From: ovadmin Date: Sat, 30 Sep 2017 11:00:48 +0800 Subject: [PATCH 02/10] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E4=B8=80=E4=BA=9B?= =?UTF-8?q?=E9=81=B8=E5=AD=97=E6=A9=9F=E5=88=B6=20C++=20=E6=AA=94=E6=A1=88?= =?UTF-8?q?=20#include=20=E4=B8=8D=E5=AE=8C=E6=95=B4=E7=9A=84=E5=95=8F?= =?UTF-8?q?=E9=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Source/Engine/Gramambular/Bigram.h | 2 ++ Source/Engine/Gramambular/BlockReadingBuilder.h | 2 +- Source/Engine/Gramambular/KeyValuePair.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Gramambular/Bigram.h b/Source/Engine/Gramambular/Bigram.h index 194ea755..42ac9033 100644 --- a/Source/Engine/Gramambular/Bigram.h +++ b/Source/Engine/Gramambular/Bigram.h @@ -28,6 +28,8 @@ #ifndef Bigram_h #define Bigram_h +#include + #include "KeyValuePair.h" namespace Formosa { diff --git a/Source/Engine/Gramambular/BlockReadingBuilder.h b/Source/Engine/Gramambular/BlockReadingBuilder.h index f6909b06..ed6fd173 100644 --- a/Source/Engine/Gramambular/BlockReadingBuilder.h +++ b/Source/Engine/Gramambular/BlockReadingBuilder.h @@ -199,7 +199,7 @@ namespace Formosa { } } - const string BlockReadingBuilder::Join(vector::const_iterator begin, vector::const_iterator end, const string& separator) + inline const string BlockReadingBuilder::Join(vector::const_iterator begin, vector::const_iterator end, const string& separator) { string result; for (vector::const_iterator iter = begin ; iter != end ; ) { diff --git a/Source/Engine/Gramambular/KeyValuePair.h b/Source/Engine/Gramambular/KeyValuePair.h index ea6fd33d..0abbb891 100644 --- a/Source/Engine/Gramambular/KeyValuePair.h +++ b/Source/Engine/Gramambular/KeyValuePair.h @@ -28,6 +28,7 @@ #ifndef KeyValuePair_h #define KeyValuePair_h +#include #include namespace Formosa { From 89c9a52014ec1dc8cd5fde4bdae9816fdbe458b2 Mon Sep 17 00:00:00 2001 From: ovadmin Date: Sat, 30 Sep 2017 11:03:04 +0800 Subject: [PATCH 03/10] =?UTF-8?q?=E5=AF=A6=E4=BD=9C=E7=B0=A1=E5=96=AE?= =?UTF-8?q?=E7=9A=84=E7=94=A8=E6=88=B6=E9=81=B8=E5=AD=97=E8=A8=98=E6=86=B6?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 這個模型基本上只是根據游標前的兩個 unigram 記憶當前的用戶選字。當有超過 一個以上的用戶選字時,則要給每個選字評分。評分標準是選字頻率乘上一個透過 半衰期遞減的最近選字經歷時間。如此一來我們在「少用但最近選過」及「常用但 最近少選」之間取得一個平衡。半衰期透過經驗法則決定。 目前這個簡易模型並不存入磁碟,因此下一次重開機後就會洗掉重來。目前這樣選 擇純粹是因為模型有半衰期,因此長時間存放後還是會遺忘。 這個模型的好處是對既有詞庫提供詞的影響很小,對於連續單字詞的 override 有 還不錯的幫助。如此對於人名、地名、公司名等專有名詞,應該可以減少選字的頻 率。這個模型應用起來的缺點是,如果用戶修改的字詞原來是個雙字詞,例如先前 的兩個 unigram 分別是 A, BB ,而用戶想改的是 BB 的第二個字,使選完後的三 個字分別是 A, B', C,這個 C 往往是記不起來的,但如果一開始用戶逐字選取, 亦即在 BB 只出現 B 時就選取 B' 然後再打 C ,則 A, B', C 這個組合往往能被 正確記憶。實際發生原因在此不討論,但跟底層所用的組字網架的架構有關。確實 要改進的話得要從底層重新架構來下手,但至少目前這個模型給的建議偏保守,不 至干擾原有的預設選字。衡諸得失,這個模型提供一些邊際上的改善,應該還是值 得採用的。 --- McBopomofo.xcodeproj/project.pbxproj | 6 + Source/UserOverrideModel.cpp | 202 +++++++++++++++++++++++++++ Source/UserOverrideModel.h | 81 +++++++++++ 3 files changed, 289 insertions(+) create mode 100644 Source/UserOverrideModel.cpp create mode 100644 Source/UserOverrideModel.h diff --git a/McBopomofo.xcodeproj/project.pbxproj b/McBopomofo.xcodeproj/project.pbxproj index 830ae4c3..0d2c102f 100644 --- a/McBopomofo.xcodeproj/project.pbxproj +++ b/McBopomofo.xcodeproj/project.pbxproj @@ -48,6 +48,7 @@ 6AD7CBC815FE555000691B5B /* data-plain-bpmf.txt in Resources */ = {isa = PBXBuildFile; fileRef = 6AD7CBC715FE555000691B5B /* data-plain-bpmf.txt */; }; 6AE210B215FC63CC003659FE /* PlainBopomofo.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 6AE210B015FC63CC003659FE /* PlainBopomofo.tiff */; }; 6AE210B315FC63CC003659FE /* PlainBopomofo@2x.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 6AE210B115FC63CC003659FE /* PlainBopomofo@2x.tiff */; }; + 6AE30A491F7F40B7008735BD /* UserOverrideModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6AE30A471F7F40B7008735BD /* UserOverrideModel.cpp */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -209,6 +210,8 @@ 6AD7CBC715FE555000691B5B /* data-plain-bpmf.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = "data-plain-bpmf.txt"; sourceTree = ""; }; 6AE210B015FC63CC003659FE /* PlainBopomofo.tiff */ = {isa = PBXFileReference; lastKnownFileType = image.tiff; path = PlainBopomofo.tiff; sourceTree = ""; }; 6AE210B115FC63CC003659FE /* PlainBopomofo@2x.tiff */ = {isa = PBXFileReference; lastKnownFileType = image.tiff; path = "PlainBopomofo@2x.tiff"; sourceTree = ""; }; + 6AE30A471F7F40B7008735BD /* UserOverrideModel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = UserOverrideModel.cpp; sourceTree = ""; }; + 6AE30A481F7F40B7008735BD /* UserOverrideModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = UserOverrideModel.h; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -283,6 +286,8 @@ 6A0D4ECC15FC0D6400ABF4B3 /* PreferencesWindowController.m */, 6A0D4ECD15FC0D6400ABF4B3 /* UpdateNotificationController.h */, 6A0D4ECE15FC0D6400ABF4B3 /* UpdateNotificationController.m */, + 6AE30A471F7F40B7008735BD /* UserOverrideModel.cpp */, + 6AE30A481F7F40B7008735BD /* UserOverrideModel.h */, ); path = Source; sourceTree = ""; @@ -612,6 +617,7 @@ 6A0D4EFF15FC0DA600ABF4B3 /* VTHorizontalCandidateController.m in Sources */, 6A0D4F0015FC0DA600ABF4B3 /* VTHorizontalCandidateView.m in Sources */, 6A0D4F0115FC0DA600ABF4B3 /* VTVerticalCandidateController.m in Sources */, + 6AE30A491F7F40B7008735BD /* UserOverrideModel.cpp in Sources */, 6A0D4F0215FC0DA600ABF4B3 /* VTVerticalCandidateTableView.m in Sources */, 6A0D4F0315FC0DA600ABF4B3 /* VTVerticalKeyLabelStripView.m in Sources */, 6A0D4F4515FC0EB100ABF4B3 /* Mandarin.cpp in Sources */, diff --git a/Source/UserOverrideModel.cpp b/Source/UserOverrideModel.cpp new file mode 100644 index 00000000..9f38cf1d --- /dev/null +++ b/Source/UserOverrideModel.cpp @@ -0,0 +1,202 @@ +// +// UserOverrideModel.cpp +// +// Copyright (c) 2017 The McBopomofo Project. +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +#include "UserOverrideModel.h" + +#include +#include +#include + +using namespace McBopomofo; + +// About 20 generations. +static const double DecayThreshould = 1.0 / 1048576.0; + +static double Score(size_t eventCount, + size_t totalCount, + double eventTimestamp, + double timestamp, + double lambda); +static string WalkedNodesToKey(const std::vector& walkedNodes, + size_t cursorIndex); + +UserOverrideModel::UserOverrideModel(size_t capacity, double decayConstant) + : m_capacity(capacity) { + assert(m_capacity > 0); + m_decayExponent = log(0.5) / decayConstant; +} + +void UserOverrideModel::observe(const std::vector& walkedNodes, + size_t cursorIndex, + const string& candidate, + double timestamp) { + string key = WalkedNodesToKey(walkedNodes, cursorIndex); + auto mapIter = m_lruMap.find(key); + if (mapIter == m_lruMap.end()) { + auto keyValuePair = KeyObservationPair(key, Observation()); + Observation& observation = keyValuePair.second; + observation.update(candidate, timestamp); + + m_lruList.push_front(keyValuePair); + auto listIter = m_lruList.begin(); + auto lruKeyValue = std::pair::iterator>(key, listIter); + m_lruMap.insert(lruKeyValue); + + if (m_lruList.size() > m_capacity) { + auto lastKeyValuePair = m_lruList.end(); + --lastKeyValuePair; + m_lruMap.erase(lastKeyValuePair->first); + m_lruList.pop_back(); + } + } else { + auto listIter = mapIter->second; + m_lruList.splice(m_lruList.begin(), m_lruList, listIter); + + auto& keyValuePair = *listIter; + Observation& observation = keyValuePair.second; + observation.update(candidate, timestamp); + } +} + +string UserOverrideModel::suggest(const std::vector& walkedNodes, + size_t cursorIndex, + double timestamp) { + string key = WalkedNodesToKey(walkedNodes, cursorIndex); + auto mapIter = m_lruMap.find(key); + if (mapIter == m_lruMap.end()) { + return string(); + } + + auto listIter = mapIter->second; + auto& keyValuePair = *listIter; + const Observation& observation = keyValuePair.second; + + string candidate; + double score = 0.0; + for (auto i = observation.overrides.begin(); + i != observation.overrides.end(); + ++i) { + const Override& o = i->second; + double overrideScore = Score(o.count, + observation.count, + o.timestamp, + timestamp, + m_decayExponent); + if (overrideScore == 0.0) { + continue; + } + + if (overrideScore > score) { + candidate = i->first; + score = overrideScore; + } + } + return candidate; +} + +void UserOverrideModel::Observation::update(const string& candidate, + double timestamp) { + count++; + auto& o = overrides[candidate]; + o.timestamp = timestamp; + o.count++; +} + +static double Score(size_t eventCount, + size_t totalCount, + double eventTimestamp, + double timestamp, + double lambda) { + double decay = exp((timestamp - eventTimestamp) * lambda); + if (decay < DecayThreshould) { + return 0.0; + } + + double prob = (double)eventCount / (double)totalCount; + return prob * decay; +} + +static string WalkedNodesToKey(const std::vector& walkedNodes, + size_t cursorIndex) { + std::stringstream s; + std::vector n; + size_t ll = 0; + for (std::vector::const_iterator i = walkedNodes.begin(); + i != walkedNodes.end(); + ++i) { + const auto& nn = *i; + n.push_back(nn); + ll += nn.spanningLength; + if (ll >= cursorIndex) { + break; + } + } + + std::vector::const_reverse_iterator r = n.rbegin(); + + if (r == n.rend()) { + return ""; + } + + string current = (*r).node->currentKeyValue().key; + ++r; + + s.clear(); + s.str(std::string()); + if (r != n.rend()) { + s << "(" + << (*r).node->currentKeyValue().key + << "," + << (*r).node->currentKeyValue().value + << ")"; + ++r; + } else { + s << "()"; + } + string prev = s.str(); + + s.clear(); + s.str(std::string()); + if (r != n.rend()) { + s << "(" + << (*r).node->currentKeyValue().key + << "," + << (*r).node->currentKeyValue().value + << ")"; + ++r; + } else { + s << "()"; + } + string anterior = s.str(); + + s.clear(); + s.str(std::string()); + s << "(" << anterior << "," << prev << "," << current << ")"; + + return s.str(); +} diff --git a/Source/UserOverrideModel.h b/Source/UserOverrideModel.h new file mode 100644 index 00000000..0b981923 --- /dev/null +++ b/Source/UserOverrideModel.h @@ -0,0 +1,81 @@ +// +// UserOverrideModel.h +// +// Copyright (c) 2017 The McBopomofo Project. +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +#ifndef USEROVERRIDEMODEL_H +#define USEROVERRIDEMODEL_H + +#include +#include +#include + +#include "Gramambular.h" + +namespace McBopomofo { + +using namespace Formosa::Gramambular; + +class UserOverrideModel { +public: + UserOverrideModel(size_t capacity, double decayConstant); + + void observe(const std::vector& walkedNodes, + size_t cursorIndex, + const string& candidate, + double timestamp); + + string suggest(const std::vector& walkedNodes, + size_t cursorIndex, + double timestamp); + +private: + struct Override { + size_t count; + double timestamp; + + Override() : count(0), timestamp(0.0) {} + }; + + struct Observation { + size_t count; + std::map overrides; + + Observation() : count(0) {} + void update(const string& candidate, double timestamp); + }; + + typedef std::pair KeyObservationPair; + + size_t m_capacity; + double m_decayExponent; + std::list m_lruList; + std::map::iterator> m_lruMap; +}; + +}; // namespace McBopomofo + +#endif + From 0a88fcdb73d898c470669db4c9165f28c91f698c Mon Sep 17 00:00:00 2001 From: ovadmin Date: Sat, 30 Sep 2017 11:22:44 +0800 Subject: [PATCH 04/10] =?UTF-8?q?=E5=B0=87=E7=94=A8=E6=88=B6=E9=81=B8?= =?UTF-8?q?=E5=AD=97=E8=A8=98=E6=86=B6=E6=A9=9F=E5=88=B6=E6=95=B4=E5=90=88?= =?UTF-8?q?=E5=85=A5=20InputMethodController?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Source/Engine/Gramambular/Node.h | 19 ++++++++++++++++++ Source/InputMethodController.h | 4 ++++ Source/InputMethodController.mm | 34 ++++++++++++++++++++++++++++++-- 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Gramambular/Node.h b/Source/Engine/Gramambular/Node.h index d4b7b432..8641ccdf 100644 --- a/Source/Engine/Gramambular/Node.h +++ b/Source/Engine/Gramambular/Node.h @@ -46,10 +46,12 @@ namespace Formosa { bool isCandidateFixed() const; const vector& candidates() const; void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true); + void selectFloatingCandidateAtIndex(size_t index, double score); const string& key() const; double score() const; const KeyValuePair currentKeyValue() const; + double highestUnigramScore() const; protected: const LanguageModel* m_LM; @@ -165,6 +167,16 @@ namespace Formosa { m_candidateFixed = inFix; m_score = 99; } + + inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) { + if (index >= m_unigrams.size()) { + m_selectedUnigramIndex = 0; + } else { + m_selectedUnigramIndex = index; + } + m_candidateFixed = false; + m_score = score; + } inline const string& Node::key() const { @@ -175,6 +187,13 @@ namespace Formosa { { return m_score; } + + inline double Node::highestUnigramScore() const { + if (m_unigrams.empty()) { + return 0.0; + } + return m_unigrams[0].score; + } inline const KeyValuePair Node::currentKeyValue() const { diff --git a/Source/InputMethodController.h b/Source/InputMethodController.h index 00aca989..210734ef 100644 --- a/Source/InputMethodController.h +++ b/Source/InputMethodController.h @@ -37,6 +37,7 @@ #import "Mandarin.h" #import "Gramambular.h" #import "FastLM.h" +#import "UserOverrideModel.h" @interface McBopomofoInputMethodController : IMKInputController { @@ -53,6 +54,9 @@ // latest walked path (trellis) using the Viterbi algorithm std::vector _walkedNodes; + // user override model + McBopomofo::UserOverrideModel *_uom; + // the latest composing buffer that is updated to the foreground app NSMutableString *_composingBuffer; NSInteger _latestReadingCursor; diff --git a/Source/InputMethodController.mm b/Source/InputMethodController.mm index 30979cab..8ade6876 100644 --- a/Source/InputMethodController.mm +++ b/Source/InputMethodController.mm @@ -112,6 +112,7 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot // shared language model object that stores our phrase-term probability database FastLM gLanguageModel; FastLM gLanguageModelPlainBopomofo; +McBopomofo::UserOverrideModel gUserOverrideModel(200, 60.0); // private methods @interface McBopomofoInputMethodController () @@ -175,6 +176,7 @@ public: // create the lattice builder _languageModel = &gLanguageModel; _builder = new BlockReadingBuilder(_languageModel); + _uom = &gUserOverrideModel; // each Mandarin syllable is separated by a hyphen _builder->setJoinSeparator("-"); @@ -610,6 +612,33 @@ public: // then walk the lattice [self popOverflowComposingTextAndWalk:client]; + // get user override model suggestion + string overrideCandidate = _uom->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]); + if (!overrideCandidate.empty()) { + size_t cursorIndex = [self actualCandidateCursorIndex]; + vector nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex); + + double highestScore = 0.0; + for (auto ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) { + double score = ni->node->highestUnigramScore(); + if (score > highestScore) { + highestScore = score; + } + } + highestScore += 0.00001; + + for (vector::iterator ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) { + const vector& candidates = (*ni).node->candidates(); + for (size_t i = 0, c = candidates.size(); i < c; ++i) { + if (candidates[i].value == overrideCandidate) { + // found our node + const_cast((*ni).node)->selectFloatingCandidateAtIndex(i, highestScore); + break; + } + } + } + } + // then update the text _bpmfReadingBuffer->clear(); [self updateClientComposingBuffer:client]; @@ -1315,10 +1344,11 @@ public: // candidate selected, override the node with selection string selectedValue = [[_candidates objectAtIndex:index] UTF8String]; - size_t cursorIndex = [self actualCandidateCursorIndex]; - vector nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex); + _uom->observe(_walkedNodes, cursorIndex, selectedValue, [[NSDate date] timeIntervalSince1970]); + + vector nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex); for (vector::iterator ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) { const vector& candidates = (*ni).node->candidates(); From a3f84f271372c171c99e63ab96a6c9454dc9958c Mon Sep 17 00:00:00 2001 From: ovadmin Date: Sat, 30 Sep 2017 11:30:14 +0800 Subject: [PATCH 05/10] =?UTF-8?q?=E7=89=88=E6=9C=AC=E6=9B=B4=E6=96=B0?= =?UTF-8?q?=E8=87=B3=200.9.9.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Source/Installer/Installer-Info.plist | 4 ++-- Source/McBopomofo-Info.plist | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Installer/Installer-Info.plist b/Source/Installer/Installer-Info.plist index c97b3aed..f2883b9a 100644 --- a/Source/Installer/Installer-Info.plist +++ b/Source/Installer/Installer-Info.plist @@ -17,11 +17,11 @@ CFBundlePackageType APPL CFBundleShortVersionString - 0.9.9 + 0.9.9.1 CFBundleSignature MBIN CFBundleVersion - 784 + 789 LSHasLocalizedDisplayName LSMinimumSystemVersion diff --git a/Source/McBopomofo-Info.plist b/Source/McBopomofo-Info.plist index 443b9ee4..5ffedd3c 100644 --- a/Source/McBopomofo-Info.plist +++ b/Source/McBopomofo-Info.plist @@ -17,11 +17,11 @@ CFBundlePackageType APPL CFBundleShortVersionString - 0.9.9 + 0.9.9.1 CFBundleSignature BPMF CFBundleVersion - 784 + 789 ComponentInputModeDict tsInputModeListKey From 5cfdc5c23f145c48c72774d2103c619197fbfca7 Mon Sep 17 00:00:00 2001 From: ovadmin Date: Mon, 2 Oct 2017 00:38:23 +0800 Subject: [PATCH 06/10] =?UTF-8?q?=E5=8A=A0=E5=A4=A7=E7=94=A8=E6=88=B6?= =?UTF-8?q?=E9=81=B8=E5=AD=97=E8=A9=9E=E6=A8=A1=E5=9E=8B=E7=9A=84=E5=AE=B9?= =?UTF-8?q?=E9=87=8F=E8=B7=9F=E5=8D=8A=E8=A1=B0=E6=9C=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Source/InputMethodController.mm | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Source/InputMethodController.mm b/Source/InputMethodController.mm index 8ade6876..2a26f017 100644 --- a/Source/InputMethodController.mm +++ b/Source/InputMethodController.mm @@ -112,7 +112,10 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot // shared language model object that stores our phrase-term probability database FastLM gLanguageModel; FastLM gLanguageModelPlainBopomofo; -McBopomofo::UserOverrideModel gUserOverrideModel(200, 60.0); + +static const int kUserOverrideModelCapacity = 500; +static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr. +McBopomofo::UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife); // private methods @interface McBopomofoInputMethodController () From e205656e69fc2face09477b9d9324ebc105dd1af Mon Sep 17 00:00:00 2001 From: ovadmin Date: Mon, 2 Oct 2017 00:40:28 +0800 Subject: [PATCH 07/10] =?UTF-8?q?=E5=82=B3=E7=B5=B1=E6=B3=A8=E9=9F=B3?= =?UTF-8?q?=E4=B8=8D=E8=A6=81=E8=A8=98=E4=BD=8F=E7=94=A8=E6=88=B6=E9=81=B8?= =?UTF-8?q?=E5=AD=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Source/InputMethodController.mm | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Source/InputMethodController.mm b/Source/InputMethodController.mm index 2a26f017..1084192e 100644 --- a/Source/InputMethodController.mm +++ b/Source/InputMethodController.mm @@ -616,7 +616,9 @@ public: [self popOverflowComposingTextAndWalk:client]; // get user override model suggestion - string overrideCandidate = _uom->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]); + string overrideCandidate = + (_inputMode == kPlainBopomofoModeIdentifier) ? "" : + _uom->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]); if (!overrideCandidate.empty()) { size_t cursorIndex = [self actualCandidateCursorIndex]; vector nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex); @@ -1349,7 +1351,9 @@ public: string selectedValue = [[_candidates objectAtIndex:index] UTF8String]; size_t cursorIndex = [self actualCandidateCursorIndex]; - _uom->observe(_walkedNodes, cursorIndex, selectedValue, [[NSDate date] timeIntervalSince1970]); + if (_inputMode != kPlainBopomofoModeIdentifier) { + _uom->observe(_walkedNodes, cursorIndex, selectedValue, [[NSDate date] timeIntervalSince1970]); + } vector nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex); for (vector::iterator ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) { From 5920ab3ce88b98a33ecdddc04bda736c255391f7 Mon Sep 17 00:00:00 2001 From: ovadmin Date: Mon, 2 Oct 2017 00:59:20 +0800 Subject: [PATCH 08/10] =?UTF-8?q?=E5=B0=8F=E5=B9=85=E9=87=8D=E6=A7=8B?= =?UTF-8?q?=E9=87=8D=E8=A4=87=E7=9A=84=E7=A8=8B=E5=BC=8F=E7=A2=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Source/InputMethodController.mm | 69 +++++++++++++++++---------------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/Source/InputMethodController.mm b/Source/InputMethodController.mm index 1084192e..7fadd3c6 100644 --- a/Source/InputMethodController.mm +++ b/Source/InputMethodController.mm @@ -142,6 +142,36 @@ public: } }; +static const double kEpsilon = 0.000001; + +static double FindHighestScore(const vector& nodes, double epsilon) { + double highestScore = 0.0; + for (auto ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) { + double score = ni->node->highestUnigramScore(); + if (score > highestScore) { + highestScore = score; + } + } + return highestScore + epsilon; +} + +static void OverrideCandidate(const vector& nodes, const string& candidateValue, bool fixed, double floatingNodeOverrideScore) { + for (auto ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) { + const vector& candidates = (*ni).node->candidates(); + for (size_t i = 0, c = candidates.size(); i < c; ++i) { + if (candidates[i].value == candidateValue) { + // found our node + if (fixed) { + const_cast((*ni).node)->selectCandidateAtIndex(i); + } else { + const_cast((*ni).node)->selectFloatingCandidateAtIndex(i, floatingNodeOverrideScore); + } + return; + } + } + } +} + @implementation McBopomofoInputMethodController - (void)dealloc { @@ -616,32 +646,14 @@ public: [self popOverflowComposingTextAndWalk:client]; // get user override model suggestion - string overrideCandidate = + string overrideValue = (_inputMode == kPlainBopomofoModeIdentifier) ? "" : _uom->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]); - if (!overrideCandidate.empty()) { + if (!overrideValue.empty()) { size_t cursorIndex = [self actualCandidateCursorIndex]; vector nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex); - - double highestScore = 0.0; - for (auto ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) { - double score = ni->node->highestUnigramScore(); - if (score > highestScore) { - highestScore = score; - } - } - highestScore += 0.00001; - - for (vector::iterator ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) { - const vector& candidates = (*ni).node->candidates(); - for (size_t i = 0, c = candidates.size(); i < c; ++i) { - if (candidates[i].value == overrideCandidate) { - // found our node - const_cast((*ni).node)->selectFloatingCandidateAtIndex(i, highestScore); - break; - } - } - } + double highestScore = FindHighestScore(nodes, kEpsilon); + OverrideCandidate(nodes, overrideValue, false, highestScore); } // then update the text @@ -1356,18 +1368,7 @@ public: } vector nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex); - for (vector::iterator ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) { - const vector& candidates = (*ni).node->candidates(); - - for (size_t i = 0, c = candidates.size(); i < c; ++i) { - if (candidates[i].value == selectedValue) { - // found our node - const_cast((*ni).node)->selectCandidateAtIndex(i); - break; - } - } - } - + OverrideCandidate(nodes, selectedValue, true, 0.0); [_candidates removeAllObjects]; [self walk]; From c86a378863e8234ec9a42b943c391bd86cf998ea Mon Sep 17 00:00:00 2001 From: ovadmin Date: Mon, 2 Oct 2017 01:10:59 +0800 Subject: [PATCH 09/10] =?UTF-8?q?=E8=A8=88=E7=AE=97=E9=81=B8=E5=AD=97?= =?UTF-8?q?=E4=BA=8B=E4=BB=B6=E6=99=82=EF=BC=8C=E8=8B=A5=E9=81=87=E5=88=B0?= =?UTF-8?q?=E5=B8=B8=E7=94=A8=E6=A8=99=E9=BB=9E=EF=BC=8C=E5=B0=87=E6=A8=99?= =?UTF-8?q?=E9=BB=9E=E8=A6=96=E7=82=BA=E5=8F=A5=E5=B0=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 如此一來標點後的單字詞,在計算時,等同於句首第一詞。 --- Source/UserOverrideModel.cpp | 41 +++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/Source/UserOverrideModel.cpp b/Source/UserOverrideModel.cpp index 9f38cf1d..8b2df522 100644 --- a/Source/UserOverrideModel.cpp +++ b/Source/UserOverrideModel.cpp @@ -41,6 +41,7 @@ static double Score(size_t eventCount, double eventTimestamp, double timestamp, double lambda); +static bool IsEndingPunctuation(const string& value); static string WalkedNodesToKey(const std::vector& walkedNodes, size_t cursorIndex); @@ -141,6 +142,10 @@ static double Score(size_t eventCount, return prob * decay; } +static bool IsEndingPunctuation(const string& value) { + return value == "," || value == "。" || value== "!" || value == "?" || + value == "」" || value == "』" || value== "”" || value == "”"; +} static string WalkedNodesToKey(const std::vector& walkedNodes, size_t cursorIndex) { std::stringstream s; @@ -169,12 +174,18 @@ static string WalkedNodesToKey(const std::vector& walkedNodes, s.clear(); s.str(std::string()); if (r != n.rend()) { - s << "(" - << (*r).node->currentKeyValue().key - << "," - << (*r).node->currentKeyValue().value - << ")"; - ++r; + string value = (*r).node->currentKeyValue().value; + if (IsEndingPunctuation(value)) { + s << "()"; + r = n.rend(); + } else { + s << "(" + << (*r).node->currentKeyValue().key + << "," + << value + << ")"; + ++r; + } } else { s << "()"; } @@ -183,12 +194,18 @@ static string WalkedNodesToKey(const std::vector& walkedNodes, s.clear(); s.str(std::string()); if (r != n.rend()) { - s << "(" - << (*r).node->currentKeyValue().key - << "," - << (*r).node->currentKeyValue().value - << ")"; - ++r; + string value = (*r).node->currentKeyValue().value; + if (IsEndingPunctuation(value)) { + s << "()"; + r = n.rend(); + } else { + s << "(" + << (*r).node->currentKeyValue().key + << "," + << value + << ")"; + ++r; + } } else { s << "()"; } From 93d1cabb3633fc7c9ccb881c020791d31c1a18b4 Mon Sep 17 00:00:00 2001 From: ovadmin Date: Mon, 2 Oct 2017 01:35:29 +0800 Subject: [PATCH 10/10] =?UTF-8?q?=E7=89=88=E6=9C=AC=E6=9B=B4=E6=96=B0?= =?UTF-8?q?=E8=87=B3=200.9.9.2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Source/Installer/Installer-Info.plist | 4 ++-- Source/McBopomofo-Info.plist | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Installer/Installer-Info.plist b/Source/Installer/Installer-Info.plist index f2883b9a..c18c62c9 100644 --- a/Source/Installer/Installer-Info.plist +++ b/Source/Installer/Installer-Info.plist @@ -17,11 +17,11 @@ CFBundlePackageType APPL CFBundleShortVersionString - 0.9.9.1 + 0.9.9.2 CFBundleSignature MBIN CFBundleVersion - 789 + 794 LSHasLocalizedDisplayName LSMinimumSystemVersion diff --git a/Source/McBopomofo-Info.plist b/Source/McBopomofo-Info.plist index 5ffedd3c..f47b1692 100644 --- a/Source/McBopomofo-Info.plist +++ b/Source/McBopomofo-Info.plist @@ -17,11 +17,11 @@ CFBundlePackageType APPL CFBundleShortVersionString - 0.9.9.1 + 0.9.9.2 CFBundleSignature BPMF CFBundleVersion - 789 + 794 ComponentInputModeDict tsInputModeListKey