diff --git a/McBopomofo.xcodeproj/project.pbxproj b/McBopomofo.xcodeproj/project.pbxproj index 64f7b808..aa27af5b 100644 --- a/McBopomofo.xcodeproj/project.pbxproj +++ b/McBopomofo.xcodeproj/project.pbxproj @@ -49,6 +49,7 @@ 6AFF97F3253B299E007F1C49 /* OVNonModalAlertWindowController.m in Sources */ = {isa = PBXBuildFile; fileRef = 6AFF97F1253B299E007F1C49 /* OVNonModalAlertWindowController.m */; }; D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */; }; D48550A325EBE689006A204C /* OpenCC in Frameworks */ = {isa = PBXBuildFile; productRef = D48550A225EBE689006A204C /* OpenCC */; }; + 6AE30A491F7F40B7008735BD /* UserOverrideModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6AE30A471F7F40B7008735BD /* UserOverrideModel.cpp */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -211,6 +212,8 @@ 6AFF97F1253B299E007F1C49 /* OVNonModalAlertWindowController.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = OVNonModalAlertWindowController.m; sourceTree = ""; }; D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "McBopomofo-Bridging-Header.h"; sourceTree = ""; }; D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpenCCBridge.swift; sourceTree = ""; }; + 6AE30A471F7F40B7008735BD /* UserOverrideModel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = UserOverrideModel.cpp; sourceTree = ""; }; + 6AE30A481F7F40B7008735BD /* UserOverrideModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = UserOverrideModel.h; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -286,6 +289,10 @@ 6A0D4ECC15FC0D6400ABF4B3 /* PreferencesWindowController.m */, D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */, D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */, + 6A0D4ECD15FC0D6400ABF4B3 /* UpdateNotificationController.h */, + 6A0D4ECE15FC0D6400ABF4B3 /* UpdateNotificationController.m */, + 6AE30A471F7F40B7008735BD /* UserOverrideModel.cpp */, + 6AE30A481F7F40B7008735BD /* UserOverrideModel.h */, ); path = Source; sourceTree = ""; @@ -647,6 +654,7 @@ 6A0D4F0015FC0DA600ABF4B3 /* VTHorizontalCandidateView.m in Sources */, 6AFF97F3253B299E007F1C49 /* OVNonModalAlertWindowController.m in Sources */, 6A0D4F0115FC0DA600ABF4B3 /* VTVerticalCandidateController.m in Sources */, + 6AE30A491F7F40B7008735BD /* UserOverrideModel.cpp in Sources */, 6A0D4F0215FC0DA600ABF4B3 /* VTVerticalCandidateTableView.m in Sources */, 6A0D4F0315FC0DA600ABF4B3 /* VTVerticalKeyLabelStripView.m in Sources */, D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */, diff --git a/Source/Engine/Gramambular/Bigram.h b/Source/Engine/Gramambular/Bigram.h index 194ea755..42ac9033 100644 --- a/Source/Engine/Gramambular/Bigram.h +++ b/Source/Engine/Gramambular/Bigram.h @@ -28,6 +28,8 @@ #ifndef Bigram_h #define Bigram_h +#include + #include "KeyValuePair.h" namespace Formosa { diff --git a/Source/Engine/Gramambular/BlockReadingBuilder.h b/Source/Engine/Gramambular/BlockReadingBuilder.h index f6909b06..ed6fd173 100644 --- a/Source/Engine/Gramambular/BlockReadingBuilder.h +++ b/Source/Engine/Gramambular/BlockReadingBuilder.h @@ -199,7 +199,7 @@ namespace Formosa { } } - const string BlockReadingBuilder::Join(vector::const_iterator begin, vector::const_iterator end, const string& separator) + inline const string BlockReadingBuilder::Join(vector::const_iterator begin, vector::const_iterator end, const string& separator) { string result; for (vector::const_iterator iter = begin ; iter != end ; ) { diff --git a/Source/Engine/Gramambular/Grid.h b/Source/Engine/Gramambular/Grid.h index e13c8eab..d4103c99 100644 --- a/Source/Engine/Gramambular/Grid.h +++ b/Source/Engine/Gramambular/Grid.h @@ -47,7 +47,18 @@ namespace Formosa { size_t width() const; vector nodesEndingAt(size_t inLocation); vector nodesCrossingOrEndingAt(size_t inLocation); + + // "Freeze" the node with the unigram that represents the selected canditate value. + // After this, the node that contains the unigram will always be evaluated to that + // unigram, while all other overlapping nodes will be reset to their initial state + // (that is, if any of those nodes were "frozen" or fixed, they will be unfrozen.) void fixNodeSelectedCandidate(size_t location, const string& value); + + // Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, only + // boost the unigram that represents the value with an overriding score. This + // has the same side effect as fixNodeSelectedCandidate, which is that all other + // overlapping nodes will be reset to their initial state. + void overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore); const string dumpDOT(); @@ -194,6 +205,24 @@ namespace Formosa { } } } + + inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore) + { + vector nodes = nodesCrossingOrEndingAt(location); + for (auto nodeAnchor : nodes) { + auto candidates = nodeAnchor.node->candidates(); + + // Reset the candidate-fixed state of every node at the location. + const_cast(nodeAnchor.node)->resetCandidate(); + + for (size_t i = 0, c = candidates.size(); i < c; ++i) { + if (candidates[i].value == value) { + const_cast(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore); + break; + } + } + } + } inline const string Grid::dumpDOT() { diff --git a/Source/Engine/Gramambular/KeyValuePair.h b/Source/Engine/Gramambular/KeyValuePair.h index ea6fd33d..0abbb891 100644 --- a/Source/Engine/Gramambular/KeyValuePair.h +++ b/Source/Engine/Gramambular/KeyValuePair.h @@ -28,6 +28,7 @@ #ifndef KeyValuePair_h #define KeyValuePair_h +#include #include namespace Formosa { diff --git a/Source/Engine/Gramambular/Node.h b/Source/Engine/Gramambular/Node.h index 89a74813..6e2bf45e 100644 --- a/Source/Engine/Gramambular/Node.h +++ b/Source/Engine/Gramambular/Node.h @@ -47,10 +47,12 @@ namespace Formosa { const vector& candidates() const; void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true); void resetCandidate(); + void selectFloatingCandidateAtIndex(size_t index, double score); const string& key() const; double score() const; const KeyValuePair currentKeyValue() const; + double highestUnigramScore() const; protected: const LanguageModel* m_LM; @@ -175,6 +177,16 @@ namespace Formosa { m_score = m_unigrams[0].score; } } + + inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) { + if (index >= m_unigrams.size()) { + m_selectedUnigramIndex = 0; + } else { + m_selectedUnigramIndex = index; + } + m_candidateFixed = false; + m_score = score; + } inline const string& Node::key() const { @@ -185,6 +197,13 @@ namespace Formosa { { return m_score; } + + inline double Node::highestUnigramScore() const { + if (m_unigrams.empty()) { + return 0.0; + } + return m_unigrams[0].score; + } inline const KeyValuePair Node::currentKeyValue() const { diff --git a/Source/InputMethodController.h b/Source/InputMethodController.h index 8741cbed..71437a00 100644 --- a/Source/InputMethodController.h +++ b/Source/InputMethodController.h @@ -37,6 +37,7 @@ #import "Mandarin.h" #import "Gramambular.h" #import "FastLM.h" +#import "UserOverrideModel.h" @interface McBopomofoInputMethodController : IMKInputController { @@ -53,6 +54,9 @@ // latest walked path (trellis) using the Viterbi algorithm std::vector _walkedNodes; + // user override model + McBopomofo::UserOverrideModel *_uom; + // the latest composing buffer that is updated to the foreground app NSMutableString *_composingBuffer; NSInteger _latestReadingCursor; diff --git a/Source/InputMethodController.mm b/Source/InputMethodController.mm index 1a76847a..9e48f829 100644 --- a/Source/InputMethodController.mm +++ b/Source/InputMethodController.mm @@ -76,7 +76,6 @@ static NSString *const kCandidateListTextSizeKey = @"CandidateListTextSize"; static NSString *const kSelectPhraseAfterCursorAsCandidatePreferenceKey = @"SelectPhraseAfterCursorAsCandidate"; static NSString *const kUseHorizontalCandidateListPreferenceKey = @"UseHorizontalCandidateList"; static NSString *const kComposingBufferSizePreferenceKey = @"ComposingBufferSize"; -static NSString *const kDisableUserCandidateSelectionLearning = @"DisableUserCandidateSelectionLearning"; static NSString *const kChooseCandidateUsingSpaceKey = @"ChooseCandidateUsingSpaceKey"; static NSString *const kChineseConversionEnabledKey = @"ChineseConversionEnabledKey"; static NSString *const kEscToCleanInputBufferKey = @"EscToCleanInputBufferKey"; @@ -104,9 +103,6 @@ enum { kDeleteKeyCode = 117 }; -// a global object for saving the "learned" user candidate selections -NSMutableDictionary *gCandidateLearningDictionary = nil; -NSString *gUserCandidatesDictionaryPath = nil; VTCandidateController *gCurrentCandidateController = nil; // if DEBUG is defined, a DOT file (GraphViz format) will be written to the @@ -119,6 +115,10 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot FastLM gLanguageModel; FastLM gLanguageModelPlainBopomofo; +static const int kUserOverrideModelCapacity = 500; +static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr. +McBopomofo::UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife); + // https://clang-analyzer.llvm.org/faq.html __attribute__((annotate("returns_localized_nsstring"))) static inline NSString *LocalizationNotNeeded(NSString *s) { @@ -133,10 +133,7 @@ static inline NSString *LocalizationNotNeeded(NSString *s) { - (void)collectCandidates; - (size_t)actualCandidateCursorIndex; -- (NSString *)neighborTrigramString; -- (void)_performDeferredSaveUserCandidatesDictionary; -- (void)saveUserCandidatesDictionary; - (void)_showCandidateWindowUsingVerticalMode:(BOOL)useVerticalMode client:(id)client; - (void)beep; @@ -153,6 +150,19 @@ public: } }; +static const double kEpsilon = 0.000001; + +static double FindHighestScore(const vector& nodes, double epsilon) { + double highestScore = 0.0; + for (auto ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) { + double score = ni->node->highestUnigramScore(); + if (score > highestScore) { + highestScore = score; + } + } + return highestScore + epsilon; +} + @implementation McBopomofoInputMethodController - (void)dealloc { @@ -183,6 +193,7 @@ public: // create the lattice builder _languageModel = &gLanguageModel; _builder = new BlockReadingBuilder(_languageModel); + _uom = &gUserOverrideModel; // each Mandarin syllable is separated by a hyphen _builder->setJoinSeparator("-"); @@ -190,11 +201,6 @@ public: // create the composing buffer _composingBuffer = [[NSMutableString alloc] init]; - // populate the settings, by default, DISABLE user candidate learning - if (![[NSUserDefaults standardUserDefaults] objectForKey:kDisableUserCandidateSelectionLearning]) { - [[NSUserDefaults standardUserDefaults] setObject:(id)kCFBooleanTrue forKey:kDisableUserCandidateSelectionLearning]; - } - _inputMode = kBopomofoModeIdentifier; _chineseConversionEnabled = [[NSUserDefaults standardUserDefaults] boolForKey:kChineseConversionEnabledKey]; } @@ -209,30 +215,6 @@ public: NSMenuItem *preferenceMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"McBopomofo Preferences", @"") action:@selector(showPreferences:) keyEquivalent:@""]; [menu addItem:preferenceMenuItem]; - // If Option key is pressed, show the learning-related menu - - #if DEBUG - //I think the following line is 10.6+ specific - if ([[NSEvent class] respondsToSelector:@selector(modifierFlags)] && ([NSEvent modifierFlags] & NSAlternateKeyMask)) { - - BOOL learningEnabled = ![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning]; - - NSMenuItem *learnMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Enable Selection Learning", @"") action:@selector(toggleLearning:) keyEquivalent:@""]; - learnMenuItem.state = learningEnabled ? NSControlStateValueOn : NSControlStateValueOff; - [menu addItem:learnMenuItem]; - - if (learningEnabled) { - NSString *clearMenuItemTitle = [NSString stringWithFormat:NSLocalizedString(@"Clear Learning Dictionary (%ju Items)", @""), (uintmax_t)[gCandidateLearningDictionary count]]; - NSMenuItem *clearMenuItem = [[NSMenuItem alloc] initWithTitle:clearMenuItemTitle action:@selector(clearLearningDictionary:) keyEquivalent:@""]; - [menu addItem:clearMenuItem]; - - - NSMenuItem *dumpMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Dump Learning Data to Console", @"") action:@selector(dumpLearningDictionary:) keyEquivalent:@""]; - [menu addItem:dumpMenuItem]; - } - } - #endif //DEBUG - NSMenuItem *chineseConversionMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Chinese Conversion", @"") action:@selector(toggleChineseConverter:) keyEquivalent:@"G"]; chineseConversionMenuItem.keyEquivalentModifierMask = NSEventModifierFlagCommand | NSEventModifierFlagControl; chineseConversionMenuItem.state = _chineseConversionEnabled ? NSControlStateValueOn : NSControlStateValueOff; @@ -695,15 +677,15 @@ public: // then walk the lattice [self popOverflowComposingTextAndWalk:client]; - // see if we need to override the selection if a learned one exists - if (![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning]) { - NSString *trigram = [self neighborTrigramString]; - - // Lookup from the user dict to see if the trigram fit or not - NSString *overrideCandidateString = [gCandidateLearningDictionary objectForKey:trigram]; - if (overrideCandidateString) { - [self candidateSelected:(NSAttributedString *)overrideCandidateString]; - } + // get user override model suggestion + string overrideValue = + (_inputMode == kPlainBopomofoModeIdentifier) ? "" : + _uom->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]); + if (!overrideValue.empty()) { + size_t cursorIndex = [self actualCandidateCursorIndex]; + vector nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex); + double highestScore = FindHighestScore(nodes, kEpsilon); + _builder->grid().overrideNodeScoreForSelectedCandidate(cursorIndex, overrideValue, highestScore); } // then update the text @@ -1292,78 +1274,6 @@ public: return cursorIndex; } -- (NSString *)neighborTrigramString -{ - // gather the "trigram" for user candidate selection learning - - NSMutableArray *termArray = [NSMutableArray array]; - - size_t cursorIndex = [self actualCandidateCursorIndex]; - vector nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex); - - const Node* prev = 0; - const Node* current = 0; - const Node* next = 0; - - size_t wni = 0; - size_t wnc = _walkedNodes.size(); - size_t accuSpanningLength = 0; - for (wni = 0; wni < wnc; wni++) { - NodeAnchor& anchor = _walkedNodes[wni]; - if (!anchor.node) { - continue; - } - - accuSpanningLength += anchor.spanningLength; - if (accuSpanningLength >= cursorIndex) { - prev = current; - current = anchor.node; - break; - } - - current = anchor.node; - } - - if (wni + 1 < wnc) { - next = _walkedNodes[wni + 1].node; - } - - string term; - if (prev) { - term = prev->currentKeyValue().key; - [termArray addObject:[NSString stringWithUTF8String:term.c_str()]]; - } - - if (current) { - term = current->currentKeyValue().key; - [termArray addObject:[NSString stringWithUTF8String:term.c_str()]]; - } - - if (next) { - term = next->currentKeyValue().key; - [termArray addObject:[NSString stringWithUTF8String:term.c_str()]]; - } - - return [termArray componentsJoinedByString:@"-"]; -} - -- (void)_performDeferredSaveUserCandidatesDictionary -{ - BOOL __unused success = [gCandidateLearningDictionary writeToFile:gUserCandidatesDictionaryPath atomically:YES]; -} - -- (void)saveUserCandidatesDictionary -{ - if (!gUserCandidatesDictionaryPath) { - return; - } - - [NSObject cancelPreviousPerformRequestsWithTarget:self selector:@selector(_performDeferredSaveUserCandidatesDictionary) object:nil]; - - // TODO: Const-ize the delay - [self performSelector:@selector(_performDeferredSaveUserCandidatesDictionary) withObject:nil afterDelay:5.0]; -} - - (void)_showCandidateWindowUsingVerticalMode:(BOOL)useVerticalMode client:(id)client { // set the candidate panel style @@ -1467,30 +1377,12 @@ public: [[NSApplication sharedApplication] activateIgnoringOtherApps:YES]; } -- (void)toggleLearning:(id)sender -{ - BOOL toggle = ![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning]; - - [[NSUserDefaults standardUserDefaults] setBool:toggle forKey:kDisableUserCandidateSelectionLearning]; -} - - (void)toggleChineseConverter:(id)sender { _chineseConversionEnabled = !_chineseConversionEnabled; [[NSUserDefaults standardUserDefaults] setBool:_chineseConversionEnabled forKey:kChineseConversionEnabledKey]; } -- (void)clearLearningDictionary:(id)sender -{ - [gCandidateLearningDictionary removeAllObjects]; - [self _performDeferredSaveUserCandidatesDictionary]; -} - -- (void)dumpLearningDictionary:(id)sender -{ - NSLog(@"%@", gCandidateLearningDictionary); -} - - (NSUInteger)candidateCountForController:(VTCandidateController *)controller { return [_candidates count]; @@ -1508,15 +1400,11 @@ public: // candidate selected, override the node with selection string selectedValue = [[_candidates objectAtIndex:index] UTF8String]; - if (![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning]) { - NSString *trigram = [self neighborTrigramString]; - NSString *selectedNSString = [NSString stringWithUTF8String:selectedValue.c_str()]; - [gCandidateLearningDictionary setObject:selectedNSString forKey:trigram]; - [self saveUserCandidatesDictionary]; - } - size_t cursorIndex = [self actualCandidateCursorIndex]; _builder->grid().fixNodeSelectedCandidate(cursorIndex, selectedValue); + if (_inputMode != kPlainBopomofoModeIdentifier) { + _uom->observe(_walkedNodes, cursorIndex, selectedValue, [[NSDate date] timeIntervalSince1970]); + } [_candidates removeAllObjects]; @@ -1545,57 +1433,4 @@ void LTLoadLanguageModel() { LTLoadLanguageModelFile(@"data", gLanguageModel); LTLoadLanguageModelFile(@"data-plain-bpmf", gLanguageModelPlainBopomofo); - - - // initialize the singleton learning dictionary - // putting singleton in @synchronized is the standard way in Objective-C - // to avoid race condition - gCandidateLearningDictionary = [[NSMutableDictionary alloc] init]; - - // the first instance is also responsible for loading the dictionary - NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES); - if (![paths count]) { - NSLog(@"Fatal error: cannot find Applicaiton Support directory."); - return; - } - - NSString *appSupportPath = [paths objectAtIndex:0]; - NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"McBopomofo"]; - - BOOL isDir = NO; - BOOL exists = [[NSFileManager defaultManager] fileExistsAtPath:userDictPath isDirectory:&isDir]; - - if (exists) { - if (!isDir) { - NSLog(@"Fatal error: Path '%@' is not a directory", userDictPath); - return; - } - } - else { - NSError *error = nil; - BOOL success = [[NSFileManager defaultManager] createDirectoryAtPath:userDictPath withIntermediateDirectories:YES attributes:nil error:&error]; - if (!success) { - NSLog(@"Failed to create directory '%@', error: %@", userDictPath, error); - return; - } - } - - // TODO: Change this - NSString *userDictFile = [userDictPath stringByAppendingPathComponent:@"UserCandidatesCache.plist"]; - gUserCandidatesDictionaryPath = userDictFile; - - exists = [[NSFileManager defaultManager] fileExistsAtPath:userDictFile isDirectory:&isDir]; - if (exists && !isDir) { - NSData *data = [NSData dataWithContentsOfFile:userDictFile]; - if (!data) { - return; - } - - id plist = [NSPropertyListSerialization propertyListWithData:data options:NSPropertyListImmutable format:NULL error:NULL]; - if (plist && [plist isKindOfClass:[NSDictionary class]]) { - [gCandidateLearningDictionary setDictionary:(NSDictionary *)plist]; - NSLog(@"User dictionary read, item count: %ju", (uintmax_t)[gCandidateLearningDictionary count]); - } - } - } diff --git a/Source/UserOverrideModel.cpp b/Source/UserOverrideModel.cpp new file mode 100644 index 00000000..8b2df522 --- /dev/null +++ b/Source/UserOverrideModel.cpp @@ -0,0 +1,219 @@ +// +// UserOverrideModel.cpp +// +// Copyright (c) 2017 The McBopomofo Project. +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +#include "UserOverrideModel.h" + +#include +#include +#include + +using namespace McBopomofo; + +// About 20 generations. +static const double DecayThreshould = 1.0 / 1048576.0; + +static double Score(size_t eventCount, + size_t totalCount, + double eventTimestamp, + double timestamp, + double lambda); +static bool IsEndingPunctuation(const string& value); +static string WalkedNodesToKey(const std::vector& walkedNodes, + size_t cursorIndex); + +UserOverrideModel::UserOverrideModel(size_t capacity, double decayConstant) + : m_capacity(capacity) { + assert(m_capacity > 0); + m_decayExponent = log(0.5) / decayConstant; +} + +void UserOverrideModel::observe(const std::vector& walkedNodes, + size_t cursorIndex, + const string& candidate, + double timestamp) { + string key = WalkedNodesToKey(walkedNodes, cursorIndex); + auto mapIter = m_lruMap.find(key); + if (mapIter == m_lruMap.end()) { + auto keyValuePair = KeyObservationPair(key, Observation()); + Observation& observation = keyValuePair.second; + observation.update(candidate, timestamp); + + m_lruList.push_front(keyValuePair); + auto listIter = m_lruList.begin(); + auto lruKeyValue = std::pair::iterator>(key, listIter); + m_lruMap.insert(lruKeyValue); + + if (m_lruList.size() > m_capacity) { + auto lastKeyValuePair = m_lruList.end(); + --lastKeyValuePair; + m_lruMap.erase(lastKeyValuePair->first); + m_lruList.pop_back(); + } + } else { + auto listIter = mapIter->second; + m_lruList.splice(m_lruList.begin(), m_lruList, listIter); + + auto& keyValuePair = *listIter; + Observation& observation = keyValuePair.second; + observation.update(candidate, timestamp); + } +} + +string UserOverrideModel::suggest(const std::vector& walkedNodes, + size_t cursorIndex, + double timestamp) { + string key = WalkedNodesToKey(walkedNodes, cursorIndex); + auto mapIter = m_lruMap.find(key); + if (mapIter == m_lruMap.end()) { + return string(); + } + + auto listIter = mapIter->second; + auto& keyValuePair = *listIter; + const Observation& observation = keyValuePair.second; + + string candidate; + double score = 0.0; + for (auto i = observation.overrides.begin(); + i != observation.overrides.end(); + ++i) { + const Override& o = i->second; + double overrideScore = Score(o.count, + observation.count, + o.timestamp, + timestamp, + m_decayExponent); + if (overrideScore == 0.0) { + continue; + } + + if (overrideScore > score) { + candidate = i->first; + score = overrideScore; + } + } + return candidate; +} + +void UserOverrideModel::Observation::update(const string& candidate, + double timestamp) { + count++; + auto& o = overrides[candidate]; + o.timestamp = timestamp; + o.count++; +} + +static double Score(size_t eventCount, + size_t totalCount, + double eventTimestamp, + double timestamp, + double lambda) { + double decay = exp((timestamp - eventTimestamp) * lambda); + if (decay < DecayThreshould) { + return 0.0; + } + + double prob = (double)eventCount / (double)totalCount; + return prob * decay; +} + +static bool IsEndingPunctuation(const string& value) { + return value == "," || value == "。" || value== "!" || value == "?" || + value == "」" || value == "』" || value== "”" || value == "”"; +} +static string WalkedNodesToKey(const std::vector& walkedNodes, + size_t cursorIndex) { + std::stringstream s; + std::vector n; + size_t ll = 0; + for (std::vector::const_iterator i = walkedNodes.begin(); + i != walkedNodes.end(); + ++i) { + const auto& nn = *i; + n.push_back(nn); + ll += nn.spanningLength; + if (ll >= cursorIndex) { + break; + } + } + + std::vector::const_reverse_iterator r = n.rbegin(); + + if (r == n.rend()) { + return ""; + } + + string current = (*r).node->currentKeyValue().key; + ++r; + + s.clear(); + s.str(std::string()); + if (r != n.rend()) { + string value = (*r).node->currentKeyValue().value; + if (IsEndingPunctuation(value)) { + s << "()"; + r = n.rend(); + } else { + s << "(" + << (*r).node->currentKeyValue().key + << "," + << value + << ")"; + ++r; + } + } else { + s << "()"; + } + string prev = s.str(); + + s.clear(); + s.str(std::string()); + if (r != n.rend()) { + string value = (*r).node->currentKeyValue().value; + if (IsEndingPunctuation(value)) { + s << "()"; + r = n.rend(); + } else { + s << "(" + << (*r).node->currentKeyValue().key + << "," + << value + << ")"; + ++r; + } + } else { + s << "()"; + } + string anterior = s.str(); + + s.clear(); + s.str(std::string()); + s << "(" << anterior << "," << prev << "," << current << ")"; + + return s.str(); +} diff --git a/Source/UserOverrideModel.h b/Source/UserOverrideModel.h new file mode 100644 index 00000000..0b981923 --- /dev/null +++ b/Source/UserOverrideModel.h @@ -0,0 +1,81 @@ +// +// UserOverrideModel.h +// +// Copyright (c) 2017 The McBopomofo Project. +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +#ifndef USEROVERRIDEMODEL_H +#define USEROVERRIDEMODEL_H + +#include +#include +#include + +#include "Gramambular.h" + +namespace McBopomofo { + +using namespace Formosa::Gramambular; + +class UserOverrideModel { +public: + UserOverrideModel(size_t capacity, double decayConstant); + + void observe(const std::vector& walkedNodes, + size_t cursorIndex, + const string& candidate, + double timestamp); + + string suggest(const std::vector& walkedNodes, + size_t cursorIndex, + double timestamp); + +private: + struct Override { + size_t count; + double timestamp; + + Override() : count(0), timestamp(0.0) {} + }; + + struct Observation { + size_t count; + std::map overrides; + + Observation() : count(0) {} + void update(const string& candidate, double timestamp); + }; + + typedef std::pair KeyObservationPair; + + size_t m_capacity; + double m_decayExponent; + std::list m_lruList; + std::map::iterator> m_lruMap; +}; + +}; // namespace McBopomofo + +#endif +