Merge pull request #207 from openvanilla/rebased-user-override-model

Implements an exponential decay-based user candidate override model, rebased from #135
This commit is contained in:
Weizhong Yang a.k.a zonble 2022-01-07 13:20:52 +08:00 committed by GitHub
commit 39cdc7d73d
10 changed files with 394 additions and 196 deletions

View File

@ -49,6 +49,7 @@
6AFF97F3253B299E007F1C49 /* OVNonModalAlertWindowController.m in Sources */ = {isa = PBXBuildFile; fileRef = 6AFF97F1253B299E007F1C49 /* OVNonModalAlertWindowController.m */; }; 6AFF97F3253B299E007F1C49 /* OVNonModalAlertWindowController.m in Sources */ = {isa = PBXBuildFile; fileRef = 6AFF97F1253B299E007F1C49 /* OVNonModalAlertWindowController.m */; };
D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */; }; D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */; };
D48550A325EBE689006A204C /* OpenCC in Frameworks */ = {isa = PBXBuildFile; productRef = D48550A225EBE689006A204C /* OpenCC */; }; D48550A325EBE689006A204C /* OpenCC in Frameworks */ = {isa = PBXBuildFile; productRef = D48550A225EBE689006A204C /* OpenCC */; };
6AE30A491F7F40B7008735BD /* UserOverrideModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6AE30A471F7F40B7008735BD /* UserOverrideModel.cpp */; };
/* End PBXBuildFile section */ /* End PBXBuildFile section */
/* Begin PBXContainerItemProxy section */ /* Begin PBXContainerItemProxy section */
@ -211,6 +212,8 @@
6AFF97F1253B299E007F1C49 /* OVNonModalAlertWindowController.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = OVNonModalAlertWindowController.m; sourceTree = "<group>"; }; 6AFF97F1253B299E007F1C49 /* OVNonModalAlertWindowController.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = OVNonModalAlertWindowController.m; sourceTree = "<group>"; };
D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "McBopomofo-Bridging-Header.h"; sourceTree = "<group>"; }; D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "McBopomofo-Bridging-Header.h"; sourceTree = "<group>"; };
D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpenCCBridge.swift; sourceTree = "<group>"; }; D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpenCCBridge.swift; sourceTree = "<group>"; };
6AE30A471F7F40B7008735BD /* UserOverrideModel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = UserOverrideModel.cpp; sourceTree = "<group>"; };
6AE30A481F7F40B7008735BD /* UserOverrideModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = UserOverrideModel.h; sourceTree = "<group>"; };
/* End PBXFileReference section */ /* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */ /* Begin PBXFrameworksBuildPhase section */
@ -286,6 +289,10 @@
6A0D4ECC15FC0D6400ABF4B3 /* PreferencesWindowController.m */, 6A0D4ECC15FC0D6400ABF4B3 /* PreferencesWindowController.m */,
D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */, D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */,
D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */, D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */,
6A0D4ECD15FC0D6400ABF4B3 /* UpdateNotificationController.h */,
6A0D4ECE15FC0D6400ABF4B3 /* UpdateNotificationController.m */,
6AE30A471F7F40B7008735BD /* UserOverrideModel.cpp */,
6AE30A481F7F40B7008735BD /* UserOverrideModel.h */,
); );
path = Source; path = Source;
sourceTree = "<group>"; sourceTree = "<group>";
@ -647,6 +654,7 @@
6A0D4F0015FC0DA600ABF4B3 /* VTHorizontalCandidateView.m in Sources */, 6A0D4F0015FC0DA600ABF4B3 /* VTHorizontalCandidateView.m in Sources */,
6AFF97F3253B299E007F1C49 /* OVNonModalAlertWindowController.m in Sources */, 6AFF97F3253B299E007F1C49 /* OVNonModalAlertWindowController.m in Sources */,
6A0D4F0115FC0DA600ABF4B3 /* VTVerticalCandidateController.m in Sources */, 6A0D4F0115FC0DA600ABF4B3 /* VTVerticalCandidateController.m in Sources */,
6AE30A491F7F40B7008735BD /* UserOverrideModel.cpp in Sources */,
6A0D4F0215FC0DA600ABF4B3 /* VTVerticalCandidateTableView.m in Sources */, 6A0D4F0215FC0DA600ABF4B3 /* VTVerticalCandidateTableView.m in Sources */,
6A0D4F0315FC0DA600ABF4B3 /* VTVerticalKeyLabelStripView.m in Sources */, 6A0D4F0315FC0DA600ABF4B3 /* VTVerticalKeyLabelStripView.m in Sources */,
D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */, D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */,

View File

@ -28,6 +28,8 @@
#ifndef Bigram_h #ifndef Bigram_h
#define Bigram_h #define Bigram_h
#include <vector>
#include "KeyValuePair.h" #include "KeyValuePair.h"
namespace Formosa { namespace Formosa {

View File

@ -199,7 +199,7 @@ namespace Formosa {
} }
} }
const string BlockReadingBuilder::Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator) inline const string BlockReadingBuilder::Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator)
{ {
string result; string result;
for (vector<string>::const_iterator iter = begin ; iter != end ; ) { for (vector<string>::const_iterator iter = begin ; iter != end ; ) {

View File

@ -47,8 +47,19 @@ namespace Formosa {
size_t width() const; size_t width() const;
vector<NodeAnchor> nodesEndingAt(size_t inLocation); vector<NodeAnchor> nodesEndingAt(size_t inLocation);
vector<NodeAnchor> nodesCrossingOrEndingAt(size_t inLocation); vector<NodeAnchor> nodesCrossingOrEndingAt(size_t inLocation);
// "Freeze" the node with the unigram that represents the selected canditate value.
// After this, the node that contains the unigram will always be evaluated to that
// unigram, while all other overlapping nodes will be reset to their initial state
// (that is, if any of those nodes were "frozen" or fixed, they will be unfrozen.)
void fixNodeSelectedCandidate(size_t location, const string& value); void fixNodeSelectedCandidate(size_t location, const string& value);
// Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, only
// boost the unigram that represents the value with an overriding score. This
// has the same side effect as fixNodeSelectedCandidate, which is that all other
// overlapping nodes will be reset to their initial state.
void overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore);
const string dumpDOT(); const string dumpDOT();
protected: protected:
@ -195,6 +206,24 @@ namespace Formosa {
} }
} }
inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore)
{
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore);
break;
}
}
}
}
inline const string Grid::dumpDOT() inline const string Grid::dumpDOT()
{ {
stringstream sst; stringstream sst;

View File

@ -28,6 +28,7 @@
#ifndef KeyValuePair_h #ifndef KeyValuePair_h
#define KeyValuePair_h #define KeyValuePair_h
#include <ostream>
#include <string> #include <string>
namespace Formosa { namespace Formosa {

View File

@ -47,10 +47,12 @@ namespace Formosa {
const vector<KeyValuePair>& candidates() const; const vector<KeyValuePair>& candidates() const;
void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true); void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true);
void resetCandidate(); void resetCandidate();
void selectFloatingCandidateAtIndex(size_t index, double score);
const string& key() const; const string& key() const;
double score() const; double score() const;
const KeyValuePair currentKeyValue() const; const KeyValuePair currentKeyValue() const;
double highestUnigramScore() const;
protected: protected:
const LanguageModel* m_LM; const LanguageModel* m_LM;
@ -176,6 +178,16 @@ namespace Formosa {
} }
} }
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) {
if (index >= m_unigrams.size()) {
m_selectedUnigramIndex = 0;
} else {
m_selectedUnigramIndex = index;
}
m_candidateFixed = false;
m_score = score;
}
inline const string& Node::key() const inline const string& Node::key() const
{ {
return m_key; return m_key;
@ -186,6 +198,13 @@ namespace Formosa {
return m_score; return m_score;
} }
inline double Node::highestUnigramScore() const {
if (m_unigrams.empty()) {
return 0.0;
}
return m_unigrams[0].score;
}
inline const KeyValuePair Node::currentKeyValue() const inline const KeyValuePair Node::currentKeyValue() const
{ {
if(m_selectedUnigramIndex >= m_unigrams.size()) { if(m_selectedUnigramIndex >= m_unigrams.size()) {

View File

@ -37,6 +37,7 @@
#import "Mandarin.h" #import "Mandarin.h"
#import "Gramambular.h" #import "Gramambular.h"
#import "FastLM.h" #import "FastLM.h"
#import "UserOverrideModel.h"
@interface McBopomofoInputMethodController : IMKInputController @interface McBopomofoInputMethodController : IMKInputController
{ {
@ -53,6 +54,9 @@
// latest walked path (trellis) using the Viterbi algorithm // latest walked path (trellis) using the Viterbi algorithm
std::vector<Formosa::Gramambular::NodeAnchor> _walkedNodes; std::vector<Formosa::Gramambular::NodeAnchor> _walkedNodes;
// user override model
McBopomofo::UserOverrideModel *_uom;
// the latest composing buffer that is updated to the foreground app // the latest composing buffer that is updated to the foreground app
NSMutableString *_composingBuffer; NSMutableString *_composingBuffer;
NSInteger _latestReadingCursor; NSInteger _latestReadingCursor;

View File

@ -76,7 +76,6 @@ static NSString *const kCandidateListTextSizeKey = @"CandidateListTextSize";
static NSString *const kSelectPhraseAfterCursorAsCandidatePreferenceKey = @"SelectPhraseAfterCursorAsCandidate"; static NSString *const kSelectPhraseAfterCursorAsCandidatePreferenceKey = @"SelectPhraseAfterCursorAsCandidate";
static NSString *const kUseHorizontalCandidateListPreferenceKey = @"UseHorizontalCandidateList"; static NSString *const kUseHorizontalCandidateListPreferenceKey = @"UseHorizontalCandidateList";
static NSString *const kComposingBufferSizePreferenceKey = @"ComposingBufferSize"; static NSString *const kComposingBufferSizePreferenceKey = @"ComposingBufferSize";
static NSString *const kDisableUserCandidateSelectionLearning = @"DisableUserCandidateSelectionLearning";
static NSString *const kChooseCandidateUsingSpaceKey = @"ChooseCandidateUsingSpaceKey"; static NSString *const kChooseCandidateUsingSpaceKey = @"ChooseCandidateUsingSpaceKey";
static NSString *const kChineseConversionEnabledKey = @"ChineseConversionEnabledKey"; static NSString *const kChineseConversionEnabledKey = @"ChineseConversionEnabledKey";
static NSString *const kEscToCleanInputBufferKey = @"EscToCleanInputBufferKey"; static NSString *const kEscToCleanInputBufferKey = @"EscToCleanInputBufferKey";
@ -104,9 +103,6 @@ enum {
kDeleteKeyCode = 117 kDeleteKeyCode = 117
}; };
// a global object for saving the "learned" user candidate selections
NSMutableDictionary *gCandidateLearningDictionary = nil;
NSString *gUserCandidatesDictionaryPath = nil;
VTCandidateController *gCurrentCandidateController = nil; VTCandidateController *gCurrentCandidateController = nil;
// if DEBUG is defined, a DOT file (GraphViz format) will be written to the // if DEBUG is defined, a DOT file (GraphViz format) will be written to the
@ -119,6 +115,10 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
FastLM gLanguageModel; FastLM gLanguageModel;
FastLM gLanguageModelPlainBopomofo; FastLM gLanguageModelPlainBopomofo;
static const int kUserOverrideModelCapacity = 500;
static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
McBopomofo::UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
// https://clang-analyzer.llvm.org/faq.html // https://clang-analyzer.llvm.org/faq.html
__attribute__((annotate("returns_localized_nsstring"))) __attribute__((annotate("returns_localized_nsstring")))
static inline NSString *LocalizationNotNeeded(NSString *s) { static inline NSString *LocalizationNotNeeded(NSString *s) {
@ -133,10 +133,7 @@ static inline NSString *LocalizationNotNeeded(NSString *s) {
- (void)collectCandidates; - (void)collectCandidates;
- (size_t)actualCandidateCursorIndex; - (size_t)actualCandidateCursorIndex;
- (NSString *)neighborTrigramString;
- (void)_performDeferredSaveUserCandidatesDictionary;
- (void)saveUserCandidatesDictionary;
- (void)_showCandidateWindowUsingVerticalMode:(BOOL)useVerticalMode client:(id)client; - (void)_showCandidateWindowUsingVerticalMode:(BOOL)useVerticalMode client:(id)client;
- (void)beep; - (void)beep;
@ -153,6 +150,19 @@ public:
} }
}; };
static const double kEpsilon = 0.000001;
static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon) {
double highestScore = 0.0;
for (auto ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) {
double score = ni->node->highestUnigramScore();
if (score > highestScore) {
highestScore = score;
}
}
return highestScore + epsilon;
}
@implementation McBopomofoInputMethodController @implementation McBopomofoInputMethodController
- (void)dealloc - (void)dealloc
{ {
@ -183,6 +193,7 @@ public:
// create the lattice builder // create the lattice builder
_languageModel = &gLanguageModel; _languageModel = &gLanguageModel;
_builder = new BlockReadingBuilder(_languageModel); _builder = new BlockReadingBuilder(_languageModel);
_uom = &gUserOverrideModel;
// each Mandarin syllable is separated by a hyphen // each Mandarin syllable is separated by a hyphen
_builder->setJoinSeparator("-"); _builder->setJoinSeparator("-");
@ -190,11 +201,6 @@ public:
// create the composing buffer // create the composing buffer
_composingBuffer = [[NSMutableString alloc] init]; _composingBuffer = [[NSMutableString alloc] init];
// populate the settings, by default, DISABLE user candidate learning
if (![[NSUserDefaults standardUserDefaults] objectForKey:kDisableUserCandidateSelectionLearning]) {
[[NSUserDefaults standardUserDefaults] setObject:(id)kCFBooleanTrue forKey:kDisableUserCandidateSelectionLearning];
}
_inputMode = kBopomofoModeIdentifier; _inputMode = kBopomofoModeIdentifier;
_chineseConversionEnabled = [[NSUserDefaults standardUserDefaults] boolForKey:kChineseConversionEnabledKey]; _chineseConversionEnabled = [[NSUserDefaults standardUserDefaults] boolForKey:kChineseConversionEnabledKey];
} }
@ -209,30 +215,6 @@ public:
NSMenuItem *preferenceMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"McBopomofo Preferences", @"") action:@selector(showPreferences:) keyEquivalent:@""]; NSMenuItem *preferenceMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"McBopomofo Preferences", @"") action:@selector(showPreferences:) keyEquivalent:@""];
[menu addItem:preferenceMenuItem]; [menu addItem:preferenceMenuItem];
// If Option key is pressed, show the learning-related menu
#if DEBUG
//I think the following line is 10.6+ specific
if ([[NSEvent class] respondsToSelector:@selector(modifierFlags)] && ([NSEvent modifierFlags] & NSAlternateKeyMask)) {
BOOL learningEnabled = ![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning];
NSMenuItem *learnMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Enable Selection Learning", @"") action:@selector(toggleLearning:) keyEquivalent:@""];
learnMenuItem.state = learningEnabled ? NSControlStateValueOn : NSControlStateValueOff;
[menu addItem:learnMenuItem];
if (learningEnabled) {
NSString *clearMenuItemTitle = [NSString stringWithFormat:NSLocalizedString(@"Clear Learning Dictionary (%ju Items)", @""), (uintmax_t)[gCandidateLearningDictionary count]];
NSMenuItem *clearMenuItem = [[NSMenuItem alloc] initWithTitle:clearMenuItemTitle action:@selector(clearLearningDictionary:) keyEquivalent:@""];
[menu addItem:clearMenuItem];
NSMenuItem *dumpMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Dump Learning Data to Console", @"") action:@selector(dumpLearningDictionary:) keyEquivalent:@""];
[menu addItem:dumpMenuItem];
}
}
#endif //DEBUG
NSMenuItem *chineseConversionMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Chinese Conversion", @"") action:@selector(toggleChineseConverter:) keyEquivalent:@"G"]; NSMenuItem *chineseConversionMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Chinese Conversion", @"") action:@selector(toggleChineseConverter:) keyEquivalent:@"G"];
chineseConversionMenuItem.keyEquivalentModifierMask = NSEventModifierFlagCommand | NSEventModifierFlagControl; chineseConversionMenuItem.keyEquivalentModifierMask = NSEventModifierFlagCommand | NSEventModifierFlagControl;
chineseConversionMenuItem.state = _chineseConversionEnabled ? NSControlStateValueOn : NSControlStateValueOff; chineseConversionMenuItem.state = _chineseConversionEnabled ? NSControlStateValueOn : NSControlStateValueOff;
@ -695,15 +677,15 @@ public:
// then walk the lattice // then walk the lattice
[self popOverflowComposingTextAndWalk:client]; [self popOverflowComposingTextAndWalk:client];
// see if we need to override the selection if a learned one exists // get user override model suggestion
if (![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning]) { string overrideValue =
NSString *trigram = [self neighborTrigramString]; (_inputMode == kPlainBopomofoModeIdentifier) ? "" :
_uom->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]);
// Lookup from the user dict to see if the trigram fit or not if (!overrideValue.empty()) {
NSString *overrideCandidateString = [gCandidateLearningDictionary objectForKey:trigram]; size_t cursorIndex = [self actualCandidateCursorIndex];
if (overrideCandidateString) { vector<NodeAnchor> nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex);
[self candidateSelected:(NSAttributedString *)overrideCandidateString]; double highestScore = FindHighestScore(nodes, kEpsilon);
} _builder->grid().overrideNodeScoreForSelectedCandidate(cursorIndex, overrideValue, highestScore);
} }
// then update the text // then update the text
@ -1292,78 +1274,6 @@ public:
return cursorIndex; return cursorIndex;
} }
- (NSString *)neighborTrigramString
{
// gather the "trigram" for user candidate selection learning
NSMutableArray *termArray = [NSMutableArray array];
size_t cursorIndex = [self actualCandidateCursorIndex];
vector<NodeAnchor> nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex);
const Node* prev = 0;
const Node* current = 0;
const Node* next = 0;
size_t wni = 0;
size_t wnc = _walkedNodes.size();
size_t accuSpanningLength = 0;
for (wni = 0; wni < wnc; wni++) {
NodeAnchor& anchor = _walkedNodes[wni];
if (!anchor.node) {
continue;
}
accuSpanningLength += anchor.spanningLength;
if (accuSpanningLength >= cursorIndex) {
prev = current;
current = anchor.node;
break;
}
current = anchor.node;
}
if (wni + 1 < wnc) {
next = _walkedNodes[wni + 1].node;
}
string term;
if (prev) {
term = prev->currentKeyValue().key;
[termArray addObject:[NSString stringWithUTF8String:term.c_str()]];
}
if (current) {
term = current->currentKeyValue().key;
[termArray addObject:[NSString stringWithUTF8String:term.c_str()]];
}
if (next) {
term = next->currentKeyValue().key;
[termArray addObject:[NSString stringWithUTF8String:term.c_str()]];
}
return [termArray componentsJoinedByString:@"-"];
}
- (void)_performDeferredSaveUserCandidatesDictionary
{
BOOL __unused success = [gCandidateLearningDictionary writeToFile:gUserCandidatesDictionaryPath atomically:YES];
}
- (void)saveUserCandidatesDictionary
{
if (!gUserCandidatesDictionaryPath) {
return;
}
[NSObject cancelPreviousPerformRequestsWithTarget:self selector:@selector(_performDeferredSaveUserCandidatesDictionary) object:nil];
// TODO: Const-ize the delay
[self performSelector:@selector(_performDeferredSaveUserCandidatesDictionary) withObject:nil afterDelay:5.0];
}
- (void)_showCandidateWindowUsingVerticalMode:(BOOL)useVerticalMode client:(id)client - (void)_showCandidateWindowUsingVerticalMode:(BOOL)useVerticalMode client:(id)client
{ {
// set the candidate panel style // set the candidate panel style
@ -1467,30 +1377,12 @@ public:
[[NSApplication sharedApplication] activateIgnoringOtherApps:YES]; [[NSApplication sharedApplication] activateIgnoringOtherApps:YES];
} }
- (void)toggleLearning:(id)sender
{
BOOL toggle = ![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning];
[[NSUserDefaults standardUserDefaults] setBool:toggle forKey:kDisableUserCandidateSelectionLearning];
}
- (void)toggleChineseConverter:(id)sender - (void)toggleChineseConverter:(id)sender
{ {
_chineseConversionEnabled = !_chineseConversionEnabled; _chineseConversionEnabled = !_chineseConversionEnabled;
[[NSUserDefaults standardUserDefaults] setBool:_chineseConversionEnabled forKey:kChineseConversionEnabledKey]; [[NSUserDefaults standardUserDefaults] setBool:_chineseConversionEnabled forKey:kChineseConversionEnabledKey];
} }
- (void)clearLearningDictionary:(id)sender
{
[gCandidateLearningDictionary removeAllObjects];
[self _performDeferredSaveUserCandidatesDictionary];
}
- (void)dumpLearningDictionary:(id)sender
{
NSLog(@"%@", gCandidateLearningDictionary);
}
- (NSUInteger)candidateCountForController:(VTCandidateController *)controller - (NSUInteger)candidateCountForController:(VTCandidateController *)controller
{ {
return [_candidates count]; return [_candidates count];
@ -1508,15 +1400,11 @@ public:
// candidate selected, override the node with selection // candidate selected, override the node with selection
string selectedValue = [[_candidates objectAtIndex:index] UTF8String]; string selectedValue = [[_candidates objectAtIndex:index] UTF8String];
if (![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning]) {
NSString *trigram = [self neighborTrigramString];
NSString *selectedNSString = [NSString stringWithUTF8String:selectedValue.c_str()];
[gCandidateLearningDictionary setObject:selectedNSString forKey:trigram];
[self saveUserCandidatesDictionary];
}
size_t cursorIndex = [self actualCandidateCursorIndex]; size_t cursorIndex = [self actualCandidateCursorIndex];
_builder->grid().fixNodeSelectedCandidate(cursorIndex, selectedValue); _builder->grid().fixNodeSelectedCandidate(cursorIndex, selectedValue);
if (_inputMode != kPlainBopomofoModeIdentifier) {
_uom->observe(_walkedNodes, cursorIndex, selectedValue, [[NSDate date] timeIntervalSince1970]);
}
[_candidates removeAllObjects]; [_candidates removeAllObjects];
@ -1545,57 +1433,4 @@ void LTLoadLanguageModel()
{ {
LTLoadLanguageModelFile(@"data", gLanguageModel); LTLoadLanguageModelFile(@"data", gLanguageModel);
LTLoadLanguageModelFile(@"data-plain-bpmf", gLanguageModelPlainBopomofo); LTLoadLanguageModelFile(@"data-plain-bpmf", gLanguageModelPlainBopomofo);
// initialize the singleton learning dictionary
// putting singleton in @synchronized is the standard way in Objective-C
// to avoid race condition
gCandidateLearningDictionary = [[NSMutableDictionary alloc] init];
// the first instance is also responsible for loading the dictionary
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES);
if (![paths count]) {
NSLog(@"Fatal error: cannot find Applicaiton Support directory.");
return;
}
NSString *appSupportPath = [paths objectAtIndex:0];
NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"McBopomofo"];
BOOL isDir = NO;
BOOL exists = [[NSFileManager defaultManager] fileExistsAtPath:userDictPath isDirectory:&isDir];
if (exists) {
if (!isDir) {
NSLog(@"Fatal error: Path '%@' is not a directory", userDictPath);
return;
}
}
else {
NSError *error = nil;
BOOL success = [[NSFileManager defaultManager] createDirectoryAtPath:userDictPath withIntermediateDirectories:YES attributes:nil error:&error];
if (!success) {
NSLog(@"Failed to create directory '%@', error: %@", userDictPath, error);
return;
}
}
// TODO: Change this
NSString *userDictFile = [userDictPath stringByAppendingPathComponent:@"UserCandidatesCache.plist"];
gUserCandidatesDictionaryPath = userDictFile;
exists = [[NSFileManager defaultManager] fileExistsAtPath:userDictFile isDirectory:&isDir];
if (exists && !isDir) {
NSData *data = [NSData dataWithContentsOfFile:userDictFile];
if (!data) {
return;
}
id plist = [NSPropertyListSerialization propertyListWithData:data options:NSPropertyListImmutable format:NULL error:NULL];
if (plist && [plist isKindOfClass:[NSDictionary class]]) {
[gCandidateLearningDictionary setDictionary:(NSDictionary *)plist];
NSLog(@"User dictionary read, item count: %ju", (uintmax_t)[gCandidateLearningDictionary count]);
}
}
} }

View File

@ -0,0 +1,219 @@
//
// UserOverrideModel.cpp
//
// Copyright (c) 2017 The McBopomofo Project.
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
#include "UserOverrideModel.h"
#include <cassert>
#include <cmath>
#include <sstream>
using namespace McBopomofo;
// About 20 generations.
static const double DecayThreshould = 1.0 / 1048576.0;
static double Score(size_t eventCount,
size_t totalCount,
double eventTimestamp,
double timestamp,
double lambda);
static bool IsEndingPunctuation(const string& value);
static string WalkedNodesToKey(const std::vector<NodeAnchor>& walkedNodes,
size_t cursorIndex);
UserOverrideModel::UserOverrideModel(size_t capacity, double decayConstant)
: m_capacity(capacity) {
assert(m_capacity > 0);
m_decayExponent = log(0.5) / decayConstant;
}
void UserOverrideModel::observe(const std::vector<NodeAnchor>& walkedNodes,
size_t cursorIndex,
const string& candidate,
double timestamp) {
string key = WalkedNodesToKey(walkedNodes, cursorIndex);
auto mapIter = m_lruMap.find(key);
if (mapIter == m_lruMap.end()) {
auto keyValuePair = KeyObservationPair(key, Observation());
Observation& observation = keyValuePair.second;
observation.update(candidate, timestamp);
m_lruList.push_front(keyValuePair);
auto listIter = m_lruList.begin();
auto lruKeyValue = std::pair<std::string,
std::list<KeyObservationPair>::iterator>(key, listIter);
m_lruMap.insert(lruKeyValue);
if (m_lruList.size() > m_capacity) {
auto lastKeyValuePair = m_lruList.end();
--lastKeyValuePair;
m_lruMap.erase(lastKeyValuePair->first);
m_lruList.pop_back();
}
} else {
auto listIter = mapIter->second;
m_lruList.splice(m_lruList.begin(), m_lruList, listIter);
auto& keyValuePair = *listIter;
Observation& observation = keyValuePair.second;
observation.update(candidate, timestamp);
}
}
string UserOverrideModel::suggest(const std::vector<NodeAnchor>& walkedNodes,
size_t cursorIndex,
double timestamp) {
string key = WalkedNodesToKey(walkedNodes, cursorIndex);
auto mapIter = m_lruMap.find(key);
if (mapIter == m_lruMap.end()) {
return string();
}
auto listIter = mapIter->second;
auto& keyValuePair = *listIter;
const Observation& observation = keyValuePair.second;
string candidate;
double score = 0.0;
for (auto i = observation.overrides.begin();
i != observation.overrides.end();
++i) {
const Override& o = i->second;
double overrideScore = Score(o.count,
observation.count,
o.timestamp,
timestamp,
m_decayExponent);
if (overrideScore == 0.0) {
continue;
}
if (overrideScore > score) {
candidate = i->first;
score = overrideScore;
}
}
return candidate;
}
void UserOverrideModel::Observation::update(const string& candidate,
double timestamp) {
count++;
auto& o = overrides[candidate];
o.timestamp = timestamp;
o.count++;
}
static double Score(size_t eventCount,
size_t totalCount,
double eventTimestamp,
double timestamp,
double lambda) {
double decay = exp((timestamp - eventTimestamp) * lambda);
if (decay < DecayThreshould) {
return 0.0;
}
double prob = (double)eventCount / (double)totalCount;
return prob * decay;
}
static bool IsEndingPunctuation(const string& value) {
return value == "" || value == "" || value== "" || value == "" ||
value == "" || value == "" || value== "" || value == "";
}
static string WalkedNodesToKey(const std::vector<NodeAnchor>& walkedNodes,
size_t cursorIndex) {
std::stringstream s;
std::vector<NodeAnchor> n;
size_t ll = 0;
for (std::vector<NodeAnchor>::const_iterator i = walkedNodes.begin();
i != walkedNodes.end();
++i) {
const auto& nn = *i;
n.push_back(nn);
ll += nn.spanningLength;
if (ll >= cursorIndex) {
break;
}
}
std::vector<NodeAnchor>::const_reverse_iterator r = n.rbegin();
if (r == n.rend()) {
return "";
}
string current = (*r).node->currentKeyValue().key;
++r;
s.clear();
s.str(std::string());
if (r != n.rend()) {
string value = (*r).node->currentKeyValue().value;
if (IsEndingPunctuation(value)) {
s << "()";
r = n.rend();
} else {
s << "("
<< (*r).node->currentKeyValue().key
<< ","
<< value
<< ")";
++r;
}
} else {
s << "()";
}
string prev = s.str();
s.clear();
s.str(std::string());
if (r != n.rend()) {
string value = (*r).node->currentKeyValue().value;
if (IsEndingPunctuation(value)) {
s << "()";
r = n.rend();
} else {
s << "("
<< (*r).node->currentKeyValue().key
<< ","
<< value
<< ")";
++r;
}
} else {
s << "()";
}
string anterior = s.str();
s.clear();
s.str(std::string());
s << "(" << anterior << "," << prev << "," << current << ")";
return s.str();
}

View File

@ -0,0 +1,81 @@
//
// UserOverrideModel.h
//
// Copyright (c) 2017 The McBopomofo Project.
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
#ifndef USEROVERRIDEMODEL_H
#define USEROVERRIDEMODEL_H
#include <list>
#include <map>
#include <string>
#include "Gramambular.h"
namespace McBopomofo {
using namespace Formosa::Gramambular;
class UserOverrideModel {
public:
UserOverrideModel(size_t capacity, double decayConstant);
void observe(const std::vector<NodeAnchor>& walkedNodes,
size_t cursorIndex,
const string& candidate,
double timestamp);
string suggest(const std::vector<NodeAnchor>& walkedNodes,
size_t cursorIndex,
double timestamp);
private:
struct Override {
size_t count;
double timestamp;
Override() : count(0), timestamp(0.0) {}
};
struct Observation {
size_t count;
std::map<std::string, Override> overrides;
Observation() : count(0) {}
void update(const string& candidate, double timestamp);
};
typedef std::pair<std::string, Observation> KeyObservationPair;
size_t m_capacity;
double m_decayExponent;
std::list<KeyObservationPair> m_lruList;
std::map<std::string, std::list<KeyObservationPair>::iterator> m_lruMap;
};
}; // namespace McBopomofo
#endif