Merge pull request #207 from openvanilla/rebased-user-override-model
Implements an exponential decay-based user candidate override model, rebased from #135
This commit is contained in:
commit
39cdc7d73d
|
@ -49,6 +49,7 @@
|
|||
6AFF97F3253B299E007F1C49 /* OVNonModalAlertWindowController.m in Sources */ = {isa = PBXBuildFile; fileRef = 6AFF97F1253B299E007F1C49 /* OVNonModalAlertWindowController.m */; };
|
||||
D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */; };
|
||||
D48550A325EBE689006A204C /* OpenCC in Frameworks */ = {isa = PBXBuildFile; productRef = D48550A225EBE689006A204C /* OpenCC */; };
|
||||
6AE30A491F7F40B7008735BD /* UserOverrideModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6AE30A471F7F40B7008735BD /* UserOverrideModel.cpp */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXContainerItemProxy section */
|
||||
|
@ -211,6 +212,8 @@
|
|||
6AFF97F1253B299E007F1C49 /* OVNonModalAlertWindowController.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = OVNonModalAlertWindowController.m; sourceTree = "<group>"; };
|
||||
D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "McBopomofo-Bridging-Header.h"; sourceTree = "<group>"; };
|
||||
D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpenCCBridge.swift; sourceTree = "<group>"; };
|
||||
6AE30A471F7F40B7008735BD /* UserOverrideModel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = UserOverrideModel.cpp; sourceTree = "<group>"; };
|
||||
6AE30A481F7F40B7008735BD /* UserOverrideModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = UserOverrideModel.h; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
|
@ -286,6 +289,10 @@
|
|||
6A0D4ECC15FC0D6400ABF4B3 /* PreferencesWindowController.m */,
|
||||
D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */,
|
||||
D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */,
|
||||
6A0D4ECD15FC0D6400ABF4B3 /* UpdateNotificationController.h */,
|
||||
6A0D4ECE15FC0D6400ABF4B3 /* UpdateNotificationController.m */,
|
||||
6AE30A471F7F40B7008735BD /* UserOverrideModel.cpp */,
|
||||
6AE30A481F7F40B7008735BD /* UserOverrideModel.h */,
|
||||
);
|
||||
path = Source;
|
||||
sourceTree = "<group>";
|
||||
|
@ -647,6 +654,7 @@
|
|||
6A0D4F0015FC0DA600ABF4B3 /* VTHorizontalCandidateView.m in Sources */,
|
||||
6AFF97F3253B299E007F1C49 /* OVNonModalAlertWindowController.m in Sources */,
|
||||
6A0D4F0115FC0DA600ABF4B3 /* VTVerticalCandidateController.m in Sources */,
|
||||
6AE30A491F7F40B7008735BD /* UserOverrideModel.cpp in Sources */,
|
||||
6A0D4F0215FC0DA600ABF4B3 /* VTVerticalCandidateTableView.m in Sources */,
|
||||
6A0D4F0315FC0DA600ABF4B3 /* VTVerticalKeyLabelStripView.m in Sources */,
|
||||
D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */,
|
||||
|
|
|
@ -28,6 +28,8 @@
|
|||
#ifndef Bigram_h
|
||||
#define Bigram_h
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "KeyValuePair.h"
|
||||
|
||||
namespace Formosa {
|
||||
|
|
|
@ -199,7 +199,7 @@ namespace Formosa {
|
|||
}
|
||||
}
|
||||
|
||||
const string BlockReadingBuilder::Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator)
|
||||
inline const string BlockReadingBuilder::Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator)
|
||||
{
|
||||
string result;
|
||||
for (vector<string>::const_iterator iter = begin ; iter != end ; ) {
|
||||
|
|
|
@ -47,7 +47,18 @@ namespace Formosa {
|
|||
size_t width() const;
|
||||
vector<NodeAnchor> nodesEndingAt(size_t inLocation);
|
||||
vector<NodeAnchor> nodesCrossingOrEndingAt(size_t inLocation);
|
||||
|
||||
// "Freeze" the node with the unigram that represents the selected canditate value.
|
||||
// After this, the node that contains the unigram will always be evaluated to that
|
||||
// unigram, while all other overlapping nodes will be reset to their initial state
|
||||
// (that is, if any of those nodes were "frozen" or fixed, they will be unfrozen.)
|
||||
void fixNodeSelectedCandidate(size_t location, const string& value);
|
||||
|
||||
// Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, only
|
||||
// boost the unigram that represents the value with an overriding score. This
|
||||
// has the same side effect as fixNodeSelectedCandidate, which is that all other
|
||||
// overlapping nodes will be reset to their initial state.
|
||||
void overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore);
|
||||
|
||||
const string dumpDOT();
|
||||
|
||||
|
@ -194,6 +205,24 @@ namespace Formosa {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore)
|
||||
{
|
||||
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
|
||||
for (auto nodeAnchor : nodes) {
|
||||
auto candidates = nodeAnchor.node->candidates();
|
||||
|
||||
// Reset the candidate-fixed state of every node at the location.
|
||||
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
|
||||
|
||||
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
|
||||
if (candidates[i].value == value) {
|
||||
const_cast<Node*>(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline const string Grid::dumpDOT()
|
||||
{
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#ifndef KeyValuePair_h
|
||||
#define KeyValuePair_h
|
||||
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
|
||||
namespace Formosa {
|
||||
|
|
|
@ -47,10 +47,12 @@ namespace Formosa {
|
|||
const vector<KeyValuePair>& candidates() const;
|
||||
void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true);
|
||||
void resetCandidate();
|
||||
void selectFloatingCandidateAtIndex(size_t index, double score);
|
||||
|
||||
const string& key() const;
|
||||
double score() const;
|
||||
const KeyValuePair currentKeyValue() const;
|
||||
double highestUnigramScore() const;
|
||||
|
||||
protected:
|
||||
const LanguageModel* m_LM;
|
||||
|
@ -175,6 +177,16 @@ namespace Formosa {
|
|||
m_score = m_unigrams[0].score;
|
||||
}
|
||||
}
|
||||
|
||||
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) {
|
||||
if (index >= m_unigrams.size()) {
|
||||
m_selectedUnigramIndex = 0;
|
||||
} else {
|
||||
m_selectedUnigramIndex = index;
|
||||
}
|
||||
m_candidateFixed = false;
|
||||
m_score = score;
|
||||
}
|
||||
|
||||
inline const string& Node::key() const
|
||||
{
|
||||
|
@ -185,6 +197,13 @@ namespace Formosa {
|
|||
{
|
||||
return m_score;
|
||||
}
|
||||
|
||||
inline double Node::highestUnigramScore() const {
|
||||
if (m_unigrams.empty()) {
|
||||
return 0.0;
|
||||
}
|
||||
return m_unigrams[0].score;
|
||||
}
|
||||
|
||||
inline const KeyValuePair Node::currentKeyValue() const
|
||||
{
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#import "Mandarin.h"
|
||||
#import "Gramambular.h"
|
||||
#import "FastLM.h"
|
||||
#import "UserOverrideModel.h"
|
||||
|
||||
@interface McBopomofoInputMethodController : IMKInputController
|
||||
{
|
||||
|
@ -53,6 +54,9 @@
|
|||
// latest walked path (trellis) using the Viterbi algorithm
|
||||
std::vector<Formosa::Gramambular::NodeAnchor> _walkedNodes;
|
||||
|
||||
// user override model
|
||||
McBopomofo::UserOverrideModel *_uom;
|
||||
|
||||
// the latest composing buffer that is updated to the foreground app
|
||||
NSMutableString *_composingBuffer;
|
||||
NSInteger _latestReadingCursor;
|
||||
|
|
|
@ -76,7 +76,6 @@ static NSString *const kCandidateListTextSizeKey = @"CandidateListTextSize";
|
|||
static NSString *const kSelectPhraseAfterCursorAsCandidatePreferenceKey = @"SelectPhraseAfterCursorAsCandidate";
|
||||
static NSString *const kUseHorizontalCandidateListPreferenceKey = @"UseHorizontalCandidateList";
|
||||
static NSString *const kComposingBufferSizePreferenceKey = @"ComposingBufferSize";
|
||||
static NSString *const kDisableUserCandidateSelectionLearning = @"DisableUserCandidateSelectionLearning";
|
||||
static NSString *const kChooseCandidateUsingSpaceKey = @"ChooseCandidateUsingSpaceKey";
|
||||
static NSString *const kChineseConversionEnabledKey = @"ChineseConversionEnabledKey";
|
||||
static NSString *const kEscToCleanInputBufferKey = @"EscToCleanInputBufferKey";
|
||||
|
@ -104,9 +103,6 @@ enum {
|
|||
kDeleteKeyCode = 117
|
||||
};
|
||||
|
||||
// a global object for saving the "learned" user candidate selections
|
||||
NSMutableDictionary *gCandidateLearningDictionary = nil;
|
||||
NSString *gUserCandidatesDictionaryPath = nil;
|
||||
VTCandidateController *gCurrentCandidateController = nil;
|
||||
|
||||
// if DEBUG is defined, a DOT file (GraphViz format) will be written to the
|
||||
|
@ -119,6 +115,10 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
|
|||
FastLM gLanguageModel;
|
||||
FastLM gLanguageModelPlainBopomofo;
|
||||
|
||||
static const int kUserOverrideModelCapacity = 500;
|
||||
static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
|
||||
McBopomofo::UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
|
||||
|
||||
// https://clang-analyzer.llvm.org/faq.html
|
||||
__attribute__((annotate("returns_localized_nsstring")))
|
||||
static inline NSString *LocalizationNotNeeded(NSString *s) {
|
||||
|
@ -133,10 +133,7 @@ static inline NSString *LocalizationNotNeeded(NSString *s) {
|
|||
- (void)collectCandidates;
|
||||
|
||||
- (size_t)actualCandidateCursorIndex;
|
||||
- (NSString *)neighborTrigramString;
|
||||
|
||||
- (void)_performDeferredSaveUserCandidatesDictionary;
|
||||
- (void)saveUserCandidatesDictionary;
|
||||
- (void)_showCandidateWindowUsingVerticalMode:(BOOL)useVerticalMode client:(id)client;
|
||||
|
||||
- (void)beep;
|
||||
|
@ -153,6 +150,19 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
static const double kEpsilon = 0.000001;
|
||||
|
||||
static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon) {
|
||||
double highestScore = 0.0;
|
||||
for (auto ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) {
|
||||
double score = ni->node->highestUnigramScore();
|
||||
if (score > highestScore) {
|
||||
highestScore = score;
|
||||
}
|
||||
}
|
||||
return highestScore + epsilon;
|
||||
}
|
||||
|
||||
@implementation McBopomofoInputMethodController
|
||||
- (void)dealloc
|
||||
{
|
||||
|
@ -183,6 +193,7 @@ public:
|
|||
// create the lattice builder
|
||||
_languageModel = &gLanguageModel;
|
||||
_builder = new BlockReadingBuilder(_languageModel);
|
||||
_uom = &gUserOverrideModel;
|
||||
|
||||
// each Mandarin syllable is separated by a hyphen
|
||||
_builder->setJoinSeparator("-");
|
||||
|
@ -190,11 +201,6 @@ public:
|
|||
// create the composing buffer
|
||||
_composingBuffer = [[NSMutableString alloc] init];
|
||||
|
||||
// populate the settings, by default, DISABLE user candidate learning
|
||||
if (![[NSUserDefaults standardUserDefaults] objectForKey:kDisableUserCandidateSelectionLearning]) {
|
||||
[[NSUserDefaults standardUserDefaults] setObject:(id)kCFBooleanTrue forKey:kDisableUserCandidateSelectionLearning];
|
||||
}
|
||||
|
||||
_inputMode = kBopomofoModeIdentifier;
|
||||
_chineseConversionEnabled = [[NSUserDefaults standardUserDefaults] boolForKey:kChineseConversionEnabledKey];
|
||||
}
|
||||
|
@ -209,30 +215,6 @@ public:
|
|||
NSMenuItem *preferenceMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"McBopomofo Preferences", @"") action:@selector(showPreferences:) keyEquivalent:@""];
|
||||
[menu addItem:preferenceMenuItem];
|
||||
|
||||
// If Option key is pressed, show the learning-related menu
|
||||
|
||||
#if DEBUG
|
||||
//I think the following line is 10.6+ specific
|
||||
if ([[NSEvent class] respondsToSelector:@selector(modifierFlags)] && ([NSEvent modifierFlags] & NSAlternateKeyMask)) {
|
||||
|
||||
BOOL learningEnabled = ![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning];
|
||||
|
||||
NSMenuItem *learnMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Enable Selection Learning", @"") action:@selector(toggleLearning:) keyEquivalent:@""];
|
||||
learnMenuItem.state = learningEnabled ? NSControlStateValueOn : NSControlStateValueOff;
|
||||
[menu addItem:learnMenuItem];
|
||||
|
||||
if (learningEnabled) {
|
||||
NSString *clearMenuItemTitle = [NSString stringWithFormat:NSLocalizedString(@"Clear Learning Dictionary (%ju Items)", @""), (uintmax_t)[gCandidateLearningDictionary count]];
|
||||
NSMenuItem *clearMenuItem = [[NSMenuItem alloc] initWithTitle:clearMenuItemTitle action:@selector(clearLearningDictionary:) keyEquivalent:@""];
|
||||
[menu addItem:clearMenuItem];
|
||||
|
||||
|
||||
NSMenuItem *dumpMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Dump Learning Data to Console", @"") action:@selector(dumpLearningDictionary:) keyEquivalent:@""];
|
||||
[menu addItem:dumpMenuItem];
|
||||
}
|
||||
}
|
||||
#endif //DEBUG
|
||||
|
||||
NSMenuItem *chineseConversionMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Chinese Conversion", @"") action:@selector(toggleChineseConverter:) keyEquivalent:@"G"];
|
||||
chineseConversionMenuItem.keyEquivalentModifierMask = NSEventModifierFlagCommand | NSEventModifierFlagControl;
|
||||
chineseConversionMenuItem.state = _chineseConversionEnabled ? NSControlStateValueOn : NSControlStateValueOff;
|
||||
|
@ -695,15 +677,15 @@ public:
|
|||
// then walk the lattice
|
||||
[self popOverflowComposingTextAndWalk:client];
|
||||
|
||||
// see if we need to override the selection if a learned one exists
|
||||
if (![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning]) {
|
||||
NSString *trigram = [self neighborTrigramString];
|
||||
|
||||
// Lookup from the user dict to see if the trigram fit or not
|
||||
NSString *overrideCandidateString = [gCandidateLearningDictionary objectForKey:trigram];
|
||||
if (overrideCandidateString) {
|
||||
[self candidateSelected:(NSAttributedString *)overrideCandidateString];
|
||||
}
|
||||
// get user override model suggestion
|
||||
string overrideValue =
|
||||
(_inputMode == kPlainBopomofoModeIdentifier) ? "" :
|
||||
_uom->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]);
|
||||
if (!overrideValue.empty()) {
|
||||
size_t cursorIndex = [self actualCandidateCursorIndex];
|
||||
vector<NodeAnchor> nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex);
|
||||
double highestScore = FindHighestScore(nodes, kEpsilon);
|
||||
_builder->grid().overrideNodeScoreForSelectedCandidate(cursorIndex, overrideValue, highestScore);
|
||||
}
|
||||
|
||||
// then update the text
|
||||
|
@ -1292,78 +1274,6 @@ public:
|
|||
return cursorIndex;
|
||||
}
|
||||
|
||||
- (NSString *)neighborTrigramString
|
||||
{
|
||||
// gather the "trigram" for user candidate selection learning
|
||||
|
||||
NSMutableArray *termArray = [NSMutableArray array];
|
||||
|
||||
size_t cursorIndex = [self actualCandidateCursorIndex];
|
||||
vector<NodeAnchor> nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex);
|
||||
|
||||
const Node* prev = 0;
|
||||
const Node* current = 0;
|
||||
const Node* next = 0;
|
||||
|
||||
size_t wni = 0;
|
||||
size_t wnc = _walkedNodes.size();
|
||||
size_t accuSpanningLength = 0;
|
||||
for (wni = 0; wni < wnc; wni++) {
|
||||
NodeAnchor& anchor = _walkedNodes[wni];
|
||||
if (!anchor.node) {
|
||||
continue;
|
||||
}
|
||||
|
||||
accuSpanningLength += anchor.spanningLength;
|
||||
if (accuSpanningLength >= cursorIndex) {
|
||||
prev = current;
|
||||
current = anchor.node;
|
||||
break;
|
||||
}
|
||||
|
||||
current = anchor.node;
|
||||
}
|
||||
|
||||
if (wni + 1 < wnc) {
|
||||
next = _walkedNodes[wni + 1].node;
|
||||
}
|
||||
|
||||
string term;
|
||||
if (prev) {
|
||||
term = prev->currentKeyValue().key;
|
||||
[termArray addObject:[NSString stringWithUTF8String:term.c_str()]];
|
||||
}
|
||||
|
||||
if (current) {
|
||||
term = current->currentKeyValue().key;
|
||||
[termArray addObject:[NSString stringWithUTF8String:term.c_str()]];
|
||||
}
|
||||
|
||||
if (next) {
|
||||
term = next->currentKeyValue().key;
|
||||
[termArray addObject:[NSString stringWithUTF8String:term.c_str()]];
|
||||
}
|
||||
|
||||
return [termArray componentsJoinedByString:@"-"];
|
||||
}
|
||||
|
||||
- (void)_performDeferredSaveUserCandidatesDictionary
|
||||
{
|
||||
BOOL __unused success = [gCandidateLearningDictionary writeToFile:gUserCandidatesDictionaryPath atomically:YES];
|
||||
}
|
||||
|
||||
- (void)saveUserCandidatesDictionary
|
||||
{
|
||||
if (!gUserCandidatesDictionaryPath) {
|
||||
return;
|
||||
}
|
||||
|
||||
[NSObject cancelPreviousPerformRequestsWithTarget:self selector:@selector(_performDeferredSaveUserCandidatesDictionary) object:nil];
|
||||
|
||||
// TODO: Const-ize the delay
|
||||
[self performSelector:@selector(_performDeferredSaveUserCandidatesDictionary) withObject:nil afterDelay:5.0];
|
||||
}
|
||||
|
||||
- (void)_showCandidateWindowUsingVerticalMode:(BOOL)useVerticalMode client:(id)client
|
||||
{
|
||||
// set the candidate panel style
|
||||
|
@ -1467,30 +1377,12 @@ public:
|
|||
[[NSApplication sharedApplication] activateIgnoringOtherApps:YES];
|
||||
}
|
||||
|
||||
- (void)toggleLearning:(id)sender
|
||||
{
|
||||
BOOL toggle = ![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning];
|
||||
|
||||
[[NSUserDefaults standardUserDefaults] setBool:toggle forKey:kDisableUserCandidateSelectionLearning];
|
||||
}
|
||||
|
||||
- (void)toggleChineseConverter:(id)sender
|
||||
{
|
||||
_chineseConversionEnabled = !_chineseConversionEnabled;
|
||||
[[NSUserDefaults standardUserDefaults] setBool:_chineseConversionEnabled forKey:kChineseConversionEnabledKey];
|
||||
}
|
||||
|
||||
- (void)clearLearningDictionary:(id)sender
|
||||
{
|
||||
[gCandidateLearningDictionary removeAllObjects];
|
||||
[self _performDeferredSaveUserCandidatesDictionary];
|
||||
}
|
||||
|
||||
- (void)dumpLearningDictionary:(id)sender
|
||||
{
|
||||
NSLog(@"%@", gCandidateLearningDictionary);
|
||||
}
|
||||
|
||||
- (NSUInteger)candidateCountForController:(VTCandidateController *)controller
|
||||
{
|
||||
return [_candidates count];
|
||||
|
@ -1508,15 +1400,11 @@ public:
|
|||
// candidate selected, override the node with selection
|
||||
string selectedValue = [[_candidates objectAtIndex:index] UTF8String];
|
||||
|
||||
if (![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning]) {
|
||||
NSString *trigram = [self neighborTrigramString];
|
||||
NSString *selectedNSString = [NSString stringWithUTF8String:selectedValue.c_str()];
|
||||
[gCandidateLearningDictionary setObject:selectedNSString forKey:trigram];
|
||||
[self saveUserCandidatesDictionary];
|
||||
}
|
||||
|
||||
size_t cursorIndex = [self actualCandidateCursorIndex];
|
||||
_builder->grid().fixNodeSelectedCandidate(cursorIndex, selectedValue);
|
||||
if (_inputMode != kPlainBopomofoModeIdentifier) {
|
||||
_uom->observe(_walkedNodes, cursorIndex, selectedValue, [[NSDate date] timeIntervalSince1970]);
|
||||
}
|
||||
|
||||
[_candidates removeAllObjects];
|
||||
|
||||
|
@ -1545,57 +1433,4 @@ void LTLoadLanguageModel()
|
|||
{
|
||||
LTLoadLanguageModelFile(@"data", gLanguageModel);
|
||||
LTLoadLanguageModelFile(@"data-plain-bpmf", gLanguageModelPlainBopomofo);
|
||||
|
||||
|
||||
// initialize the singleton learning dictionary
|
||||
// putting singleton in @synchronized is the standard way in Objective-C
|
||||
// to avoid race condition
|
||||
gCandidateLearningDictionary = [[NSMutableDictionary alloc] init];
|
||||
|
||||
// the first instance is also responsible for loading the dictionary
|
||||
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES);
|
||||
if (![paths count]) {
|
||||
NSLog(@"Fatal error: cannot find Applicaiton Support directory.");
|
||||
return;
|
||||
}
|
||||
|
||||
NSString *appSupportPath = [paths objectAtIndex:0];
|
||||
NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"McBopomofo"];
|
||||
|
||||
BOOL isDir = NO;
|
||||
BOOL exists = [[NSFileManager defaultManager] fileExistsAtPath:userDictPath isDirectory:&isDir];
|
||||
|
||||
if (exists) {
|
||||
if (!isDir) {
|
||||
NSLog(@"Fatal error: Path '%@' is not a directory", userDictPath);
|
||||
return;
|
||||
}
|
||||
}
|
||||
else {
|
||||
NSError *error = nil;
|
||||
BOOL success = [[NSFileManager defaultManager] createDirectoryAtPath:userDictPath withIntermediateDirectories:YES attributes:nil error:&error];
|
||||
if (!success) {
|
||||
NSLog(@"Failed to create directory '%@', error: %@", userDictPath, error);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Change this
|
||||
NSString *userDictFile = [userDictPath stringByAppendingPathComponent:@"UserCandidatesCache.plist"];
|
||||
gUserCandidatesDictionaryPath = userDictFile;
|
||||
|
||||
exists = [[NSFileManager defaultManager] fileExistsAtPath:userDictFile isDirectory:&isDir];
|
||||
if (exists && !isDir) {
|
||||
NSData *data = [NSData dataWithContentsOfFile:userDictFile];
|
||||
if (!data) {
|
||||
return;
|
||||
}
|
||||
|
||||
id plist = [NSPropertyListSerialization propertyListWithData:data options:NSPropertyListImmutable format:NULL error:NULL];
|
||||
if (plist && [plist isKindOfClass:[NSDictionary class]]) {
|
||||
[gCandidateLearningDictionary setDictionary:(NSDictionary *)plist];
|
||||
NSLog(@"User dictionary read, item count: %ju", (uintmax_t)[gCandidateLearningDictionary count]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,219 @@
|
|||
//
|
||||
// UserOverrideModel.cpp
|
||||
//
|
||||
// Copyright (c) 2017 The McBopomofo Project.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
//
|
||||
|
||||
#include "UserOverrideModel.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <sstream>
|
||||
|
||||
using namespace McBopomofo;
|
||||
|
||||
// About 20 generations.
|
||||
static const double DecayThreshould = 1.0 / 1048576.0;
|
||||
|
||||
static double Score(size_t eventCount,
|
||||
size_t totalCount,
|
||||
double eventTimestamp,
|
||||
double timestamp,
|
||||
double lambda);
|
||||
static bool IsEndingPunctuation(const string& value);
|
||||
static string WalkedNodesToKey(const std::vector<NodeAnchor>& walkedNodes,
|
||||
size_t cursorIndex);
|
||||
|
||||
UserOverrideModel::UserOverrideModel(size_t capacity, double decayConstant)
|
||||
: m_capacity(capacity) {
|
||||
assert(m_capacity > 0);
|
||||
m_decayExponent = log(0.5) / decayConstant;
|
||||
}
|
||||
|
||||
void UserOverrideModel::observe(const std::vector<NodeAnchor>& walkedNodes,
|
||||
size_t cursorIndex,
|
||||
const string& candidate,
|
||||
double timestamp) {
|
||||
string key = WalkedNodesToKey(walkedNodes, cursorIndex);
|
||||
auto mapIter = m_lruMap.find(key);
|
||||
if (mapIter == m_lruMap.end()) {
|
||||
auto keyValuePair = KeyObservationPair(key, Observation());
|
||||
Observation& observation = keyValuePair.second;
|
||||
observation.update(candidate, timestamp);
|
||||
|
||||
m_lruList.push_front(keyValuePair);
|
||||
auto listIter = m_lruList.begin();
|
||||
auto lruKeyValue = std::pair<std::string,
|
||||
std::list<KeyObservationPair>::iterator>(key, listIter);
|
||||
m_lruMap.insert(lruKeyValue);
|
||||
|
||||
if (m_lruList.size() > m_capacity) {
|
||||
auto lastKeyValuePair = m_lruList.end();
|
||||
--lastKeyValuePair;
|
||||
m_lruMap.erase(lastKeyValuePair->first);
|
||||
m_lruList.pop_back();
|
||||
}
|
||||
} else {
|
||||
auto listIter = mapIter->second;
|
||||
m_lruList.splice(m_lruList.begin(), m_lruList, listIter);
|
||||
|
||||
auto& keyValuePair = *listIter;
|
||||
Observation& observation = keyValuePair.second;
|
||||
observation.update(candidate, timestamp);
|
||||
}
|
||||
}
|
||||
|
||||
string UserOverrideModel::suggest(const std::vector<NodeAnchor>& walkedNodes,
|
||||
size_t cursorIndex,
|
||||
double timestamp) {
|
||||
string key = WalkedNodesToKey(walkedNodes, cursorIndex);
|
||||
auto mapIter = m_lruMap.find(key);
|
||||
if (mapIter == m_lruMap.end()) {
|
||||
return string();
|
||||
}
|
||||
|
||||
auto listIter = mapIter->second;
|
||||
auto& keyValuePair = *listIter;
|
||||
const Observation& observation = keyValuePair.second;
|
||||
|
||||
string candidate;
|
||||
double score = 0.0;
|
||||
for (auto i = observation.overrides.begin();
|
||||
i != observation.overrides.end();
|
||||
++i) {
|
||||
const Override& o = i->second;
|
||||
double overrideScore = Score(o.count,
|
||||
observation.count,
|
||||
o.timestamp,
|
||||
timestamp,
|
||||
m_decayExponent);
|
||||
if (overrideScore == 0.0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (overrideScore > score) {
|
||||
candidate = i->first;
|
||||
score = overrideScore;
|
||||
}
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
|
||||
void UserOverrideModel::Observation::update(const string& candidate,
|
||||
double timestamp) {
|
||||
count++;
|
||||
auto& o = overrides[candidate];
|
||||
o.timestamp = timestamp;
|
||||
o.count++;
|
||||
}
|
||||
|
||||
static double Score(size_t eventCount,
|
||||
size_t totalCount,
|
||||
double eventTimestamp,
|
||||
double timestamp,
|
||||
double lambda) {
|
||||
double decay = exp((timestamp - eventTimestamp) * lambda);
|
||||
if (decay < DecayThreshould) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
double prob = (double)eventCount / (double)totalCount;
|
||||
return prob * decay;
|
||||
}
|
||||
|
||||
static bool IsEndingPunctuation(const string& value) {
|
||||
return value == "," || value == "。" || value== "!" || value == "?" ||
|
||||
value == "」" || value == "』" || value== "”" || value == "”";
|
||||
}
|
||||
static string WalkedNodesToKey(const std::vector<NodeAnchor>& walkedNodes,
|
||||
size_t cursorIndex) {
|
||||
std::stringstream s;
|
||||
std::vector<NodeAnchor> n;
|
||||
size_t ll = 0;
|
||||
for (std::vector<NodeAnchor>::const_iterator i = walkedNodes.begin();
|
||||
i != walkedNodes.end();
|
||||
++i) {
|
||||
const auto& nn = *i;
|
||||
n.push_back(nn);
|
||||
ll += nn.spanningLength;
|
||||
if (ll >= cursorIndex) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<NodeAnchor>::const_reverse_iterator r = n.rbegin();
|
||||
|
||||
if (r == n.rend()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
string current = (*r).node->currentKeyValue().key;
|
||||
++r;
|
||||
|
||||
s.clear();
|
||||
s.str(std::string());
|
||||
if (r != n.rend()) {
|
||||
string value = (*r).node->currentKeyValue().value;
|
||||
if (IsEndingPunctuation(value)) {
|
||||
s << "()";
|
||||
r = n.rend();
|
||||
} else {
|
||||
s << "("
|
||||
<< (*r).node->currentKeyValue().key
|
||||
<< ","
|
||||
<< value
|
||||
<< ")";
|
||||
++r;
|
||||
}
|
||||
} else {
|
||||
s << "()";
|
||||
}
|
||||
string prev = s.str();
|
||||
|
||||
s.clear();
|
||||
s.str(std::string());
|
||||
if (r != n.rend()) {
|
||||
string value = (*r).node->currentKeyValue().value;
|
||||
if (IsEndingPunctuation(value)) {
|
||||
s << "()";
|
||||
r = n.rend();
|
||||
} else {
|
||||
s << "("
|
||||
<< (*r).node->currentKeyValue().key
|
||||
<< ","
|
||||
<< value
|
||||
<< ")";
|
||||
++r;
|
||||
}
|
||||
} else {
|
||||
s << "()";
|
||||
}
|
||||
string anterior = s.str();
|
||||
|
||||
s.clear();
|
||||
s.str(std::string());
|
||||
s << "(" << anterior << "," << prev << "," << current << ")";
|
||||
|
||||
return s.str();
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
//
|
||||
// UserOverrideModel.h
|
||||
//
|
||||
// Copyright (c) 2017 The McBopomofo Project.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
//
|
||||
|
||||
#ifndef USEROVERRIDEMODEL_H
|
||||
#define USEROVERRIDEMODEL_H
|
||||
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
#include "Gramambular.h"
|
||||
|
||||
namespace McBopomofo {
|
||||
|
||||
using namespace Formosa::Gramambular;
|
||||
|
||||
class UserOverrideModel {
|
||||
public:
|
||||
UserOverrideModel(size_t capacity, double decayConstant);
|
||||
|
||||
void observe(const std::vector<NodeAnchor>& walkedNodes,
|
||||
size_t cursorIndex,
|
||||
const string& candidate,
|
||||
double timestamp);
|
||||
|
||||
string suggest(const std::vector<NodeAnchor>& walkedNodes,
|
||||
size_t cursorIndex,
|
||||
double timestamp);
|
||||
|
||||
private:
|
||||
struct Override {
|
||||
size_t count;
|
||||
double timestamp;
|
||||
|
||||
Override() : count(0), timestamp(0.0) {}
|
||||
};
|
||||
|
||||
struct Observation {
|
||||
size_t count;
|
||||
std::map<std::string, Override> overrides;
|
||||
|
||||
Observation() : count(0) {}
|
||||
void update(const string& candidate, double timestamp);
|
||||
};
|
||||
|
||||
typedef std::pair<std::string, Observation> KeyObservationPair;
|
||||
|
||||
size_t m_capacity;
|
||||
double m_decayExponent;
|
||||
std::list<KeyObservationPair> m_lruList;
|
||||
std::map<std::string, std::list<KeyObservationPair>::iterator> m_lruMap;
|
||||
};
|
||||
|
||||
}; // namespace McBopomofo
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue