Lukhnos: Tweaks to the exponential decay-based user candidate override model.

This commit is contained in:
ShikiSuen 2022-01-07 02:56:38 +08:00
parent 762f0f95af
commit 89e663ebf6
2 changed files with 61 additions and 199 deletions

View File

@ -47,8 +47,19 @@ namespace Formosa {
size_t width() const;
vector<NodeAnchor> nodesEndingAt(size_t inLocation);
vector<NodeAnchor> nodesCrossingOrEndingAt(size_t inLocation);
void fixNodeSelectedCandidate(size_t location, const string& value);
// "Freeze" the node with the unigram that represents the selected canditate value.
// After this, the node that contains the unigram will always be evaluated to that
// unigram, while all other overlapping nodes will be reset to their initial state
// (that is, if any of those nodes were "frozen" or fixed, they will be unfrozen.)
void fixNodeSelectedCandidate(size_t location, const string& value);
// Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, only
// boost the unigram that represents the value with an overriding score. This
// has the same side effect as fixNodeSelectedCandidate, which is that all other
// overlapping nodes will be reset to their initial state.
void overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore);
const string dumpDOT();
protected:
@ -195,7 +206,25 @@ namespace Formosa {
}
}
inline const string Grid::dumpDOT()
inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore)
{
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore);
break;
}
}
}
}
inline const string Grid::dumpDOT()
{
stringstream sst;
sst << "digraph {" << endl;

View File

@ -76,7 +76,6 @@ static NSString *const kCandidateListTextSizeKey = @"CandidateListTextSize";
static NSString *const kSelectPhraseAfterCursorAsCandidatePreferenceKey = @"SelectPhraseAfterCursorAsCandidate";
static NSString *const kUseHorizontalCandidateListPreferenceKey = @"UseHorizontalCandidateList";
static NSString *const kComposingBufferSizePreferenceKey = @"ComposingBufferSize";
static NSString *const kDisableUserCandidateSelectionLearning = @"DisableUserCandidateSelectionLearning";
static NSString *const kChooseCandidateUsingSpaceKey = @"ChooseCandidateUsingSpaceKey";
static NSString *const kChineseConversionEnabledKey = @"ChineseConversionEnabledKey";
static NSString *const kEscToCleanInputBufferKey = @"EscToCleanInputBufferKey";
@ -104,9 +103,6 @@ enum {
kDeleteKeyCode = 117
};
// a global object for saving the "learned" user candidate selections
NSMutableDictionary *gCandidateLearningDictionary = nil;
NSString *gUserCandidatesDictionaryPath = nil;
VTCandidateController *gCurrentCandidateController = nil;
// if DEBUG is defined, a DOT file (GraphViz format) will be written to the
@ -119,6 +115,10 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot";
FastLM gLanguageModel;
FastLM gLanguageModelSimpBopomofo;
static const int kUserOverrideModelCapacity = 500;
static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
vChewing::UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
// https://clang-analyzer.llvm.org/faq.html
__attribute__((annotate("returns_localized_nsstring")))
static inline NSString *LocalizationNotNeeded(NSString *s) {
@ -133,10 +133,7 @@ static inline NSString *LocalizationNotNeeded(NSString *s) {
- (void)collectCandidates;
- (size_t)actualCandidateCursorIndex;
- (NSString *)neighborTrigramString;
- (void)_performDeferredSaveUserCandidatesDictionary;
- (void)saveUserCandidatesDictionary;
- (void)_showCandidateWindowUsingVerticalMode:(BOOL)useVerticalMode client:(id)client;
- (void)beep;
@ -153,6 +150,19 @@ public:
}
};
static const double kEpsilon = 0.000001;
static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon) {
double highestScore = 0.0;
for (auto ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) {
double score = ni->node->highestUnigramScore();
if (score > highestScore) {
highestScore = score;
}
}
return highestScore + epsilon;
}
@implementation vChewingInputMethodController
- (void)dealloc
{
@ -185,6 +195,7 @@ public:
// create the lattice builder
_languageModel = &gLanguageModel;
_builder = new BlockReadingBuilder(_languageModel);
_uom = &gUserOverrideModel;
// each Mandarin syllable is separated by a hyphen
_builder->setJoinSeparator("-");
@ -192,11 +203,6 @@ public:
// create the composing buffer
_composingBuffer = [[NSMutableString alloc] init];
// populate the settings, by default, DISABLE user candidate learning
if (![[NSUserDefaults standardUserDefaults] objectForKey:kDisableUserCandidateSelectionLearning]) {
[[NSUserDefaults standardUserDefaults] setObject:(id)kCFBooleanTrue forKey:kDisableUserCandidateSelectionLearning];
}
_inputMode = kBopomofoModeIdentifier;
_chineseConversionEnabled = [[NSUserDefaults standardUserDefaults] boolForKey:kChineseConversionEnabledKey];
_previousChineseConversionEnabledStatus = _chineseConversionEnabled;
@ -215,30 +221,6 @@ public:
NSMenuItem *preferenceMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"vChewing Preferences", @"") action:@selector(showPreferences:) keyEquivalent:@""];
[menu addItem:preferenceMenuItem];
// If Option key is pressed, show the learning-related menu
#if DEBUG
//I think the following line is 10.6+ specific
if ([[NSEvent class] respondsToSelector:@selector(modifierFlags)] && ([NSEvent modifierFlags] & NSAlternateKeyMask)) {
BOOL learningEnabled = ![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning];
NSMenuItem *learnMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Enable Selection Learning", @"") action:@selector(toggleLearning:) keyEquivalent:@""];
learnMenuItem.state = learningEnabled ? NSControlStateValueOn : NSControlStateValueOff;
[menu addItem:learnMenuItem];
if (learningEnabled) {
NSString *clearMenuItemTitle = [NSString stringWithFormat:NSLocalizedString(@"Clear Learning Dictionary (%ju Items)", @""), (uintmax_t)[gCandidateLearningDictionary count]];
NSMenuItem *clearMenuItem = [[NSMenuItem alloc] initWithTitle:clearMenuItemTitle action:@selector(clearLearningDictionary:) keyEquivalent:@""];
[menu addItem:clearMenuItem];
NSMenuItem *dumpMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Dump Learning Data to Console", @"") action:@selector(dumpLearningDictionary:) keyEquivalent:@""];
[menu addItem:dumpMenuItem];
}
}
#endif //DEBUG
NSMenuItem *chineseConversionMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Chinese Conversion", @"") action:@selector(toggleChineseConverter:) keyEquivalent:@"K"];
chineseConversionMenuItem.keyEquivalentModifierMask = NSEventModifierFlagCommand;
chineseConversionMenuItem.state = _chineseConversionEnabled ? NSControlStateValueOn : NSControlStateValueOff;
@ -700,16 +682,15 @@ public:
// then walk the lattice
[self popOverflowComposingTextAndWalk:client];
// see if we need to override the selection if a learned one exists
if (![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning]) {
NSString *trigram = [self neighborTrigramString];
// Lookup from the user dict to see if the trigram fit or not
NSString *overrideCandidateString = [gCandidateLearningDictionary objectForKey:trigram];
if (overrideCandidateString) {
[self candidateSelected:(NSAttributedString *)overrideCandidateString];
}
}
// get user override model suggestion
string overrideValue =
_uom->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]);
if (!overrideValue.empty()) {
size_t cursorIndex = [self actualCandidateCursorIndex];
vector<NodeAnchor> nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex);
double highestScore = FindHighestScore(nodes, kEpsilon);
_builder->grid().overrideNodeScoreForSelectedCandidate(cursorIndex, overrideValue, highestScore);
}
// then update the text
_bpmfReadingBuffer->clear();
@ -1250,78 +1231,6 @@ public:
return cursorIndex;
}
- (NSString *)neighborTrigramString
{
// gather the "trigram" for user candidate selection learning
NSMutableArray *termArray = [NSMutableArray array];
size_t cursorIndex = [self actualCandidateCursorIndex];
vector<NodeAnchor> nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex);
const Node* prev = 0;
const Node* current = 0;
const Node* next = 0;
size_t wni = 0;
size_t wnc = _walkedNodes.size();
size_t accuSpanningLength = 0;
for (wni = 0; wni < wnc; wni++) {
NodeAnchor& anchor = _walkedNodes[wni];
if (!anchor.node) {
continue;
}
accuSpanningLength += anchor.spanningLength;
if (accuSpanningLength >= cursorIndex) {
prev = current;
current = anchor.node;
break;
}
current = anchor.node;
}
if (wni + 1 < wnc) {
next = _walkedNodes[wni + 1].node;
}
string term;
if (prev) {
term = prev->currentKeyValue().key;
[termArray addObject:[NSString stringWithUTF8String:term.c_str()]];
}
if (current) {
term = current->currentKeyValue().key;
[termArray addObject:[NSString stringWithUTF8String:term.c_str()]];
}
if (next) {
term = next->currentKeyValue().key;
[termArray addObject:[NSString stringWithUTF8String:term.c_str()]];
}
return [termArray componentsJoinedByString:@"-"];
}
- (void)_performDeferredSaveUserCandidatesDictionary
{
BOOL __unused success = [gCandidateLearningDictionary writeToFile:gUserCandidatesDictionaryPath atomically:YES];
}
- (void)saveUserCandidatesDictionary
{
if (!gUserCandidatesDictionaryPath) {
return;
}
[NSObject cancelPreviousPerformRequestsWithTarget:self selector:@selector(_performDeferredSaveUserCandidatesDictionary) object:nil];
// TODO: Const-ize the delay
[self performSelector:@selector(_performDeferredSaveUserCandidatesDictionary) withObject:nil afterDelay:5.0];
}
- (void)_showCandidateWindowUsingVerticalMode:(BOOL)useVerticalMode client:(id)client
{
// set the candidate panel style
@ -1420,13 +1329,6 @@ public:
[[NSApplication sharedApplication] activateIgnoringOtherApps:YES];
}
- (void)toggleLearning:(id)sender
{
BOOL toggle = ![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning];
[[NSUserDefaults standardUserDefaults] setBool:toggle forKey:kDisableUserCandidateSelectionLearning];
}
- (void)toggleChineseConverter:(id)sender
{
_chineseConversionEnabled = !_chineseConversionEnabled;
@ -1434,17 +1336,6 @@ public:
[[NSNotificationCenter defaultCenter] postNotificationName:@"ChineseConversionStatusChanged" object:nil];
}
- (void)clearLearningDictionary:(id)sender
{
[gCandidateLearningDictionary removeAllObjects];
[self _performDeferredSaveUserCandidatesDictionary];
}
- (void)dumpLearningDictionary:(id)sender
{
NSLog(@"%@", gCandidateLearningDictionary);
}
- (NSUInteger)candidateCountForController:(VTCandidateController *)controller
{
return [_candidates count];
@ -1462,15 +1353,10 @@ public:
// candidate selected, override the node with selection
string selectedValue = [[_candidates objectAtIndex:index] UTF8String];
if (![[NSUserDefaults standardUserDefaults] boolForKey:kDisableUserCandidateSelectionLearning]) {
NSString *trigram = [self neighborTrigramString];
NSString *selectedNSString = [NSString stringWithUTF8String:selectedValue.c_str()];
[gCandidateLearningDictionary setObject:selectedNSString forKey:trigram];
[self saveUserCandidatesDictionary];
}
size_t cursorIndex = [self actualCandidateCursorIndex];
_builder->grid().fixNodeSelectedCandidate(cursorIndex, selectedValue);
_uom->observe(_walkedNodes, cursorIndex, selectedValue, [[NSDate date] timeIntervalSince1970]);
[_candidates removeAllObjects];
@ -1512,57 +1398,4 @@ void LTLoadLanguageModel()
{
LTLoadLanguageModelFile(@"data", gLanguageModel);
LTLoadLanguageModelFile(@"data-chs", gLanguageModelSimpBopomofo);
// initialize the singleton learning dictionary
// putting singleton in @synchronized is the standard way in Objective-C
// to avoid race condition
gCandidateLearningDictionary = [[NSMutableDictionary alloc] init];
// the first instance is also responsible for loading the dictionary
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES);
if (![paths count]) {
NSLog(@"Fatal error: cannot find Applicaiton Support directory.");
return;
}
NSString *appSupportPath = [paths objectAtIndex:0];
NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"vChewing"];
BOOL isDir = NO;
BOOL exists = [[NSFileManager defaultManager] fileExistsAtPath:userDictPath isDirectory:&isDir];
if (exists) {
if (!isDir) {
NSLog(@"Fatal error: Path '%@' is not a directory", userDictPath);
return;
}
}
else {
NSError *error = nil;
BOOL success = [[NSFileManager defaultManager] createDirectoryAtPath:userDictPath withIntermediateDirectories:YES attributes:nil error:&error];
if (!success) {
NSLog(@"Failed to create directory '%@', error: %@", userDictPath, error);
return;
}
}
// TODO: Change this
NSString *userDictFile = [userDictPath stringByAppendingPathComponent:@"UserCandidatesCache.plist"];
gUserCandidatesDictionaryPath = userDictFile;
exists = [[NSFileManager defaultManager] fileExistsAtPath:userDictFile isDirectory:&isDir];
if (exists && !isDir) {
NSData *data = [NSData dataWithContentsOfFile:userDictFile];
if (!data) {
return;
}
id plist = [NSPropertyListSerialization propertyListWithData:data options:NSPropertyListImmutable format:NULL error:NULL];
if (plist && [plist isKindOfClass:[NSDictionary class]]) {
[gCandidateLearningDictionary setDictionary:(NSDictionary *)plist];
NSLog(@"User dictionary read, item count: %ju", (uintmax_t)[gCandidateLearningDictionary count]);
}
}
}