Adds Language Model Manager.

The reference of the global language models were stored in the class
InputMethodController, however, the global models are global but not a
part of the input method controller, and the input method controller
only use one of the models (McBopomofo/Plain Bopomofo). I guess it
somehow violates SRP and there should be a better place for the global
models.
This commit is contained in:
zonble 2022-01-11 17:12:58 +08:00
parent f339948219
commit 144d133463
7 changed files with 224 additions and 147 deletions

View File

@ -36,6 +36,7 @@
6AE210B215FC63CC003659FE /* PlainBopomofo.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 6AE210B015FC63CC003659FE /* PlainBopomofo.tiff */; }; 6AE210B215FC63CC003659FE /* PlainBopomofo.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 6AE210B015FC63CC003659FE /* PlainBopomofo.tiff */; };
6AE210B315FC63CC003659FE /* PlainBopomofo@2x.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 6AE210B115FC63CC003659FE /* PlainBopomofo@2x.tiff */; }; 6AE210B315FC63CC003659FE /* PlainBopomofo@2x.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 6AE210B115FC63CC003659FE /* PlainBopomofo@2x.tiff */; };
6AFF97F2253B299E007F1C49 /* NonModalAlertWindowController.xib in Resources */ = {isa = PBXBuildFile; fileRef = 6AFF97F0253B299E007F1C49 /* NonModalAlertWindowController.xib */; }; 6AFF97F2253B299E007F1C49 /* NonModalAlertWindowController.xib in Resources */ = {isa = PBXBuildFile; fileRef = 6AFF97F0253B299E007F1C49 /* NonModalAlertWindowController.xib */; };
D41355D8278D74B5005E5CBD /* LanguageModelManager.mm in Sources */ = {isa = PBXBuildFile; fileRef = D41355D7278D7409005E5CBD /* LanguageModelManager.mm */; };
D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */; }; D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */; };
D427F76A278C9E29004A2160 /* CandidateUI in Frameworks */ = {isa = PBXBuildFile; productRef = D427F769278C9E29004A2160 /* CandidateUI */; }; D427F76A278C9E29004A2160 /* CandidateUI in Frameworks */ = {isa = PBXBuildFile; productRef = D427F769278C9E29004A2160 /* CandidateUI */; };
D427F76C278CA2B0004A2160 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427F76B278CA1BA004A2160 /* AppDelegate.swift */; }; D427F76C278CA2B0004A2160 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427F76B278CA1BA004A2160 /* AppDelegate.swift */; };
@ -155,6 +156,8 @@
6AE210B015FC63CC003659FE /* PlainBopomofo.tiff */ = {isa = PBXFileReference; lastKnownFileType = image.tiff; path = PlainBopomofo.tiff; sourceTree = "<group>"; }; 6AE210B015FC63CC003659FE /* PlainBopomofo.tiff */ = {isa = PBXFileReference; lastKnownFileType = image.tiff; path = PlainBopomofo.tiff; sourceTree = "<group>"; };
6AE210B115FC63CC003659FE /* PlainBopomofo@2x.tiff */ = {isa = PBXFileReference; lastKnownFileType = image.tiff; path = "PlainBopomofo@2x.tiff"; sourceTree = "<group>"; }; 6AE210B115FC63CC003659FE /* PlainBopomofo@2x.tiff */ = {isa = PBXFileReference; lastKnownFileType = image.tiff; path = "PlainBopomofo@2x.tiff"; sourceTree = "<group>"; };
6AFF97F0253B299E007F1C49 /* NonModalAlertWindowController.xib */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.xib; path = NonModalAlertWindowController.xib; sourceTree = "<group>"; }; 6AFF97F0253B299E007F1C49 /* NonModalAlertWindowController.xib */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.xib; path = NonModalAlertWindowController.xib; sourceTree = "<group>"; };
D41355D6278D7409005E5CBD /* LanguageModelManager.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LanguageModelManager.h; sourceTree = "<group>"; };
D41355D7278D7409005E5CBD /* LanguageModelManager.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = LanguageModelManager.mm; sourceTree = "<group>"; };
D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "McBopomofo-Bridging-Header.h"; sourceTree = "<group>"; }; D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "McBopomofo-Bridging-Header.h"; sourceTree = "<group>"; };
D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpenCCBridge.swift; sourceTree = "<group>"; }; D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpenCCBridge.swift; sourceTree = "<group>"; };
D427F768278C9D0D004A2160 /* CandidateUI */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = CandidateUI; path = Packages/CandidateUI; sourceTree = "<group>"; }; D427F768278C9D0D004A2160 /* CandidateUI */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = CandidateUI; path = Packages/CandidateUI; sourceTree = "<group>"; };
@ -228,6 +231,8 @@
6A0D4F4715FC0EB900ABF4B3 /* Resources */, 6A0D4F4715FC0EB900ABF4B3 /* Resources */,
6A0D4EC615FC0D6400ABF4B3 /* InputMethodController.h */, 6A0D4EC615FC0D6400ABF4B3 /* InputMethodController.h */,
6A0D4EC715FC0D6400ABF4B3 /* InputMethodController.mm */, 6A0D4EC715FC0D6400ABF4B3 /* InputMethodController.mm */,
D41355D6278D7409005E5CBD /* LanguageModelManager.h */,
D41355D7278D7409005E5CBD /* LanguageModelManager.mm */,
6A0D4EC815FC0D6400ABF4B3 /* main.m */, 6A0D4EC815FC0D6400ABF4B3 /* main.m */,
D427F76B278CA1BA004A2160 /* AppDelegate.swift */, D427F76B278CA1BA004A2160 /* AppDelegate.swift */,
D47F7DD4278C25A0002F9DD7 /* InputSourceHelper.swift */, D47F7DD4278C25A0002F9DD7 /* InputSourceHelper.swift */,
@ -551,6 +556,7 @@
D47F7DD3278C1263002F9DD7 /* UserOverrideModel.cpp in Sources */, D47F7DD3278C1263002F9DD7 /* UserOverrideModel.cpp in Sources */,
6A0D4F4515FC0EB100ABF4B3 /* Mandarin.cpp in Sources */, 6A0D4F4515FC0EB100ABF4B3 /* Mandarin.cpp in Sources */,
6A0421A815FEF3F50061ED63 /* FastLM.cpp in Sources */, 6A0421A815FEF3F50061ED63 /* FastLM.cpp in Sources */,
D41355D8278D74B5005E5CBD /* LanguageModelManager.mm in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };

View File

@ -51,8 +51,10 @@ class AppDelegate: NSObject, NSApplicationDelegate, NonModalAlertWindowControlle
private var updateNextStepURL: URL? private var updateNextStepURL: URL?
func applicationDidFinishLaunching(_ notification: Notification) { func applicationDidFinishLaunching(_ notification: Notification) {
LTLoadLanguageModel() LanguageModelManager.loadDataModels()
LTLoadUserLanguageModelFile() LanguageModelManager.loadUserPhrasesModel()
// LTLoadLanguageModel()
// LTLoadUserLanguageModelFile()
if UserDefaults.standard.object(forKey: kCheckUpdateAutomatically) == nil { if UserDefaults.standard.object(forKey: kCheckUpdateAutomatically) == nil {
UserDefaults.standard.set(true, forKey: kCheckUpdateAutomatically) UserDefaults.standard.set(true, forKey: kCheckUpdateAutomatically)

View File

@ -49,15 +49,15 @@
Formosa::Gramambular::FastLM *_languageModel; Formosa::Gramambular::FastLM *_languageModel;
Formosa::Gramambular::FastLM *_userPhrasesModel; Formosa::Gramambular::FastLM *_userPhrasesModel;
// user override model
McBopomofo::UserOverrideModel *_userOverrideModel;
// the grid (lattice) builder for the unigrams (and bigrams) // the grid (lattice) builder for the unigrams (and bigrams)
Formosa::Gramambular::BlockReadingBuilder* _builder; Formosa::Gramambular::BlockReadingBuilder* _builder;
// latest walked path (trellis) using the Viterbi algorithm // latest walked path (trellis) using the Viterbi algorithm
std::vector<Formosa::Gramambular::NodeAnchor> _walkedNodes; std::vector<Formosa::Gramambular::NodeAnchor> _walkedNodes;
// user override model
McBopomofo::UserOverrideModel *_uom;
// the latest composing buffer that is updated to the foreground app // the latest composing buffer that is updated to the foreground app
NSMutableString *_composingBuffer; NSMutableString *_composingBuffer;
NSInteger _latestReadingCursor; NSInteger _latestReadingCursor;
@ -78,7 +78,3 @@
BOOL _chineseConversionEnabled; BOOL _chineseConversionEnabled;
} }
@end @end
// the shared language model object
extern "C" void LTLoadLanguageModel();
extern "C" void LTLoadUserLanguageModelFile();

View File

@ -38,13 +38,12 @@
#import <set> #import <set>
#import "OVStringHelper.h" #import "OVStringHelper.h"
#import "OVUTF8Helper.h" #import "OVUTF8Helper.h"
#import "LanguageModelManager.h"
#import "McBopomofo-Swift.h" #import "McBopomofo-Swift.h"
@import CandidateUI; @import CandidateUI;
@import OpenCC; @import OpenCC;
//@import SwiftUI;
// C++ namespace usages // C++ namespace usages
using namespace std; using namespace std;
using namespace Formosa::Mandarin; using namespace Formosa::Mandarin;
@ -111,62 +110,6 @@ VTCandidateController *gCurrentCandidateController = nil;
static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot"; static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot";
#endif #endif
// shared language model object that stores our phrase-term probability database
FastLM gLanguageModel;
FastLM gLanguageModelPlainBopomofo;
FastLM gUserPhraseLanguageModel;
static const int kUserOverrideModelCapacity = 500;
static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
McBopomofo::UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
static NSString *LTUserDataFolderPath()
{
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES);
NSString *appSupportPath = [paths objectAtIndex:0];
NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"McBopomofo"];
return userDictPath;
}
static NSString *LTUserPhrasesDataPath()
{
return [LTUserDataFolderPath() stringByAppendingPathComponent:@"data.txt"];
}
static BOOL LTCheckIfUserLanguageModelFileExists() {
NSString *folderPath = LTUserDataFolderPath();
BOOL isFolder = NO;
BOOL folderExist = [[NSFileManager defaultManager] fileExistsAtPath:folderPath isDirectory:&isFolder];
if (folderExist && !isFolder) {
NSError *error = nil;
[[NSFileManager defaultManager] removeItemAtPath:folderPath error:&error];
if (error) {
NSLog(@"Failed to remove folder %@", error);
return NO;
}
folderExist = NO;
}
if (!folderExist) {
NSError *error = nil;
[[NSFileManager defaultManager] createDirectoryAtPath:folderPath withIntermediateDirectories:YES attributes:nil error:&error];
if (error) {
NSLog(@"Failed to create folder %@", error);
return NO;
}
}
NSString *filePath = LTUserPhrasesDataPath();
if (![[NSFileManager defaultManager] fileExistsAtPath:filePath]) {
BOOL result = [[@"" dataUsingEncoding:NSUTF8StringEncoding] writeToFile:filePath atomically:YES];
if (!result) {
NSLog(@"Failed to write file");
return NO;
}
}
return YES;
}
// https://clang-analyzer.llvm.org/faq.html // https://clang-analyzer.llvm.org/faq.html
__attribute__((annotate("returns_localized_nsstring"))) __attribute__((annotate("returns_localized_nsstring")))
static inline NSString *LocalizationNotNeeded(NSString *s) { static inline NSString *LocalizationNotNeeded(NSString *s) {
@ -174,17 +117,12 @@ static inline NSString *LocalizationNotNeeded(NSString *s) {
} }
// private methods // private methods
@interface McBopomofoInputMethodController () <VTCandidateControllerDelegate> @interface McBopomofoInputMethodController ()
+ (VTHorizontalCandidateController *)horizontalCandidateController; + (VTHorizontalCandidateController *)horizontalCandidateController;
+ (VTVerticalCandidateController *)verticalCandidateController; + (VTVerticalCandidateController *)verticalCandidateController;
@end
- (void)collectCandidates; @interface McBopomofoInputMethodController (VTCandidateController) <VTCandidateControllerDelegate>
- (size_t)actualCandidateCursorIndex;
- (void)_showCandidateWindowUsingVerticalMode:(BOOL)useVerticalMode client:(id)client;
- (void)beep;
- (BOOL)handleInputText:(NSString*)inputText key:(NSInteger)keyCode modifiers:(NSUInteger)flags client:(id)client;
- (BOOL)handleCandidateEventWithInputText:(NSString *)inputText charCode:(UniChar)charCode keyCode:(NSUInteger)keyCode;
@end @end
// sort helper // sort helper
@ -237,10 +175,11 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
_bpmfReadingBuffer = new BopomofoReadingBuffer(BopomofoKeyboardLayout::StandardLayout()); _bpmfReadingBuffer = new BopomofoReadingBuffer(BopomofoKeyboardLayout::StandardLayout());
// create the lattice builder // create the lattice builder
_languageModel = &gLanguageModel; _languageModel = [LanguageModelManager languageModelMcBopomofo];
_userPhrasesModel = &gUserPhraseLanguageModel; _userPhrasesModel = [LanguageModelManager userPhraseLanguageModel];
_userOverrideModel = [LanguageModelManager userOverrideModel];
_builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel); _builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel);
_uom = &gUserOverrideModel;
// each Mandarin syllable is separated by a hyphen // each Mandarin syllable is separated by a hyphen
_builder->setJoinSeparator("-"); _builder->setJoinSeparator("-");
@ -380,17 +319,17 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
{ {
NSString *newInputMode; NSString *newInputMode;
Formosa::Gramambular::FastLM *newLanguageModel; Formosa::Gramambular::FastLM *newLanguageModel;
Formosa::Gramambular::FastLM *userPhraseModel; Formosa::Gramambular::FastLM *newUserPhraseModel;
if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) { if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) {
newInputMode = kPlainBopomofoModeIdentifier; newInputMode = kPlainBopomofoModeIdentifier;
newLanguageModel = &gLanguageModelPlainBopomofo; newLanguageModel = [LanguageModelManager languageModelPlainBopomofo];
userPhraseModel = NULL; newUserPhraseModel = NULL;
} }
else { else {
newInputMode = kBopomofoModeIdentifier; newInputMode = kBopomofoModeIdentifier;
newLanguageModel = &gLanguageModel; newLanguageModel = [LanguageModelManager languageModelMcBopomofo];
userPhraseModel = &gUserPhraseLanguageModel; newUserPhraseModel = [LanguageModelManager userPhraseLanguageModel];
} }
// Only apply the changes if the value is changed // Only apply the changes if the value is changed
@ -406,7 +345,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
_inputMode = newInputMode; _inputMode = newInputMode;
_languageModel = newLanguageModel; _languageModel = newLanguageModel;
_userPhrasesModel = userPhraseModel; _userPhrasesModel = newUserPhraseModel;
if (!_bpmfReadingBuffer->isEmpty()) { if (!_bpmfReadingBuffer->isEmpty()) {
_bpmfReadingBuffer->clear(); _bpmfReadingBuffer->clear();
@ -432,8 +371,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
// if it's Terminal, we don't commit at the first call (the client of which will not be IPMDServerClientWrapper) // if it's Terminal, we don't commit at the first call (the client of which will not be IPMDServerClientWrapper)
// then we defer the update in the next runloop round -- so that the composing buffer is not // then we defer the update in the next runloop round -- so that the composing buffer is not
// meaninglessly flushed, an annoying bug in Terminal.app since Mac OS X 10.5 // meaninglessly flushed, an annoying bug in Terminal.app since Mac OS X 10.5
if ([[client bundleIdentifier] isEqualToString:@"com.apple.Terminal"] && ![NSStringFromClass([client class]) isEqualToString:@"IPMDServerClientWrapper"]) if ([[client bundleIdentifier] isEqualToString:@"com.apple.Terminal"] && ![NSStringFromClass([client class]) isEqualToString:@"IPMDServerClientWrapper"]) {
{
if (_currentDeferredClient) { if (_currentDeferredClient) {
[self performSelector:@selector(updateClientComposingBuffer:) withObject:_currentDeferredClient afterDelay:0.0]; [self performSelector:@selector(updateClientComposingBuffer:) withObject:_currentDeferredClient afterDelay:0.0];
} }
@ -532,7 +470,8 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
// i.e. the client app needs to take care of where to put ths composing buffer // i.e. the client app needs to take care of where to put ths composing buffer
[client setMarkedText:attrString selectionRange:NSMakeRange((NSInteger)_builder->markerCursorIndex(), 0) replacementRange:NSMakeRange(NSNotFound, NSNotFound)]; [client setMarkedText:attrString selectionRange:NSMakeRange((NSInteger)_builder->markerCursorIndex(), 0) replacementRange:NSMakeRange(NSNotFound, NSNotFound)];
_latestReadingCursor = (NSInteger)_builder->markerCursorIndex(); _latestReadingCursor = (NSInteger)_builder->markerCursorIndex();
} else { }
else {
// we must use NSAttributedString so that the cursor is visible -- // we must use NSAttributedString so that the cursor is visible --
// can't just use NSString // can't just use NSString
NSDictionary *attrDict = @{NSUnderlineStyleAttributeName: @(NSUnderlineStyleSingle), NSDictionary *attrDict = @{NSUnderlineStyleAttributeName: @(NSUnderlineStyleSingle),
@ -560,13 +499,13 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
reverse(_walkedNodes.begin(), _walkedNodes.end()); reverse(_walkedNodes.begin(), _walkedNodes.end());
// if DEBUG is defined, a GraphViz file is written to kGraphVizOutputfile // if DEBUG is defined, a GraphViz file is written to kGraphVizOutputfile
#if DEBUG #if DEBUG
string dotDump = _builder->grid().dumpDOT(); string dotDump = _builder->grid().dumpDOT();
NSString *dotStr = [NSString stringWithUTF8String:dotDump.c_str()]; NSString *dotStr = [NSString stringWithUTF8String:dotDump.c_str()];
NSError *error = nil; NSError *error = nil;
BOOL __unused success = [dotStr writeToFile:kGraphVizOutputfile atomically:YES encoding:NSUTF8StringEncoding error:&error]; BOOL __unused success = [dotStr writeToFile:kGraphVizOutputfile atomically:YES encoding:NSUTF8StringEncoding error:&error];
#endif #endif
} }
- (void)popOverflowComposingTextAndWalk:(id)client - (void)popOverflowComposingTextAndWalk:(id)client
@ -681,29 +620,12 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
- (BOOL)_writeUserPhrase - (BOOL)_writeUserPhrase
{ {
if (!LTCheckIfUserLanguageModelFileExists()) {
return NO;
}
NSString *currentMarkedPhrase = [self _currentMarkedText]; NSString *currentMarkedPhrase = [self _currentMarkedText];
if (![currentMarkedPhrase length]) { if (![currentMarkedPhrase length]) {
return NO; return NO;
} }
currentMarkedPhrase = [currentMarkedPhrase stringByAppendingString:@"\n"]; return [LanguageModelManager writeUserPhrase:currentMarkedPhrase];
NSString *path = LTUserPhrasesDataPath();
NSFileHandle *file = [NSFileHandle fileHandleForUpdatingAtPath:path];
if (!file) {
return NO;
}
[file seekToEndOfFile];
NSData *data = [currentMarkedPhrase dataUsingEncoding:NSUTF8StringEncoding];
[file writeData:data];
[file closeFile];
LTLoadUserLanguageModelFile();
return YES;
} }
- (BOOL)handleInputText:(NSString*)inputText key:(NSInteger)keyCode modifiers:(NSUInteger)flags client:(id)client - (BOOL)handleInputText:(NSString*)inputText key:(NSInteger)keyCode modifiers:(NSUInteger)flags client:(id)client
@ -801,7 +723,8 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
if (charCode == 13) { if (charCode == 13) {
if ([self _writeUserPhrase]) { if ([self _writeUserPhrase]) {
_builder->setMarkerCursorIndex(SIZE_MAX); _builder->setMarkerCursorIndex(SIZE_MAX);
} else { }
else {
[self beep]; [self beep];
} }
[self updateClientComposingBuffer:client]; [self updateClientComposingBuffer:client];
@ -868,9 +791,9 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
[self popOverflowComposingTextAndWalk:client]; [self popOverflowComposingTextAndWalk:client];
// get user override model suggestion // get user override model suggestion
string overrideValue = string overrideValue = (_inputMode == kPlainBopomofoModeIdentifier) ? "" :
(_inputMode == kPlainBopomofoModeIdentifier) ? "" : _userOverrideModel->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]);
_uom->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]);
if (!overrideValue.empty()) { if (!overrideValue.empty()) {
size_t cursorIndex = [self actualCandidateCursorIndex]; size_t cursorIndex = [self actualCandidateCursorIndex];
vector<NodeAnchor> nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex); vector<NodeAnchor> nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex);
@ -1188,9 +1111,9 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
- (BOOL)handleCandidateEventWithInputText:(NSString *)inputText charCode:(UniChar)charCode keyCode:(NSUInteger)keyCode - (BOOL)handleCandidateEventWithInputText:(NSString *)inputText charCode:(UniChar)charCode keyCode:(NSUInteger)keyCode
{ {
BOOL cancelCandidateKey = BOOL cancelCandidateKey =
(charCode == 27) || (charCode == 27) ||
((_inputMode == kPlainBopomofoModeIdentifier) && ((_inputMode == kPlainBopomofoModeIdentifier) &&
(charCode == 8 || keyCode == kDeleteKeyCode)); (charCode == 8 || keyCode == kDeleteKeyCode));
if (cancelCandidateKey) { if (cancelCandidateKey) {
gCurrentCandidateController.visible = NO; gCurrentCandidateController.visible = NO;
@ -1343,7 +1266,7 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
string punctuation = string("_punctuation_") + string(1, (char)charCode); string punctuation = string("_punctuation_") + string(1, (char)charCode);
BOOL shouldAutoSelectCandidate = _bpmfReadingBuffer->isValidKey((char)charCode) || _languageModel->hasUnigramsForKey(customPunctuation) || BOOL shouldAutoSelectCandidate = _bpmfReadingBuffer->isValidKey((char)charCode) || _languageModel->hasUnigramsForKey(customPunctuation) ||
_languageModel->hasUnigramsForKey(punctuation); _languageModel->hasUnigramsForKey(punctuation);
if (shouldAutoSelectCandidate) { if (shouldAutoSelectCandidate) {
NSUInteger candidateIndex = [gCurrentCandidateController candidateIndexAtKeyLabelIndex:0]; NSUInteger candidateIndex = [gCurrentCandidateController candidateIndexAtKeyLabelIndex:0];
@ -1569,13 +1492,13 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
- (void)openUserPhrases:(id)sender - (void)openUserPhrases:(id)sender
{ {
NSLog(@"openUserPhrases called"); NSLog(@"openUserPhrases called");
if (!LTCheckIfUserLanguageModelFileExists()) { if (![LanguageModelManager checkIfUserLanguageModelFileExists] ) {
NSString *content = [NSString stringWithFormat:NSLocalizedString(@"Please check the permission of at \"%@\".", @""), LTUserDataFolderPath()]; NSString *content = [NSString stringWithFormat:NSLocalizedString(@"Please check the permission of at \"%@\".", @""), [LanguageModelManager dataFolderPath]];
[[NonModalAlertWindowController sharedInstance] showWithTitle:NSLocalizedString(@"Unable to create the user phrase file.", @"") content:content confirmButtonTitle:NSLocalizedString(@"OK", @"") cancelButtonTitle:nil cancelAsDefault:NO delegate:nil]; [[NonModalAlertWindowController sharedInstance] showWithTitle:NSLocalizedString(@"Unable to create the user phrase file.", @"") content:content confirmButtonTitle:NSLocalizedString(@"OK", @"") cancelButtonTitle:nil cancelAsDefault:NO delegate:nil];
return; return;
} }
NSString *path = LTUserPhrasesDataPath(); NSString *path = [LanguageModelManager userPhrasesDataPath];
NSLog(@"Open %@", path); NSLog(@"Open %@", path);
if (![[NSFileManager defaultManager] fileExistsAtPath:path]) { if (![[NSFileManager defaultManager] fileExistsAtPath:path]) {
[[@"" dataUsingEncoding:NSUTF8StringEncoding] writeToFile:path atomically:YES]; [[@"" dataUsingEncoding:NSUTF8StringEncoding] writeToFile:path atomically:YES];
@ -1587,7 +1510,7 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
- (void)reloadUserPhrases:(id)sender - (void)reloadUserPhrases:(id)sender
{ {
NSLog(@"reloadUserPhrases called"); NSLog(@"reloadUserPhrases called");
LTLoadUserLanguageModelFile(); [LanguageModelManager loadUserPhrasesModel];
} }
- (void)showAbout:(id)sender - (void)showAbout:(id)sender
@ -1602,6 +1525,12 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
[[NSUserDefaults standardUserDefaults] setBool:_chineseConversionEnabled forKey:kChineseConversionEnabledKey]; [[NSUserDefaults standardUserDefaults] setBool:_chineseConversionEnabled forKey:kChineseConversionEnabledKey];
} }
@end
#pragma mark -
@implementation McBopomofoInputMethodController (VTCandidateController)
- (NSUInteger)candidateCountForController:(VTCandidateController *)controller - (NSUInteger)candidateCountForController:(VTCandidateController *)controller
{ {
return [_candidates count]; return [_candidates count];
@ -1622,7 +1551,7 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
size_t cursorIndex = [self actualCandidateCursorIndex]; size_t cursorIndex = [self actualCandidateCursorIndex];
_builder->grid().fixNodeSelectedCandidate(cursorIndex, selectedValue); _builder->grid().fixNodeSelectedCandidate(cursorIndex, selectedValue);
if (_inputMode != kPlainBopomofoModeIdentifier) { if (_inputMode != kPlainBopomofoModeIdentifier) {
_uom->observe(_walkedNodes, cursorIndex, selectedValue, [[NSDate date] timeIntervalSince1970]); _userOverrideModel->observe(_walkedNodes, cursorIndex, selectedValue, [[NSDate date] timeIntervalSince1970]);
} }
[_candidates removeAllObjects]; [_candidates removeAllObjects];
@ -1637,28 +1566,3 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
} }
@end @end
static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, FastLM &lm)
{
NSString *dataPath = [[NSBundle bundleForClass:[McBopomofoInputMethodController class]] pathForResource:filenameWithoutExtension ofType:@"txt"];
bool result = lm.open([dataPath UTF8String]);
if (!result) {
NSLog(@"Failed opening language model: %@", dataPath);
}
}
void LTLoadLanguageModel()
{
LTLoadLanguageModelFile(@"data", gLanguageModel);
LTLoadLanguageModelFile(@"data-plain-bpmf", gLanguageModelPlainBopomofo);
}
void LTLoadUserLanguageModelFile()
{
gUserPhraseLanguageModel.close();
bool result = gUserPhraseLanguageModel.open([LTUserPhrasesDataPath() UTF8String]);
if (!result) {
NSLog(@"Failed opening language model for user phrases.");
}
}

View File

@ -0,0 +1,23 @@
#import <Foundation/Foundation.h>
#import "FastLM.h"
#import "UserOverrideModel.h"
NS_ASSUME_NONNULL_BEGIN
@interface LanguageModelManager : NSObject
+ (void)loadDataModels;
+ (void)loadUserPhrasesModel;
+ (BOOL)checkIfUserLanguageModelFileExists;
+ (BOOL)writeUserPhrase:(NSString *)userPhrase;
@property (class, readonly, nonatomic) NSString *dataFolderPath;
@property (class, readonly, nonatomic) NSString *userPhrasesDataPath;
@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *languageModelMcBopomofo;
@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *languageModelPlainBopomofo;
@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *userPhraseLanguageModel;
@property (class, readonly, nonatomic) McBopomofo::UserOverrideModel *userOverrideModel;
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,140 @@
#import "LanguageModelManager.h"
#import <fstream>
#import <iostream>
#import <set>
#import "OVStringHelper.h"
#import "OVUTF8Helper.h"
using namespace std;
using namespace Formosa::Gramambular;
using namespace OpenVanilla;
static const int kUserOverrideModelCapacity = 500;
static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
FastLM globalLanguageModel;
FastLM globalLanguageModelPlainBopomofo;
FastLM globalUserPhraseLanguageModel;
McBopomofo::UserOverrideModel globalUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
@implementation LanguageModelManager
static bool LTLoadLanguageModelFile(NSString *filenameWithoutExtension, FastLM &lm)
{
Class cls = NSClassFromString(@"McBopomofoInputMethodController");
NSString *dataPath = [[NSBundle bundleForClass:cls] pathForResource:filenameWithoutExtension ofType:@"txt"];
bool result = lm.open([dataPath UTF8String]);
return (BOOL)result;
}
+ (void)loadDataModels
{
bool dataOpenResult = LTLoadLanguageModelFile(@"data", globalLanguageModel);
if (!dataOpenResult) {
NSLog(@"Failed to open language model.");
}
bool plainBpmfOpenResult = LTLoadLanguageModelFile(@"data-plain-bpmf", globalLanguageModelPlainBopomofo);
if (!plainBpmfOpenResult) {
NSLog(@"Failed to open language model for plain bpmf.");
}
}
+ (void)loadUserPhrasesModel
{
globalUserPhraseLanguageModel.close();
bool result = globalUserPhraseLanguageModel.open([[self userPhrasesDataPath] UTF8String]);
if (!result) {
NSLog(@"Failed to open user phrases.");
}
}
+ (BOOL)checkIfUserLanguageModelFileExists
{
NSString *folderPath = [self dataFolderPath];
BOOL isFolder = NO;
BOOL folderExist = [[NSFileManager defaultManager] fileExistsAtPath:folderPath isDirectory:&isFolder];
if (folderExist && !isFolder) {
NSError *error = nil;
[[NSFileManager defaultManager] removeItemAtPath:folderPath error:&error];
if (error) {
NSLog(@"Failed to remove folder %@", error);
return NO;
}
folderExist = NO;
}
if (!folderExist) {
NSError *error = nil;
[[NSFileManager defaultManager] createDirectoryAtPath:folderPath withIntermediateDirectories:YES attributes:nil error:&error];
if (error) {
NSLog(@"Failed to create folder %@", error);
return NO;
}
}
NSString *filePath = [self userPhrasesDataPath];
if (![[NSFileManager defaultManager] fileExistsAtPath:filePath]) {
BOOL result = [[@"" dataUsingEncoding:NSUTF8StringEncoding] writeToFile:filePath atomically:YES];
if (!result) {
NSLog(@"Failed to write file");
return NO;
}
}
return YES;
}
+ (BOOL)writeUserPhrase:(NSString *)userPhrase
{
if (![self checkIfUserLanguageModelFileExists]) {
return NO;
}
NSString *currentMarkedPhrase = [userPhrase stringByAppendingString:@"\n"];
NSString *path = [self userPhrasesDataPath];
NSFileHandle *file = [NSFileHandle fileHandleForUpdatingAtPath:path];
if (!file) {
return NO;
}
[file seekToEndOfFile];
NSData *data = [currentMarkedPhrase dataUsingEncoding:NSUTF8StringEncoding];
[file writeData:data];
[file closeFile];
[self loadUserPhrasesModel];
return YES;
}
+ (NSString *)dataFolderPath
{
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES);
NSString *appSupportPath = [paths objectAtIndex:0];
NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"McBopomofo"];
return userDictPath;
}
+ (NSString *)userPhrasesDataPath
{
return [[self dataFolderPath] stringByAppendingPathComponent:@"data.txt"];
}
+ (Formosa::Gramambular::FastLM *)languageModelMcBopomofo
{
return &globalLanguageModel;
}
+ (Formosa::Gramambular::FastLM *)languageModelPlainBopomofo
{
return &globalLanguageModelPlainBopomofo;
}
+ (Formosa::Gramambular::FastLM *)userPhraseLanguageModel
{
return &globalUserPhraseLanguageModel;
}
+ (McBopomofo::UserOverrideModel *)userOverrideModel
{
return &globalUserOverrideModel;
}
@end

View File

@ -2,7 +2,13 @@
// Use this file to import your target's public headers that you would like to expose to Swift. // Use this file to import your target's public headers that you would like to expose to Swift.
// //
extern void LTLoadLanguageModel(void); //extern void LTLoadLanguageModel(void);
extern void LTLoadUserLanguageModelFile(void); //extern void LTLoadUserLanguageModelFile(void);
@import Foundation;
@interface LanguageModelManager : NSObject
+ (void)loadDataModels;
+ (void)loadUserPhrasesModel;
+ (BOOL)checkIfUserLanguageModelFileExists;
@end