diff --git a/McBopomofo.xcodeproj/project.pbxproj b/McBopomofo.xcodeproj/project.pbxproj index 390d7a63..9aa701d5 100644 --- a/McBopomofo.xcodeproj/project.pbxproj +++ b/McBopomofo.xcodeproj/project.pbxproj @@ -45,6 +45,7 @@ 6ACA41FD15FC1D9000935EF6 /* MainMenu.xib in Resources */ = {isa = PBXBuildFile; fileRef = 6ACA41F015FC1D9000935EF6 /* MainMenu.xib */; }; 6ACA41FF15FC1D9000935EF6 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 6ACA41F415FC1D9000935EF6 /* main.m */; }; 6ACA420215FC1E5200935EF6 /* McBopomofo.app in Resources */ = {isa = PBXBuildFile; fileRef = 6A0D4EA215FC0D2D00ABF4B3 /* McBopomofo.app */; }; + 6AD7CBC815FE555000691B5B /* data-plain-bpmf.txt in Resources */ = {isa = PBXBuildFile; fileRef = 6AD7CBC715FE555000691B5B /* data-plain-bpmf.txt */; }; 6AE210B215FC63CC003659FE /* PlainBopomofo.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 6AE210B015FC63CC003659FE /* PlainBopomofo.tiff */; }; 6AE210B315FC63CC003659FE /* PlainBopomofo@2x.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 6AE210B115FC63CC003659FE /* PlainBopomofo@2x.tiff */; }; /* End PBXBuildFile section */ @@ -205,6 +206,7 @@ 6ACA41F615FC1D9000935EF6 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.rtf; name = "zh-Hant"; path = "zh-Hant.lproj/License.rtf"; sourceTree = ""; }; 6ACA41F715FC1D9000935EF6 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/Localizable.strings"; sourceTree = ""; }; 6ACA41F815FC1D9000935EF6 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = "zh-Hant"; path = "zh-Hant.lproj/MainMenu.xib"; sourceTree = ""; }; + 6AD7CBC715FE555000691B5B /* data-plain-bpmf.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = "data-plain-bpmf.txt"; sourceTree = ""; }; 6AE210B015FC63CC003659FE /* PlainBopomofo.tiff */ = {isa = PBXFileReference; lastKnownFileType = image.tiff; path = PlainBopomofo.tiff; sourceTree = ""; }; 6AE210B115FC63CC003659FE /* PlainBopomofo@2x.tiff */ = {isa = PBXFileReference; lastKnownFileType = image.tiff; path = "PlainBopomofo@2x.tiff"; sourceTree = ""; }; /* End PBXFileReference section */ @@ -425,6 +427,7 @@ 6A38BBF415FC117A00A8A51F /* BPMFMappings.txt */, 6A38BBF515FC117A00A8A51F /* BPMFPunctuations.txt */, 6A38BBF615FC117A00A8A51F /* data.txt */, + 6AD7CBC715FE555000691B5B /* data-plain-bpmf.txt */, 6A38BBF715FC117A00A8A51F /* heterophony1.list */, 6A38BBF815FC117A00A8A51F /* heterophony2.list */, 6A38BBF915FC117A00A8A51F /* heterophony3.list */, @@ -581,6 +584,7 @@ 6A719D0415FC5FD200C8B8E3 /* McBopomofo.iconset in Resources */, 6AE210B215FC63CC003659FE /* PlainBopomofo.tiff in Resources */, 6AE210B315FC63CC003659FE /* PlainBopomofo@2x.tiff in Resources */, + 6AD7CBC815FE555000691B5B /* data-plain-bpmf.txt in Resources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/Source/InputMethodController.h b/Source/InputMethodController.h index fb3b8e38..6cfd307a 100644 --- a/Source/InputMethodController.h +++ b/Source/InputMethodController.h @@ -36,13 +36,17 @@ #import #import "Mandarin.h" #import "Gramambular.h" +#import "SimpleLM.h" @interface McBopomofoInputMethodController : IMKInputController { -@private +@private // the reading buffer that takes user input Formosa::Mandarin::BopomofoReadingBuffer* _bpmfReadingBuffer; + // language model + Formosa::Gramambular::SimpleLM *_languageModel; + // the grid (lattice) builder for the unigrams (and bigrams) Formosa::Gramambular::BlockReadingBuilder* _builder; diff --git a/Source/InputMethodController.mm b/Source/InputMethodController.mm index 49a1ca88..81a046f6 100644 --- a/Source/InputMethodController.mm +++ b/Source/InputMethodController.mm @@ -36,7 +36,6 @@ #import #import #import -#import "SimpleLM.h" #import "OVStringHelper.h" #import "OVUTF8Helper.h" #import "AppDelegate.h" @@ -109,6 +108,7 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot // shared language model object that stores our phrase-term probability database SimpleLM gLanguageModel; +SimpleLM gLanguageModelPlainBopomofo; // private methods @interface McBopomofoInputMethodController () @@ -172,7 +172,8 @@ public: _bpmfReadingBuffer = new BopomofoReadingBuffer(BopomofoKeyboardLayout::StandardLayout()); // create the lattice builder - _builder = new BlockReadingBuilder(&gLanguageModel); + _languageModel = &gLanguageModel; + _builder = new BlockReadingBuilder(_languageModel); // each Mandarin syllable is separated by a hyphen _builder->setJoinSeparator("-"); @@ -309,9 +310,11 @@ public: { if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) { _inputMode = kPlainBopomofoModeIdentifier; + _languageModel = &gLanguageModelPlainBopomofo; } else { _inputMode = kBopomofoModeIdentifier; + _languageModel = &gLanguageModel; } NSString *basisKeyboardLayoutID = [[NSUserDefaults standardUserDefaults] stringForKey:kBasisKeyboardLayoutPreferenceKey]; @@ -329,6 +332,12 @@ public: if ([_composingBuffer length] > 0) { [self commitComposition:sender]; } + + if (_builder) { + delete _builder; + _builder = new BlockReadingBuilder(_languageModel); + _builder->setJoinSeparator("-"); + } } #pragma mark - IMKServerInput protocol methods @@ -744,7 +753,7 @@ public: string reading = _bpmfReadingBuffer->syllable().composedString(); // see if we have a unigram for this - if (!gLanguageModel.hasUnigramsForKey(reading)) { + if (!_languageModel->hasUnigramsForKey(reading)) { [self beep]; [self updateClientComposingBuffer:client]; return YES; @@ -789,7 +798,7 @@ public: [self commitComposition:client]; _bpmfReadingBuffer->clear(); } - else if (gLanguageModel.hasUnigramsForKey(" ")) { + else if (_languageModel->hasUnigramsForKey(" ")) { _builder->insertReadingAtCursor(" "); [self popOverflowComposingTextAndWalk:client]; [self updateClientComposingBuffer:client]; @@ -908,7 +917,7 @@ public: // punctuation list if ((char)charCode == '`') { - if (gLanguageModel.hasUnigramsForKey(string("_punctuation_list"))) { + if (_languageModel->hasUnigramsForKey(string("_punctuation_list"))) { if (_bpmfReadingBuffer->isEmpty()) { _builder->insertReadingAtCursor(string("_punctuation_list")); [self popOverflowComposingTextAndWalk:client]; @@ -945,7 +954,7 @@ public: } string customPunctuation = string("_punctuation_") + layout + string(1, (char)charCode); - if (gLanguageModel.hasUnigramsForKey(customPunctuation)) { + if (_languageModel->hasUnigramsForKey(customPunctuation)) { if (_bpmfReadingBuffer->isEmpty()) { _builder->insertReadingAtCursor(customPunctuation); [self popOverflowComposingTextAndWalk:client]; @@ -964,7 +973,7 @@ public: // if nothing is matched, see if it's a punctuation key string punctuation = string("_punctuation_") + string(1, (char)charCode); - if (gLanguageModel.hasUnigramsForKey(punctuation)) { + if (_languageModel->hasUnigramsForKey(punctuation)) { if (_bpmfReadingBuffer->isEmpty()) { _builder->insertReadingAtCursor(punctuation); [self popOverflowComposingTextAndWalk:client]; @@ -1302,34 +1311,42 @@ public: @end - -void LTLoadLanguageModel() +static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, SimpleLM &lm) { // load the language model; the performance of this function can be greatly improved // with better loading/parsing methods - NSDate *__unused startTime = [NSDate date]; - - NSString *dataPath = [[NSBundle bundleForClass:[McBopomofoInputMethodController class]] pathForResource:@"data" ofType:@"txt"]; - + + NSString *dataPath = [[NSBundle bundleForClass:[McBopomofoInputMethodController class]] pathForResource:filenameWithoutExtension ofType:@"txt"]; + ifstream ifs; ifs.open([dataPath UTF8String]); while (ifs.good()) { string line; getline(ifs, line); - + if (!line.size() || (line.size() && line[0] == '#')) { continue; } - + vector p = OVStringHelper::SplitBySpacesOrTabs(line); - + if (p.size() == 3) { - gLanguageModel.add(p[1], p[0], atof(p[2].c_str())); + lm.add(p[1], p[0], atof(p[2].c_str())); } } ifs.close(); - gLanguageModel.add(" ", " ", 0.0); + + // insert an empty entry for BOS/EOS markers + lm.add(" ", " ", 0.0); +} + + +void LTLoadLanguageModel() +{ + LTLoadLanguageModelFile(@"data", gLanguageModel); + LTLoadLanguageModelFile(@"data-plain-bpmf", gLanguageModelPlainBopomofo); + // initialize the singleton learning dictionary // putting singleton in @synchronized is the standard way in Objective-C @@ -1344,7 +1361,6 @@ void LTLoadLanguageModel() } NSString *appSupportPath = [paths objectAtIndex:0]; - NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"McBopomofo"]; BOOL isDir = NO;