Load Plain Bopomofo data.

This commit is contained in:
Lukhnos Liu 2012-09-10 10:14:32 -07:00
parent 10ff94e141
commit 7b4568e152
3 changed files with 44 additions and 20 deletions

View File

@ -45,6 +45,7 @@
6ACA41FD15FC1D9000935EF6 /* MainMenu.xib in Resources */ = {isa = PBXBuildFile; fileRef = 6ACA41F015FC1D9000935EF6 /* MainMenu.xib */; };
6ACA41FF15FC1D9000935EF6 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 6ACA41F415FC1D9000935EF6 /* main.m */; };
6ACA420215FC1E5200935EF6 /* McBopomofo.app in Resources */ = {isa = PBXBuildFile; fileRef = 6A0D4EA215FC0D2D00ABF4B3 /* McBopomofo.app */; };
6AD7CBC815FE555000691B5B /* data-plain-bpmf.txt in Resources */ = {isa = PBXBuildFile; fileRef = 6AD7CBC715FE555000691B5B /* data-plain-bpmf.txt */; };
6AE210B215FC63CC003659FE /* PlainBopomofo.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 6AE210B015FC63CC003659FE /* PlainBopomofo.tiff */; };
6AE210B315FC63CC003659FE /* PlainBopomofo@2x.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 6AE210B115FC63CC003659FE /* PlainBopomofo@2x.tiff */; };
/* End PBXBuildFile section */
@ -205,6 +206,7 @@
6ACA41F615FC1D9000935EF6 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.rtf; name = "zh-Hant"; path = "zh-Hant.lproj/License.rtf"; sourceTree = "<group>"; };
6ACA41F715FC1D9000935EF6 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/Localizable.strings"; sourceTree = "<group>"; };
6ACA41F815FC1D9000935EF6 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = "zh-Hant"; path = "zh-Hant.lproj/MainMenu.xib"; sourceTree = "<group>"; };
6AD7CBC715FE555000691B5B /* data-plain-bpmf.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = "data-plain-bpmf.txt"; sourceTree = "<group>"; };
6AE210B015FC63CC003659FE /* PlainBopomofo.tiff */ = {isa = PBXFileReference; lastKnownFileType = image.tiff; path = PlainBopomofo.tiff; sourceTree = "<group>"; };
6AE210B115FC63CC003659FE /* PlainBopomofo@2x.tiff */ = {isa = PBXFileReference; lastKnownFileType = image.tiff; path = "PlainBopomofo@2x.tiff"; sourceTree = "<group>"; };
/* End PBXFileReference section */
@ -425,6 +427,7 @@
6A38BBF415FC117A00A8A51F /* BPMFMappings.txt */,
6A38BBF515FC117A00A8A51F /* BPMFPunctuations.txt */,
6A38BBF615FC117A00A8A51F /* data.txt */,
6AD7CBC715FE555000691B5B /* data-plain-bpmf.txt */,
6A38BBF715FC117A00A8A51F /* heterophony1.list */,
6A38BBF815FC117A00A8A51F /* heterophony2.list */,
6A38BBF915FC117A00A8A51F /* heterophony3.list */,
@ -581,6 +584,7 @@
6A719D0415FC5FD200C8B8E3 /* McBopomofo.iconset in Resources */,
6AE210B215FC63CC003659FE /* PlainBopomofo.tiff in Resources */,
6AE210B315FC63CC003659FE /* PlainBopomofo@2x.tiff in Resources */,
6AD7CBC815FE555000691B5B /* data-plain-bpmf.txt in Resources */,
);
runOnlyForDeploymentPostprocessing = 0;
};

View File

@ -36,13 +36,17 @@
#import <InputMethodKit/InputMethodKit.h>
#import "Mandarin.h"
#import "Gramambular.h"
#import "SimpleLM.h"
@interface McBopomofoInputMethodController : IMKInputController
{
@private
@private
// the reading buffer that takes user input
Formosa::Mandarin::BopomofoReadingBuffer* _bpmfReadingBuffer;
// language model
Formosa::Gramambular::SimpleLM *_languageModel;
// the grid (lattice) builder for the unigrams (and bigrams)
Formosa::Gramambular::BlockReadingBuilder* _builder;

View File

@ -36,7 +36,6 @@
#import <fstream>
#import <iostream>
#import <set>
#import "SimpleLM.h"
#import "OVStringHelper.h"
#import "OVUTF8Helper.h"
#import "AppDelegate.h"
@ -109,6 +108,7 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
// shared language model object that stores our phrase-term probability database
SimpleLM gLanguageModel;
SimpleLM gLanguageModelPlainBopomofo;
// private methods
@interface McBopomofoInputMethodController () <VTCandidateControllerDelegate>
@ -172,7 +172,8 @@ public:
_bpmfReadingBuffer = new BopomofoReadingBuffer(BopomofoKeyboardLayout::StandardLayout());
// create the lattice builder
_builder = new BlockReadingBuilder(&gLanguageModel);
_languageModel = &gLanguageModel;
_builder = new BlockReadingBuilder(_languageModel);
// each Mandarin syllable is separated by a hyphen
_builder->setJoinSeparator("-");
@ -309,9 +310,11 @@ public:
{
if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) {
_inputMode = kPlainBopomofoModeIdentifier;
_languageModel = &gLanguageModelPlainBopomofo;
}
else {
_inputMode = kBopomofoModeIdentifier;
_languageModel = &gLanguageModel;
}
NSString *basisKeyboardLayoutID = [[NSUserDefaults standardUserDefaults] stringForKey:kBasisKeyboardLayoutPreferenceKey];
@ -329,6 +332,12 @@ public:
if ([_composingBuffer length] > 0) {
[self commitComposition:sender];
}
if (_builder) {
delete _builder;
_builder = new BlockReadingBuilder(_languageModel);
_builder->setJoinSeparator("-");
}
}
#pragma mark - IMKServerInput protocol methods
@ -744,7 +753,7 @@ public:
string reading = _bpmfReadingBuffer->syllable().composedString();
// see if we have a unigram for this
if (!gLanguageModel.hasUnigramsForKey(reading)) {
if (!_languageModel->hasUnigramsForKey(reading)) {
[self beep];
[self updateClientComposingBuffer:client];
return YES;
@ -789,7 +798,7 @@ public:
[self commitComposition:client];
_bpmfReadingBuffer->clear();
}
else if (gLanguageModel.hasUnigramsForKey(" ")) {
else if (_languageModel->hasUnigramsForKey(" ")) {
_builder->insertReadingAtCursor(" ");
[self popOverflowComposingTextAndWalk:client];
[self updateClientComposingBuffer:client];
@ -908,7 +917,7 @@ public:
// punctuation list
if ((char)charCode == '`') {
if (gLanguageModel.hasUnigramsForKey(string("_punctuation_list"))) {
if (_languageModel->hasUnigramsForKey(string("_punctuation_list"))) {
if (_bpmfReadingBuffer->isEmpty()) {
_builder->insertReadingAtCursor(string("_punctuation_list"));
[self popOverflowComposingTextAndWalk:client];
@ -945,7 +954,7 @@ public:
}
string customPunctuation = string("_punctuation_") + layout + string(1, (char)charCode);
if (gLanguageModel.hasUnigramsForKey(customPunctuation)) {
if (_languageModel->hasUnigramsForKey(customPunctuation)) {
if (_bpmfReadingBuffer->isEmpty()) {
_builder->insertReadingAtCursor(customPunctuation);
[self popOverflowComposingTextAndWalk:client];
@ -964,7 +973,7 @@ public:
// if nothing is matched, see if it's a punctuation key
string punctuation = string("_punctuation_") + string(1, (char)charCode);
if (gLanguageModel.hasUnigramsForKey(punctuation)) {
if (_languageModel->hasUnigramsForKey(punctuation)) {
if (_bpmfReadingBuffer->isEmpty()) {
_builder->insertReadingAtCursor(punctuation);
[self popOverflowComposingTextAndWalk:client];
@ -1302,34 +1311,42 @@ public:
@end
void LTLoadLanguageModel()
static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, SimpleLM &lm)
{
// load the language model; the performance of this function can be greatly improved
// with better loading/parsing methods
NSDate *__unused startTime = [NSDate date];
NSString *dataPath = [[NSBundle bundleForClass:[McBopomofoInputMethodController class]] pathForResource:@"data" ofType:@"txt"];
NSString *dataPath = [[NSBundle bundleForClass:[McBopomofoInputMethodController class]] pathForResource:filenameWithoutExtension ofType:@"txt"];
ifstream ifs;
ifs.open([dataPath UTF8String]);
while (ifs.good()) {
string line;
getline(ifs, line);
if (!line.size() || (line.size() && line[0] == '#')) {
continue;
}
vector<string> p = OVStringHelper::SplitBySpacesOrTabs(line);
if (p.size() == 3) {
gLanguageModel.add(p[1], p[0], atof(p[2].c_str()));
lm.add(p[1], p[0], atof(p[2].c_str()));
}
}
ifs.close();
gLanguageModel.add(" ", " ", 0.0);
// insert an empty entry for BOS/EOS markers
lm.add(" ", " ", 0.0);
}
void LTLoadLanguageModel()
{
LTLoadLanguageModelFile(@"data", gLanguageModel);
LTLoadLanguageModelFile(@"data-plain-bpmf", gLanguageModelPlainBopomofo);
// initialize the singleton learning dictionary
// putting singleton in @synchronized is the standard way in Objective-C
@ -1344,7 +1361,6 @@ void LTLoadLanguageModel()
}
NSString *appSupportPath = [paths objectAtIndex:0];
NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"McBopomofo"];
BOOL isDir = NO;