Load Plain Bopomofo data.
This commit is contained in:
parent
10ff94e141
commit
7b4568e152
|
@ -45,6 +45,7 @@
|
|||
6ACA41FD15FC1D9000935EF6 /* MainMenu.xib in Resources */ = {isa = PBXBuildFile; fileRef = 6ACA41F015FC1D9000935EF6 /* MainMenu.xib */; };
|
||||
6ACA41FF15FC1D9000935EF6 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 6ACA41F415FC1D9000935EF6 /* main.m */; };
|
||||
6ACA420215FC1E5200935EF6 /* McBopomofo.app in Resources */ = {isa = PBXBuildFile; fileRef = 6A0D4EA215FC0D2D00ABF4B3 /* McBopomofo.app */; };
|
||||
6AD7CBC815FE555000691B5B /* data-plain-bpmf.txt in Resources */ = {isa = PBXBuildFile; fileRef = 6AD7CBC715FE555000691B5B /* data-plain-bpmf.txt */; };
|
||||
6AE210B215FC63CC003659FE /* PlainBopomofo.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 6AE210B015FC63CC003659FE /* PlainBopomofo.tiff */; };
|
||||
6AE210B315FC63CC003659FE /* PlainBopomofo@2x.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 6AE210B115FC63CC003659FE /* PlainBopomofo@2x.tiff */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
@ -205,6 +206,7 @@
|
|||
6ACA41F615FC1D9000935EF6 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.rtf; name = "zh-Hant"; path = "zh-Hant.lproj/License.rtf"; sourceTree = "<group>"; };
|
||||
6ACA41F715FC1D9000935EF6 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/Localizable.strings"; sourceTree = "<group>"; };
|
||||
6ACA41F815FC1D9000935EF6 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = "zh-Hant"; path = "zh-Hant.lproj/MainMenu.xib"; sourceTree = "<group>"; };
|
||||
6AD7CBC715FE555000691B5B /* data-plain-bpmf.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = "data-plain-bpmf.txt"; sourceTree = "<group>"; };
|
||||
6AE210B015FC63CC003659FE /* PlainBopomofo.tiff */ = {isa = PBXFileReference; lastKnownFileType = image.tiff; path = PlainBopomofo.tiff; sourceTree = "<group>"; };
|
||||
6AE210B115FC63CC003659FE /* PlainBopomofo@2x.tiff */ = {isa = PBXFileReference; lastKnownFileType = image.tiff; path = "PlainBopomofo@2x.tiff"; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
@ -425,6 +427,7 @@
|
|||
6A38BBF415FC117A00A8A51F /* BPMFMappings.txt */,
|
||||
6A38BBF515FC117A00A8A51F /* BPMFPunctuations.txt */,
|
||||
6A38BBF615FC117A00A8A51F /* data.txt */,
|
||||
6AD7CBC715FE555000691B5B /* data-plain-bpmf.txt */,
|
||||
6A38BBF715FC117A00A8A51F /* heterophony1.list */,
|
||||
6A38BBF815FC117A00A8A51F /* heterophony2.list */,
|
||||
6A38BBF915FC117A00A8A51F /* heterophony3.list */,
|
||||
|
@ -581,6 +584,7 @@
|
|||
6A719D0415FC5FD200C8B8E3 /* McBopomofo.iconset in Resources */,
|
||||
6AE210B215FC63CC003659FE /* PlainBopomofo.tiff in Resources */,
|
||||
6AE210B315FC63CC003659FE /* PlainBopomofo@2x.tiff in Resources */,
|
||||
6AD7CBC815FE555000691B5B /* data-plain-bpmf.txt in Resources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
|
|
|
@ -36,13 +36,17 @@
|
|||
#import <InputMethodKit/InputMethodKit.h>
|
||||
#import "Mandarin.h"
|
||||
#import "Gramambular.h"
|
||||
#import "SimpleLM.h"
|
||||
|
||||
@interface McBopomofoInputMethodController : IMKInputController
|
||||
{
|
||||
@private
|
||||
@private
|
||||
// the reading buffer that takes user input
|
||||
Formosa::Mandarin::BopomofoReadingBuffer* _bpmfReadingBuffer;
|
||||
|
||||
// language model
|
||||
Formosa::Gramambular::SimpleLM *_languageModel;
|
||||
|
||||
// the grid (lattice) builder for the unigrams (and bigrams)
|
||||
Formosa::Gramambular::BlockReadingBuilder* _builder;
|
||||
|
||||
|
|
|
@ -36,7 +36,6 @@
|
|||
#import <fstream>
|
||||
#import <iostream>
|
||||
#import <set>
|
||||
#import "SimpleLM.h"
|
||||
#import "OVStringHelper.h"
|
||||
#import "OVUTF8Helper.h"
|
||||
#import "AppDelegate.h"
|
||||
|
@ -109,6 +108,7 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
|
|||
|
||||
// shared language model object that stores our phrase-term probability database
|
||||
SimpleLM gLanguageModel;
|
||||
SimpleLM gLanguageModelPlainBopomofo;
|
||||
|
||||
// private methods
|
||||
@interface McBopomofoInputMethodController () <VTCandidateControllerDelegate>
|
||||
|
@ -172,7 +172,8 @@ public:
|
|||
_bpmfReadingBuffer = new BopomofoReadingBuffer(BopomofoKeyboardLayout::StandardLayout());
|
||||
|
||||
// create the lattice builder
|
||||
_builder = new BlockReadingBuilder(&gLanguageModel);
|
||||
_languageModel = &gLanguageModel;
|
||||
_builder = new BlockReadingBuilder(_languageModel);
|
||||
|
||||
// each Mandarin syllable is separated by a hyphen
|
||||
_builder->setJoinSeparator("-");
|
||||
|
@ -309,9 +310,11 @@ public:
|
|||
{
|
||||
if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) {
|
||||
_inputMode = kPlainBopomofoModeIdentifier;
|
||||
_languageModel = &gLanguageModelPlainBopomofo;
|
||||
}
|
||||
else {
|
||||
_inputMode = kBopomofoModeIdentifier;
|
||||
_languageModel = &gLanguageModel;
|
||||
}
|
||||
|
||||
NSString *basisKeyboardLayoutID = [[NSUserDefaults standardUserDefaults] stringForKey:kBasisKeyboardLayoutPreferenceKey];
|
||||
|
@ -329,6 +332,12 @@ public:
|
|||
if ([_composingBuffer length] > 0) {
|
||||
[self commitComposition:sender];
|
||||
}
|
||||
|
||||
if (_builder) {
|
||||
delete _builder;
|
||||
_builder = new BlockReadingBuilder(_languageModel);
|
||||
_builder->setJoinSeparator("-");
|
||||
}
|
||||
}
|
||||
|
||||
#pragma mark - IMKServerInput protocol methods
|
||||
|
@ -744,7 +753,7 @@ public:
|
|||
string reading = _bpmfReadingBuffer->syllable().composedString();
|
||||
|
||||
// see if we have a unigram for this
|
||||
if (!gLanguageModel.hasUnigramsForKey(reading)) {
|
||||
if (!_languageModel->hasUnigramsForKey(reading)) {
|
||||
[self beep];
|
||||
[self updateClientComposingBuffer:client];
|
||||
return YES;
|
||||
|
@ -789,7 +798,7 @@ public:
|
|||
[self commitComposition:client];
|
||||
_bpmfReadingBuffer->clear();
|
||||
}
|
||||
else if (gLanguageModel.hasUnigramsForKey(" ")) {
|
||||
else if (_languageModel->hasUnigramsForKey(" ")) {
|
||||
_builder->insertReadingAtCursor(" ");
|
||||
[self popOverflowComposingTextAndWalk:client];
|
||||
[self updateClientComposingBuffer:client];
|
||||
|
@ -908,7 +917,7 @@ public:
|
|||
|
||||
// punctuation list
|
||||
if ((char)charCode == '`') {
|
||||
if (gLanguageModel.hasUnigramsForKey(string("_punctuation_list"))) {
|
||||
if (_languageModel->hasUnigramsForKey(string("_punctuation_list"))) {
|
||||
if (_bpmfReadingBuffer->isEmpty()) {
|
||||
_builder->insertReadingAtCursor(string("_punctuation_list"));
|
||||
[self popOverflowComposingTextAndWalk:client];
|
||||
|
@ -945,7 +954,7 @@ public:
|
|||
}
|
||||
|
||||
string customPunctuation = string("_punctuation_") + layout + string(1, (char)charCode);
|
||||
if (gLanguageModel.hasUnigramsForKey(customPunctuation)) {
|
||||
if (_languageModel->hasUnigramsForKey(customPunctuation)) {
|
||||
if (_bpmfReadingBuffer->isEmpty()) {
|
||||
_builder->insertReadingAtCursor(customPunctuation);
|
||||
[self popOverflowComposingTextAndWalk:client];
|
||||
|
@ -964,7 +973,7 @@ public:
|
|||
|
||||
// if nothing is matched, see if it's a punctuation key
|
||||
string punctuation = string("_punctuation_") + string(1, (char)charCode);
|
||||
if (gLanguageModel.hasUnigramsForKey(punctuation)) {
|
||||
if (_languageModel->hasUnigramsForKey(punctuation)) {
|
||||
if (_bpmfReadingBuffer->isEmpty()) {
|
||||
_builder->insertReadingAtCursor(punctuation);
|
||||
[self popOverflowComposingTextAndWalk:client];
|
||||
|
@ -1302,34 +1311,42 @@ public:
|
|||
|
||||
@end
|
||||
|
||||
|
||||
void LTLoadLanguageModel()
|
||||
static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, SimpleLM &lm)
|
||||
{
|
||||
// load the language model; the performance of this function can be greatly improved
|
||||
// with better loading/parsing methods
|
||||
|
||||
NSDate *__unused startTime = [NSDate date];
|
||||
|
||||
NSString *dataPath = [[NSBundle bundleForClass:[McBopomofoInputMethodController class]] pathForResource:@"data" ofType:@"txt"];
|
||||
|
||||
|
||||
NSString *dataPath = [[NSBundle bundleForClass:[McBopomofoInputMethodController class]] pathForResource:filenameWithoutExtension ofType:@"txt"];
|
||||
|
||||
ifstream ifs;
|
||||
ifs.open([dataPath UTF8String]);
|
||||
while (ifs.good()) {
|
||||
string line;
|
||||
getline(ifs, line);
|
||||
|
||||
|
||||
if (!line.size() || (line.size() && line[0] == '#')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
vector<string> p = OVStringHelper::SplitBySpacesOrTabs(line);
|
||||
|
||||
|
||||
if (p.size() == 3) {
|
||||
gLanguageModel.add(p[1], p[0], atof(p[2].c_str()));
|
||||
lm.add(p[1], p[0], atof(p[2].c_str()));
|
||||
}
|
||||
}
|
||||
ifs.close();
|
||||
gLanguageModel.add(" ", " ", 0.0);
|
||||
|
||||
// insert an empty entry for BOS/EOS markers
|
||||
lm.add(" ", " ", 0.0);
|
||||
}
|
||||
|
||||
|
||||
void LTLoadLanguageModel()
|
||||
{
|
||||
LTLoadLanguageModelFile(@"data", gLanguageModel);
|
||||
LTLoadLanguageModelFile(@"data-plain-bpmf", gLanguageModelPlainBopomofo);
|
||||
|
||||
|
||||
// initialize the singleton learning dictionary
|
||||
// putting singleton in @synchronized is the standard way in Objective-C
|
||||
|
@ -1344,7 +1361,6 @@ void LTLoadLanguageModel()
|
|||
}
|
||||
|
||||
NSString *appSupportPath = [paths objectAtIndex:0];
|
||||
|
||||
NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"McBopomofo"];
|
||||
|
||||
BOOL isDir = NO;
|
||||
|
|
Loading…
Reference in New Issue