Adds McBopomofoLM as the facade of three language models.

- main language model
- user phrases
- user excluded phrases
This commit is contained in:
zonble 2022-01-12 12:26:24 +08:00
parent 56896625e3
commit abdf97f652
11 changed files with 178 additions and 170 deletions

View File

@ -37,6 +37,7 @@
6AE210B315FC63CC003659FE /* PlainBopomofo@2x.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 6AE210B115FC63CC003659FE /* PlainBopomofo@2x.tiff */; }; 6AE210B315FC63CC003659FE /* PlainBopomofo@2x.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 6AE210B115FC63CC003659FE /* PlainBopomofo@2x.tiff */; };
6AFF97F2253B299E007F1C49 /* NonModalAlertWindowController.xib in Resources */ = {isa = PBXBuildFile; fileRef = 6AFF97F0253B299E007F1C49 /* NonModalAlertWindowController.xib */; }; 6AFF97F2253B299E007F1C49 /* NonModalAlertWindowController.xib in Resources */ = {isa = PBXBuildFile; fileRef = 6AFF97F0253B299E007F1C49 /* NonModalAlertWindowController.xib */; };
D41355D8278D74B5005E5CBD /* LanguageModelManager.mm in Sources */ = {isa = PBXBuildFile; fileRef = D41355D7278D7409005E5CBD /* LanguageModelManager.mm */; }; D41355D8278D74B5005E5CBD /* LanguageModelManager.mm in Sources */ = {isa = PBXBuildFile; fileRef = D41355D7278D7409005E5CBD /* LanguageModelManager.mm */; };
D41355DB278E6D17005E5CBD /* McBopomofoLM.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D41355D9278E6D17005E5CBD /* McBopomofoLM.cpp */; };
D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */; }; D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */; };
D427F76A278C9E29004A2160 /* CandidateUI in Frameworks */ = {isa = PBXBuildFile; productRef = D427F769278C9E29004A2160 /* CandidateUI */; }; D427F76A278C9E29004A2160 /* CandidateUI in Frameworks */ = {isa = PBXBuildFile; productRef = D427F769278C9E29004A2160 /* CandidateUI */; };
D427F76C278CA2B0004A2160 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427F76B278CA1BA004A2160 /* AppDelegate.swift */; }; D427F76C278CA2B0004A2160 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427F76B278CA1BA004A2160 /* AppDelegate.swift */; };
@ -158,6 +159,8 @@
6AFF97F0253B299E007F1C49 /* NonModalAlertWindowController.xib */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.xib; path = NonModalAlertWindowController.xib; sourceTree = "<group>"; }; 6AFF97F0253B299E007F1C49 /* NonModalAlertWindowController.xib */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.xib; path = NonModalAlertWindowController.xib; sourceTree = "<group>"; };
D41355D6278D7409005E5CBD /* LanguageModelManager.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LanguageModelManager.h; sourceTree = "<group>"; }; D41355D6278D7409005E5CBD /* LanguageModelManager.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LanguageModelManager.h; sourceTree = "<group>"; };
D41355D7278D7409005E5CBD /* LanguageModelManager.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = LanguageModelManager.mm; sourceTree = "<group>"; }; D41355D7278D7409005E5CBD /* LanguageModelManager.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = LanguageModelManager.mm; sourceTree = "<group>"; };
D41355D9278E6D17005E5CBD /* McBopomofoLM.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = McBopomofoLM.cpp; sourceTree = "<group>"; };
D41355DA278E6D17005E5CBD /* McBopomofoLM.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = McBopomofoLM.h; sourceTree = "<group>"; };
D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "McBopomofo-Bridging-Header.h"; sourceTree = "<group>"; }; D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "McBopomofo-Bridging-Header.h"; sourceTree = "<group>"; };
D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpenCCBridge.swift; sourceTree = "<group>"; }; D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpenCCBridge.swift; sourceTree = "<group>"; };
D427F768278C9D0D004A2160 /* CandidateUI */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = CandidateUI; path = Packages/CandidateUI; sourceTree = "<group>"; }; D427F768278C9D0D004A2160 /* CandidateUI */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = CandidateUI; path = Packages/CandidateUI; sourceTree = "<group>"; };
@ -267,6 +270,8 @@
6A0421A715FEF3F50061ED63 /* FastLM.h */, 6A0421A715FEF3F50061ED63 /* FastLM.h */,
D47F7DD2278C1263002F9DD7 /* UserOverrideModel.cpp */, D47F7DD2278C1263002F9DD7 /* UserOverrideModel.cpp */,
D47F7DD1278C1263002F9DD7 /* UserOverrideModel.h */, D47F7DD1278C1263002F9DD7 /* UserOverrideModel.h */,
D41355D9278E6D17005E5CBD /* McBopomofoLM.cpp */,
D41355DA278E6D17005E5CBD /* McBopomofoLM.h */,
); );
path = Engine; path = Engine;
sourceTree = "<group>"; sourceTree = "<group>";
@ -553,6 +558,7 @@
D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */, D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */,
D47F7DCE278BFB57002F9DD7 /* PreferencesWindowController.swift in Sources */, D47F7DCE278BFB57002F9DD7 /* PreferencesWindowController.swift in Sources */,
6A0D4ED215FC0D6400ABF4B3 /* InputMethodController.mm in Sources */, 6A0D4ED215FC0D6400ABF4B3 /* InputMethodController.mm in Sources */,
D41355DB278E6D17005E5CBD /* McBopomofoLM.cpp in Sources */,
D47F7DD3278C1263002F9DD7 /* UserOverrideModel.cpp in Sources */, D47F7DD3278C1263002F9DD7 /* UserOverrideModel.cpp in Sources */,
6A0D4F4515FC0EB100ABF4B3 /* Mandarin.cpp in Sources */, 6A0D4F4515FC0EB100ABF4B3 /* Mandarin.cpp in Sources */,
6A0421A815FEF3F50061ED63 /* FastLM.cpp in Sources */, 6A0421A815FEF3F50061ED63 /* FastLM.cpp in Sources */,

View File

@ -285,7 +285,7 @@ const vector<Bigram> FastLM::bigramsForKeys(const string& preceedingKey, const s
return vector<Bigram>(); return vector<Bigram>();
} }
const vector<Unigram> FastLM::unigramsForKeys(const string& key) const vector<Unigram> FastLM::unigramsForKey(const string& key)
{ {
vector<Unigram> v; vector<Unigram> v;
map<const char *, vector<Row> >::const_iterator i = keyRowMap.find(key.c_str()); map<const char *, vector<Row> >::const_iterator i = keyRowMap.find(key.c_str());

View File

@ -50,7 +50,7 @@ namespace Formosa {
void dump(); void dump();
virtual const vector<Bigram> bigramsForKeys(const string& preceedingKey, const string& key); virtual const vector<Bigram> bigramsForKeys(const string& preceedingKey, const string& key);
virtual const vector<Unigram> unigramsForKeys(const string& key); virtual const vector<Unigram> unigramsForKey(const string& key);
virtual bool hasUnigramsForKey(const string& key); virtual bool hasUnigramsForKey(const string& key);
protected: protected:

View File

@ -38,7 +38,7 @@ namespace Formosa {
class BlockReadingBuilder { class BlockReadingBuilder {
public: public:
BlockReadingBuilder(LanguageModel *inLM, LanguageModel *inUserPhraseLM, LanguageModel *inExcludedPhrasesLM); BlockReadingBuilder(LanguageModel *inLM);
void clear(); void clear();
size_t length() const; size_t length() const;
@ -75,17 +75,11 @@ namespace Formosa {
Grid m_grid; Grid m_grid;
LanguageModel *m_LM; LanguageModel *m_LM;
LanguageModel *m_userPhraseLM;
LanguageModel *m_excludedPhrasesLM;
string m_joinSeparator; string m_joinSeparator;
}; };
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM, inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM)
LanguageModel *inUserPhraseLM,
LanguageModel *inExcludedPhrasesLM)
: m_LM(inLM) : m_LM(inLM)
, m_userPhraseLM(inUserPhraseLM)
, m_excludedPhrasesLM(inExcludedPhrasesLM)
, m_cursorIndex(0) , m_cursorIndex(0)
, m_markerCursorIndex(SIZE_MAX) , m_markerCursorIndex(SIZE_MAX)
{ {
@ -238,33 +232,7 @@ namespace Formosa {
for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) { for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) {
string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator); string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) { if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) {
vector<Unigram> unigrams; vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
vector<Unigram> userUnigrams;
if (m_userPhraseLM != NULL && m_userPhraseLM->hasUnigramsForKey(combinedReading)) {
userUnigrams = m_userPhraseLM->unigramsForKeys(combinedReading);
}
if (m_LM->hasUnigramsForKey(combinedReading)) {
vector<Unigram> globalUnigrams = m_LM->unigramsForKeys(combinedReading);
for (std::vector<Unigram>::iterator it=globalUnigrams.begin(); it!=globalUnigrams.end(); ++it) {
if (!checkIfUnigramExistInVector(*it, unigrams)) {
unigrams.push_back(*it);
}
}
}
unigrams.insert(unigrams.begin(), userUnigrams.begin(), userUnigrams.end());
if (m_excludedPhrasesLM != NULL && m_excludedPhrasesLM->hasUnigramsForKey(combinedReading)) {
vector<Unigram> excludedUnigrams = m_excludedPhrasesLM->unigramsForKeys(combinedReading);
vector<Unigram> newUnigram;
for (std::vector<Unigram>::iterator it=unigrams.begin(); it!=unigrams.end(); ++it) {
if (!checkIfUnigramExistInVector(*it, excludedUnigrams)) {
newUnigram.push_back(*it);
}
}
unigrams = newUnigram;
}
if (unigrams.size() > 0) { if (unigrams.size() > 0) {
Node n(combinedReading, unigrams, vector<Bigram>()); Node n(combinedReading, unigrams, vector<Bigram>());

View File

@ -42,7 +42,7 @@ namespace Formosa {
virtual ~LanguageModel() {} virtual ~LanguageModel() {}
virtual const vector<Bigram> bigramsForKeys(const string &preceedingKey, const string& key) = 0; virtual const vector<Bigram> bigramsForKeys(const string &preceedingKey, const string& key) = 0;
virtual const vector<Unigram> unigramsForKeys(const string &key) = 0; virtual const vector<Unigram> unigramsForKey(const string &key) = 0;
virtual bool hasUnigramsForKey(const string& key) = 0; virtual bool hasUnigramsForKey(const string& key) = 0;
}; };
} }

View File

@ -0,0 +1,92 @@
#include "McBopomofoLM.h"
#include <algorithm>
#include <iterator>
#include <unordered_set>
using namespace McBopomofo;
McBopomofoLM::McBopomofoLM()
{
}
McBopomofoLM::~McBopomofoLM()
{
m_languageModel.close();
m_userPhrases.close();
m_excluddePhrases.close();
}
void McBopomofoLM::loadLanguageModel(const char* languageModelDataPath)
{
m_languageModel.close();
m_languageModel.open(languageModelDataPath);
}
void McBopomofoLM::loadUserPhrases(const char* userPhrasesDataPath,
const char* excludedPhrasesDataPath)
{
m_userPhrases.close();
m_userPhrases.open(userPhrasesDataPath);
m_excluddePhrases.close();
m_excluddePhrases.open(excludedPhrasesDataPath);
}
const vector<Bigram> McBopomofoLM::bigramsForKeys(const string& preceedingKey, const string& key)
{
return vector<Bigram>();
}
const vector<Unigram> McBopomofoLM::unigramsForKey(const string& key)
{
vector<Unigram> unigrams;
vector<Unigram> userUnigrams;
// Use unordered_set so that you don't have to do O(n*m)
unordered_set<string> excludedValues;
unordered_set<string> userValues;
if (m_excluddePhrases.hasUnigramsForKey(key)) {
vector<Unigram> excludedUnigrams = m_excluddePhrases.unigramsForKey(key);
transform(excludedUnigrams.begin(), excludedUnigrams.end(),
inserter(excludedValues, excludedValues.end()),
[](const Unigram &u) { return u.keyValue.value; });
}
if (m_userPhrases.hasUnigramsForKey(key)) {
vector<Unigram> rawUserUnigrams = m_userPhrases.unigramsForKey(key);
for (auto&& unigram : rawUserUnigrams) {
if (excludedValues.find(unigram.keyValue.value) == excludedValues.end()) {
userUnigrams.push_back(unigram);
}
}
transform(userUnigrams.begin(), userUnigrams.end(),
inserter(userValues, userValues.end()),
[](const Unigram &u) { return u.keyValue.value; });
}
if (m_languageModel.hasUnigramsForKey(key)) {
vector<Unigram> globalUnigrams = m_languageModel.unigramsForKey(key);
for (auto&& unigram : globalUnigrams) {
if (excludedValues.find(unigram.keyValue.value) == excludedValues.end() &&
userValues.find(unigram.keyValue.value) == userValues.end()) {
unigrams.push_back(unigram);
}
}
}
unigrams.insert(unigrams.begin(), userUnigrams.begin(), userUnigrams.end());
return unigrams;
}
bool McBopomofoLM::hasUnigramsForKey(const string& key)
{
if (!m_excluddePhrases.hasUnigramsForKey(key)) {
return m_userPhrases.hasUnigramsForKey(key) ||
m_languageModel.hasUnigramsForKey(key);
}
return unigramsForKey(key).size() > 0;
}

View File

@ -0,0 +1,31 @@
#ifndef MCBOPOMOFOLM_H
#define MCBOPOMOFOLM_H
#include <stdio.h>
#include "FastLM.h"
namespace McBopomofo {
using namespace Formosa::Gramambular;
class McBopomofoLM : public LanguageModel {
public:
McBopomofoLM();
~McBopomofoLM();
void loadLanguageModel(const char* languageModelDataPath);
void loadUserPhrases(const char* m_userPhrasesDataPath,
const char* m_excludedPhrasesDataPath);
const vector<Bigram> bigramsForKeys(const string& preceedingKey, const string& key);
const vector<Unigram> unigramsForKey(const string& key);
bool hasUnigramsForKey(const string& key);
protected:
FastLM m_languageModel;
FastLM m_userPhrases;
FastLM m_excluddePhrases;
};
};
#endif

View File

@ -36,7 +36,7 @@
#import <InputMethodKit/InputMethodKit.h> #import <InputMethodKit/InputMethodKit.h>
#import "Mandarin.h" #import "Mandarin.h"
#import "Gramambular.h" #import "Gramambular.h"
#import "FastLM.h" #import "McBopomofoLM.h"
#import "UserOverrideModel.h" #import "UserOverrideModel.h"
@interface McBopomofoInputMethodController : IMKInputController @interface McBopomofoInputMethodController : IMKInputController
@ -46,9 +46,7 @@
Formosa::Mandarin::BopomofoReadingBuffer* _bpmfReadingBuffer; Formosa::Mandarin::BopomofoReadingBuffer* _bpmfReadingBuffer;
// language model // language model
Formosa::Gramambular::FastLM *_languageModel; McBopomofo::McBopomofoLM *_languageModel;
Formosa::Gramambular::FastLM *_userPhrasesModel;
Formosa::Gramambular::FastLM *_excludedPhraseModel;
// user override model // user override model
McBopomofo::UserOverrideModel *_userOverrideModel; McBopomofo::UserOverrideModel *_userOverrideModel;

View File

@ -48,6 +48,7 @@
using namespace std; using namespace std;
using namespace Formosa::Mandarin; using namespace Formosa::Mandarin;
using namespace Formosa::Gramambular; using namespace Formosa::Gramambular;
using namespace McBopomofo;
using namespace OpenVanilla; using namespace OpenVanilla;
// default, min and max candidate list text size // default, min and max candidate list text size
@ -176,11 +177,9 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
// create the lattice builder // create the lattice builder
_languageModel = [LanguageModelManager languageModelMcBopomofo]; _languageModel = [LanguageModelManager languageModelMcBopomofo];
_userPhrasesModel = [LanguageModelManager userPhraseLanguageModel];
_userOverrideModel = [LanguageModelManager userOverrideModel]; _userOverrideModel = [LanguageModelManager userOverrideModel];
_excludedPhraseModel = [LanguageModelManager excludedPhrasesLanguageModelMcBopomofo];
_builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel, _excludedPhraseModel); _builder = new BlockReadingBuilder(_languageModel);
// each Mandarin syllable is separated by a hyphen // each Mandarin syllable is separated by a hyphen
_builder->setJoinSeparator("-"); _builder->setJoinSeparator("-");
@ -325,21 +324,15 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
- (void)setValue:(id)value forTag:(long)tag client:(id)sender - (void)setValue:(id)value forTag:(long)tag client:(id)sender
{ {
NSString *newInputMode; NSString *newInputMode;
FastLM *newLanguageModel; McBopomofoLM *newLanguageModel;
FastLM *newUserPhrasesModel;
FastLM *newExcludedPhraseModel;
if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) { if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) {
newInputMode = kPlainBopomofoModeIdentifier; newInputMode = kPlainBopomofoModeIdentifier;
newLanguageModel = [LanguageModelManager languageModelPlainBopomofo]; newLanguageModel = [LanguageModelManager languageModelPlainBopomofo];
newUserPhrasesModel = NULL;
newExcludedPhraseModel = [LanguageModelManager excludedPhrasesLanguageModelPlainBopomofo];
} }
else { else {
newInputMode = kBopomofoModeIdentifier; newInputMode = kBopomofoModeIdentifier;
newLanguageModel = [LanguageModelManager languageModelMcBopomofo]; newLanguageModel = [LanguageModelManager languageModelMcBopomofo];
newUserPhrasesModel = [LanguageModelManager userPhraseLanguageModel];
newExcludedPhraseModel = [LanguageModelManager excludedPhrasesLanguageModelMcBopomofo];
} }
// Only apply the changes if the value is changed // Only apply the changes if the value is changed
@ -355,8 +348,6 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
_inputMode = newInputMode; _inputMode = newInputMode;
_languageModel = newLanguageModel; _languageModel = newLanguageModel;
_userPhrasesModel = newUserPhrasesModel;
_excludedPhraseModel = newExcludedPhraseModel;
if (!_bpmfReadingBuffer->isEmpty()) { if (!_bpmfReadingBuffer->isEmpty()) {
_bpmfReadingBuffer->clear(); _bpmfReadingBuffer->clear();
@ -369,7 +360,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
if (_builder) { if (_builder) {
delete _builder; delete _builder;
_builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel, _excludedPhraseModel); _builder = new BlockReadingBuilder(_languageModel);
_builder->setJoinSeparator("-"); _builder->setJoinSeparator("-");
} }
} }
@ -1096,46 +1087,9 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
return NO; return NO;
} }
- (vector<Unigram>)_collectUnigrams:(string)string
{
vector<Unigram> unigrams;
vector<Unigram> userUnigrams;
if (_userPhrasesModel != NULL && _userPhrasesModel->hasUnigramsForKey(string)) {
userUnigrams = _userPhrasesModel->unigramsForKeys(string);
}
if (_languageModel->hasUnigramsForKey(string)) {
vector<Unigram> globalUnigrams = _languageModel->unigramsForKeys(string);
for (std::vector<Unigram>::iterator it=globalUnigrams.begin(); it!=globalUnigrams.end(); ++it) {
if (!_builder->checkIfUnigramExistInVector(*it, unigrams)) {
unigrams.push_back(*it);
}
}
}
unigrams.insert(unigrams.begin(), userUnigrams.begin(), userUnigrams.end());
if (_excludedPhraseModel != NULL && _excludedPhraseModel->hasUnigramsForKey(string)) {
vector<Unigram> excludedUnigrams = _excludedPhraseModel->unigramsForKeys(string);
vector<Unigram> newUnigram;
for (std::vector<Unigram>::iterator it=unigrams.begin(); it!=unigrams.end(); ++it) {
if (!_builder->checkIfUnigramExistInVector(*it, excludedUnigrams)) {
newUnigram.push_back(*it);
}
}
unigrams = newUnigram;
}
return unigrams;
}
- (BOOL)handlePunctuation:(string)customPunctuation usingVerticalMode:(BOOL)useVerticalMode client:(id)client - (BOOL)handlePunctuation:(string)customPunctuation usingVerticalMode:(BOOL)useVerticalMode client:(id)client
{ {
vector<Unigram> collected = [self _collectUnigrams:customPunctuation]; if (_languageModel->hasUnigramsForKey(customPunctuation)) {
if (!collected.size()) {
return NO;
}
if (_bpmfReadingBuffer->isEmpty()) { if (_bpmfReadingBuffer->isEmpty()) {
_builder->insertReadingAtCursor(customPunctuation); _builder->insertReadingAtCursor(customPunctuation);
[self popOverflowComposingTextAndWalk:client]; [self popOverflowComposingTextAndWalk:client];
@ -1156,6 +1110,8 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
} }
return YES; return YES;
} }
return NO;
}
- (BOOL)handleCandidateEventWithInputText:(NSString *)inputText charCode:(UniChar)charCode keyCode:(NSUInteger)keyCode - (BOOL)handleCandidateEventWithInputText:(NSString *)inputText charCode:(UniChar)charCode keyCode:(NSUInteger)keyCode
{ {

View File

@ -1,6 +1,7 @@
#import <Foundation/Foundation.h> #import <Foundation/Foundation.h>
#import "FastLM.h" #import "FastLM.h"
#import "UserOverrideModel.h" #import "UserOverrideModel.h"
#import "McBopomofoLM.h"
NS_ASSUME_NONNULL_BEGIN NS_ASSUME_NONNULL_BEGIN
@ -15,11 +16,8 @@ NS_ASSUME_NONNULL_BEGIN
@property (class, readonly, nonatomic) NSString *userPhrasesDataPathMcBopomofo; @property (class, readonly, nonatomic) NSString *userPhrasesDataPathMcBopomofo;
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathMcBopomofo; @property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathMcBopomofo;
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathPlainBopomofo; @property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathPlainBopomofo;
@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *languageModelMcBopomofo; @property (class, readonly, nonatomic) McBopomofo::McBopomofoLM *languageModelMcBopomofo;
@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *languageModelPlainBopomofo; @property (class, readonly, nonatomic) McBopomofo::McBopomofoLM *languageModelPlainBopomofo;
@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *userPhraseLanguageModel;
@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *excludedPhrasesLanguageModelMcBopomofo;
@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *excludedPhrasesLanguageModelPlainBopomofo;
@property (class, readonly, nonatomic) McBopomofo::UserOverrideModel *userOverrideModel; @property (class, readonly, nonatomic) McBopomofo::UserOverrideModel *userOverrideModel;
@end @end

View File

@ -7,61 +7,35 @@
using namespace std; using namespace std;
using namespace Formosa::Gramambular; using namespace Formosa::Gramambular;
using namespace McBopomofo;
using namespace OpenVanilla; using namespace OpenVanilla;
static const int kUserOverrideModelCapacity = 500; static const int kUserOverrideModelCapacity = 500;
static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr. static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
FastLM globalLanguageModel; McBopomofoLM gLanguageModelMcBopomofo;
FastLM globalLanguageModelPlainBopomofo; McBopomofoLM gLanguageModelPlainBopomofo;
FastLM globalUserPhraseLanguageModel; UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
FastLM globalUserExcludedPhrasesMcBopomofo;
FastLM globalUserExcludedPhrasesPlainBopomofo;
McBopomofo::UserOverrideModel globalUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
@implementation LanguageModelManager @implementation LanguageModelManager
static bool LTLoadLanguageModelFile(NSString *filenameWithoutExtension, FastLM &lm) static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, McBopomofoLM &lm)
{ {
Class cls = NSClassFromString(@"McBopomofoInputMethodController"); Class cls = NSClassFromString(@"McBopomofoInputMethodController");
NSString *dataPath = [[NSBundle bundleForClass:cls] pathForResource:filenameWithoutExtension ofType:@"txt"]; NSString *dataPath = [[NSBundle bundleForClass:cls] pathForResource:filenameWithoutExtension ofType:@"txt"];
bool result = lm.open([dataPath UTF8String]); lm.loadLanguageModel([dataPath UTF8String]);
return (BOOL)result;
} }
+ (void)loadDataModels + (void)loadDataModels
{ {
bool dataOpenResult = LTLoadLanguageModelFile(@"data", globalLanguageModel); LTLoadLanguageModelFile(@"data", gLanguageModelMcBopomofo);
if (!dataOpenResult) { LTLoadLanguageModelFile(@"data-plain-bpmf", gLanguageModelPlainBopomofo);
NSLog(@"Failed to open language model.");
}
bool plainBpmfOpenResult = LTLoadLanguageModelFile(@"data-plain-bpmf", globalLanguageModelPlainBopomofo);
if (!plainBpmfOpenResult) {
NSLog(@"Failed to open language model for plain bpmf.");
}
} }
+ (void)loadUserPhrasesModel + (void)loadUserPhrasesModel
{ {
globalUserPhraseLanguageModel.close(); gLanguageModelMcBopomofo.loadUserPhrases([[self userPhrasesDataPathMcBopomofo] UTF8String], [[self excludedPhrasesDataPathMcBopomofo] UTF8String]);
globalUserExcludedPhrasesMcBopomofo.close(); gLanguageModelPlainBopomofo.loadUserPhrases("", [[self excludedPhrasesDataPathPlainBopomofo] UTF8String]);
globalUserExcludedPhrasesPlainBopomofo.close();
bool result = false;
result = globalUserPhraseLanguageModel.open([[self userPhrasesDataPathMcBopomofo] UTF8String]);
if (!result) {
NSLog(@"Failed to open user phrases. %@", [self userPhrasesDataPathMcBopomofo]);
}
result = globalUserExcludedPhrasesMcBopomofo.open([[self excludedPhrasesDataPathMcBopomofo] UTF8String]);
if (!result) {
NSLog(@"Failed to open excluded phrases McBopomofo. %@", [self excludedPhrasesDataPathMcBopomofo]);
}
result = globalUserExcludedPhrasesPlainBopomofo.open([[self excludedPhrasesDataPathPlainBopomofo] UTF8String]);
if (!result) {
NSLog(@"Failed to open excluded phrases Plain Bopomofo. %@", [self excludedPhrasesDataPathPlainBopomofo]);
}
} }
+ (BOOL)checkIfUserDataFolderExists + (BOOL)checkIfUserDataFolderExists
@ -163,34 +137,19 @@ static bool LTLoadLanguageModelFile(NSString *filenameWithoutExtension, FastLM &
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-plain-bpmf.txt"]; return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-plain-bpmf.txt"];
} }
+ (FastLM *)languageModelMcBopomofo + (McBopomofoLM *)languageModelMcBopomofo
{ {
return &globalLanguageModel; return &gLanguageModelMcBopomofo;
} }
+ (FastLM *)languageModelPlainBopomofo + (McBopomofoLM *)languageModelPlainBopomofo
{ {
return &globalLanguageModelPlainBopomofo; return &gLanguageModelPlainBopomofo;
}
+ (FastLM *)userPhraseLanguageModel
{
return &globalUserPhraseLanguageModel;
}
+ (FastLM *)excludedPhrasesLanguageModelMcBopomofo
{
return &globalUserExcludedPhrasesMcBopomofo;
}
+ (FastLM *)excludedPhrasesLanguageModelPlainBopomofo
{
return &globalUserExcludedPhrasesPlainBopomofo;
} }
+ (McBopomofo::UserOverrideModel *)userOverrideModel + (McBopomofo::UserOverrideModel *)userOverrideModel
{ {
return &globalUserOverrideModel; return &gUserOverrideModel;
} }
@end @end