Adds McBopomofoLM as the facade of three language models.
- main language model - user phrases - user excluded phrases
This commit is contained in:
parent
56896625e3
commit
abdf97f652
|
@ -37,6 +37,7 @@
|
||||||
6AE210B315FC63CC003659FE /* PlainBopomofo@2x.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 6AE210B115FC63CC003659FE /* PlainBopomofo@2x.tiff */; };
|
6AE210B315FC63CC003659FE /* PlainBopomofo@2x.tiff in Resources */ = {isa = PBXBuildFile; fileRef = 6AE210B115FC63CC003659FE /* PlainBopomofo@2x.tiff */; };
|
||||||
6AFF97F2253B299E007F1C49 /* NonModalAlertWindowController.xib in Resources */ = {isa = PBXBuildFile; fileRef = 6AFF97F0253B299E007F1C49 /* NonModalAlertWindowController.xib */; };
|
6AFF97F2253B299E007F1C49 /* NonModalAlertWindowController.xib in Resources */ = {isa = PBXBuildFile; fileRef = 6AFF97F0253B299E007F1C49 /* NonModalAlertWindowController.xib */; };
|
||||||
D41355D8278D74B5005E5CBD /* LanguageModelManager.mm in Sources */ = {isa = PBXBuildFile; fileRef = D41355D7278D7409005E5CBD /* LanguageModelManager.mm */; };
|
D41355D8278D74B5005E5CBD /* LanguageModelManager.mm in Sources */ = {isa = PBXBuildFile; fileRef = D41355D7278D7409005E5CBD /* LanguageModelManager.mm */; };
|
||||||
|
D41355DB278E6D17005E5CBD /* McBopomofoLM.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D41355D9278E6D17005E5CBD /* McBopomofoLM.cpp */; };
|
||||||
D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */; };
|
D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */; };
|
||||||
D427F76A278C9E29004A2160 /* CandidateUI in Frameworks */ = {isa = PBXBuildFile; productRef = D427F769278C9E29004A2160 /* CandidateUI */; };
|
D427F76A278C9E29004A2160 /* CandidateUI in Frameworks */ = {isa = PBXBuildFile; productRef = D427F769278C9E29004A2160 /* CandidateUI */; };
|
||||||
D427F76C278CA2B0004A2160 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427F76B278CA1BA004A2160 /* AppDelegate.swift */; };
|
D427F76C278CA2B0004A2160 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427F76B278CA1BA004A2160 /* AppDelegate.swift */; };
|
||||||
|
@ -158,6 +159,8 @@
|
||||||
6AFF97F0253B299E007F1C49 /* NonModalAlertWindowController.xib */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.xib; path = NonModalAlertWindowController.xib; sourceTree = "<group>"; };
|
6AFF97F0253B299E007F1C49 /* NonModalAlertWindowController.xib */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.xib; path = NonModalAlertWindowController.xib; sourceTree = "<group>"; };
|
||||||
D41355D6278D7409005E5CBD /* LanguageModelManager.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LanguageModelManager.h; sourceTree = "<group>"; };
|
D41355D6278D7409005E5CBD /* LanguageModelManager.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LanguageModelManager.h; sourceTree = "<group>"; };
|
||||||
D41355D7278D7409005E5CBD /* LanguageModelManager.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = LanguageModelManager.mm; sourceTree = "<group>"; };
|
D41355D7278D7409005E5CBD /* LanguageModelManager.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = LanguageModelManager.mm; sourceTree = "<group>"; };
|
||||||
|
D41355D9278E6D17005E5CBD /* McBopomofoLM.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = McBopomofoLM.cpp; sourceTree = "<group>"; };
|
||||||
|
D41355DA278E6D17005E5CBD /* McBopomofoLM.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = McBopomofoLM.h; sourceTree = "<group>"; };
|
||||||
D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "McBopomofo-Bridging-Header.h"; sourceTree = "<group>"; };
|
D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "McBopomofo-Bridging-Header.h"; sourceTree = "<group>"; };
|
||||||
D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpenCCBridge.swift; sourceTree = "<group>"; };
|
D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpenCCBridge.swift; sourceTree = "<group>"; };
|
||||||
D427F768278C9D0D004A2160 /* CandidateUI */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = CandidateUI; path = Packages/CandidateUI; sourceTree = "<group>"; };
|
D427F768278C9D0D004A2160 /* CandidateUI */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = CandidateUI; path = Packages/CandidateUI; sourceTree = "<group>"; };
|
||||||
|
@ -267,6 +270,8 @@
|
||||||
6A0421A715FEF3F50061ED63 /* FastLM.h */,
|
6A0421A715FEF3F50061ED63 /* FastLM.h */,
|
||||||
D47F7DD2278C1263002F9DD7 /* UserOverrideModel.cpp */,
|
D47F7DD2278C1263002F9DD7 /* UserOverrideModel.cpp */,
|
||||||
D47F7DD1278C1263002F9DD7 /* UserOverrideModel.h */,
|
D47F7DD1278C1263002F9DD7 /* UserOverrideModel.h */,
|
||||||
|
D41355D9278E6D17005E5CBD /* McBopomofoLM.cpp */,
|
||||||
|
D41355DA278E6D17005E5CBD /* McBopomofoLM.h */,
|
||||||
);
|
);
|
||||||
path = Engine;
|
path = Engine;
|
||||||
sourceTree = "<group>";
|
sourceTree = "<group>";
|
||||||
|
@ -553,6 +558,7 @@
|
||||||
D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */,
|
D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */,
|
||||||
D47F7DCE278BFB57002F9DD7 /* PreferencesWindowController.swift in Sources */,
|
D47F7DCE278BFB57002F9DD7 /* PreferencesWindowController.swift in Sources */,
|
||||||
6A0D4ED215FC0D6400ABF4B3 /* InputMethodController.mm in Sources */,
|
6A0D4ED215FC0D6400ABF4B3 /* InputMethodController.mm in Sources */,
|
||||||
|
D41355DB278E6D17005E5CBD /* McBopomofoLM.cpp in Sources */,
|
||||||
D47F7DD3278C1263002F9DD7 /* UserOverrideModel.cpp in Sources */,
|
D47F7DD3278C1263002F9DD7 /* UserOverrideModel.cpp in Sources */,
|
||||||
6A0D4F4515FC0EB100ABF4B3 /* Mandarin.cpp in Sources */,
|
6A0D4F4515FC0EB100ABF4B3 /* Mandarin.cpp in Sources */,
|
||||||
6A0421A815FEF3F50061ED63 /* FastLM.cpp in Sources */,
|
6A0421A815FEF3F50061ED63 /* FastLM.cpp in Sources */,
|
||||||
|
|
|
@ -285,7 +285,7 @@ const vector<Bigram> FastLM::bigramsForKeys(const string& preceedingKey, const s
|
||||||
return vector<Bigram>();
|
return vector<Bigram>();
|
||||||
}
|
}
|
||||||
|
|
||||||
const vector<Unigram> FastLM::unigramsForKeys(const string& key)
|
const vector<Unigram> FastLM::unigramsForKey(const string& key)
|
||||||
{
|
{
|
||||||
vector<Unigram> v;
|
vector<Unigram> v;
|
||||||
map<const char *, vector<Row> >::const_iterator i = keyRowMap.find(key.c_str());
|
map<const char *, vector<Row> >::const_iterator i = keyRowMap.find(key.c_str());
|
||||||
|
|
|
@ -50,7 +50,7 @@ namespace Formosa {
|
||||||
void dump();
|
void dump();
|
||||||
|
|
||||||
virtual const vector<Bigram> bigramsForKeys(const string& preceedingKey, const string& key);
|
virtual const vector<Bigram> bigramsForKeys(const string& preceedingKey, const string& key);
|
||||||
virtual const vector<Unigram> unigramsForKeys(const string& key);
|
virtual const vector<Unigram> unigramsForKey(const string& key);
|
||||||
virtual bool hasUnigramsForKey(const string& key);
|
virtual bool hasUnigramsForKey(const string& key);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
@ -38,7 +38,7 @@ namespace Formosa {
|
||||||
|
|
||||||
class BlockReadingBuilder {
|
class BlockReadingBuilder {
|
||||||
public:
|
public:
|
||||||
BlockReadingBuilder(LanguageModel *inLM, LanguageModel *inUserPhraseLM, LanguageModel *inExcludedPhrasesLM);
|
BlockReadingBuilder(LanguageModel *inLM);
|
||||||
void clear();
|
void clear();
|
||||||
|
|
||||||
size_t length() const;
|
size_t length() const;
|
||||||
|
@ -75,17 +75,11 @@ namespace Formosa {
|
||||||
|
|
||||||
Grid m_grid;
|
Grid m_grid;
|
||||||
LanguageModel *m_LM;
|
LanguageModel *m_LM;
|
||||||
LanguageModel *m_userPhraseLM;
|
|
||||||
LanguageModel *m_excludedPhrasesLM;
|
|
||||||
string m_joinSeparator;
|
string m_joinSeparator;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM,
|
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM)
|
||||||
LanguageModel *inUserPhraseLM,
|
|
||||||
LanguageModel *inExcludedPhrasesLM)
|
|
||||||
: m_LM(inLM)
|
: m_LM(inLM)
|
||||||
, m_userPhraseLM(inUserPhraseLM)
|
|
||||||
, m_excludedPhrasesLM(inExcludedPhrasesLM)
|
|
||||||
, m_cursorIndex(0)
|
, m_cursorIndex(0)
|
||||||
, m_markerCursorIndex(SIZE_MAX)
|
, m_markerCursorIndex(SIZE_MAX)
|
||||||
{
|
{
|
||||||
|
@ -238,33 +232,7 @@ namespace Formosa {
|
||||||
for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) {
|
for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) {
|
||||||
string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
|
string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
|
||||||
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) {
|
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) {
|
||||||
vector<Unigram> unigrams;
|
vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
|
||||||
vector<Unigram> userUnigrams;
|
|
||||||
|
|
||||||
if (m_userPhraseLM != NULL && m_userPhraseLM->hasUnigramsForKey(combinedReading)) {
|
|
||||||
userUnigrams = m_userPhraseLM->unigramsForKeys(combinedReading);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (m_LM->hasUnigramsForKey(combinedReading)) {
|
|
||||||
vector<Unigram> globalUnigrams = m_LM->unigramsForKeys(combinedReading);
|
|
||||||
for (std::vector<Unigram>::iterator it=globalUnigrams.begin(); it!=globalUnigrams.end(); ++it) {
|
|
||||||
if (!checkIfUnigramExistInVector(*it, unigrams)) {
|
|
||||||
unigrams.push_back(*it);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
unigrams.insert(unigrams.begin(), userUnigrams.begin(), userUnigrams.end());
|
|
||||||
|
|
||||||
if (m_excludedPhrasesLM != NULL && m_excludedPhrasesLM->hasUnigramsForKey(combinedReading)) {
|
|
||||||
vector<Unigram> excludedUnigrams = m_excludedPhrasesLM->unigramsForKeys(combinedReading);
|
|
||||||
vector<Unigram> newUnigram;
|
|
||||||
for (std::vector<Unigram>::iterator it=unigrams.begin(); it!=unigrams.end(); ++it) {
|
|
||||||
if (!checkIfUnigramExistInVector(*it, excludedUnigrams)) {
|
|
||||||
newUnigram.push_back(*it);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
unigrams = newUnigram;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (unigrams.size() > 0) {
|
if (unigrams.size() > 0) {
|
||||||
Node n(combinedReading, unigrams, vector<Bigram>());
|
Node n(combinedReading, unigrams, vector<Bigram>());
|
||||||
|
|
|
@ -42,7 +42,7 @@ namespace Formosa {
|
||||||
virtual ~LanguageModel() {}
|
virtual ~LanguageModel() {}
|
||||||
|
|
||||||
virtual const vector<Bigram> bigramsForKeys(const string &preceedingKey, const string& key) = 0;
|
virtual const vector<Bigram> bigramsForKeys(const string &preceedingKey, const string& key) = 0;
|
||||||
virtual const vector<Unigram> unigramsForKeys(const string &key) = 0;
|
virtual const vector<Unigram> unigramsForKey(const string &key) = 0;
|
||||||
virtual bool hasUnigramsForKey(const string& key) = 0;
|
virtual bool hasUnigramsForKey(const string& key) = 0;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,92 @@
|
||||||
|
#include "McBopomofoLM.h"
|
||||||
|
#include <algorithm>
|
||||||
|
#include <iterator>
|
||||||
|
#include <unordered_set>
|
||||||
|
|
||||||
|
using namespace McBopomofo;
|
||||||
|
|
||||||
|
McBopomofoLM::McBopomofoLM()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
McBopomofoLM::~McBopomofoLM()
|
||||||
|
{
|
||||||
|
m_languageModel.close();
|
||||||
|
m_userPhrases.close();
|
||||||
|
m_excluddePhrases.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
void McBopomofoLM::loadLanguageModel(const char* languageModelDataPath)
|
||||||
|
{
|
||||||
|
m_languageModel.close();
|
||||||
|
m_languageModel.open(languageModelDataPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
void McBopomofoLM::loadUserPhrases(const char* userPhrasesDataPath,
|
||||||
|
const char* excludedPhrasesDataPath)
|
||||||
|
{
|
||||||
|
m_userPhrases.close();
|
||||||
|
m_userPhrases.open(userPhrasesDataPath);
|
||||||
|
m_excluddePhrases.close();
|
||||||
|
m_excluddePhrases.open(excludedPhrasesDataPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
const vector<Bigram> McBopomofoLM::bigramsForKeys(const string& preceedingKey, const string& key)
|
||||||
|
{
|
||||||
|
return vector<Bigram>();
|
||||||
|
}
|
||||||
|
|
||||||
|
const vector<Unigram> McBopomofoLM::unigramsForKey(const string& key)
|
||||||
|
{
|
||||||
|
vector<Unigram> unigrams;
|
||||||
|
vector<Unigram> userUnigrams;
|
||||||
|
|
||||||
|
// Use unordered_set so that you don't have to do O(n*m)
|
||||||
|
unordered_set<string> excludedValues;
|
||||||
|
unordered_set<string> userValues;
|
||||||
|
|
||||||
|
if (m_excluddePhrases.hasUnigramsForKey(key)) {
|
||||||
|
vector<Unigram> excludedUnigrams = m_excluddePhrases.unigramsForKey(key);
|
||||||
|
transform(excludedUnigrams.begin(), excludedUnigrams.end(),
|
||||||
|
inserter(excludedValues, excludedValues.end()),
|
||||||
|
[](const Unigram &u) { return u.keyValue.value; });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_userPhrases.hasUnigramsForKey(key)) {
|
||||||
|
vector<Unigram> rawUserUnigrams = m_userPhrases.unigramsForKey(key);
|
||||||
|
|
||||||
|
for (auto&& unigram : rawUserUnigrams) {
|
||||||
|
if (excludedValues.find(unigram.keyValue.value) == excludedValues.end()) {
|
||||||
|
userUnigrams.push_back(unigram);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
transform(userUnigrams.begin(), userUnigrams.end(),
|
||||||
|
inserter(userValues, userValues.end()),
|
||||||
|
[](const Unigram &u) { return u.keyValue.value; });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_languageModel.hasUnigramsForKey(key)) {
|
||||||
|
vector<Unigram> globalUnigrams = m_languageModel.unigramsForKey(key);
|
||||||
|
|
||||||
|
for (auto&& unigram : globalUnigrams) {
|
||||||
|
if (excludedValues.find(unigram.keyValue.value) == excludedValues.end() &&
|
||||||
|
userValues.find(unigram.keyValue.value) == userValues.end()) {
|
||||||
|
unigrams.push_back(unigram);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unigrams.insert(unigrams.begin(), userUnigrams.begin(), userUnigrams.end());
|
||||||
|
return unigrams;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool McBopomofoLM::hasUnigramsForKey(const string& key)
|
||||||
|
{
|
||||||
|
if (!m_excluddePhrases.hasUnigramsForKey(key)) {
|
||||||
|
return m_userPhrases.hasUnigramsForKey(key) ||
|
||||||
|
m_languageModel.hasUnigramsForKey(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
return unigramsForKey(key).size() > 0;
|
||||||
|
}
|
|
@ -0,0 +1,31 @@
|
||||||
|
#ifndef MCBOPOMOFOLM_H
|
||||||
|
#define MCBOPOMOFOLM_H
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "FastLM.h"
|
||||||
|
|
||||||
|
namespace McBopomofo {
|
||||||
|
|
||||||
|
using namespace Formosa::Gramambular;
|
||||||
|
|
||||||
|
class McBopomofoLM : public LanguageModel {
|
||||||
|
public:
|
||||||
|
McBopomofoLM();
|
||||||
|
~McBopomofoLM();
|
||||||
|
|
||||||
|
void loadLanguageModel(const char* languageModelDataPath);
|
||||||
|
void loadUserPhrases(const char* m_userPhrasesDataPath,
|
||||||
|
const char* m_excludedPhrasesDataPath);
|
||||||
|
|
||||||
|
const vector<Bigram> bigramsForKeys(const string& preceedingKey, const string& key);
|
||||||
|
const vector<Unigram> unigramsForKey(const string& key);
|
||||||
|
bool hasUnigramsForKey(const string& key);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
FastLM m_languageModel;
|
||||||
|
FastLM m_userPhrases;
|
||||||
|
FastLM m_excluddePhrases;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
|
@ -36,7 +36,7 @@
|
||||||
#import <InputMethodKit/InputMethodKit.h>
|
#import <InputMethodKit/InputMethodKit.h>
|
||||||
#import "Mandarin.h"
|
#import "Mandarin.h"
|
||||||
#import "Gramambular.h"
|
#import "Gramambular.h"
|
||||||
#import "FastLM.h"
|
#import "McBopomofoLM.h"
|
||||||
#import "UserOverrideModel.h"
|
#import "UserOverrideModel.h"
|
||||||
|
|
||||||
@interface McBopomofoInputMethodController : IMKInputController
|
@interface McBopomofoInputMethodController : IMKInputController
|
||||||
|
@ -46,9 +46,7 @@
|
||||||
Formosa::Mandarin::BopomofoReadingBuffer* _bpmfReadingBuffer;
|
Formosa::Mandarin::BopomofoReadingBuffer* _bpmfReadingBuffer;
|
||||||
|
|
||||||
// language model
|
// language model
|
||||||
Formosa::Gramambular::FastLM *_languageModel;
|
McBopomofo::McBopomofoLM *_languageModel;
|
||||||
Formosa::Gramambular::FastLM *_userPhrasesModel;
|
|
||||||
Formosa::Gramambular::FastLM *_excludedPhraseModel;
|
|
||||||
|
|
||||||
// user override model
|
// user override model
|
||||||
McBopomofo::UserOverrideModel *_userOverrideModel;
|
McBopomofo::UserOverrideModel *_userOverrideModel;
|
||||||
|
|
|
@ -48,6 +48,7 @@
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Formosa::Mandarin;
|
using namespace Formosa::Mandarin;
|
||||||
using namespace Formosa::Gramambular;
|
using namespace Formosa::Gramambular;
|
||||||
|
using namespace McBopomofo;
|
||||||
using namespace OpenVanilla;
|
using namespace OpenVanilla;
|
||||||
|
|
||||||
// default, min and max candidate list text size
|
// default, min and max candidate list text size
|
||||||
|
@ -176,11 +177,9 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
|
||||||
|
|
||||||
// create the lattice builder
|
// create the lattice builder
|
||||||
_languageModel = [LanguageModelManager languageModelMcBopomofo];
|
_languageModel = [LanguageModelManager languageModelMcBopomofo];
|
||||||
_userPhrasesModel = [LanguageModelManager userPhraseLanguageModel];
|
|
||||||
_userOverrideModel = [LanguageModelManager userOverrideModel];
|
_userOverrideModel = [LanguageModelManager userOverrideModel];
|
||||||
_excludedPhraseModel = [LanguageModelManager excludedPhrasesLanguageModelMcBopomofo];
|
|
||||||
|
|
||||||
_builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel, _excludedPhraseModel);
|
_builder = new BlockReadingBuilder(_languageModel);
|
||||||
|
|
||||||
// each Mandarin syllable is separated by a hyphen
|
// each Mandarin syllable is separated by a hyphen
|
||||||
_builder->setJoinSeparator("-");
|
_builder->setJoinSeparator("-");
|
||||||
|
@ -325,21 +324,15 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
|
||||||
- (void)setValue:(id)value forTag:(long)tag client:(id)sender
|
- (void)setValue:(id)value forTag:(long)tag client:(id)sender
|
||||||
{
|
{
|
||||||
NSString *newInputMode;
|
NSString *newInputMode;
|
||||||
FastLM *newLanguageModel;
|
McBopomofoLM *newLanguageModel;
|
||||||
FastLM *newUserPhrasesModel;
|
|
||||||
FastLM *newExcludedPhraseModel;
|
|
||||||
|
|
||||||
if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) {
|
if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) {
|
||||||
newInputMode = kPlainBopomofoModeIdentifier;
|
newInputMode = kPlainBopomofoModeIdentifier;
|
||||||
newLanguageModel = [LanguageModelManager languageModelPlainBopomofo];
|
newLanguageModel = [LanguageModelManager languageModelPlainBopomofo];
|
||||||
newUserPhrasesModel = NULL;
|
|
||||||
newExcludedPhraseModel = [LanguageModelManager excludedPhrasesLanguageModelPlainBopomofo];
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
newInputMode = kBopomofoModeIdentifier;
|
newInputMode = kBopomofoModeIdentifier;
|
||||||
newLanguageModel = [LanguageModelManager languageModelMcBopomofo];
|
newLanguageModel = [LanguageModelManager languageModelMcBopomofo];
|
||||||
newUserPhrasesModel = [LanguageModelManager userPhraseLanguageModel];
|
|
||||||
newExcludedPhraseModel = [LanguageModelManager excludedPhrasesLanguageModelMcBopomofo];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only apply the changes if the value is changed
|
// Only apply the changes if the value is changed
|
||||||
|
@ -355,8 +348,6 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
|
||||||
|
|
||||||
_inputMode = newInputMode;
|
_inputMode = newInputMode;
|
||||||
_languageModel = newLanguageModel;
|
_languageModel = newLanguageModel;
|
||||||
_userPhrasesModel = newUserPhrasesModel;
|
|
||||||
_excludedPhraseModel = newExcludedPhraseModel;
|
|
||||||
|
|
||||||
if (!_bpmfReadingBuffer->isEmpty()) {
|
if (!_bpmfReadingBuffer->isEmpty()) {
|
||||||
_bpmfReadingBuffer->clear();
|
_bpmfReadingBuffer->clear();
|
||||||
|
@ -369,7 +360,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
|
||||||
|
|
||||||
if (_builder) {
|
if (_builder) {
|
||||||
delete _builder;
|
delete _builder;
|
||||||
_builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel, _excludedPhraseModel);
|
_builder = new BlockReadingBuilder(_languageModel);
|
||||||
_builder->setJoinSeparator("-");
|
_builder->setJoinSeparator("-");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1096,46 +1087,9 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
|
||||||
return NO;
|
return NO;
|
||||||
}
|
}
|
||||||
|
|
||||||
- (vector<Unigram>)_collectUnigrams:(string)string
|
|
||||||
{
|
|
||||||
vector<Unigram> unigrams;
|
|
||||||
vector<Unigram> userUnigrams;
|
|
||||||
|
|
||||||
if (_userPhrasesModel != NULL && _userPhrasesModel->hasUnigramsForKey(string)) {
|
|
||||||
userUnigrams = _userPhrasesModel->unigramsForKeys(string);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (_languageModel->hasUnigramsForKey(string)) {
|
|
||||||
vector<Unigram> globalUnigrams = _languageModel->unigramsForKeys(string);
|
|
||||||
for (std::vector<Unigram>::iterator it=globalUnigrams.begin(); it!=globalUnigrams.end(); ++it) {
|
|
||||||
if (!_builder->checkIfUnigramExistInVector(*it, unigrams)) {
|
|
||||||
unigrams.push_back(*it);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
unigrams.insert(unigrams.begin(), userUnigrams.begin(), userUnigrams.end());
|
|
||||||
|
|
||||||
if (_excludedPhraseModel != NULL && _excludedPhraseModel->hasUnigramsForKey(string)) {
|
|
||||||
vector<Unigram> excludedUnigrams = _excludedPhraseModel->unigramsForKeys(string);
|
|
||||||
vector<Unigram> newUnigram;
|
|
||||||
for (std::vector<Unigram>::iterator it=unigrams.begin(); it!=unigrams.end(); ++it) {
|
|
||||||
if (!_builder->checkIfUnigramExistInVector(*it, excludedUnigrams)) {
|
|
||||||
newUnigram.push_back(*it);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
unigrams = newUnigram;
|
|
||||||
}
|
|
||||||
return unigrams;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
- (BOOL)handlePunctuation:(string)customPunctuation usingVerticalMode:(BOOL)useVerticalMode client:(id)client
|
- (BOOL)handlePunctuation:(string)customPunctuation usingVerticalMode:(BOOL)useVerticalMode client:(id)client
|
||||||
{
|
{
|
||||||
vector<Unigram> collected = [self _collectUnigrams:customPunctuation];
|
if (_languageModel->hasUnigramsForKey(customPunctuation)) {
|
||||||
if (!collected.size()) {
|
|
||||||
return NO;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (_bpmfReadingBuffer->isEmpty()) {
|
if (_bpmfReadingBuffer->isEmpty()) {
|
||||||
_builder->insertReadingAtCursor(customPunctuation);
|
_builder->insertReadingAtCursor(customPunctuation);
|
||||||
[self popOverflowComposingTextAndWalk:client];
|
[self popOverflowComposingTextAndWalk:client];
|
||||||
|
@ -1156,6 +1110,8 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
|
||||||
}
|
}
|
||||||
return YES;
|
return YES;
|
||||||
}
|
}
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
|
||||||
- (BOOL)handleCandidateEventWithInputText:(NSString *)inputText charCode:(UniChar)charCode keyCode:(NSUInteger)keyCode
|
- (BOOL)handleCandidateEventWithInputText:(NSString *)inputText charCode:(UniChar)charCode keyCode:(NSUInteger)keyCode
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#import <Foundation/Foundation.h>
|
#import <Foundation/Foundation.h>
|
||||||
#import "FastLM.h"
|
#import "FastLM.h"
|
||||||
#import "UserOverrideModel.h"
|
#import "UserOverrideModel.h"
|
||||||
|
#import "McBopomofoLM.h"
|
||||||
|
|
||||||
NS_ASSUME_NONNULL_BEGIN
|
NS_ASSUME_NONNULL_BEGIN
|
||||||
|
|
||||||
|
@ -15,11 +16,8 @@ NS_ASSUME_NONNULL_BEGIN
|
||||||
@property (class, readonly, nonatomic) NSString *userPhrasesDataPathMcBopomofo;
|
@property (class, readonly, nonatomic) NSString *userPhrasesDataPathMcBopomofo;
|
||||||
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathMcBopomofo;
|
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathMcBopomofo;
|
||||||
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathPlainBopomofo;
|
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathPlainBopomofo;
|
||||||
@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *languageModelMcBopomofo;
|
@property (class, readonly, nonatomic) McBopomofo::McBopomofoLM *languageModelMcBopomofo;
|
||||||
@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *languageModelPlainBopomofo;
|
@property (class, readonly, nonatomic) McBopomofo::McBopomofoLM *languageModelPlainBopomofo;
|
||||||
@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *userPhraseLanguageModel;
|
|
||||||
@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *excludedPhrasesLanguageModelMcBopomofo;
|
|
||||||
@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *excludedPhrasesLanguageModelPlainBopomofo;
|
|
||||||
@property (class, readonly, nonatomic) McBopomofo::UserOverrideModel *userOverrideModel;
|
@property (class, readonly, nonatomic) McBopomofo::UserOverrideModel *userOverrideModel;
|
||||||
@end
|
@end
|
||||||
|
|
||||||
|
|
|
@ -7,61 +7,35 @@
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Formosa::Gramambular;
|
using namespace Formosa::Gramambular;
|
||||||
|
using namespace McBopomofo;
|
||||||
using namespace OpenVanilla;
|
using namespace OpenVanilla;
|
||||||
|
|
||||||
static const int kUserOverrideModelCapacity = 500;
|
static const int kUserOverrideModelCapacity = 500;
|
||||||
static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
|
static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
|
||||||
|
|
||||||
FastLM globalLanguageModel;
|
McBopomofoLM gLanguageModelMcBopomofo;
|
||||||
FastLM globalLanguageModelPlainBopomofo;
|
McBopomofoLM gLanguageModelPlainBopomofo;
|
||||||
FastLM globalUserPhraseLanguageModel;
|
UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
|
||||||
FastLM globalUserExcludedPhrasesMcBopomofo;
|
|
||||||
FastLM globalUserExcludedPhrasesPlainBopomofo;
|
|
||||||
McBopomofo::UserOverrideModel globalUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
|
|
||||||
|
|
||||||
@implementation LanguageModelManager
|
@implementation LanguageModelManager
|
||||||
|
|
||||||
static bool LTLoadLanguageModelFile(NSString *filenameWithoutExtension, FastLM &lm)
|
static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, McBopomofoLM &lm)
|
||||||
{
|
{
|
||||||
Class cls = NSClassFromString(@"McBopomofoInputMethodController");
|
Class cls = NSClassFromString(@"McBopomofoInputMethodController");
|
||||||
NSString *dataPath = [[NSBundle bundleForClass:cls] pathForResource:filenameWithoutExtension ofType:@"txt"];
|
NSString *dataPath = [[NSBundle bundleForClass:cls] pathForResource:filenameWithoutExtension ofType:@"txt"];
|
||||||
bool result = lm.open([dataPath UTF8String]);
|
lm.loadLanguageModel([dataPath UTF8String]);
|
||||||
return (BOOL)result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (void)loadDataModels
|
+ (void)loadDataModels
|
||||||
{
|
{
|
||||||
bool dataOpenResult = LTLoadLanguageModelFile(@"data", globalLanguageModel);
|
LTLoadLanguageModelFile(@"data", gLanguageModelMcBopomofo);
|
||||||
if (!dataOpenResult) {
|
LTLoadLanguageModelFile(@"data-plain-bpmf", gLanguageModelPlainBopomofo);
|
||||||
NSLog(@"Failed to open language model.");
|
|
||||||
}
|
|
||||||
bool plainBpmfOpenResult = LTLoadLanguageModelFile(@"data-plain-bpmf", globalLanguageModelPlainBopomofo);
|
|
||||||
if (!plainBpmfOpenResult) {
|
|
||||||
NSLog(@"Failed to open language model for plain bpmf.");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (void)loadUserPhrasesModel
|
+ (void)loadUserPhrasesModel
|
||||||
{
|
{
|
||||||
globalUserPhraseLanguageModel.close();
|
gLanguageModelMcBopomofo.loadUserPhrases([[self userPhrasesDataPathMcBopomofo] UTF8String], [[self excludedPhrasesDataPathMcBopomofo] UTF8String]);
|
||||||
globalUserExcludedPhrasesMcBopomofo.close();
|
gLanguageModelPlainBopomofo.loadUserPhrases("", [[self excludedPhrasesDataPathPlainBopomofo] UTF8String]);
|
||||||
globalUserExcludedPhrasesPlainBopomofo.close();
|
|
||||||
|
|
||||||
bool result = false;
|
|
||||||
|
|
||||||
result = globalUserPhraseLanguageModel.open([[self userPhrasesDataPathMcBopomofo] UTF8String]);
|
|
||||||
if (!result) {
|
|
||||||
NSLog(@"Failed to open user phrases. %@", [self userPhrasesDataPathMcBopomofo]);
|
|
||||||
}
|
|
||||||
result = globalUserExcludedPhrasesMcBopomofo.open([[self excludedPhrasesDataPathMcBopomofo] UTF8String]);
|
|
||||||
if (!result) {
|
|
||||||
NSLog(@"Failed to open excluded phrases McBopomofo. %@", [self excludedPhrasesDataPathMcBopomofo]);
|
|
||||||
}
|
|
||||||
|
|
||||||
result = globalUserExcludedPhrasesPlainBopomofo.open([[self excludedPhrasesDataPathPlainBopomofo] UTF8String]);
|
|
||||||
if (!result) {
|
|
||||||
NSLog(@"Failed to open excluded phrases Plain Bopomofo. %@", [self excludedPhrasesDataPathPlainBopomofo]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (BOOL)checkIfUserDataFolderExists
|
+ (BOOL)checkIfUserDataFolderExists
|
||||||
|
@ -163,34 +137,19 @@ static bool LTLoadLanguageModelFile(NSString *filenameWithoutExtension, FastLM &
|
||||||
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-plain-bpmf.txt"];
|
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-plain-bpmf.txt"];
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (FastLM *)languageModelMcBopomofo
|
+ (McBopomofoLM *)languageModelMcBopomofo
|
||||||
{
|
{
|
||||||
return &globalLanguageModel;
|
return &gLanguageModelMcBopomofo;
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (FastLM *)languageModelPlainBopomofo
|
+ (McBopomofoLM *)languageModelPlainBopomofo
|
||||||
{
|
{
|
||||||
return &globalLanguageModelPlainBopomofo;
|
return &gLanguageModelPlainBopomofo;
|
||||||
}
|
|
||||||
|
|
||||||
+ (FastLM *)userPhraseLanguageModel
|
|
||||||
{
|
|
||||||
return &globalUserPhraseLanguageModel;
|
|
||||||
}
|
|
||||||
|
|
||||||
+ (FastLM *)excludedPhrasesLanguageModelMcBopomofo
|
|
||||||
{
|
|
||||||
return &globalUserExcludedPhrasesMcBopomofo;
|
|
||||||
}
|
|
||||||
|
|
||||||
+ (FastLM *)excludedPhrasesLanguageModelPlainBopomofo
|
|
||||||
{
|
|
||||||
return &globalUserExcludedPhrasesPlainBopomofo;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (McBopomofo::UserOverrideModel *)userOverrideModel
|
+ (McBopomofo::UserOverrideModel *)userOverrideModel
|
||||||
{
|
{
|
||||||
return &globalUserOverrideModel;
|
return &gUserOverrideModel;
|
||||||
}
|
}
|
||||||
|
|
||||||
@end
|
@end
|
||||||
|
|
Loading…
Reference in New Issue