Uses user phrases in the block builder.
This commit is contained in:
parent
6f761ecbcd
commit
e909dc20b5
|
@ -38,7 +38,7 @@ namespace Formosa {
|
||||||
|
|
||||||
class BlockReadingBuilder {
|
class BlockReadingBuilder {
|
||||||
public:
|
public:
|
||||||
BlockReadingBuilder(LanguageModel *inLM);
|
BlockReadingBuilder(LanguageModel *inLM, LanguageModel *inUserPhraseLM);
|
||||||
void clear();
|
void clear();
|
||||||
|
|
||||||
size_t length() const;
|
size_t length() const;
|
||||||
|
@ -73,11 +73,13 @@ namespace Formosa {
|
||||||
|
|
||||||
Grid m_grid;
|
Grid m_grid;
|
||||||
LanguageModel *m_LM;
|
LanguageModel *m_LM;
|
||||||
|
LanguageModel *m_UserPhraseLM;
|
||||||
string m_joinSeparator;
|
string m_joinSeparator;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM)
|
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM, LanguageModel *inUserPhraseLM)
|
||||||
: m_LM(inLM)
|
: m_LM(inLM)
|
||||||
|
, m_UserPhraseLM(inUserPhraseLM)
|
||||||
, m_cursorIndex(0)
|
, m_cursorIndex(0)
|
||||||
, m_markerCursorIndex(SIZE_MAX)
|
, m_markerCursorIndex(SIZE_MAX)
|
||||||
{
|
{
|
||||||
|
@ -219,6 +221,13 @@ namespace Formosa {
|
||||||
for (size_t p = begin ; p < end ; p++) {
|
for (size_t p = begin ; p < end ; p++) {
|
||||||
for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) {
|
for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) {
|
||||||
string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
|
string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
|
||||||
|
if (m_UserPhraseLM != NULL) {
|
||||||
|
if (m_UserPhraseLM->hasUnigramsForKey(combinedReading) && !m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) {
|
||||||
|
Node n(combinedReading, m_UserPhraseLM->unigramsForKeys(combinedReading), vector<Bigram>());
|
||||||
|
m_grid.insertNode(n, p, q);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (m_LM->hasUnigramsForKey(combinedReading) && !m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) {
|
if (m_LM->hasUnigramsForKey(combinedReading) && !m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) {
|
||||||
Node n(combinedReading, m_LM->unigramsForKeys(combinedReading), vector<Bigram>());
|
Node n(combinedReading, m_LM->unigramsForKeys(combinedReading), vector<Bigram>());
|
||||||
|
|
|
@ -47,7 +47,7 @@
|
||||||
|
|
||||||
// language model
|
// language model
|
||||||
Formosa::Gramambular::FastLM *_languageModel;
|
Formosa::Gramambular::FastLM *_languageModel;
|
||||||
Formosa::Gramambular::FastLM *_userPhrases;
|
Formosa::Gramambular::FastLM *_userPhrasesModel;
|
||||||
|
|
||||||
// the grid (lattice) builder for the unigrams (and bigrams)
|
// the grid (lattice) builder for the unigrams (and bigrams)
|
||||||
Formosa::Gramambular::BlockReadingBuilder* _builder;
|
Formosa::Gramambular::BlockReadingBuilder* _builder;
|
||||||
|
|
|
@ -116,6 +116,10 @@ FastLM gLanguageModel;
|
||||||
FastLM gLanguageModelPlainBopomofo;
|
FastLM gLanguageModelPlainBopomofo;
|
||||||
FastLM gUserPhraseLanguageModel;
|
FastLM gUserPhraseLanguageModel;
|
||||||
|
|
||||||
|
static const int kUserOverrideModelCapacity = 500;
|
||||||
|
static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
|
||||||
|
McBopomofo::UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
|
||||||
|
|
||||||
static NSString *userDataFolderPath()
|
static NSString *userDataFolderPath()
|
||||||
{
|
{
|
||||||
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES);
|
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES);
|
||||||
|
@ -129,12 +133,6 @@ static NSString *userPhrasesDataPath()
|
||||||
return [userDataFolderPath() stringByAppendingPathComponent:@"data.txt"];
|
return [userDataFolderPath() stringByAppendingPathComponent:@"data.txt"];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static const int kUserOverrideModelCapacity = 500;
|
|
||||||
static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
|
|
||||||
McBopomofo::UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
|
|
||||||
|
|
||||||
// https://clang-analyzer.llvm.org/faq.html
|
// https://clang-analyzer.llvm.org/faq.html
|
||||||
__attribute__((annotate("returns_localized_nsstring")))
|
__attribute__((annotate("returns_localized_nsstring")))
|
||||||
static inline NSString *LocalizationNotNeeded(NSString *s) {
|
static inline NSString *LocalizationNotNeeded(NSString *s) {
|
||||||
|
@ -206,7 +204,8 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
|
||||||
|
|
||||||
// create the lattice builder
|
// create the lattice builder
|
||||||
_languageModel = &gLanguageModel;
|
_languageModel = &gLanguageModel;
|
||||||
_builder = new BlockReadingBuilder(_languageModel);
|
_userPhrasesModel = &gUserPhraseLanguageModel;
|
||||||
|
_builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel);
|
||||||
_uom = &gUserOverrideModel;
|
_uom = &gUserOverrideModel;
|
||||||
|
|
||||||
// each Mandarin syllable is separated by a hyphen
|
// each Mandarin syllable is separated by a hyphen
|
||||||
|
@ -338,14 +337,17 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
|
||||||
{
|
{
|
||||||
NSString *newInputMode;
|
NSString *newInputMode;
|
||||||
Formosa::Gramambular::FastLM *newLanguageModel;
|
Formosa::Gramambular::FastLM *newLanguageModel;
|
||||||
|
Formosa::Gramambular::FastLM *userPhraseModel;
|
||||||
|
|
||||||
if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) {
|
if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) {
|
||||||
newInputMode = kPlainBopomofoModeIdentifier;
|
newInputMode = kPlainBopomofoModeIdentifier;
|
||||||
newLanguageModel = &gLanguageModelPlainBopomofo;
|
newLanguageModel = &gLanguageModelPlainBopomofo;
|
||||||
|
userPhraseModel = NULL;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
newInputMode = kBopomofoModeIdentifier;
|
newInputMode = kBopomofoModeIdentifier;
|
||||||
newLanguageModel = &gLanguageModel;
|
newLanguageModel = &gLanguageModel;
|
||||||
|
userPhraseModel = &gUserPhraseLanguageModel;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only apply the changes if the value is changed
|
// Only apply the changes if the value is changed
|
||||||
|
@ -361,6 +363,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
|
||||||
|
|
||||||
_inputMode = newInputMode;
|
_inputMode = newInputMode;
|
||||||
_languageModel = newLanguageModel;
|
_languageModel = newLanguageModel;
|
||||||
|
_userPhrasesModel = userPhraseModel;
|
||||||
|
|
||||||
if (!_bpmfReadingBuffer->isEmpty()) {
|
if (!_bpmfReadingBuffer->isEmpty()) {
|
||||||
_bpmfReadingBuffer->clear();
|
_bpmfReadingBuffer->clear();
|
||||||
|
@ -373,7 +376,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
|
||||||
|
|
||||||
if (_builder) {
|
if (_builder) {
|
||||||
delete _builder;
|
delete _builder;
|
||||||
_builder = new BlockReadingBuilder(_languageModel);
|
_builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel);
|
||||||
_builder->setJoinSeparator("-");
|
_builder->setJoinSeparator("-");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue