Uses user phrases in the block builder.

This commit is contained in:
zonble 2022-01-09 19:41:36 +08:00 committed by Lukhnos Liu
parent 6f761ecbcd
commit e909dc20b5
3 changed files with 23 additions and 11 deletions

View File

@ -38,7 +38,7 @@ namespace Formosa {
class BlockReadingBuilder { class BlockReadingBuilder {
public: public:
BlockReadingBuilder(LanguageModel *inLM); BlockReadingBuilder(LanguageModel *inLM, LanguageModel *inUserPhraseLM);
void clear(); void clear();
size_t length() const; size_t length() const;
@ -73,11 +73,13 @@ namespace Formosa {
Grid m_grid; Grid m_grid;
LanguageModel *m_LM; LanguageModel *m_LM;
LanguageModel *m_UserPhraseLM;
string m_joinSeparator; string m_joinSeparator;
}; };
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM) inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM, LanguageModel *inUserPhraseLM)
: m_LM(inLM) : m_LM(inLM)
, m_UserPhraseLM(inUserPhraseLM)
, m_cursorIndex(0) , m_cursorIndex(0)
, m_markerCursorIndex(SIZE_MAX) , m_markerCursorIndex(SIZE_MAX)
{ {
@ -219,6 +221,13 @@ namespace Formosa {
for (size_t p = begin ; p < end ; p++) { for (size_t p = begin ; p < end ; p++) {
for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) { for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) {
string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator); string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
if (m_UserPhraseLM != NULL) {
if (m_UserPhraseLM->hasUnigramsForKey(combinedReading) && !m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) {
Node n(combinedReading, m_UserPhraseLM->unigramsForKeys(combinedReading), vector<Bigram>());
m_grid.insertNode(n, p, q);
continue;
}
}
if (m_LM->hasUnigramsForKey(combinedReading) && !m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) { if (m_LM->hasUnigramsForKey(combinedReading) && !m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) {
Node n(combinedReading, m_LM->unigramsForKeys(combinedReading), vector<Bigram>()); Node n(combinedReading, m_LM->unigramsForKeys(combinedReading), vector<Bigram>());

View File

@ -47,7 +47,7 @@
// language model // language model
Formosa::Gramambular::FastLM *_languageModel; Formosa::Gramambular::FastLM *_languageModel;
Formosa::Gramambular::FastLM *_userPhrases; Formosa::Gramambular::FastLM *_userPhrasesModel;
// the grid (lattice) builder for the unigrams (and bigrams) // the grid (lattice) builder for the unigrams (and bigrams)
Formosa::Gramambular::BlockReadingBuilder* _builder; Formosa::Gramambular::BlockReadingBuilder* _builder;

View File

@ -116,6 +116,10 @@ FastLM gLanguageModel;
FastLM gLanguageModelPlainBopomofo; FastLM gLanguageModelPlainBopomofo;
FastLM gUserPhraseLanguageModel; FastLM gUserPhraseLanguageModel;
static const int kUserOverrideModelCapacity = 500;
static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
McBopomofo::UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
static NSString *userDataFolderPath() static NSString *userDataFolderPath()
{ {
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES); NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES);
@ -129,12 +133,6 @@ static NSString *userPhrasesDataPath()
return [userDataFolderPath() stringByAppendingPathComponent:@"data.txt"]; return [userDataFolderPath() stringByAppendingPathComponent:@"data.txt"];
} }
static const int kUserOverrideModelCapacity = 500;
static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
McBopomofo::UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
// https://clang-analyzer.llvm.org/faq.html // https://clang-analyzer.llvm.org/faq.html
__attribute__((annotate("returns_localized_nsstring"))) __attribute__((annotate("returns_localized_nsstring")))
static inline NSString *LocalizationNotNeeded(NSString *s) { static inline NSString *LocalizationNotNeeded(NSString *s) {
@ -206,7 +204,8 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
// create the lattice builder // create the lattice builder
_languageModel = &gLanguageModel; _languageModel = &gLanguageModel;
_builder = new BlockReadingBuilder(_languageModel); _userPhrasesModel = &gUserPhraseLanguageModel;
_builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel);
_uom = &gUserOverrideModel; _uom = &gUserOverrideModel;
// each Mandarin syllable is separated by a hyphen // each Mandarin syllable is separated by a hyphen
@ -338,14 +337,17 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
{ {
NSString *newInputMode; NSString *newInputMode;
Formosa::Gramambular::FastLM *newLanguageModel; Formosa::Gramambular::FastLM *newLanguageModel;
Formosa::Gramambular::FastLM *userPhraseModel;
if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) { if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) {
newInputMode = kPlainBopomofoModeIdentifier; newInputMode = kPlainBopomofoModeIdentifier;
newLanguageModel = &gLanguageModelPlainBopomofo; newLanguageModel = &gLanguageModelPlainBopomofo;
userPhraseModel = NULL;
} }
else { else {
newInputMode = kBopomofoModeIdentifier; newInputMode = kBopomofoModeIdentifier;
newLanguageModel = &gLanguageModel; newLanguageModel = &gLanguageModel;
userPhraseModel = &gUserPhraseLanguageModel;
} }
// Only apply the changes if the value is changed // Only apply the changes if the value is changed
@ -361,6 +363,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
_inputMode = newInputMode; _inputMode = newInputMode;
_languageModel = newLanguageModel; _languageModel = newLanguageModel;
_userPhrasesModel = userPhraseModel;
if (!_bpmfReadingBuffer->isEmpty()) { if (!_bpmfReadingBuffer->isEmpty()) {
_bpmfReadingBuffer->clear(); _bpmfReadingBuffer->clear();
@ -373,7 +376,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
if (_builder) { if (_builder) {
delete _builder; delete _builder;
_builder = new BlockReadingBuilder(_languageModel); _builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel);
_builder->setJoinSeparator("-"); _builder->setJoinSeparator("-");
} }
} }