Uses user phrases in the block builder.
This commit is contained in:
parent
6f761ecbcd
commit
e909dc20b5
|
@ -38,7 +38,7 @@ namespace Formosa {
|
|||
|
||||
class BlockReadingBuilder {
|
||||
public:
|
||||
BlockReadingBuilder(LanguageModel *inLM);
|
||||
BlockReadingBuilder(LanguageModel *inLM, LanguageModel *inUserPhraseLM);
|
||||
void clear();
|
||||
|
||||
size_t length() const;
|
||||
|
@ -73,11 +73,13 @@ namespace Formosa {
|
|||
|
||||
Grid m_grid;
|
||||
LanguageModel *m_LM;
|
||||
LanguageModel *m_UserPhraseLM;
|
||||
string m_joinSeparator;
|
||||
};
|
||||
|
||||
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM)
|
||||
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM, LanguageModel *inUserPhraseLM)
|
||||
: m_LM(inLM)
|
||||
, m_UserPhraseLM(inUserPhraseLM)
|
||||
, m_cursorIndex(0)
|
||||
, m_markerCursorIndex(SIZE_MAX)
|
||||
{
|
||||
|
@ -219,6 +221,13 @@ namespace Formosa {
|
|||
for (size_t p = begin ; p < end ; p++) {
|
||||
for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) {
|
||||
string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
|
||||
if (m_UserPhraseLM != NULL) {
|
||||
if (m_UserPhraseLM->hasUnigramsForKey(combinedReading) && !m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) {
|
||||
Node n(combinedReading, m_UserPhraseLM->unigramsForKeys(combinedReading), vector<Bigram>());
|
||||
m_grid.insertNode(n, p, q);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (m_LM->hasUnigramsForKey(combinedReading) && !m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) {
|
||||
Node n(combinedReading, m_LM->unigramsForKeys(combinedReading), vector<Bigram>());
|
||||
|
|
|
@ -47,7 +47,7 @@
|
|||
|
||||
// language model
|
||||
Formosa::Gramambular::FastLM *_languageModel;
|
||||
Formosa::Gramambular::FastLM *_userPhrases;
|
||||
Formosa::Gramambular::FastLM *_userPhrasesModel;
|
||||
|
||||
// the grid (lattice) builder for the unigrams (and bigrams)
|
||||
Formosa::Gramambular::BlockReadingBuilder* _builder;
|
||||
|
|
|
@ -116,6 +116,10 @@ FastLM gLanguageModel;
|
|||
FastLM gLanguageModelPlainBopomofo;
|
||||
FastLM gUserPhraseLanguageModel;
|
||||
|
||||
static const int kUserOverrideModelCapacity = 500;
|
||||
static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
|
||||
McBopomofo::UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
|
||||
|
||||
static NSString *userDataFolderPath()
|
||||
{
|
||||
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES);
|
||||
|
@ -129,12 +133,6 @@ static NSString *userPhrasesDataPath()
|
|||
return [userDataFolderPath() stringByAppendingPathComponent:@"data.txt"];
|
||||
}
|
||||
|
||||
|
||||
|
||||
static const int kUserOverrideModelCapacity = 500;
|
||||
static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
|
||||
McBopomofo::UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
|
||||
|
||||
// https://clang-analyzer.llvm.org/faq.html
|
||||
__attribute__((annotate("returns_localized_nsstring")))
|
||||
static inline NSString *LocalizationNotNeeded(NSString *s) {
|
||||
|
@ -206,7 +204,8 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
|
|||
|
||||
// create the lattice builder
|
||||
_languageModel = &gLanguageModel;
|
||||
_builder = new BlockReadingBuilder(_languageModel);
|
||||
_userPhrasesModel = &gUserPhraseLanguageModel;
|
||||
_builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel);
|
||||
_uom = &gUserOverrideModel;
|
||||
|
||||
// each Mandarin syllable is separated by a hyphen
|
||||
|
@ -338,14 +337,17 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
|
|||
{
|
||||
NSString *newInputMode;
|
||||
Formosa::Gramambular::FastLM *newLanguageModel;
|
||||
Formosa::Gramambular::FastLM *userPhraseModel;
|
||||
|
||||
if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) {
|
||||
newInputMode = kPlainBopomofoModeIdentifier;
|
||||
newLanguageModel = &gLanguageModelPlainBopomofo;
|
||||
userPhraseModel = NULL;
|
||||
}
|
||||
else {
|
||||
newInputMode = kBopomofoModeIdentifier;
|
||||
newLanguageModel = &gLanguageModel;
|
||||
userPhraseModel = &gUserPhraseLanguageModel;
|
||||
}
|
||||
|
||||
// Only apply the changes if the value is changed
|
||||
|
@ -361,6 +363,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
|
|||
|
||||
_inputMode = newInputMode;
|
||||
_languageModel = newLanguageModel;
|
||||
_userPhrasesModel = userPhraseModel;
|
||||
|
||||
if (!_bpmfReadingBuffer->isEmpty()) {
|
||||
_bpmfReadingBuffer->clear();
|
||||
|
@ -373,7 +376,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
|
|||
|
||||
if (_builder) {
|
||||
delete _builder;
|
||||
_builder = new BlockReadingBuilder(_languageModel);
|
||||
_builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel);
|
||||
_builder->setJoinSeparator("-");
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue