Implements excluding phrases.

This commit is contained in:
zonble 2022-01-12 00:16:55 +08:00
parent 144d133463
commit 9b485b799c
6 changed files with 171 additions and 51 deletions

View File

@ -38,7 +38,7 @@ namespace Formosa {
class BlockReadingBuilder {
public:
BlockReadingBuilder(LanguageModel *inLM, LanguageModel *inUserPhraseLM);
BlockReadingBuilder(LanguageModel *inLM, LanguageModel *inUserPhraseLM, LanguageModel *inExcludedPhrasesLM);
void clear();
size_t length() const;
@ -58,6 +58,8 @@ namespace Formosa {
vector<string> readingsAtRange(size_t begin, size_t end) const;
Grid& grid();
bool checkIfUnigramExistInVector(Unigram& unigram, vector<Unigram>vector);
protected:
void build();
@ -73,13 +75,17 @@ namespace Formosa {
Grid m_grid;
LanguageModel *m_LM;
LanguageModel *m_UserPhraseLM;
LanguageModel *m_userPhraseLM;
LanguageModel *m_excludedPhrasesLM;
string m_joinSeparator;
};
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM, LanguageModel *inUserPhraseLM)
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM,
LanguageModel *inUserPhraseLM,
LanguageModel *inExcludedPhrasesLM)
: m_LM(inLM)
, m_UserPhraseLM(inUserPhraseLM)
, m_userPhraseLM(inUserPhraseLM)
, m_excludedPhrasesLM(inExcludedPhrasesLM)
, m_cursorIndex(0)
, m_markerCursorIndex(SIZE_MAX)
{
@ -197,7 +203,17 @@ namespace Formosa {
{
return m_grid;
}
inline bool BlockReadingBuilder::checkIfUnigramExistInVector(Unigram& unigram, vector<Unigram>vector)
{
for (std::vector<Unigram>::iterator it=vector.begin(); it!=vector.end(); ++it) {
if (it->keyValue.value == unigram.keyValue.value) {
return true;
}
}
return false;
}
inline void BlockReadingBuilder::build()
{
if (!m_LM) {
@ -223,17 +239,31 @@ namespace Formosa {
string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) {
vector<Unigram> unigrams;
vector<Unigram> userUnigrams;
if (m_UserPhraseLM != NULL) {
if (m_UserPhraseLM->hasUnigramsForKey(combinedReading)) {
vector<Unigram> userUnigrams = m_UserPhraseLM->unigramsForKeys(combinedReading);
unigrams.insert(unigrams.end(), userUnigrams.begin(), userUnigrams.end());
}
if (m_userPhraseLM != NULL && m_userPhraseLM->hasUnigramsForKey(combinedReading)) {
userUnigrams = m_userPhraseLM->unigramsForKeys(combinedReading);
}
if (m_LM->hasUnigramsForKey(combinedReading)) {
vector<Unigram> globalUnigrams = m_LM->unigramsForKeys(combinedReading);
unigrams.insert(unigrams.end(), globalUnigrams.begin(), globalUnigrams.end());
for (std::vector<Unigram>::iterator it=globalUnigrams.begin(); it!=globalUnigrams.end(); ++it) {
if (!checkIfUnigramExistInVector(*it, unigrams)) {
unigrams.push_back(*it);
}
}
}
unigrams.insert(unigrams.begin(), userUnigrams.begin(), userUnigrams.end());
if (m_excludedPhrasesLM != NULL && m_excludedPhrasesLM->hasUnigramsForKey(combinedReading)) {
vector<Unigram> excludedUnigrams = m_excludedPhrasesLM->unigramsForKeys(combinedReading);
vector<Unigram> newUnigram;
for (std::vector<Unigram>::iterator it=unigrams.begin(); it!=unigrams.end(); ++it) {
if (!checkIfUnigramExistInVector(*it, excludedUnigrams)) {
newUnigram.push_back(*it);
}
}
unigrams = newUnigram;
}
if (unigrams.size() > 0) {

View File

@ -48,6 +48,7 @@
// language model
Formosa::Gramambular::FastLM *_languageModel;
Formosa::Gramambular::FastLM *_userPhrasesModel;
Formosa::Gramambular::FastLM *_excludedPhraseModel;
// user override model
McBopomofo::UserOverrideModel *_userOverrideModel;

View File

@ -178,8 +178,9 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
_languageModel = [LanguageModelManager languageModelMcBopomofo];
_userPhrasesModel = [LanguageModelManager userPhraseLanguageModel];
_userOverrideModel = [LanguageModelManager userOverrideModel];
_excludedPhraseModel = [LanguageModelManager excludedPhrasesLanguageModelMcBopomofo];
_builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel);
_builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel, _excludedPhraseModel);
// each Mandarin syllable is separated by a hyphen
_builder->setJoinSeparator("-");
@ -206,18 +207,23 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
chineseConversionMenuItem.state = _chineseConversionEnabled ? NSControlStateValueOn : NSControlStateValueOff;
[menu addItem:chineseConversionMenuItem];
if (_inputMode != kPlainBopomofoModeIdentifier) {
[menu addItem:[NSMenuItem separatorItem]];
[menu addItemWithTitle:NSLocalizedString(@"User Phrases", @"") action:NULL keyEquivalent:@""];
NSMenuItem *editUserPheaseItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Edit User Phrases", @"") action:@selector(openUserPhrases:) keyEquivalent:@""];
[editUserPheaseItem setIndentationLevel:2];
[menu addItem:editUserPheaseItem];
NSMenuItem *reloadUserPheaseItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Reload User Phrases", @"") action:@selector(reloadUserPhrases:) keyEquivalent:@""];
[reloadUserPheaseItem setIndentationLevel:2];
[menu addItem:reloadUserPheaseItem];
[menu addItem:[NSMenuItem separatorItem]];
[menu addItem:[NSMenuItem separatorItem]];
[menu addItemWithTitle:NSLocalizedString(@"User Phrases", @"") action:NULL keyEquivalent:@""];
if (_inputMode == kPlainBopomofoModeIdentifier) {
NSMenuItem *editExcludedPhrasesItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Edit Excluded Phrases", @"") action:@selector(openExcludedPhrasesPlainBopomofo:) keyEquivalent:@""];
[menu addItem:editExcludedPhrasesItem];
}
else {
NSMenuItem *editUserPhrasesItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Edit User Phrases", @"") action:@selector(openUserPhrases:) keyEquivalent:@""];
[menu addItem:editUserPhrasesItem];
NSMenuItem *editExcludedPhrasesItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Edit Excluded Phrases", @"") action:@selector(openExcludedPhrasesMcBopomofo:) keyEquivalent:@""];
[menu addItem:editExcludedPhrasesItem];
}
NSMenuItem *reloadUserPhrasesItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Reload User Phrases", @"") action:@selector(reloadUserPhrases:) keyEquivalent:@""];
[menu addItem:reloadUserPhrasesItem];
[menu addItem:[NSMenuItem separatorItem]];
NSMenuItem *updateCheckItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Check for Updates…", @"") action:@selector(checkForUpdate:) keyEquivalent:@""];
[menu addItem:updateCheckItem];
@ -318,18 +324,21 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
- (void)setValue:(id)value forTag:(long)tag client:(id)sender
{
NSString *newInputMode;
Formosa::Gramambular::FastLM *newLanguageModel;
Formosa::Gramambular::FastLM *newUserPhraseModel;
FastLM *newLanguageModel;
FastLM *newUserPhrasesModel;
FastLM *newExcludedPhraseModel;
if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) {
newInputMode = kPlainBopomofoModeIdentifier;
newLanguageModel = [LanguageModelManager languageModelPlainBopomofo];
newUserPhraseModel = NULL;
newUserPhrasesModel = NULL;
newExcludedPhraseModel = [LanguageModelManager excludedPhrasesLanguageModelPlainBopomofo];
}
else {
newInputMode = kBopomofoModeIdentifier;
newLanguageModel = [LanguageModelManager languageModelMcBopomofo];
newUserPhraseModel = [LanguageModelManager userPhraseLanguageModel];
newUserPhrasesModel = [LanguageModelManager userPhraseLanguageModel];
newExcludedPhraseModel = [LanguageModelManager excludedPhrasesLanguageModelMcBopomofo];
}
// Only apply the changes if the value is changed
@ -345,7 +354,8 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
_inputMode = newInputMode;
_languageModel = newLanguageModel;
_userPhrasesModel = newUserPhraseModel;
_userPhrasesModel = newUserPhrasesModel;
_excludedPhraseModel = newExcludedPhraseModel;
if (!_bpmfReadingBuffer->isEmpty()) {
_bpmfReadingBuffer->clear();
@ -358,7 +368,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
if (_builder) {
delete _builder;
_builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel);
_builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel, _excludedPhraseModel);
_builder->setJoinSeparator("-");
}
}
@ -1489,24 +1499,44 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
[(AppDelegate *)[[NSApplication sharedApplication] delegate] checkForUpdateForced:YES];
}
- (void)openUserPhrases:(id)sender
- (BOOL)_checkUserFiles
{
NSLog(@"openUserPhrases called");
if (![LanguageModelManager checkIfUserLanguageModelFileExists] ) {
if (![LanguageModelManager checkIfUserLanguageModelFilesExist] ) {
NSString *content = [NSString stringWithFormat:NSLocalizedString(@"Please check the permission of at \"%@\".", @""), [LanguageModelManager dataFolderPath]];
[[NonModalAlertWindowController sharedInstance] showWithTitle:NSLocalizedString(@"Unable to create the user phrase file.", @"") content:content confirmButtonTitle:NSLocalizedString(@"OK", @"") cancelButtonTitle:nil cancelAsDefault:NO delegate:nil];
return;
return NO;
}
NSString *path = [LanguageModelManager userPhrasesDataPath];
NSLog(@"Open %@", path);
if (![[NSFileManager defaultManager] fileExistsAtPath:path]) {
[[@"" dataUsingEncoding:NSUTF8StringEncoding] writeToFile:path atomically:YES];
return YES;
}
- (void)_openUserFile:(NSString *)path
{
if (![self _checkUserFiles]) {
return;
}
NSURL *url = [NSURL fileURLWithPath:path];
[[NSWorkspace sharedWorkspace] openURL:url];
}
- (void)openUserPhrases:(id)sender
{
NSLog(@"openUserPhrases called");
[self _openUserFile:[LanguageModelManager userPhrasesDataPathMcBopomofo]];
}
- (void)openExcludedPhrasesPlainBopomofo:(id)sender
{
NSLog(@"openExcludedPhrasesPlainBopomofo called");
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathPlainBopomofo]];
}
- (void)openExcludedPhrasesMcBopomofo:(id)sender
{
NSLog(@"openExcludedPhrasesMcBopomofo called");
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathMcBopomofo]];
}
- (void)reloadUserPhrases:(id)sender
{
NSLog(@"reloadUserPhrases called");

View File

@ -8,16 +8,19 @@ NS_ASSUME_NONNULL_BEGIN
+ (void)loadDataModels;
+ (void)loadUserPhrasesModel;
+ (BOOL)checkIfUserLanguageModelFileExists;
+ (BOOL)checkIfUserLanguageModelFilesExist;
+ (BOOL)writeUserPhrase:(NSString *)userPhrase;
@property (class, readonly, nonatomic) NSString *dataFolderPath;
@property (class, readonly, nonatomic) NSString *userPhrasesDataPath;
@property (class, readonly, nonatomic) NSString *userPhrasesDataPathMcBopomofo;
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathMcBopomofo;
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathPlainBopomofo;
@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *languageModelMcBopomofo;
@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *languageModelPlainBopomofo;
@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *userPhraseLanguageModel;
@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *excludedPhrasesLanguageModelMcBopomofo;
@property (class, readonly, nonatomic) Formosa::Gramambular::FastLM *excludedPhrasesLanguageModelPlainBopomofo;
@property (class, readonly, nonatomic) McBopomofo::UserOverrideModel *userOverrideModel;
@end
NS_ASSUME_NONNULL_END

View File

@ -15,6 +15,8 @@ static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
FastLM globalLanguageModel;
FastLM globalLanguageModelPlainBopomofo;
FastLM globalUserPhraseLanguageModel;
FastLM globalUserExcludedPhrasesMcBopomofo;
FastLM globalUserExcludedPhrasesPlainBopomofo;
McBopomofo::UserOverrideModel globalUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
@implementation LanguageModelManager
@ -42,13 +44,27 @@ static bool LTLoadLanguageModelFile(NSString *filenameWithoutExtension, FastLM &
+ (void)loadUserPhrasesModel
{
globalUserPhraseLanguageModel.close();
bool result = globalUserPhraseLanguageModel.open([[self userPhrasesDataPath] UTF8String]);
globalUserExcludedPhrasesMcBopomofo.close();
globalUserExcludedPhrasesPlainBopomofo.close();
bool result = false;
result = globalUserPhraseLanguageModel.open([[self userPhrasesDataPathMcBopomofo] UTF8String]);
if (!result) {
NSLog(@"Failed to open user phrases.");
NSLog(@"Failed to open user phrases. %@", [self userPhrasesDataPathMcBopomofo]);
}
result = globalUserExcludedPhrasesMcBopomofo.open([[self excludedPhrasesDataPathMcBopomofo] UTF8String]);
if (!result) {
NSLog(@"Failed to open excluded phrases McBopomofo. %@", [self excludedPhrasesDataPathMcBopomofo]);
}
result = globalUserExcludedPhrasesPlainBopomofo.open([[self excludedPhrasesDataPathPlainBopomofo] UTF8String]);
if (!result) {
NSLog(@"Failed to open excluded phrases Plain Bopomofo. %@", [self excludedPhrasesDataPathPlainBopomofo]);
}
}
+ (BOOL)checkIfUserLanguageModelFileExists
+ (BOOL)checkIfUserDataFolderExists
{
NSString *folderPath = [self dataFolderPath];
BOOL isFolder = NO;
@ -70,8 +86,11 @@ static bool LTLoadLanguageModelFile(NSString *filenameWithoutExtension, FastLM &
return NO;
}
}
return YES;
}
NSString *filePath = [self userPhrasesDataPath];
+ (BOOL)checkIfFileExist:(NSString *)filePath
{
if (![[NSFileManager defaultManager] fileExistsAtPath:filePath]) {
BOOL result = [[@"" dataUsingEncoding:NSUTF8StringEncoding] writeToFile:filePath atomically:YES];
if (!result) {
@ -82,15 +101,32 @@ static bool LTLoadLanguageModelFile(NSString *filenameWithoutExtension, FastLM &
return YES;
}
+ (BOOL)checkIfUserLanguageModelFilesExist
{
if (![self checkIfUserDataFolderExists]) {
return NO;
}
if (![self checkIfFileExist:[self userPhrasesDataPathMcBopomofo]]) {
return NO;
}
if (![self checkIfFileExist:[self excludedPhrasesDataPathMcBopomofo]]) {
return NO;
}
if (![self checkIfFileExist:[self excludedPhrasesDataPathPlainBopomofo]]) {
return NO;
}
return YES;
}
+ (BOOL)writeUserPhrase:(NSString *)userPhrase
{
if (![self checkIfUserLanguageModelFileExists]) {
if (![self checkIfUserLanguageModelFilesExist]) {
return NO;
}
NSString *currentMarkedPhrase = [userPhrase stringByAppendingString:@"\n"];
NSString *path = [self userPhrasesDataPath];
NSString *path = [self userPhrasesDataPathMcBopomofo];
NSFileHandle *file = [NSFileHandle fileHandleForUpdatingAtPath:path];
if (!file) {
return NO;
@ -112,26 +148,46 @@ static bool LTLoadLanguageModelFile(NSString *filenameWithoutExtension, FastLM &
return userDictPath;
}
+ (NSString *)userPhrasesDataPath
+ (NSString *)userPhrasesDataPathMcBopomofo
{
return [[self dataFolderPath] stringByAppendingPathComponent:@"data.txt"];
}
+ (Formosa::Gramambular::FastLM *)languageModelMcBopomofo
+ (NSString *)excludedPhrasesDataPathMcBopomofo
{
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases.txt"];
}
+ (NSString *)excludedPhrasesDataPathPlainBopomofo
{
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-plain-bpmf.txt"];
}
+ (FastLM *)languageModelMcBopomofo
{
return &globalLanguageModel;
}
+ (Formosa::Gramambular::FastLM *)languageModelPlainBopomofo
+ (FastLM *)languageModelPlainBopomofo
{
return &globalLanguageModelPlainBopomofo;
}
+ (Formosa::Gramambular::FastLM *)userPhraseLanguageModel
+ (FastLM *)userPhraseLanguageModel
{
return &globalUserPhraseLanguageModel;
}
+ (FastLM *)excludedPhrasesLanguageModelMcBopomofo
{
return &globalUserExcludedPhrasesMcBopomofo;
}
+ (FastLM *)excludedPhrasesLanguageModelPlainBopomofo
{
return &globalUserExcludedPhrasesPlainBopomofo;
}
+ (McBopomofo::UserOverrideModel *)userOverrideModel
{
return &globalUserOverrideModel;

View File

@ -10,5 +10,5 @@
@interface LanguageModelManager : NSObject
+ (void)loadDataModels;
+ (void)loadUserPhrasesModel;
+ (BOOL)checkIfUserLanguageModelFileExists;
+ (BOOL)checkIfUserLanguageModelFilesExist;
@end