Shiki: Attempt to separate CHS / CHT lang models (buggy).

- bug needs further fix: UserDict entries are duplicated in the Candidate List. Note that this bug was introduced by someone else's script from upstream.
This commit is contained in:
ShikiSuen 2022-01-19 11:00:05 +08:00
parent 735546e95e
commit eab615aef3
4 changed files with 138 additions and 46 deletions

View File

@ -32,9 +32,6 @@
// latest walked path (trellis) using the Viterbi algorithm // latest walked path (trellis) using the Viterbi algorithm
std::vector<Taiyan::Gramambular::NodeAnchor> _walkedNodes; std::vector<Taiyan::Gramambular::NodeAnchor> _walkedNodes;
// user override model
vChewing::UserOverrideModel *_uom;
// the latest composing buffer that is updated to the foreground app // the latest composing buffer that is updated to the foreground app
NSMutableString *_composingBuffer; NSMutableString *_composingBuffer;
NSInteger _latestReadingCursor; NSInteger _latestReadingCursor;

View File

@ -112,7 +112,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
// create the lattice builder // create the lattice builder
_languageModel = [LanguageModelManager languageModelCoreCHT]; _languageModel = [LanguageModelManager languageModelCoreCHT];
_languageModel->setPhraseReplacementEnabled(Preferences.phraseReplacementEnabled); _languageModel->setPhraseReplacementEnabled(Preferences.phraseReplacementEnabled);
_userOverrideModel = [LanguageModelManager userOverrideModel]; _userOverrideModel = [LanguageModelManager userOverrideModelCHT];
_builder = new BlockReadingBuilder(_languageModel); _builder = new BlockReadingBuilder(_languageModel);
@ -243,13 +243,16 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
{ {
NSString *newInputMode; NSString *newInputMode;
vChewingLM *newLanguageModel; vChewingLM *newLanguageModel;
UserOverrideModel *newUserOverrideModel;
if ([value isKindOfClass:[NSString class]] && [value isEqual:kBopomofoModeIdentifierCHS]) { if ([value isKindOfClass:[NSString class]] && [value isEqual:kBopomofoModeIdentifierCHS]) {
newInputMode = kBopomofoModeIdentifierCHS; newInputMode = kBopomofoModeIdentifierCHS;
newLanguageModel = [LanguageModelManager languageModelCoreCHS]; newLanguageModel = [LanguageModelManager languageModelCoreCHS];
newUserOverrideModel = [LanguageModelManager userOverrideModelCHS];
} else { } else {
newInputMode = kBopomofoModeIdentifierCHT; newInputMode = kBopomofoModeIdentifierCHT;
newLanguageModel = [LanguageModelManager languageModelCoreCHT]; newLanguageModel = [LanguageModelManager languageModelCoreCHT];
newUserOverrideModel = [LanguageModelManager userOverrideModelCHT];
} }
// 自 Preferences 模組讀入自訂語彙置換功能開關狀態。 // 自 Preferences 模組讀入自訂語彙置換功能開關狀態。
@ -265,6 +268,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
_inputMode = newInputMode; _inputMode = newInputMode;
_languageModel = newLanguageModel; _languageModel = newLanguageModel;
_userOverrideModel = newUserOverrideModel;
if (!_bpmfReadingBuffer->isEmpty()) { if (!_bpmfReadingBuffer->isEmpty()) {
_bpmfReadingBuffer->clear(); _bpmfReadingBuffer->clear();
@ -1413,8 +1417,12 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
if (![currentMarkedPhrase length]) { if (![currentMarkedPhrase length]) {
return NO; return NO;
} }
return [LanguageModelManager writeUserPhrase:currentMarkedPhrase]; if (_inputMode == kBopomofoModeIdentifierCHT) {
return [LanguageModelManager writeUserPhraseCHT:currentMarkedPhrase];
} else {
return [LanguageModelManager writeUserPhraseCHS:currentMarkedPhrase];
}
} }
- (void)_showCurrentMarkedTextTooltipWithClient:(id)client - (void)_showCurrentMarkedTextTooltipWithClient:(id)client
@ -1499,9 +1507,15 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
- (void)togglePhraseReplacementEnabled:(id)sender - (void)togglePhraseReplacementEnabled:(id)sender
{ {
BOOL enabled = [Preferences togglePhraseReplacementEnabled]; if (_inputMode == kBopomofoModeIdentifierCHT) {
vChewingLM *lm = [LanguageModelManager languageModelCoreCHT]; BOOL enabled = [Preferences togglePhraseReplacementEnabled];
lm->setPhraseReplacementEnabled(enabled); vChewingLM *lm = [LanguageModelManager languageModelCoreCHT];
lm->setPhraseReplacementEnabled(enabled);
} else {
BOOL enabled = [Preferences togglePhraseReplacementEnabled];
vChewingLM *lm = [LanguageModelManager languageModelCoreCHS];
lm->setPhraseReplacementEnabled(enabled);
}
} }
- (void)checkForUpdate:(id)sender - (void)checkForUpdate:(id)sender
@ -1531,22 +1545,29 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
- (void)openUserPhrases:(id)sender - (void)openUserPhrases:(id)sender
{ {
[self _openUserFile:[LanguageModelManager userPhrasesDataPathBopomofo]]; if (_inputMode == kBopomofoModeIdentifierCHT) {
} [self _openUserFile:[LanguageModelManager userPhrasesDataPathCHT]];
} else {
- (void)openExcludedPhrasesSimpBopomofo:(id)sender [self _openUserFile:[LanguageModelManager userPhrasesDataPathCHS]];
{ }
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathSimpBopomofo]];
} }
- (void)openExcludedPhrases:(id)sender - (void)openExcludedPhrases:(id)sender
{ {
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathBopomofo]]; if (_inputMode == kBopomofoModeIdentifierCHT) {
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathCHT]];
} else {
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathCHS]];
}
} }
- (void)openPhraseReplacement:(id)sender - (void)openPhraseReplacement:(id)sender
{ {
[self _openUserFile:[LanguageModelManager phraseReplacementDataPathBopomofo]]; if (_inputMode == kBopomofoModeIdentifierCHT) {
[self _openUserFile:[LanguageModelManager phraseReplacementDataPathCHT]];
} else {
[self _openUserFile:[LanguageModelManager phraseReplacementDataPathCHS]];
}
} }
- (void)reloadUserPhrases:(id)sender - (void)reloadUserPhrases:(id)sender

View File

@ -19,16 +19,20 @@ NS_ASSUME_NONNULL_BEGIN
+ (void)loadUserPhrases; + (void)loadUserPhrases;
+ (void)loadUserPhraseReplacement; + (void)loadUserPhraseReplacement;
+ (BOOL)checkIfUserLanguageModelFilesExist; + (BOOL)checkIfUserLanguageModelFilesExist;
+ (BOOL)writeUserPhrase:(NSString *)userPhrase; + (BOOL)writeUserPhraseCHT:(NSString *)userPhraseCHT;
+ (BOOL)writeUserPhraseCHS:(NSString *)userPhraseCHS;
@property (class, readonly, nonatomic) NSString *dataFolderPath; @property (class, readonly, nonatomic) NSString *dataFolderPath;
@property (class, readonly, nonatomic) NSString *userPhrasesDataPathBopomofo; @property (class, readonly, nonatomic) NSString *userPhrasesDataPathCHT;
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathBopomofo; @property (class, readonly, nonatomic) NSString *userPhrasesDataPathCHS;
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathSimpBopomofo; @property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathCHT;
@property (class, readonly, nonatomic) NSString *phraseReplacementDataPathBopomofo; @property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathCHS;
@property (class, readonly, nonatomic) NSString *phraseReplacementDataPathCHT;
@property (class, readonly, nonatomic) NSString *phraseReplacementDataPathCHS;
@property (class, readonly, nonatomic) vChewing::vChewingLM *languageModelCoreCHT; @property (class, readonly, nonatomic) vChewing::vChewingLM *languageModelCoreCHT;
@property (class, readonly, nonatomic) vChewing::vChewingLM *languageModelCoreCHS; @property (class, readonly, nonatomic) vChewing::vChewingLM *languageModelCoreCHS;
@property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModel; @property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHT;
@property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHS;
@end @end
NS_ASSUME_NONNULL_END NS_ASSUME_NONNULL_END

View File

@ -23,7 +23,8 @@ static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
vChewingLM glanguageModelCoreCHT; vChewingLM glanguageModelCoreCHT;
vChewingLM glanguageModelCoreCHS; vChewingLM glanguageModelCoreCHS;
UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife); UserOverrideModel gUserOverrideModelCHS(kUserOverrideModelCapacity, kObservedOverrideHalflife);
UserOverrideModel gUserOverrideModelCHT(kUserOverrideModelCapacity, kObservedOverrideHalflife);
@implementation LanguageModelManager @implementation LanguageModelManager
@ -42,13 +43,14 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
+ (void)loadUserPhrases + (void)loadUserPhrases
{ {
glanguageModelCoreCHT.loadUserPhrases([[self userPhrasesDataPathBopomofo] UTF8String], [[self excludedPhrasesDataPathBopomofo] UTF8String]); glanguageModelCoreCHT.loadUserPhrases([[self userPhrasesDataPathCHT] UTF8String], [[self excludedPhrasesDataPathCHT] UTF8String]);
glanguageModelCoreCHS.loadUserPhrases(NULL, [[self excludedPhrasesDataPathSimpBopomofo] UTF8String]); glanguageModelCoreCHS.loadUserPhrases([[self userPhrasesDataPathCHS] UTF8String], [[self excludedPhrasesDataPathCHS] UTF8String]);
} }
+ (void)loadUserPhraseReplacement + (void)loadUserPhraseReplacement
{ {
glanguageModelCoreCHT.loadPhraseReplacementMap([[self phraseReplacementDataPathBopomofo] UTF8String]); glanguageModelCoreCHT.loadPhraseReplacementMap([[self phraseReplacementDataPathCHT] UTF8String]);
glanguageModelCoreCHS.loadPhraseReplacementMap([[self phraseReplacementDataPathCHS] UTF8String]);
} }
+ (BOOL)checkIfUserDataFolderExists + (BOOL)checkIfUserDataFolderExists
@ -93,29 +95,35 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
if (![self checkIfUserDataFolderExists]) { if (![self checkIfUserDataFolderExists]) {
return NO; return NO;
} }
if (![self checkIfFileExist:[self userPhrasesDataPathBopomofo]]) { if (![self checkIfFileExist:[self userPhrasesDataPathCHT]]) {
return NO; return NO;
} }
if (![self checkIfFileExist:[self excludedPhrasesDataPathBopomofo]]) { if (![self checkIfFileExist:[self excludedPhrasesDataPathCHT]]) {
return NO; return NO;
} }
if (![self checkIfFileExist:[self excludedPhrasesDataPathSimpBopomofo]]) { if (![self checkIfFileExist:[self phraseReplacementDataPathCHT]]) {
return NO; return NO;
} }
if (![self checkIfFileExist:[self phraseReplacementDataPathBopomofo]]) { if (![self checkIfFileExist:[self userPhrasesDataPathCHS]]) {
return NO; return NO;
} }
return YES; if (![self checkIfFileExist:[self excludedPhrasesDataPathCHS]]) {
return NO;
}
if (![self checkIfFileExist:[self phraseReplacementDataPathCHS]]) {
return NO;
}
return YES;
} }
+ (BOOL)writeUserPhrase:(NSString *)userPhrase + (BOOL)writeUserPhraseCHT:(NSString *)userPhrase
{ {
if (![self checkIfUserLanguageModelFilesExist]) { if (![self checkIfUserLanguageModelFilesExist]) {
return NO; return NO;
} }
BOOL shuoldAddLineBreakAtFront = NO; BOOL shuoldAddLineBreakAtFront = NO;
NSString *path = [self userPhrasesDataPathBopomofo]; NSString *path = [self userPhrasesDataPathCHT];
if ([[NSFileManager defaultManager] fileExistsAtPath:path]) { if ([[NSFileManager defaultManager] fileExistsAtPath:path]) {
NSError *error = nil; NSError *error = nil;
@ -155,6 +163,53 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
return YES; return YES;
} }
+ (BOOL)writeUserPhraseCHS:(NSString *)userPhrase
{
if (![self checkIfUserLanguageModelFilesExist]) {
return NO;
}
BOOL shuoldAddLineBreakAtFront = NO;
NSString *path = [self userPhrasesDataPathCHS];
if ([[NSFileManager defaultManager] fileExistsAtPath:path]) {
NSError *error = nil;
NSDictionary *attr = [[NSFileManager defaultManager] attributesOfItemAtPath:path error:&error];
unsigned long long fileSize = [attr fileSize];
if (!error && fileSize) {
NSFileHandle *readFile = [NSFileHandle fileHandleForReadingAtPath:path];
if (readFile) {
[readFile seekToFileOffset:fileSize - 1];
NSData *data = [readFile readDataToEndOfFile];
const void *bytes = [data bytes];
if (*(char *)bytes != '\n') {
shuoldAddLineBreakAtFront = YES;
}
[readFile closeFile];
}
}
}
NSMutableString *currentMarkedPhrase = [NSMutableString string];
if (shuoldAddLineBreakAtFront) {
[currentMarkedPhrase appendString:@"\n"];
}
[currentMarkedPhrase appendString:userPhrase];
[currentMarkedPhrase appendString:@"\n"];
NSFileHandle *writeFile = [NSFileHandle fileHandleForUpdatingAtPath:path];
if (!writeFile) {
return NO;
}
[writeFile seekToEndOfFile];
NSData *data = [currentMarkedPhrase dataUsingEncoding:NSUTF8StringEncoding];
[writeFile writeData:data];
[writeFile closeFile];
[self loadUserPhrases];
return YES;
}
+ (NSString *)dataFolderPath + (NSString *)dataFolderPath
{ {
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES); NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES);
@ -163,24 +218,34 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
return userDictPath; return userDictPath;
} }
+ (NSString *)userPhrasesDataPathBopomofo + (NSString *)userPhrasesDataPathCHT
{ {
return [[self dataFolderPath] stringByAppendingPathComponent:@"data-cht.txt"]; return [[self dataFolderPath] stringByAppendingPathComponent:@"userdata-cht.txt"];
} }
+ (NSString *)excludedPhrasesDataPathBopomofo + (NSString *)userPhrasesDataPathCHS
{ {
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases.txt"]; return [[self dataFolderPath] stringByAppendingPathComponent:@"userdata-chs.txt"];
} }
+ (NSString *)excludedPhrasesDataPathSimpBopomofo + (NSString *)excludedPhrasesDataPathCHT
{ {
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-plain-bpmf.txt"]; return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-cht.txt"];
} }
+ (NSString *)phraseReplacementDataPathBopomofo + (NSString *)excludedPhrasesDataPathCHS
{ {
return [[self dataFolderPath] stringByAppendingPathComponent:@"phrases-replacement.txt"]; return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-chs.txt"];
}
+ (NSString *)phraseReplacementDataPathCHT
{
return [[self dataFolderPath] stringByAppendingPathComponent:@"phrases-replacement-cht.txt"];
}
+ (NSString *)phraseReplacementDataPathCHS
{
return [[self dataFolderPath] stringByAppendingPathComponent:@"phrases-replacement-chs.txt"];
} }
+ (vChewingLM *)languageModelCoreCHT + (vChewingLM *)languageModelCoreCHT
@ -193,9 +258,14 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
return &glanguageModelCoreCHS; return &glanguageModelCoreCHS;
} }
+ (vChewing::UserOverrideModel *)userOverrideModel + (vChewing::UserOverrideModel *)userOverrideModelCHT
{ {
return &gUserOverrideModel; return &gUserOverrideModelCHT;
}
+ (vChewing::UserOverrideModel *)userOverrideModelCHS
{
return &gUserOverrideModelCHS;
} }
@end @end