Shiki: Attempt to separate CHS / CHT lang models (buggy).

- bug needs further fix: UserDict entries are duplicated in the Candidate List. Note that this bug was introduced by someone else's script from upstream.
This commit is contained in:
ShikiSuen 2022-01-19 11:00:05 +08:00
parent 7c1412cfd5
commit 8453386057
4 changed files with 138 additions and 46 deletions

View File

@ -32,9 +32,6 @@
// latest walked path (trellis) using the Viterbi algorithm
std::vector<Taiyan::Gramambular::NodeAnchor> _walkedNodes;
// user override model
vChewing::UserOverrideModel *_uom;
// the latest composing buffer that is updated to the foreground app
NSMutableString *_composingBuffer;
NSInteger _latestReadingCursor;

View File

@ -112,7 +112,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
// create the lattice builder
_languageModel = [LanguageModelManager languageModelCoreCHT];
_languageModel->setPhraseReplacementEnabled(Preferences.phraseReplacementEnabled);
_userOverrideModel = [LanguageModelManager userOverrideModel];
_userOverrideModel = [LanguageModelManager userOverrideModelCHT];
_builder = new BlockReadingBuilder(_languageModel);
@ -243,13 +243,16 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
{
NSString *newInputMode;
vChewingLM *newLanguageModel;
UserOverrideModel *newUserOverrideModel;
if ([value isKindOfClass:[NSString class]] && [value isEqual:kBopomofoModeIdentifierCHS]) {
newInputMode = kBopomofoModeIdentifierCHS;
newLanguageModel = [LanguageModelManager languageModelCoreCHS];
newUserOverrideModel = [LanguageModelManager userOverrideModelCHS];
} else {
newInputMode = kBopomofoModeIdentifierCHT;
newLanguageModel = [LanguageModelManager languageModelCoreCHT];
newUserOverrideModel = [LanguageModelManager userOverrideModelCHT];
}
// 自 Preferences 模組讀入自訂語彙置換功能開關狀態。
@ -265,6 +268,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
_inputMode = newInputMode;
_languageModel = newLanguageModel;
_userOverrideModel = newUserOverrideModel;
if (!_bpmfReadingBuffer->isEmpty()) {
_bpmfReadingBuffer->clear();
@ -1413,8 +1417,12 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
if (![currentMarkedPhrase length]) {
return NO;
}
return [LanguageModelManager writeUserPhrase:currentMarkedPhrase];
if (_inputMode == kBopomofoModeIdentifierCHT) {
return [LanguageModelManager writeUserPhraseCHT:currentMarkedPhrase];
} else {
return [LanguageModelManager writeUserPhraseCHS:currentMarkedPhrase];
}
}
- (void)_showCurrentMarkedTextTooltipWithClient:(id)client
@ -1499,9 +1507,15 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
- (void)togglePhraseReplacementEnabled:(id)sender
{
BOOL enabled = [Preferences togglePhraseReplacementEnabled];
vChewingLM *lm = [LanguageModelManager languageModelCoreCHT];
lm->setPhraseReplacementEnabled(enabled);
if (_inputMode == kBopomofoModeIdentifierCHT) {
BOOL enabled = [Preferences togglePhraseReplacementEnabled];
vChewingLM *lm = [LanguageModelManager languageModelCoreCHT];
lm->setPhraseReplacementEnabled(enabled);
} else {
BOOL enabled = [Preferences togglePhraseReplacementEnabled];
vChewingLM *lm = [LanguageModelManager languageModelCoreCHS];
lm->setPhraseReplacementEnabled(enabled);
}
}
- (void)checkForUpdate:(id)sender
@ -1531,22 +1545,29 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
- (void)openUserPhrases:(id)sender
{
[self _openUserFile:[LanguageModelManager userPhrasesDataPathBopomofo]];
}
- (void)openExcludedPhrasesSimpBopomofo:(id)sender
{
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathSimpBopomofo]];
if (_inputMode == kBopomofoModeIdentifierCHT) {
[self _openUserFile:[LanguageModelManager userPhrasesDataPathCHT]];
} else {
[self _openUserFile:[LanguageModelManager userPhrasesDataPathCHS]];
}
}
- (void)openExcludedPhrases:(id)sender
{
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathBopomofo]];
if (_inputMode == kBopomofoModeIdentifierCHT) {
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathCHT]];
} else {
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathCHS]];
}
}
- (void)openPhraseReplacement:(id)sender
{
[self _openUserFile:[LanguageModelManager phraseReplacementDataPathBopomofo]];
if (_inputMode == kBopomofoModeIdentifierCHT) {
[self _openUserFile:[LanguageModelManager phraseReplacementDataPathCHT]];
} else {
[self _openUserFile:[LanguageModelManager phraseReplacementDataPathCHS]];
}
}
- (void)reloadUserPhrases:(id)sender

View File

@ -19,16 +19,20 @@ NS_ASSUME_NONNULL_BEGIN
+ (void)loadUserPhrases;
+ (void)loadUserPhraseReplacement;
+ (BOOL)checkIfUserLanguageModelFilesExist;
+ (BOOL)writeUserPhrase:(NSString *)userPhrase;
+ (BOOL)writeUserPhraseCHT:(NSString *)userPhraseCHT;
+ (BOOL)writeUserPhraseCHS:(NSString *)userPhraseCHS;
@property (class, readonly, nonatomic) NSString *dataFolderPath;
@property (class, readonly, nonatomic) NSString *userPhrasesDataPathBopomofo;
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathBopomofo;
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathSimpBopomofo;
@property (class, readonly, nonatomic) NSString *phraseReplacementDataPathBopomofo;
@property (class, readonly, nonatomic) NSString *userPhrasesDataPathCHT;
@property (class, readonly, nonatomic) NSString *userPhrasesDataPathCHS;
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathCHT;
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathCHS;
@property (class, readonly, nonatomic) NSString *phraseReplacementDataPathCHT;
@property (class, readonly, nonatomic) NSString *phraseReplacementDataPathCHS;
@property (class, readonly, nonatomic) vChewing::vChewingLM *languageModelCoreCHT;
@property (class, readonly, nonatomic) vChewing::vChewingLM *languageModelCoreCHS;
@property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModel;
@property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHT;
@property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHS;
@end
NS_ASSUME_NONNULL_END

View File

@ -23,7 +23,8 @@ static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
vChewingLM glanguageModelCoreCHT;
vChewingLM glanguageModelCoreCHS;
UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
UserOverrideModel gUserOverrideModelCHS(kUserOverrideModelCapacity, kObservedOverrideHalflife);
UserOverrideModel gUserOverrideModelCHT(kUserOverrideModelCapacity, kObservedOverrideHalflife);
@implementation LanguageModelManager
@ -42,13 +43,14 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
+ (void)loadUserPhrases
{
glanguageModelCoreCHT.loadUserPhrases([[self userPhrasesDataPathBopomofo] UTF8String], [[self excludedPhrasesDataPathBopomofo] UTF8String]);
glanguageModelCoreCHS.loadUserPhrases(NULL, [[self excludedPhrasesDataPathSimpBopomofo] UTF8String]);
glanguageModelCoreCHT.loadUserPhrases([[self userPhrasesDataPathCHT] UTF8String], [[self excludedPhrasesDataPathCHT] UTF8String]);
glanguageModelCoreCHS.loadUserPhrases([[self userPhrasesDataPathCHS] UTF8String], [[self excludedPhrasesDataPathCHS] UTF8String]);
}
+ (void)loadUserPhraseReplacement
{
glanguageModelCoreCHT.loadPhraseReplacementMap([[self phraseReplacementDataPathBopomofo] UTF8String]);
glanguageModelCoreCHT.loadPhraseReplacementMap([[self phraseReplacementDataPathCHT] UTF8String]);
glanguageModelCoreCHS.loadPhraseReplacementMap([[self phraseReplacementDataPathCHS] UTF8String]);
}
+ (BOOL)checkIfUserDataFolderExists
@ -93,29 +95,35 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
if (![self checkIfUserDataFolderExists]) {
return NO;
}
if (![self checkIfFileExist:[self userPhrasesDataPathBopomofo]]) {
if (![self checkIfFileExist:[self userPhrasesDataPathCHT]]) {
return NO;
}
if (![self checkIfFileExist:[self excludedPhrasesDataPathBopomofo]]) {
if (![self checkIfFileExist:[self excludedPhrasesDataPathCHT]]) {
return NO;
}
if (![self checkIfFileExist:[self excludedPhrasesDataPathSimpBopomofo]]) {
if (![self checkIfFileExist:[self phraseReplacementDataPathCHT]]) {
return NO;
}
if (![self checkIfFileExist:[self phraseReplacementDataPathBopomofo]]) {
return NO;
}
return YES;
if (![self checkIfFileExist:[self userPhrasesDataPathCHS]]) {
return NO;
}
if (![self checkIfFileExist:[self excludedPhrasesDataPathCHS]]) {
return NO;
}
if (![self checkIfFileExist:[self phraseReplacementDataPathCHS]]) {
return NO;
}
return YES;
}
+ (BOOL)writeUserPhrase:(NSString *)userPhrase
+ (BOOL)writeUserPhraseCHT:(NSString *)userPhrase
{
if (![self checkIfUserLanguageModelFilesExist]) {
return NO;
}
BOOL shuoldAddLineBreakAtFront = NO;
NSString *path = [self userPhrasesDataPathBopomofo];
NSString *path = [self userPhrasesDataPathCHT];
if ([[NSFileManager defaultManager] fileExistsAtPath:path]) {
NSError *error = nil;
@ -155,6 +163,53 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
return YES;
}
+ (BOOL)writeUserPhraseCHS:(NSString *)userPhrase
{
if (![self checkIfUserLanguageModelFilesExist]) {
return NO;
}
BOOL shuoldAddLineBreakAtFront = NO;
NSString *path = [self userPhrasesDataPathCHS];
if ([[NSFileManager defaultManager] fileExistsAtPath:path]) {
NSError *error = nil;
NSDictionary *attr = [[NSFileManager defaultManager] attributesOfItemAtPath:path error:&error];
unsigned long long fileSize = [attr fileSize];
if (!error && fileSize) {
NSFileHandle *readFile = [NSFileHandle fileHandleForReadingAtPath:path];
if (readFile) {
[readFile seekToFileOffset:fileSize - 1];
NSData *data = [readFile readDataToEndOfFile];
const void *bytes = [data bytes];
if (*(char *)bytes != '\n') {
shuoldAddLineBreakAtFront = YES;
}
[readFile closeFile];
}
}
}
NSMutableString *currentMarkedPhrase = [NSMutableString string];
if (shuoldAddLineBreakAtFront) {
[currentMarkedPhrase appendString:@"\n"];
}
[currentMarkedPhrase appendString:userPhrase];
[currentMarkedPhrase appendString:@"\n"];
NSFileHandle *writeFile = [NSFileHandle fileHandleForUpdatingAtPath:path];
if (!writeFile) {
return NO;
}
[writeFile seekToEndOfFile];
NSData *data = [currentMarkedPhrase dataUsingEncoding:NSUTF8StringEncoding];
[writeFile writeData:data];
[writeFile closeFile];
[self loadUserPhrases];
return YES;
}
+ (NSString *)dataFolderPath
{
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES);
@ -163,24 +218,34 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
return userDictPath;
}
+ (NSString *)userPhrasesDataPathBopomofo
+ (NSString *)userPhrasesDataPathCHT
{
return [[self dataFolderPath] stringByAppendingPathComponent:@"data-cht.txt"];
return [[self dataFolderPath] stringByAppendingPathComponent:@"userdata-cht.txt"];
}
+ (NSString *)excludedPhrasesDataPathBopomofo
+ (NSString *)userPhrasesDataPathCHS
{
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases.txt"];
return [[self dataFolderPath] stringByAppendingPathComponent:@"userdata-chs.txt"];
}
+ (NSString *)excludedPhrasesDataPathSimpBopomofo
+ (NSString *)excludedPhrasesDataPathCHT
{
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-plain-bpmf.txt"];
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-cht.txt"];
}
+ (NSString *)phraseReplacementDataPathBopomofo
+ (NSString *)excludedPhrasesDataPathCHS
{
return [[self dataFolderPath] stringByAppendingPathComponent:@"phrases-replacement.txt"];
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-chs.txt"];
}
+ (NSString *)phraseReplacementDataPathCHT
{
return [[self dataFolderPath] stringByAppendingPathComponent:@"phrases-replacement-cht.txt"];
}
+ (NSString *)phraseReplacementDataPathCHS
{
return [[self dataFolderPath] stringByAppendingPathComponent:@"phrases-replacement-chs.txt"];
}
+ (vChewingLM *)languageModelCoreCHT
@ -193,9 +258,14 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
return &glanguageModelCoreCHS;
}
+ (vChewing::UserOverrideModel *)userOverrideModel
+ (vChewing::UserOverrideModel *)userOverrideModelCHT
{
return &gUserOverrideModel;
return &gUserOverrideModelCHT;
}
+ (vChewing::UserOverrideModel *)userOverrideModelCHS
{
return &gUserOverrideModelCHS;
}
@end