Shiki: Attempt to separate CHS / CHT lang models (buggy).
- bug needs further fix: UserDict entries are duplicated in the Candidate List. Note that this bug was introduced by someone else's script from upstream.
This commit is contained in:
parent
735546e95e
commit
eab615aef3
|
@ -32,9 +32,6 @@
|
||||||
// latest walked path (trellis) using the Viterbi algorithm
|
// latest walked path (trellis) using the Viterbi algorithm
|
||||||
std::vector<Taiyan::Gramambular::NodeAnchor> _walkedNodes;
|
std::vector<Taiyan::Gramambular::NodeAnchor> _walkedNodes;
|
||||||
|
|
||||||
// user override model
|
|
||||||
vChewing::UserOverrideModel *_uom;
|
|
||||||
|
|
||||||
// the latest composing buffer that is updated to the foreground app
|
// the latest composing buffer that is updated to the foreground app
|
||||||
NSMutableString *_composingBuffer;
|
NSMutableString *_composingBuffer;
|
||||||
NSInteger _latestReadingCursor;
|
NSInteger _latestReadingCursor;
|
||||||
|
|
|
@ -112,7 +112,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
|
||||||
// create the lattice builder
|
// create the lattice builder
|
||||||
_languageModel = [LanguageModelManager languageModelCoreCHT];
|
_languageModel = [LanguageModelManager languageModelCoreCHT];
|
||||||
_languageModel->setPhraseReplacementEnabled(Preferences.phraseReplacementEnabled);
|
_languageModel->setPhraseReplacementEnabled(Preferences.phraseReplacementEnabled);
|
||||||
_userOverrideModel = [LanguageModelManager userOverrideModel];
|
_userOverrideModel = [LanguageModelManager userOverrideModelCHT];
|
||||||
|
|
||||||
_builder = new BlockReadingBuilder(_languageModel);
|
_builder = new BlockReadingBuilder(_languageModel);
|
||||||
|
|
||||||
|
@ -243,13 +243,16 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
|
||||||
{
|
{
|
||||||
NSString *newInputMode;
|
NSString *newInputMode;
|
||||||
vChewingLM *newLanguageModel;
|
vChewingLM *newLanguageModel;
|
||||||
|
UserOverrideModel *newUserOverrideModel;
|
||||||
|
|
||||||
if ([value isKindOfClass:[NSString class]] && [value isEqual:kBopomofoModeIdentifierCHS]) {
|
if ([value isKindOfClass:[NSString class]] && [value isEqual:kBopomofoModeIdentifierCHS]) {
|
||||||
newInputMode = kBopomofoModeIdentifierCHS;
|
newInputMode = kBopomofoModeIdentifierCHS;
|
||||||
newLanguageModel = [LanguageModelManager languageModelCoreCHS];
|
newLanguageModel = [LanguageModelManager languageModelCoreCHS];
|
||||||
|
newUserOverrideModel = [LanguageModelManager userOverrideModelCHS];
|
||||||
} else {
|
} else {
|
||||||
newInputMode = kBopomofoModeIdentifierCHT;
|
newInputMode = kBopomofoModeIdentifierCHT;
|
||||||
newLanguageModel = [LanguageModelManager languageModelCoreCHT];
|
newLanguageModel = [LanguageModelManager languageModelCoreCHT];
|
||||||
|
newUserOverrideModel = [LanguageModelManager userOverrideModelCHT];
|
||||||
}
|
}
|
||||||
|
|
||||||
// 自 Preferences 模組讀入自訂語彙置換功能開關狀態。
|
// 自 Preferences 模組讀入自訂語彙置換功能開關狀態。
|
||||||
|
@ -265,6 +268,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
|
||||||
|
|
||||||
_inputMode = newInputMode;
|
_inputMode = newInputMode;
|
||||||
_languageModel = newLanguageModel;
|
_languageModel = newLanguageModel;
|
||||||
|
_userOverrideModel = newUserOverrideModel;
|
||||||
|
|
||||||
if (!_bpmfReadingBuffer->isEmpty()) {
|
if (!_bpmfReadingBuffer->isEmpty()) {
|
||||||
_bpmfReadingBuffer->clear();
|
_bpmfReadingBuffer->clear();
|
||||||
|
@ -1413,8 +1417,12 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
|
||||||
if (![currentMarkedPhrase length]) {
|
if (![currentMarkedPhrase length]) {
|
||||||
return NO;
|
return NO;
|
||||||
}
|
}
|
||||||
|
|
||||||
return [LanguageModelManager writeUserPhrase:currentMarkedPhrase];
|
if (_inputMode == kBopomofoModeIdentifierCHT) {
|
||||||
|
return [LanguageModelManager writeUserPhraseCHT:currentMarkedPhrase];
|
||||||
|
} else {
|
||||||
|
return [LanguageModelManager writeUserPhraseCHS:currentMarkedPhrase];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
- (void)_showCurrentMarkedTextTooltipWithClient:(id)client
|
- (void)_showCurrentMarkedTextTooltipWithClient:(id)client
|
||||||
|
@ -1499,9 +1507,15 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
|
||||||
|
|
||||||
- (void)togglePhraseReplacementEnabled:(id)sender
|
- (void)togglePhraseReplacementEnabled:(id)sender
|
||||||
{
|
{
|
||||||
BOOL enabled = [Preferences togglePhraseReplacementEnabled];
|
if (_inputMode == kBopomofoModeIdentifierCHT) {
|
||||||
vChewingLM *lm = [LanguageModelManager languageModelCoreCHT];
|
BOOL enabled = [Preferences togglePhraseReplacementEnabled];
|
||||||
lm->setPhraseReplacementEnabled(enabled);
|
vChewingLM *lm = [LanguageModelManager languageModelCoreCHT];
|
||||||
|
lm->setPhraseReplacementEnabled(enabled);
|
||||||
|
} else {
|
||||||
|
BOOL enabled = [Preferences togglePhraseReplacementEnabled];
|
||||||
|
vChewingLM *lm = [LanguageModelManager languageModelCoreCHS];
|
||||||
|
lm->setPhraseReplacementEnabled(enabled);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
- (void)checkForUpdate:(id)sender
|
- (void)checkForUpdate:(id)sender
|
||||||
|
@ -1531,22 +1545,29 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
|
||||||
|
|
||||||
- (void)openUserPhrases:(id)sender
|
- (void)openUserPhrases:(id)sender
|
||||||
{
|
{
|
||||||
[self _openUserFile:[LanguageModelManager userPhrasesDataPathBopomofo]];
|
if (_inputMode == kBopomofoModeIdentifierCHT) {
|
||||||
}
|
[self _openUserFile:[LanguageModelManager userPhrasesDataPathCHT]];
|
||||||
|
} else {
|
||||||
- (void)openExcludedPhrasesSimpBopomofo:(id)sender
|
[self _openUserFile:[LanguageModelManager userPhrasesDataPathCHS]];
|
||||||
{
|
}
|
||||||
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathSimpBopomofo]];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
- (void)openExcludedPhrases:(id)sender
|
- (void)openExcludedPhrases:(id)sender
|
||||||
{
|
{
|
||||||
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathBopomofo]];
|
if (_inputMode == kBopomofoModeIdentifierCHT) {
|
||||||
|
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathCHT]];
|
||||||
|
} else {
|
||||||
|
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathCHS]];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
- (void)openPhraseReplacement:(id)sender
|
- (void)openPhraseReplacement:(id)sender
|
||||||
{
|
{
|
||||||
[self _openUserFile:[LanguageModelManager phraseReplacementDataPathBopomofo]];
|
if (_inputMode == kBopomofoModeIdentifierCHT) {
|
||||||
|
[self _openUserFile:[LanguageModelManager phraseReplacementDataPathCHT]];
|
||||||
|
} else {
|
||||||
|
[self _openUserFile:[LanguageModelManager phraseReplacementDataPathCHS]];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
- (void)reloadUserPhrases:(id)sender
|
- (void)reloadUserPhrases:(id)sender
|
||||||
|
|
|
@ -19,16 +19,20 @@ NS_ASSUME_NONNULL_BEGIN
|
||||||
+ (void)loadUserPhrases;
|
+ (void)loadUserPhrases;
|
||||||
+ (void)loadUserPhraseReplacement;
|
+ (void)loadUserPhraseReplacement;
|
||||||
+ (BOOL)checkIfUserLanguageModelFilesExist;
|
+ (BOOL)checkIfUserLanguageModelFilesExist;
|
||||||
+ (BOOL)writeUserPhrase:(NSString *)userPhrase;
|
+ (BOOL)writeUserPhraseCHT:(NSString *)userPhraseCHT;
|
||||||
|
+ (BOOL)writeUserPhraseCHS:(NSString *)userPhraseCHS;
|
||||||
|
|
||||||
@property (class, readonly, nonatomic) NSString *dataFolderPath;
|
@property (class, readonly, nonatomic) NSString *dataFolderPath;
|
||||||
@property (class, readonly, nonatomic) NSString *userPhrasesDataPathBopomofo;
|
@property (class, readonly, nonatomic) NSString *userPhrasesDataPathCHT;
|
||||||
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathBopomofo;
|
@property (class, readonly, nonatomic) NSString *userPhrasesDataPathCHS;
|
||||||
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathSimpBopomofo;
|
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathCHT;
|
||||||
@property (class, readonly, nonatomic) NSString *phraseReplacementDataPathBopomofo;
|
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathCHS;
|
||||||
|
@property (class, readonly, nonatomic) NSString *phraseReplacementDataPathCHT;
|
||||||
|
@property (class, readonly, nonatomic) NSString *phraseReplacementDataPathCHS;
|
||||||
@property (class, readonly, nonatomic) vChewing::vChewingLM *languageModelCoreCHT;
|
@property (class, readonly, nonatomic) vChewing::vChewingLM *languageModelCoreCHT;
|
||||||
@property (class, readonly, nonatomic) vChewing::vChewingLM *languageModelCoreCHS;
|
@property (class, readonly, nonatomic) vChewing::vChewingLM *languageModelCoreCHS;
|
||||||
@property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModel;
|
@property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHT;
|
||||||
|
@property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHS;
|
||||||
@end
|
@end
|
||||||
|
|
||||||
NS_ASSUME_NONNULL_END
|
NS_ASSUME_NONNULL_END
|
||||||
|
|
|
@ -23,7 +23,8 @@ static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
|
||||||
|
|
||||||
vChewingLM glanguageModelCoreCHT;
|
vChewingLM glanguageModelCoreCHT;
|
||||||
vChewingLM glanguageModelCoreCHS;
|
vChewingLM glanguageModelCoreCHS;
|
||||||
UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
|
UserOverrideModel gUserOverrideModelCHS(kUserOverrideModelCapacity, kObservedOverrideHalflife);
|
||||||
|
UserOverrideModel gUserOverrideModelCHT(kUserOverrideModelCapacity, kObservedOverrideHalflife);
|
||||||
|
|
||||||
@implementation LanguageModelManager
|
@implementation LanguageModelManager
|
||||||
|
|
||||||
|
@ -42,13 +43,14 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
||||||
|
|
||||||
+ (void)loadUserPhrases
|
+ (void)loadUserPhrases
|
||||||
{
|
{
|
||||||
glanguageModelCoreCHT.loadUserPhrases([[self userPhrasesDataPathBopomofo] UTF8String], [[self excludedPhrasesDataPathBopomofo] UTF8String]);
|
glanguageModelCoreCHT.loadUserPhrases([[self userPhrasesDataPathCHT] UTF8String], [[self excludedPhrasesDataPathCHT] UTF8String]);
|
||||||
glanguageModelCoreCHS.loadUserPhrases(NULL, [[self excludedPhrasesDataPathSimpBopomofo] UTF8String]);
|
glanguageModelCoreCHS.loadUserPhrases([[self userPhrasesDataPathCHS] UTF8String], [[self excludedPhrasesDataPathCHS] UTF8String]);
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (void)loadUserPhraseReplacement
|
+ (void)loadUserPhraseReplacement
|
||||||
{
|
{
|
||||||
glanguageModelCoreCHT.loadPhraseReplacementMap([[self phraseReplacementDataPathBopomofo] UTF8String]);
|
glanguageModelCoreCHT.loadPhraseReplacementMap([[self phraseReplacementDataPathCHT] UTF8String]);
|
||||||
|
glanguageModelCoreCHS.loadPhraseReplacementMap([[self phraseReplacementDataPathCHS] UTF8String]);
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (BOOL)checkIfUserDataFolderExists
|
+ (BOOL)checkIfUserDataFolderExists
|
||||||
|
@ -93,29 +95,35 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
||||||
if (![self checkIfUserDataFolderExists]) {
|
if (![self checkIfUserDataFolderExists]) {
|
||||||
return NO;
|
return NO;
|
||||||
}
|
}
|
||||||
if (![self checkIfFileExist:[self userPhrasesDataPathBopomofo]]) {
|
if (![self checkIfFileExist:[self userPhrasesDataPathCHT]]) {
|
||||||
return NO;
|
return NO;
|
||||||
}
|
}
|
||||||
if (![self checkIfFileExist:[self excludedPhrasesDataPathBopomofo]]) {
|
if (![self checkIfFileExist:[self excludedPhrasesDataPathCHT]]) {
|
||||||
return NO;
|
return NO;
|
||||||
}
|
}
|
||||||
if (![self checkIfFileExist:[self excludedPhrasesDataPathSimpBopomofo]]) {
|
if (![self checkIfFileExist:[self phraseReplacementDataPathCHT]]) {
|
||||||
return NO;
|
return NO;
|
||||||
}
|
}
|
||||||
if (![self checkIfFileExist:[self phraseReplacementDataPathBopomofo]]) {
|
if (![self checkIfFileExist:[self userPhrasesDataPathCHS]]) {
|
||||||
return NO;
|
return NO;
|
||||||
}
|
}
|
||||||
return YES;
|
if (![self checkIfFileExist:[self excludedPhrasesDataPathCHS]]) {
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
if (![self checkIfFileExist:[self phraseReplacementDataPathCHS]]) {
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
return YES;
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (BOOL)writeUserPhrase:(NSString *)userPhrase
|
+ (BOOL)writeUserPhraseCHT:(NSString *)userPhrase
|
||||||
{
|
{
|
||||||
if (![self checkIfUserLanguageModelFilesExist]) {
|
if (![self checkIfUserLanguageModelFilesExist]) {
|
||||||
return NO;
|
return NO;
|
||||||
}
|
}
|
||||||
|
|
||||||
BOOL shuoldAddLineBreakAtFront = NO;
|
BOOL shuoldAddLineBreakAtFront = NO;
|
||||||
NSString *path = [self userPhrasesDataPathBopomofo];
|
NSString *path = [self userPhrasesDataPathCHT];
|
||||||
|
|
||||||
if ([[NSFileManager defaultManager] fileExistsAtPath:path]) {
|
if ([[NSFileManager defaultManager] fileExistsAtPath:path]) {
|
||||||
NSError *error = nil;
|
NSError *error = nil;
|
||||||
|
@ -155,6 +163,53 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
||||||
return YES;
|
return YES;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
+ (BOOL)writeUserPhraseCHS:(NSString *)userPhrase
|
||||||
|
{
|
||||||
|
if (![self checkIfUserLanguageModelFilesExist]) {
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
|
||||||
|
BOOL shuoldAddLineBreakAtFront = NO;
|
||||||
|
NSString *path = [self userPhrasesDataPathCHS];
|
||||||
|
|
||||||
|
if ([[NSFileManager defaultManager] fileExistsAtPath:path]) {
|
||||||
|
NSError *error = nil;
|
||||||
|
NSDictionary *attr = [[NSFileManager defaultManager] attributesOfItemAtPath:path error:&error];
|
||||||
|
unsigned long long fileSize = [attr fileSize];
|
||||||
|
if (!error && fileSize) {
|
||||||
|
NSFileHandle *readFile = [NSFileHandle fileHandleForReadingAtPath:path];
|
||||||
|
if (readFile) {
|
||||||
|
[readFile seekToFileOffset:fileSize - 1];
|
||||||
|
NSData *data = [readFile readDataToEndOfFile];
|
||||||
|
const void *bytes = [data bytes];
|
||||||
|
if (*(char *)bytes != '\n') {
|
||||||
|
shuoldAddLineBreakAtFront = YES;
|
||||||
|
}
|
||||||
|
[readFile closeFile];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
NSMutableString *currentMarkedPhrase = [NSMutableString string];
|
||||||
|
if (shuoldAddLineBreakAtFront) {
|
||||||
|
[currentMarkedPhrase appendString:@"\n"];
|
||||||
|
}
|
||||||
|
[currentMarkedPhrase appendString:userPhrase];
|
||||||
|
[currentMarkedPhrase appendString:@"\n"];
|
||||||
|
|
||||||
|
NSFileHandle *writeFile = [NSFileHandle fileHandleForUpdatingAtPath:path];
|
||||||
|
if (!writeFile) {
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
[writeFile seekToEndOfFile];
|
||||||
|
NSData *data = [currentMarkedPhrase dataUsingEncoding:NSUTF8StringEncoding];
|
||||||
|
[writeFile writeData:data];
|
||||||
|
[writeFile closeFile];
|
||||||
|
|
||||||
|
[self loadUserPhrases];
|
||||||
|
return YES;
|
||||||
|
}
|
||||||
|
|
||||||
+ (NSString *)dataFolderPath
|
+ (NSString *)dataFolderPath
|
||||||
{
|
{
|
||||||
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES);
|
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES);
|
||||||
|
@ -163,24 +218,34 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
||||||
return userDictPath;
|
return userDictPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (NSString *)userPhrasesDataPathBopomofo
|
+ (NSString *)userPhrasesDataPathCHT
|
||||||
{
|
{
|
||||||
return [[self dataFolderPath] stringByAppendingPathComponent:@"data-cht.txt"];
|
return [[self dataFolderPath] stringByAppendingPathComponent:@"userdata-cht.txt"];
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (NSString *)excludedPhrasesDataPathBopomofo
|
+ (NSString *)userPhrasesDataPathCHS
|
||||||
{
|
{
|
||||||
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases.txt"];
|
return [[self dataFolderPath] stringByAppendingPathComponent:@"userdata-chs.txt"];
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (NSString *)excludedPhrasesDataPathSimpBopomofo
|
+ (NSString *)excludedPhrasesDataPathCHT
|
||||||
{
|
{
|
||||||
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-plain-bpmf.txt"];
|
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-cht.txt"];
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (NSString *)phraseReplacementDataPathBopomofo
|
+ (NSString *)excludedPhrasesDataPathCHS
|
||||||
{
|
{
|
||||||
return [[self dataFolderPath] stringByAppendingPathComponent:@"phrases-replacement.txt"];
|
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-chs.txt"];
|
||||||
|
}
|
||||||
|
|
||||||
|
+ (NSString *)phraseReplacementDataPathCHT
|
||||||
|
{
|
||||||
|
return [[self dataFolderPath] stringByAppendingPathComponent:@"phrases-replacement-cht.txt"];
|
||||||
|
}
|
||||||
|
|
||||||
|
+ (NSString *)phraseReplacementDataPathCHS
|
||||||
|
{
|
||||||
|
return [[self dataFolderPath] stringByAppendingPathComponent:@"phrases-replacement-chs.txt"];
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (vChewingLM *)languageModelCoreCHT
|
+ (vChewingLM *)languageModelCoreCHT
|
||||||
|
@ -193,9 +258,14 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
||||||
return &glanguageModelCoreCHS;
|
return &glanguageModelCoreCHS;
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (vChewing::UserOverrideModel *)userOverrideModel
|
+ (vChewing::UserOverrideModel *)userOverrideModelCHT
|
||||||
{
|
{
|
||||||
return &gUserOverrideModel;
|
return &gUserOverrideModelCHT;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ (vChewing::UserOverrideModel *)userOverrideModelCHS
|
||||||
|
{
|
||||||
|
return &gUserOverrideModelCHS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@end
|
@end
|
||||||
|
|
Loading…
Reference in New Issue