Shiki: Attempt to separate CHS / CHT lang models (buggy).
- bug needs further fix: UserDict entries are duplicated in the Candidate List. Note that this bug was introduced by someone else's script from upstream.
This commit is contained in:
parent
735546e95e
commit
eab615aef3
|
@ -32,9 +32,6 @@
|
|||
// latest walked path (trellis) using the Viterbi algorithm
|
||||
std::vector<Taiyan::Gramambular::NodeAnchor> _walkedNodes;
|
||||
|
||||
// user override model
|
||||
vChewing::UserOverrideModel *_uom;
|
||||
|
||||
// the latest composing buffer that is updated to the foreground app
|
||||
NSMutableString *_composingBuffer;
|
||||
NSInteger _latestReadingCursor;
|
||||
|
|
|
@ -112,7 +112,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
|
|||
// create the lattice builder
|
||||
_languageModel = [LanguageModelManager languageModelCoreCHT];
|
||||
_languageModel->setPhraseReplacementEnabled(Preferences.phraseReplacementEnabled);
|
||||
_userOverrideModel = [LanguageModelManager userOverrideModel];
|
||||
_userOverrideModel = [LanguageModelManager userOverrideModelCHT];
|
||||
|
||||
_builder = new BlockReadingBuilder(_languageModel);
|
||||
|
||||
|
@ -243,13 +243,16 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
|
|||
{
|
||||
NSString *newInputMode;
|
||||
vChewingLM *newLanguageModel;
|
||||
UserOverrideModel *newUserOverrideModel;
|
||||
|
||||
if ([value isKindOfClass:[NSString class]] && [value isEqual:kBopomofoModeIdentifierCHS]) {
|
||||
newInputMode = kBopomofoModeIdentifierCHS;
|
||||
newLanguageModel = [LanguageModelManager languageModelCoreCHS];
|
||||
newUserOverrideModel = [LanguageModelManager userOverrideModelCHS];
|
||||
} else {
|
||||
newInputMode = kBopomofoModeIdentifierCHT;
|
||||
newLanguageModel = [LanguageModelManager languageModelCoreCHT];
|
||||
newUserOverrideModel = [LanguageModelManager userOverrideModelCHT];
|
||||
}
|
||||
|
||||
// 自 Preferences 模組讀入自訂語彙置換功能開關狀態。
|
||||
|
@ -265,6 +268,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
|
|||
|
||||
_inputMode = newInputMode;
|
||||
_languageModel = newLanguageModel;
|
||||
_userOverrideModel = newUserOverrideModel;
|
||||
|
||||
if (!_bpmfReadingBuffer->isEmpty()) {
|
||||
_bpmfReadingBuffer->clear();
|
||||
|
@ -1414,7 +1418,11 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
|
|||
return NO;
|
||||
}
|
||||
|
||||
return [LanguageModelManager writeUserPhrase:currentMarkedPhrase];
|
||||
if (_inputMode == kBopomofoModeIdentifierCHT) {
|
||||
return [LanguageModelManager writeUserPhraseCHT:currentMarkedPhrase];
|
||||
} else {
|
||||
return [LanguageModelManager writeUserPhraseCHS:currentMarkedPhrase];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)_showCurrentMarkedTextTooltipWithClient:(id)client
|
||||
|
@ -1499,9 +1507,15 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
|
|||
|
||||
- (void)togglePhraseReplacementEnabled:(id)sender
|
||||
{
|
||||
if (_inputMode == kBopomofoModeIdentifierCHT) {
|
||||
BOOL enabled = [Preferences togglePhraseReplacementEnabled];
|
||||
vChewingLM *lm = [LanguageModelManager languageModelCoreCHT];
|
||||
lm->setPhraseReplacementEnabled(enabled);
|
||||
} else {
|
||||
BOOL enabled = [Preferences togglePhraseReplacementEnabled];
|
||||
vChewingLM *lm = [LanguageModelManager languageModelCoreCHS];
|
||||
lm->setPhraseReplacementEnabled(enabled);
|
||||
}
|
||||
}
|
||||
|
||||
- (void)checkForUpdate:(id)sender
|
||||
|
@ -1531,22 +1545,29 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
|
|||
|
||||
- (void)openUserPhrases:(id)sender
|
||||
{
|
||||
[self _openUserFile:[LanguageModelManager userPhrasesDataPathBopomofo]];
|
||||
if (_inputMode == kBopomofoModeIdentifierCHT) {
|
||||
[self _openUserFile:[LanguageModelManager userPhrasesDataPathCHT]];
|
||||
} else {
|
||||
[self _openUserFile:[LanguageModelManager userPhrasesDataPathCHS]];
|
||||
}
|
||||
|
||||
- (void)openExcludedPhrasesSimpBopomofo:(id)sender
|
||||
{
|
||||
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathSimpBopomofo]];
|
||||
}
|
||||
|
||||
- (void)openExcludedPhrases:(id)sender
|
||||
{
|
||||
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathBopomofo]];
|
||||
if (_inputMode == kBopomofoModeIdentifierCHT) {
|
||||
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathCHT]];
|
||||
} else {
|
||||
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathCHS]];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)openPhraseReplacement:(id)sender
|
||||
{
|
||||
[self _openUserFile:[LanguageModelManager phraseReplacementDataPathBopomofo]];
|
||||
if (_inputMode == kBopomofoModeIdentifierCHT) {
|
||||
[self _openUserFile:[LanguageModelManager phraseReplacementDataPathCHT]];
|
||||
} else {
|
||||
[self _openUserFile:[LanguageModelManager phraseReplacementDataPathCHS]];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)reloadUserPhrases:(id)sender
|
||||
|
|
|
@ -19,16 +19,20 @@ NS_ASSUME_NONNULL_BEGIN
|
|||
+ (void)loadUserPhrases;
|
||||
+ (void)loadUserPhraseReplacement;
|
||||
+ (BOOL)checkIfUserLanguageModelFilesExist;
|
||||
+ (BOOL)writeUserPhrase:(NSString *)userPhrase;
|
||||
+ (BOOL)writeUserPhraseCHT:(NSString *)userPhraseCHT;
|
||||
+ (BOOL)writeUserPhraseCHS:(NSString *)userPhraseCHS;
|
||||
|
||||
@property (class, readonly, nonatomic) NSString *dataFolderPath;
|
||||
@property (class, readonly, nonatomic) NSString *userPhrasesDataPathBopomofo;
|
||||
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathBopomofo;
|
||||
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathSimpBopomofo;
|
||||
@property (class, readonly, nonatomic) NSString *phraseReplacementDataPathBopomofo;
|
||||
@property (class, readonly, nonatomic) NSString *userPhrasesDataPathCHT;
|
||||
@property (class, readonly, nonatomic) NSString *userPhrasesDataPathCHS;
|
||||
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathCHT;
|
||||
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathCHS;
|
||||
@property (class, readonly, nonatomic) NSString *phraseReplacementDataPathCHT;
|
||||
@property (class, readonly, nonatomic) NSString *phraseReplacementDataPathCHS;
|
||||
@property (class, readonly, nonatomic) vChewing::vChewingLM *languageModelCoreCHT;
|
||||
@property (class, readonly, nonatomic) vChewing::vChewingLM *languageModelCoreCHS;
|
||||
@property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModel;
|
||||
@property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHT;
|
||||
@property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHS;
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
|
|
|
@ -23,7 +23,8 @@ static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
|
|||
|
||||
vChewingLM glanguageModelCoreCHT;
|
||||
vChewingLM glanguageModelCoreCHS;
|
||||
UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
|
||||
UserOverrideModel gUserOverrideModelCHS(kUserOverrideModelCapacity, kObservedOverrideHalflife);
|
||||
UserOverrideModel gUserOverrideModelCHT(kUserOverrideModelCapacity, kObservedOverrideHalflife);
|
||||
|
||||
@implementation LanguageModelManager
|
||||
|
||||
|
@ -42,13 +43,14 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
|||
|
||||
+ (void)loadUserPhrases
|
||||
{
|
||||
glanguageModelCoreCHT.loadUserPhrases([[self userPhrasesDataPathBopomofo] UTF8String], [[self excludedPhrasesDataPathBopomofo] UTF8String]);
|
||||
glanguageModelCoreCHS.loadUserPhrases(NULL, [[self excludedPhrasesDataPathSimpBopomofo] UTF8String]);
|
||||
glanguageModelCoreCHT.loadUserPhrases([[self userPhrasesDataPathCHT] UTF8String], [[self excludedPhrasesDataPathCHT] UTF8String]);
|
||||
glanguageModelCoreCHS.loadUserPhrases([[self userPhrasesDataPathCHS] UTF8String], [[self excludedPhrasesDataPathCHS] UTF8String]);
|
||||
}
|
||||
|
||||
+ (void)loadUserPhraseReplacement
|
||||
{
|
||||
glanguageModelCoreCHT.loadPhraseReplacementMap([[self phraseReplacementDataPathBopomofo] UTF8String]);
|
||||
glanguageModelCoreCHT.loadPhraseReplacementMap([[self phraseReplacementDataPathCHT] UTF8String]);
|
||||
glanguageModelCoreCHS.loadPhraseReplacementMap([[self phraseReplacementDataPathCHS] UTF8String]);
|
||||
}
|
||||
|
||||
+ (BOOL)checkIfUserDataFolderExists
|
||||
|
@ -93,29 +95,82 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
|||
if (![self checkIfUserDataFolderExists]) {
|
||||
return NO;
|
||||
}
|
||||
if (![self checkIfFileExist:[self userPhrasesDataPathBopomofo]]) {
|
||||
if (![self checkIfFileExist:[self userPhrasesDataPathCHT]]) {
|
||||
return NO;
|
||||
}
|
||||
if (![self checkIfFileExist:[self excludedPhrasesDataPathBopomofo]]) {
|
||||
if (![self checkIfFileExist:[self excludedPhrasesDataPathCHT]]) {
|
||||
return NO;
|
||||
}
|
||||
if (![self checkIfFileExist:[self excludedPhrasesDataPathSimpBopomofo]]) {
|
||||
if (![self checkIfFileExist:[self phraseReplacementDataPathCHT]]) {
|
||||
return NO;
|
||||
}
|
||||
if (![self checkIfFileExist:[self phraseReplacementDataPathBopomofo]]) {
|
||||
if (![self checkIfFileExist:[self userPhrasesDataPathCHS]]) {
|
||||
return NO;
|
||||
}
|
||||
if (![self checkIfFileExist:[self excludedPhrasesDataPathCHS]]) {
|
||||
return NO;
|
||||
}
|
||||
if (![self checkIfFileExist:[self phraseReplacementDataPathCHS]]) {
|
||||
return NO;
|
||||
}
|
||||
return YES;
|
||||
}
|
||||
|
||||
+ (BOOL)writeUserPhrase:(NSString *)userPhrase
|
||||
+ (BOOL)writeUserPhraseCHT:(NSString *)userPhrase
|
||||
{
|
||||
if (![self checkIfUserLanguageModelFilesExist]) {
|
||||
return NO;
|
||||
}
|
||||
|
||||
BOOL shuoldAddLineBreakAtFront = NO;
|
||||
NSString *path = [self userPhrasesDataPathBopomofo];
|
||||
NSString *path = [self userPhrasesDataPathCHT];
|
||||
|
||||
if ([[NSFileManager defaultManager] fileExistsAtPath:path]) {
|
||||
NSError *error = nil;
|
||||
NSDictionary *attr = [[NSFileManager defaultManager] attributesOfItemAtPath:path error:&error];
|
||||
unsigned long long fileSize = [attr fileSize];
|
||||
if (!error && fileSize) {
|
||||
NSFileHandle *readFile = [NSFileHandle fileHandleForReadingAtPath:path];
|
||||
if (readFile) {
|
||||
[readFile seekToFileOffset:fileSize - 1];
|
||||
NSData *data = [readFile readDataToEndOfFile];
|
||||
const void *bytes = [data bytes];
|
||||
if (*(char *)bytes != '\n') {
|
||||
shuoldAddLineBreakAtFront = YES;
|
||||
}
|
||||
[readFile closeFile];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NSMutableString *currentMarkedPhrase = [NSMutableString string];
|
||||
if (shuoldAddLineBreakAtFront) {
|
||||
[currentMarkedPhrase appendString:@"\n"];
|
||||
}
|
||||
[currentMarkedPhrase appendString:userPhrase];
|
||||
[currentMarkedPhrase appendString:@"\n"];
|
||||
|
||||
NSFileHandle *writeFile = [NSFileHandle fileHandleForUpdatingAtPath:path];
|
||||
if (!writeFile) {
|
||||
return NO;
|
||||
}
|
||||
[writeFile seekToEndOfFile];
|
||||
NSData *data = [currentMarkedPhrase dataUsingEncoding:NSUTF8StringEncoding];
|
||||
[writeFile writeData:data];
|
||||
[writeFile closeFile];
|
||||
|
||||
[self loadUserPhrases];
|
||||
return YES;
|
||||
}
|
||||
|
||||
+ (BOOL)writeUserPhraseCHS:(NSString *)userPhrase
|
||||
{
|
||||
if (![self checkIfUserLanguageModelFilesExist]) {
|
||||
return NO;
|
||||
}
|
||||
|
||||
BOOL shuoldAddLineBreakAtFront = NO;
|
||||
NSString *path = [self userPhrasesDataPathCHS];
|
||||
|
||||
if ([[NSFileManager defaultManager] fileExistsAtPath:path]) {
|
||||
NSError *error = nil;
|
||||
|
@ -163,24 +218,34 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
|||
return userDictPath;
|
||||
}
|
||||
|
||||
+ (NSString *)userPhrasesDataPathBopomofo
|
||||
+ (NSString *)userPhrasesDataPathCHT
|
||||
{
|
||||
return [[self dataFolderPath] stringByAppendingPathComponent:@"data-cht.txt"];
|
||||
return [[self dataFolderPath] stringByAppendingPathComponent:@"userdata-cht.txt"];
|
||||
}
|
||||
|
||||
+ (NSString *)excludedPhrasesDataPathBopomofo
|
||||
+ (NSString *)userPhrasesDataPathCHS
|
||||
{
|
||||
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases.txt"];
|
||||
return [[self dataFolderPath] stringByAppendingPathComponent:@"userdata-chs.txt"];
|
||||
}
|
||||
|
||||
+ (NSString *)excludedPhrasesDataPathSimpBopomofo
|
||||
+ (NSString *)excludedPhrasesDataPathCHT
|
||||
{
|
||||
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-plain-bpmf.txt"];
|
||||
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-cht.txt"];
|
||||
}
|
||||
|
||||
+ (NSString *)phraseReplacementDataPathBopomofo
|
||||
+ (NSString *)excludedPhrasesDataPathCHS
|
||||
{
|
||||
return [[self dataFolderPath] stringByAppendingPathComponent:@"phrases-replacement.txt"];
|
||||
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-chs.txt"];
|
||||
}
|
||||
|
||||
+ (NSString *)phraseReplacementDataPathCHT
|
||||
{
|
||||
return [[self dataFolderPath] stringByAppendingPathComponent:@"phrases-replacement-cht.txt"];
|
||||
}
|
||||
|
||||
+ (NSString *)phraseReplacementDataPathCHS
|
||||
{
|
||||
return [[self dataFolderPath] stringByAppendingPathComponent:@"phrases-replacement-chs.txt"];
|
||||
}
|
||||
|
||||
+ (vChewingLM *)languageModelCoreCHT
|
||||
|
@ -193,9 +258,14 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
|||
return &glanguageModelCoreCHS;
|
||||
}
|
||||
|
||||
+ (vChewing::UserOverrideModel *)userOverrideModel
|
||||
+ (vChewing::UserOverrideModel *)userOverrideModelCHT
|
||||
{
|
||||
return &gUserOverrideModel;
|
||||
return &gUserOverrideModelCHT;
|
||||
}
|
||||
|
||||
+ (vChewing::UserOverrideModel *)userOverrideModelCHS
|
||||
{
|
||||
return &gUserOverrideModelCHS;
|
||||
}
|
||||
|
||||
@end
|
||||
|
|
Loading…
Reference in New Issue