Zonble: mgrLanguageModel // Including Custom Excluded Phrases

- Retiring Hiraku's PR01 due to upstream changes.
This commit is contained in:
ShikiSuen 2022-01-15 12:14:13 +08:00
parent d5d42b77f1
commit dc19c19521
19 changed files with 769 additions and 330 deletions

View File

@ -48,7 +48,6 @@ private let kTimeoutInterval: TimeInterval = 60.0
@objc (AppDelegate)
class AppDelegate: NSObject, NSApplicationDelegate,
NSUserNotificationCenterDelegate, // Hiraku PR#1
NonModalAlertWindowControllerDelegate {
@IBOutlet weak var window: NSWindow?
@ -66,15 +65,14 @@ class AppDelegate: NSObject, NSApplicationDelegate,
}
func applicationDidFinishLaunching(_ notification: Notification) {
LTLoadLanguageModel()
LTLoadUserLanguageModelFile();
LanguageModelManager.loadDataModels()
LanguageModelManager.loadUserPhrasesModel()
if UserDefaults.standard.object(forKey: kCheckUpdateAutomatically) == nil {
UserDefaults.standard.set(false, forKey: kCheckUpdateAutomatically)
UserDefaults.standard.synchronize()
}
checkForUpdate()
NSUserNotificationCenter.default.delegate = self // Hiraku PR#1
}
@objc func showPreferences() {
@ -238,11 +236,6 @@ class AppDelegate: NSObject, NSApplicationDelegate,
updateNextStepURL = nil
}
// Hiraku PR#1
func userNotificationCenter(_ center: NSUserNotificationCenter, shouldPresent notification: NSUserNotification) -> Bool {
return true
}
// New About Window
@IBAction func about(_ sender: Any) {
(NSApp.delegate as? AppDelegate)?.showAbout()

View File

@ -38,7 +38,7 @@ namespace Formosa {
class BlockReadingBuilder {
public:
BlockReadingBuilder(LanguageModel *inLM, LanguageModel *inUserPhraseLM);
BlockReadingBuilder(LanguageModel *inLM);
void clear();
size_t length() const;
@ -73,13 +73,11 @@ namespace Formosa {
Grid m_grid;
LanguageModel *m_LM;
LanguageModel *m_UserPhraseLM;
string m_joinSeparator;
};
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM, LanguageModel *inUserPhraseLM)
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM)
: m_LM(inLM)
, m_UserPhraseLM(inUserPhraseLM)
, m_cursorIndex(0)
, m_markerCursorIndex(SIZE_MAX)
{
@ -222,19 +220,7 @@ namespace Formosa {
for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) {
string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) {
vector<Unigram> unigrams;
if (m_UserPhraseLM != NULL) {
if (m_UserPhraseLM->hasUnigramsForKey(combinedReading)) {
vector<Unigram> userUnigrams = m_UserPhraseLM->unigramsForKeys(combinedReading);
unigrams.insert(unigrams.end(), userUnigrams.begin(), userUnigrams.end());
}
}
if (m_LM->hasUnigramsForKey(combinedReading)) {
vector<Unigram> globalUnigrams = m_LM->unigramsForKeys(combinedReading);
unigrams.insert(unigrams.end(), globalUnigrams.begin(), globalUnigrams.end());
}
vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
if (unigrams.size() > 0) {
Node n(combinedReading, unigrams, vector<Bigram>());

View File

@ -42,7 +42,7 @@ namespace Formosa {
virtual ~LanguageModel() {}
virtual const vector<Bigram> bigramsForKeys(const string &preceedingKey, const string& key) = 0;
virtual const vector<Unigram> unigramsForKeys(const string &key) = 0;
virtual const vector<Unigram> unigramsForKey(const string &key) = 0;
virtual bool hasUnigramsForKey(const string& key) = 0;
};
}

View File

@ -302,7 +302,7 @@ const vector<Bigram> FastLM::bigramsForKeys(const string& preceedingKey, const s
return vector<Bigram>();
}
const vector<Unigram> FastLM::unigramsForKeys(const string& key)
const vector<Unigram> FastLM::unigramsForKey(const string& key)
{
vector<Unigram> v;
map<const char *, vector<Row> >::const_iterator i = keyRowMap.find(key.c_str());

View File

@ -50,7 +50,7 @@ namespace Formosa {
void dump();
virtual const vector<Bigram> bigramsForKeys(const string& preceedingKey, const string& key);
virtual const vector<Unigram> unigramsForKeys(const string& key);
virtual const vector<Unigram> unigramsForKey(const string& key);
virtual bool hasUnigramsForKey(const string& key);
protected:

View File

@ -4,6 +4,11 @@
// Copyright (c) 2021-2022 The vChewing Project.
// Copyright (c) 2011-2022 The OpenVanilla Project.
//
// Contributors:
// Weizhong Yang (@zonble) @ OpenVanilla
// Hiraku Wang (@hirakujira) @ vChewing
// Shiki Suen (@ShikiSuen) @ vChewing
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without

View File

@ -4,6 +4,11 @@
// Copyright (c) 2021-2022 The vChewing Project.
// Copyright (c) 2011-2022 The OpenVanilla Project.
//
// Contributors:
// Weizhong Yang (@zonble) @ OpenVanilla
// Hiraku Wang (@hirakujira) @ vChewing
// Shiki Suen (@ShikiSuen) @ vChewing
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without

View File

@ -0,0 +1,134 @@
//
// vChewingLM.cpp
//
// Copyright (c) 2021-2022 The vChewing Project.
// Copyright (c) 2011-2022 The OpenVanilla Project.
//
// Contributors:
// Weizhong Yang (@zonble) @ OpenVanilla
// Hiraku Wang (@hirakujira) @ vChewing
// Shiki Suen (@ShikiSuen) @ vChewing
//
// Based on the Syrup Project and the Formosana Library
// by Lukhnos Liu (@lukhnos).
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
#include "vChewingLM.h"
#include <algorithm>
#include <iterator>
#include <unordered_set>
using namespace vChewing;
vChewingLM::vChewingLM()
{
}
vChewingLM::~vChewingLM()
{
m_languageModel.close();
m_userPhrases.close();
m_excludedPhrases.close();
}
void vChewingLM::loadLanguageModel(const char* languageModelDataPath)
{
if (languageModelDataPath) {
m_languageModel.close();
m_languageModel.open(languageModelDataPath);
}
}
void vChewingLM::loadUserPhrases(const char* userPhrasesDataPath,
const char* excludedPhrasesDataPath)
{
if (userPhrasesDataPath) {
m_userPhrases.close();
m_userPhrases.open(userPhrasesDataPath);
}
if (excludedPhrasesDataPath) {
m_excludedPhrases.close();
m_excludedPhrases.open(excludedPhrasesDataPath);
}
}
const vector<Bigram> vChewingLM::bigramsForKeys(const string& preceedingKey, const string& key)
{
return vector<Bigram>();
}
const vector<Unigram> vChewingLM::unigramsForKey(const string& key)
{
vector<Unigram> unigrams;
vector<Unigram> userUnigrams;
// Use unordered_set so that you don't have to do O(n*m)
unordered_set<string> excludedValues;
unordered_set<string> userValues;
if (m_excludedPhrases.hasUnigramsForKey(key)) {
vector<Unigram> excludedUnigrams = m_excludedPhrases.unigramsForKey(key);
transform(excludedUnigrams.begin(), excludedUnigrams.end(),
inserter(excludedValues, excludedValues.end()),
[](const Unigram &u) { return u.keyValue.value; });
}
if (m_userPhrases.hasUnigramsForKey(key)) {
vector<Unigram> rawUserUnigrams = m_userPhrases.unigramsForKey(key);
for (auto&& unigram : rawUserUnigrams) {
if (excludedValues.find(unigram.keyValue.value) == excludedValues.end()) {
userUnigrams.push_back(unigram);
}
}
transform(userUnigrams.begin(), userUnigrams.end(),
inserter(userValues, userValues.end()),
[](const Unigram &u) { return u.keyValue.value; });
}
if (m_languageModel.hasUnigramsForKey(key)) {
vector<Unigram> globalUnigrams = m_languageModel.unigramsForKey(key);
for (auto&& unigram : globalUnigrams) {
if (excludedValues.find(unigram.keyValue.value) == excludedValues.end() &&
userValues.find(unigram.keyValue.value) == userValues.end()) {
unigrams.push_back(unigram);
}
}
}
unigrams.insert(unigrams.begin(), userUnigrams.begin(), userUnigrams.end());
return unigrams;
}
bool vChewingLM::hasUnigramsForKey(const string& key)
{
if (!m_excludedPhrases.hasUnigramsForKey(key)) {
return m_userPhrases.hasUnigramsForKey(key) ||
m_languageModel.hasUnigramsForKey(key);
}
return unigramsForKey(key).size() > 0;
}

View File

@ -0,0 +1,67 @@
//
// vChewingLM.h
//
// Copyright (c) 2021-2022 The vChewing Project.
// Copyright (c) 2011-2022 The OpenVanilla Project.
//
// Contributors:
// Weizhong Yang (@zonble) @ OpenVanilla
// Hiraku Wang (@hirakujira) @ vChewing
// Shiki Suen (@ShikiSuen) @ vChewing
//
// Based on the Syrup Project and the Formosana Library
// by Lukhnos Liu (@lukhnos).
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
#ifndef VCHEWINGLM_H
#define VCHEWINGLM_H
#include <stdio.h>
#include "FastLM.h"
namespace vChewing {
using namespace Formosa::Gramambular;
class vChewingLM : public LanguageModel {
public:
vChewingLM();
~vChewingLM();
void loadLanguageModel(const char* languageModelDataPath);
void loadUserPhrases(const char* m_userPhrasesDataPath,
const char* m_excludedPhrasesDataPath);
const vector<Bigram> bigramsForKeys(const string& preceedingKey, const string& key);
const vector<Unigram> unigramsForKey(const string& key);
bool hasUnigramsForKey(const string& key);
protected:
FastLM m_languageModel;
FastLM m_userPhrases;
FastLM m_excludedPhrases;
};
};
#endif

View File

@ -40,7 +40,7 @@
#import <InputMethodKit/InputMethodKit.h>
#import "Mandarin.h"
#import "Gramambular.h"
#import "FastLM.h"
#import "vChewingLM.h"
#import "UserOverrideModel.h"
#import "frmAboutWindow.h"
@ -51,8 +51,10 @@
Formosa::Mandarin::BopomofoReadingBuffer* _bpmfReadingBuffer;
// language model
Formosa::Gramambular::FastLM *_languageModel;
Formosa::Gramambular::FastLM *_userPhrasesModel;
vChewing::vChewingLM *_languageModel;
// user override model
vChewing::UserOverrideModel *_userOverrideModel;
// the grid (lattice) builder for the unigrams (and bigrams)
Formosa::Gramambular::BlockReadingBuilder* _builder;
@ -81,12 +83,5 @@
// if Chinese conversion is enabled
BOOL _chineseConversionEnabled;
// if Chinese conversion status has been changed
BOOL _previousChineseConversionEnabledStatus;
}
@end
// the shared language model object
extern "C" void LTLoadLanguageModel();
extern "C" void LTLoadUserLanguageModelFile();

View File

@ -42,19 +42,20 @@
#import <set>
#import "OVStringHelper.h"
#import "OVUTF8Helper.h"
#import "LanguageModelManager.h"
#import "vChewing-Swift.h"
//@import SwiftUI;
@import OpenCC;
// C++ namespace usages
using namespace std;
using namespace Formosa::Mandarin;
using namespace Formosa::Gramambular;
using namespace vChewing;
using namespace OpenVanilla;
// default, min and max candidate list text size
static const NSInteger kDefaultCandidateListTextSize = 18;
static const NSInteger kDefaultCandidateListTextSize = 16;
static const NSInteger kMinKeyLabelSize = 10;
static const NSInteger kMinCandidateListTextSize = 12;
static const NSInteger kMaxCandidateListTextSize = 196;
@ -113,76 +114,6 @@ VTCandidateController *gCurrentCandidateController = nil;
static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot";
#endif
// shared language model object that stores our phrase-term probability database
FastLM gLanguageModelCHT;
FastLM gLanguageModelCHS;
FastLM gUserPhraseLanguageModelCHT;
FastLM gUserPhraseLanguageModelCHS;
static const int kUserOverrideModelCapacity = 500;
static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
vChewing::UserOverrideModel gUserOverrideModelCHT(kUserOverrideModelCapacity, kObservedOverrideHalflife);
vChewing::UserOverrideModel gUserOverrideModelCHS(kUserOverrideModelCapacity, kObservedOverrideHalflife);
static NSString *LTUserDataFolderPath()
{
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES);
NSString *appSupportPath = [paths objectAtIndex:0];
NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"vChewing"];
return userDictPath;
}
static NSString *LTUserPhrasesDataPathCHT()
{
return [LTUserDataFolderPath() stringByAppendingPathComponent:@"userdata-cht.txt"];
}
static NSString *LTUserPhrasesDataPathCHS()
{
return [LTUserDataFolderPath() stringByAppendingPathComponent:@"userdata-chs.txt"];
}
static BOOL LTCheckIfUserLanguageModelFileExists() {
NSString *folderPath = LTUserDataFolderPath();
BOOL isFolder = NO;
BOOL folderExist = [[NSFileManager defaultManager] fileExistsAtPath:folderPath isDirectory:&isFolder];
if (folderExist && !isFolder) {
NSError *error = nil;
[[NSFileManager defaultManager] removeItemAtPath:folderPath error:&error];
if (error) {
NSLog(@"Failed to remove folder %@", error);
return NO;
}
folderExist = NO;
}
if (!folderExist) {
NSError *error = nil;
[[NSFileManager defaultManager] createDirectoryAtPath:folderPath withIntermediateDirectories:YES attributes:nil error:&error];
if (error) {
NSLog(@"Failed to create folder %@", error);
return NO;
}
}
NSString *filePathCHS = LTUserPhrasesDataPathCHS();
if (![[NSFileManager defaultManager] fileExistsAtPath:filePathCHS]) {
BOOL result = [[@"" dataUsingEncoding:NSUTF8StringEncoding] writeToFile:filePathCHS atomically:YES];
if (!result) {
NSLog(@"Failed to write userdict CHS file");
return NO;
}
}
NSString *filePathCHT = LTUserPhrasesDataPathCHT();
if (![[NSFileManager defaultManager] fileExistsAtPath:filePathCHT]) {
BOOL result = [[@"" dataUsingEncoding:NSUTF8StringEncoding] writeToFile:filePathCHT atomically:YES];
if (!result) {
NSLog(@"Failed to write userdict CHT file");
return NO;
}
}
return YES;
}
// https://clang-analyzer.llvm.org/faq.html
__attribute__((annotate("returns_localized_nsstring")))
static inline NSString *LocalizationNotNeeded(NSString *s) {
@ -190,19 +121,12 @@ static inline NSString *LocalizationNotNeeded(NSString *s) {
}
// private methods
@interface vChewingInputMethodController () <VTCandidateControllerDelegate>
@interface vChewingInputMethodController ()
+ (VTHorizontalCandidateController *)horizontalCandidateController;
+ (VTVerticalCandidateController *)verticalCandidateController;
@end
- (void)collectCandidates;
- (size_t)actualCandidateCursorIndex;
- (void)_showCandidateWindowUsingVerticalMode:(BOOL)useVerticalMode client:(id)client;
- (void)beep;
- (BOOL)handleInputText:(NSString*)inputText key:(NSInteger)keyCode modifiers:(NSUInteger)flags client:(id)client;
- (BOOL)handleCandidateEventWithInputText:(NSString *)inputText charCode:(UniChar)charCode keyCode:(NSUInteger)keyCode;
@interface vChewingInputMethodController (VTCandidateController) <VTCandidateControllerDelegate>
@end
// sort helper
@ -240,8 +164,6 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
}
// the two client pointers are weak pointers (i.e. we don't retain them)
// therefore we don't do anything about it
[[NSNotificationCenter defaultCenter] removeObserver:self name:@"ChineseConversionStatusChanged" object:nil];
}
- (id)initWithServer:(IMKServer *)server delegate:(id)delegate client:(id)client
@ -257,10 +179,10 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
_bpmfReadingBuffer = new BopomofoReadingBuffer(BopomofoKeyboardLayout::StandardLayout());
// create the lattice builder
_languageModel = &gLanguageModelCHT;
_userPhrasesModel = &gUserPhraseLanguageModelCHT;
_builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel);
_uom = &gUserOverrideModelCHT;
_languageModel = [LanguageModelManager languageModelBopomofo];
_userOverrideModel = [LanguageModelManager userOverrideModel];
_builder = new BlockReadingBuilder(_languageModel);
// each Mandarin syllable is separated by a hyphen
_builder->setJoinSeparator("-");
@ -270,10 +192,6 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
_inputMode = kBopomofoModeIdentifier;
_chineseConversionEnabled = [[NSUserDefaults standardUserDefaults] boolForKey:kChineseConversionEnabledKey];
_previousChineseConversionEnabledStatus = _chineseConversionEnabled;
[[NSNotificationCenter defaultCenter] removeObserver:self name:@"ChineseConversionStatusChanged" object:nil];
[[NSNotificationCenter defaultCenter] addObserver:self selector:@selector(handleChineseConversionStatusDidChanged:) name:@"ChineseConversionStatusChanged" object:nil];
}
return self;
@ -287,31 +205,34 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
[menu addItem:preferenceMenuItem];
NSMenuItem *chineseConversionMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Chinese Conversion", @"") action:@selector(toggleChineseConverter:) keyEquivalent:@"K"];
chineseConversionMenuItem.keyEquivalentModifierMask = NSEventModifierFlagCommand;
chineseConversionMenuItem.keyEquivalentModifierMask = NSEventModifierFlagCommand | NSEventModifierFlagControl;
chineseConversionMenuItem.state = _chineseConversionEnabled ? NSControlStateValueOn : NSControlStateValueOff;
[menu addItem:chineseConversionMenuItem];
[menu addItem:[NSMenuItem separatorItem]]; // -----------------------
NSMenuItem *editUserPheaseItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Edit User Phrases", @"") action:@selector(openUserPhrases:) keyEquivalent:@""];
[editUserPheaseItem setIndentationLevel:2];
[menu addItem:editUserPheaseItem];
[menu addItem:[NSMenuItem separatorItem]];
[menu addItemWithTitle:NSLocalizedString(@"User Phrases", @"") action:NULL keyEquivalent:@""];
NSMenuItem *reloadUserPheaseItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Reload User Phrases", @"") action:@selector(reloadUserPhrases:) keyEquivalent:@""];
[reloadUserPheaseItem setIndentationLevel:2];
[menu addItem:reloadUserPheaseItem];
if (_inputMode == kSimpBopomofoModeIdentifier) {
NSMenuItem *editExcludedPhrasesItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Edit Excluded Phrases", @"") action:@selector(openExcludedPhrasesSimpBopomofo:) keyEquivalent:@""];
[menu addItem:editExcludedPhrasesItem];
}
else {
NSMenuItem *editUserPhrasesItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Edit User Phrases", @"") action:@selector(openUserPhrases:) keyEquivalent:@""];
[menu addItem:editUserPhrasesItem];
NSMenuItem *editExcludedPhrasesItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Edit Excluded Phrases", @"") action:@selector(openExcludedPhrasesvChewing:) keyEquivalent:@""];
[menu addItem:editExcludedPhrasesItem];
}
NSMenuItem *reloadUserPhrasesItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Reload User Phrases", @"") action:@selector(reloadUserPhrases:) keyEquivalent:@""];
[menu addItem:reloadUserPhrasesItem];
[menu addItem:[NSMenuItem separatorItem]];
[menu addItem:[NSMenuItem separatorItem]]; // -----------------------
NSMenuItem *updateCheckItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"Check for Updates…", @"") action:@selector(checkForUpdate:) keyEquivalent:@""];
[menu addItem:updateCheckItem];
NSMenuItem *aboutMenuItem = [[NSMenuItem alloc] initWithTitle:NSLocalizedString(@"About vChewing…", @"") action:@selector(showAbout:) keyEquivalent:@""];
[menu addItem:aboutMenuItem];
// Menu Debug Purposes...
NSLog(@"menu %@", menu);
return menu;
}
@ -406,28 +327,22 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
- (void)setValue:(id)value forTag:(long)tag client:(id)sender
{
NSString *newInputMode;
Formosa::Gramambular::FastLM *newLanguageModel;
Formosa::Gramambular::FastLM *userPhraseModel;
vChewing::UserOverrideModel *newUom;
vChewingLM *newLanguageModel;
if ([value isKindOfClass:[NSString class]] && [value isEqual:kSimpBopomofoModeIdentifier]) {
newInputMode = kSimpBopomofoModeIdentifier;
newLanguageModel = &gLanguageModelCHS;
newUom = &gUserOverrideModelCHS;
userPhraseModel = &gUserPhraseLanguageModelCHS;
newLanguageModel = [LanguageModelManager languageModelSimpBopomofo];
}
else {
newInputMode = kBopomofoModeIdentifier;
newLanguageModel = &gLanguageModelCHT;
newUom = &gUserOverrideModelCHT;
userPhraseModel = &gUserPhraseLanguageModelCHT;
newLanguageModel = [LanguageModelManager languageModelBopomofo];
}
// Only apply the changes if the value is changed
if (![_inputMode isEqualToString:newInputMode]) {
[[NSUserDefaults standardUserDefaults] synchronize];
// Remember to override the keyboard layout again -- treat this as an activate event
// Remember to override the keyboard layout again -- treat this as an activate eventy
NSString *basisKeyboardLayoutID = [[NSUserDefaults standardUserDefaults] stringForKey:kBasisKeyboardLayoutPreferenceKey];
if (!basisKeyboardLayoutID) {
basisKeyboardLayoutID = @"com.apple.keylayout.US";
@ -436,8 +351,6 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
_inputMode = newInputMode;
_languageModel = newLanguageModel;
_userPhrasesModel = userPhraseModel;
_uom = newUom;
if (!_bpmfReadingBuffer->isEmpty()) {
_bpmfReadingBuffer->clear();
@ -450,7 +363,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
if (_builder) {
delete _builder;
_builder = new BlockReadingBuilder(_languageModel, _userPhrasesModel);
_builder = new BlockReadingBuilder(_languageModel);
_builder->setJoinSeparator("-");
}
}
@ -463,8 +376,7 @@ static double FindHighestScore(const vector<NodeAnchor>& nodes, double epsilon)
// if it's Terminal, we don't commit at the first call (the client of which will not be IPMDServerClientWrapper)
// then we defer the update in the next runloop round -- so that the composing buffer is not
// meaninglessly flushed, an annoying bug in Terminal.app since Mac OS X 10.5
if ([[client bundleIdentifier] isEqualToString:@"com.apple.Terminal"] && ![NSStringFromClass([client class]) isEqualToString:@"IPMDServerClientWrapper"])
{
if ([[client bundleIdentifier] isEqualToString:@"com.apple.Terminal"] && ![NSStringFromClass([client class]) isEqualToString:@"IPMDServerClientWrapper"]) {
if (_currentDeferredClient) {
[self performSelector:@selector(updateClientComposingBuffer:) withObject:_currentDeferredClient afterDelay:0.0];
}
@ -563,7 +475,8 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
// i.e. the client app needs to take care of where to put ths composing buffer
[client setMarkedText:attrString selectionRange:NSMakeRange((NSInteger)_builder->markerCursorIndex(), 0) replacementRange:NSMakeRange(NSNotFound, NSNotFound)];
_latestReadingCursor = (NSInteger)_builder->markerCursorIndex();
} else {
}
else {
// we must use NSAttributedString so that the cursor is visible --
// can't just use NSString
NSDictionary *attrDict = @{NSUnderlineStyleAttributeName: @(NSUnderlineStyleSingle),
@ -591,13 +504,13 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
reverse(_walkedNodes.begin(), _walkedNodes.end());
// if DEBUG is defined, a GraphViz file is written to kGraphVizOutputfile
#if DEBUG
#if DEBUG
string dotDump = _builder->grid().dumpDOT();
NSString *dotStr = [NSString stringWithUTF8String:dotDump.c_str()];
NSError *error = nil;
BOOL __unused success = [dotStr writeToFile:kGraphVizOutputfile atomically:YES encoding:NSUTF8StringEncoding error:&error];
#endif
#endif
}
- (void)popOverflowComposingTextAndWalk:(id)client
@ -695,12 +608,12 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
}
NSRange range = NSMakeRange((NSInteger)begin, (NSInteger)(end - begin));
NSString *reading = [_composingBuffer substringWithRange:range];
NSString *phrase = [_composingBuffer substringWithRange:range];
NSMutableString *string = [[NSMutableString alloc] init];
[string appendString:reading];
[string appendString:phrase];
[string appendString:@" "];
NSMutableArray *readingsArray = [[NSMutableArray alloc] init];
vector<std::string> v = _builder->readingsAtRange(begin,end);
vector<std::string> v = _builder->readingsAtRange(begin, end);
for(vector<std::string>::iterator it_i=v.begin(); it_i!=v.end(); ++it_i) {
[readingsArray addObject:[NSString stringWithUTF8String:it_i->c_str()]];
}
@ -712,28 +625,12 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
- (BOOL)_writeUserPhrase
{
if (!LTCheckIfUserLanguageModelFileExists()) {
return NO;
}
NSString *currentMarkedPhrase = [self _currentMarkedText];
if (![currentMarkedPhrase length]) {
return NO;
}
currentMarkedPhrase = [currentMarkedPhrase stringByAppendingString:@"\n"];
NSString *path = _inputMode == kSimpBopomofoModeIdentifier ? LTUserPhrasesDataPathCHS() : LTUserPhrasesDataPathCHT();
NSFileHandle *file = [NSFileHandle fileHandleForUpdatingAtPath:path];
if (!file) {
return NO;
}
[file seekToEndOfFile];
NSData *data = [currentMarkedPhrase dataUsingEncoding:NSUTF8StringEncoding];
[file writeData:data];
[file closeFile];
LTLoadUserLanguageModelFile();
return YES;
return [LanguageModelManager writeUserPhrase:currentMarkedPhrase];
}
- (BOOL)handleInputText:(NSString*)inputText key:(NSInteger)keyCode modifiers:(NSUInteger)flags client:(id)client
@ -831,7 +728,8 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
if (charCode == 13) {
if ([self _writeUserPhrase]) {
_builder->setMarkerCursorIndex(SIZE_MAX);
} else {
}
else {
[self beep];
}
[self updateClientComposingBuffer:client];
@ -862,13 +760,14 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
_builder->setMarkerCursorIndex(SIZE_MAX);
}
// see if it's valid BPMF reading
if (_bpmfReadingBuffer->isValidKey((char)charCode)) {
_bpmfReadingBuffer->combineKey((char)charCode);
// if we have a tone marker, we have to insert the reading to the builder
// in other words, if we don't have a tone marker, we just update the composing buffer
// if we have a tone marker, we have to insert the reading to the
// builder in other words, if we don't have a tone marker, we just
// update the composing buffer
composeReading = _bpmfReadingBuffer->hasToneMarker();
if (!composeReading) {
[self updateClientComposingBuffer:client];
@ -897,8 +796,9 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
[self popOverflowComposingTextAndWalk:client];
// get user override model suggestion
string overrideValue =
_uom->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]);
string overrideValue = (_inputMode == kSimpBopomofoModeIdentifier) ? "" :
_userOverrideModel->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]);
if (!overrideValue.empty()) {
size_t cursorIndex = [self actualCandidateCursorIndex];
vector<NodeAnchor> nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex);
@ -910,6 +810,10 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
_bpmfReadingBuffer->clear();
[self updateClientComposingBuffer:client];
if (_inputMode == kSimpBopomofoModeIdentifier) {
[self _showCandidateWindowUsingVerticalMode:useVerticalMode client:client];
}
// and tells the client that the key is consumed
return YES;
}
@ -939,39 +843,39 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
// Esc
if (charCode == 27) {
BOOL escToClearInputBufferEnabled = [[NSUserDefaults standardUserDefaults] boolForKey:kEscToCleanInputBufferKey];
BOOL escToClearInputBufferEnabled = [[NSUserDefaults standardUserDefaults] boolForKey:kEscToCleanInputBufferKey];
if (escToClearInputBufferEnabled) {
// if the optioon is enabled, we clear everythiong including the composing
// buffer, walked nodes and the reading.
if (![_composingBuffer length]) {
return NO;
}
_bpmfReadingBuffer->clear();
_builder->clear();
_walkedNodes.clear();
[_composingBuffer setString:@""];
}
else {
// if reading is not empty, we cancel the reading; Apple's built-in
// Zhuyin (and the erstwhile Hanin) has a default option that Esc
// "cancels" the current composed character and revert it to
// Bopomofo reading, in odds with the expectation of users from
// other platforms
if (_bpmfReadingBuffer->isEmpty()) {
// no nee to beep since the event is deliberately triggered by user
if (escToClearInputBufferEnabled) {
// if the optioon is enabled, we clear everythiong including the composing
// buffer, walked nodes and the reading.
if (![_composingBuffer length]) {
return NO;
}
_bpmfReadingBuffer->clear();
_builder->clear();
_walkedNodes.clear();
[_composingBuffer setString:@""];
}
else {
// if reading is not empty, we cancel the reading; Apple's built-in
// Zhuyin (and the erstwhile Hanin) has a default option that Esc
// "cancels" the current composed character and revert it to
// Bopomofo reading, in odds with the expectation of users from
// other platforms
if (_bpmfReadingBuffer->isEmpty()) {
// no nee to beep since the event is deliberately triggered by user
if (![_composingBuffer length]) {
return NO;
}
}
else {
_bpmfReadingBuffer->clear();
}
_bpmfReadingBuffer->clear();
}
}
[self updateClientComposingBuffer:client];
[self updateClientComposingBuffer:client];
return YES;
}
@ -1161,33 +1065,16 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
}
}
// if nothing is matched, see if it's a punctuation key for current layout.
string layout = [self currentLayout];
string customPunctuation = string("_punctuation_") + layout + string(1, (char)charCode);
if (_languageModel->hasUnigramsForKey(customPunctuation)) {
if (_bpmfReadingBuffer->isEmpty()) {
_builder->insertReadingAtCursor(customPunctuation);
[self popOverflowComposingTextAndWalk:client];
}
else { // If there is still unfinished bpmf reading, ignore the punctuation
[self beep];
}
[self updateClientComposingBuffer:client];
if ([self handlePunctuation:customPunctuation usingVerticalMode:useVerticalMode client:client]) {
return YES;
}
// if nothing is matched, see if it's a punctuation key
// if nothing is matched, see if it's a punctuation key.
string punctuation = string("_punctuation_") + string(1, (char)charCode);
if (_languageModel->hasUnigramsForKey(punctuation)) {
if (_bpmfReadingBuffer->isEmpty()) {
_builder->insertReadingAtCursor(punctuation);
[self popOverflowComposingTextAndWalk:client];
}
else { // If there is still unfinished bpmf reading, ignore the punctuation
[self beep];
}
[self updateClientComposingBuffer:client];
if ([self handlePunctuation:punctuation usingVerticalMode:useVerticalMode client:client]) {
return YES;
}
@ -1203,16 +1090,48 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
return NO;
}
- (BOOL)handlePunctuation:(string)customPunctuation usingVerticalMode:(BOOL)useVerticalMode client:(id)client
{
if (_languageModel->hasUnigramsForKey(customPunctuation)) {
if (_bpmfReadingBuffer->isEmpty()) {
_builder->insertReadingAtCursor(customPunctuation);
[self popOverflowComposingTextAndWalk:client];
}
else { // If there is still unfinished bpmf reading, ignore the punctuation
[self beep];
}
[self updateClientComposingBuffer:client];
if (_inputMode == kSimpBopomofoModeIdentifier && _bpmfReadingBuffer->isEmpty()) {
[self collectCandidates];
if ([_candidates count] == 1) {
[self commitComposition:client];
}
else {
[self _showCandidateWindowUsingVerticalMode:useVerticalMode client:client];
}
}
return YES;
}
return NO;
}
- (BOOL)handleCandidateEventWithInputText:(NSString *)inputText charCode:(UniChar)charCode keyCode:(NSUInteger)keyCode
{
BOOL cancelCandidateKey = (charCode == 27);
BOOL cancelCandidateKey =
(charCode == 27) ||
((_inputMode == kSimpBopomofoModeIdentifier) &&
(charCode == 8 || keyCode == kDeleteKeyCode));
if (cancelCandidateKey) {
gCurrentCandidateController.visible = NO;
[_candidates removeAllObjects];
if (_inputMode == kSimpBopomofoModeIdentifier) {
_builder->clear();
_walkedNodes.clear();
[_composingBuffer setString:@""];
}
[self updateClientComposingBuffer:_currentCandidateClient];
return YES;
}
@ -1349,6 +1268,23 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
}
}
if (_inputMode == kSimpBopomofoModeIdentifier) {
string layout = [self currentLayout];
string customPunctuation = string("_punctuation_") + layout + string(1, (char)charCode);
string punctuation = string("_punctuation_") + string(1, (char)charCode);
BOOL shouldAutoSelectCandidate = _bpmfReadingBuffer->isValidKey((char)charCode) || _languageModel->hasUnigramsForKey(customPunctuation) ||
_languageModel->hasUnigramsForKey(punctuation);
if (shouldAutoSelectCandidate) {
NSUInteger candidateIndex = [gCurrentCandidateController candidateIndexAtKeyLabelIndex:0];
if (candidateIndex != NSUIntegerMax) {
[self candidateController:gCurrentCandidateController didSelectCandidateAtIndex:candidateIndex];
return [self handleInputText:inputText key:keyCode modifiers:0 client:_currentCandidateClient];
}
}
}
[self beep];
[self updateClientComposingBuffer:_currentCandidateClient];
return YES;
@ -1491,8 +1427,8 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
NSString *klFontName = [[NSUserDefaults standardUserDefaults] stringForKey:kCandidateKeyLabelFontName];
NSString *ckeys = [[NSUserDefaults standardUserDefaults] stringForKey:kCandidateKeys];
gCurrentCandidateController.keyLabelFont = klFontName ? [NSFont fontWithName:klFontName size:keyLabelSize] : [NSFont monospacedDigitSystemFontOfSize:keyLabelSize weight:NSFontWeightMedium];
gCurrentCandidateController.candidateFont = ctFontName ? [NSFont fontWithName:ctFontName size:textSize] : [NSFont systemFontOfSize:textSize weight:NSFontWeightRegular];
gCurrentCandidateController.keyLabelFont = klFontName ? [NSFont fontWithName:klFontName size:keyLabelSize] : [NSFont systemFontOfSize:keyLabelSize];
gCurrentCandidateController.candidateFont = ctFontName ? [NSFont fontWithName:ctFontName size:textSize] : [NSFont systemFontOfSize:textSize];
NSMutableArray *keyLabels = [NSMutableArray arrayWithObjects:@"1", @"2", @"3", @"4", @"5", @"6", @"7", @"8", @"9", nil];
@ -1506,6 +1442,11 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
gCurrentCandidateController.keyLabels = keyLabels;
[self collectCandidates];
if (_inputMode == kSimpBopomofoModeIdentifier && [_candidates count] == 1) {
[self commitComposition:client];
return;
}
gCurrentCandidateController.delegate = self;
[gCurrentCandidateController reloadData];
@ -1556,41 +1497,48 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
[(AppDelegate *)[[NSApplication sharedApplication] delegate] checkForUpdateForced:YES];
}
- (BOOL)_checkUserFiles
{
if (![LanguageModelManager checkIfUserLanguageModelFilesExist] ) {
NSString *content = [NSString stringWithFormat:NSLocalizedString(@"Please check the permission of at \"%@\".", @""), [LanguageModelManager dataFolderPath]];
[[NonModalAlertWindowController sharedInstance] showWithTitle:NSLocalizedString(@"Unable to create the user phrase file.", @"") content:content confirmButtonTitle:NSLocalizedString(@"OK", @"") cancelButtonTitle:nil cancelAsDefault:NO delegate:nil];
return NO;
}
return YES;
}
- (void)_openUserFile:(NSString *)path
{
if (![self _checkUserFiles]) {
return;
}
NSURL *url = [NSURL fileURLWithPath:path];
[[NSWorkspace sharedWorkspace] openURL:url];
}
- (void)openUserPhrases:(id)sender
{
NSLog(@"openUserPhrases called");
if (!LTCheckIfUserLanguageModelFileExists()) {
NSString *content = [NSString stringWithFormat:NSLocalizedString(@"Please check the permission of at \"%@\".", @""), LTUserDataFolderPath()];
[[NonModalAlertWindowController sharedInstance] showWithTitle:NSLocalizedString(@"Unable to create the user phrase file.", @"") content:content confirmButtonTitle:NSLocalizedString(@"OK", @"") cancelButtonTitle:nil cancelAsDefault:NO delegate:nil];
return;
}
if (_inputMode == kSimpBopomofoModeIdentifier) {
NSLog(@"editUserPhrases CHS called");
NSString *path = LTUserPhrasesDataPathCHS();
NSLog(@"Open %@", path);
if (![[NSFileManager defaultManager] fileExistsAtPath:path]) {
[[@"" dataUsingEncoding:NSUTF8StringEncoding] writeToFile:path atomically:YES];
}
NSURL *url = [NSURL fileURLWithPath:path];
[[NSWorkspace sharedWorkspace] openURL:url];
} else {
NSLog(@"editUserPhrases CHT called");
NSString *path = LTUserPhrasesDataPathCHT();
NSLog(@"Open %@", path);
if (![[NSFileManager defaultManager] fileExistsAtPath:path]) {
[[@"" dataUsingEncoding:NSUTF8StringEncoding] writeToFile:path atomically:YES];
}
NSURL *url = [NSURL fileURLWithPath:path];
[[NSWorkspace sharedWorkspace] openURL:url];
}
[self _openUserFile:[LanguageModelManager userPhrasesDataPathBopomofo]];
}
- (void)openExcludedPhrasesSimpBopomofo:(id)sender
{
NSLog(@"openExcludedPhrasesSimpBopomofo called");
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathSimpBopomofo]];
}
- (void)openExcludedPhrasesvChewing:(id)sender
{
NSLog(@"openExcludedPhrasesvChewing called");
[self _openUserFile:[LanguageModelManager excludedPhrasesDataPathBopomofo]];
}
- (void)reloadUserPhrases:(id)sender
{
LTLoadUserLanguageModelFile();
NSLog(@"reloadUserPhrases called");
[LanguageModelManager loadUserPhrasesModel];
}
- (void)showAbout:(id)sender
@ -1604,9 +1552,14 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
{
_chineseConversionEnabled = !_chineseConversionEnabled;
[[NSUserDefaults standardUserDefaults] setBool:_chineseConversionEnabled forKey:kChineseConversionEnabledKey];
[[NSNotificationCenter defaultCenter] postNotificationName:@"ChineseConversionStatusChanged" object:nil];
}
@end
#pragma mark -
@implementation vChewingInputMethodController (VTCandidateController)
- (NSUInteger)candidateCountForController:(VTCandidateController *)controller
{
return [_candidates count];
@ -1626,69 +1579,19 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
size_t cursorIndex = [self actualCandidateCursorIndex];
_builder->grid().fixNodeSelectedCandidate(cursorIndex, selectedValue);
_uom->observe(_walkedNodes, cursorIndex, selectedValue, [[NSDate date] timeIntervalSince1970]);
if (_inputMode != kSimpBopomofoModeIdentifier) {
_userOverrideModel->observe(_walkedNodes, cursorIndex, selectedValue, [[NSDate date] timeIntervalSince1970]);
}
[_candidates removeAllObjects];
[self walk];
[self updateClientComposingBuffer:_currentCandidateClient];
}
- (void)handleChineseConversionStatusDidChanged:(NSNotification *)notification
{
// Do not post the notification if status doesn't change.
// This is because the input method can be initiated by multiple applications, then all of them would post the notification.
if (_previousChineseConversionEnabledStatus == _chineseConversionEnabled) {
if (_inputMode == kSimpBopomofoModeIdentifier) {
[self commitComposition:_currentCandidateClient];
return;
}
NSUserNotification *userNotification = [[NSUserNotification alloc] init];
userNotification.title = @"vChewing";
userNotification.informativeText = [NSString stringWithFormat:@"%@%@", NSLocalizedString(@"Chinese Conversion", @""), _chineseConversionEnabled ? NSLocalizedString(@"NotificationSwitchON", @"") : NSLocalizedString(@"NotificationSwitchOFF", @"")];
userNotification.soundName = NSUserNotificationDefaultSoundName;
[[NSUserNotificationCenter defaultUserNotificationCenter] deliverNotification:userNotification];
_previousChineseConversionEnabledStatus = _chineseConversionEnabled;
}
@end
static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, FastLM &lm)
{
NSString *dataPath = [[NSBundle bundleForClass:[vChewingInputMethodController class]] pathForResource:filenameWithoutExtension ofType:@"txt"];
bool result = lm.open([dataPath UTF8String]);
if (!result) {
NSLog(@"Failed opening language model: %@", dataPath);
}
}
void LTLoadLanguageModel()
{
LTLoadLanguageModelFile(@"data", gLanguageModelCHT);
LTLoadLanguageModelFile(@"data-chs", gLanguageModelCHS);
}
void LTLoadUserLanguageModelFile()
{
// Autofix: Ensure that there's a new line in the user language model file.
// NSString *lineBreak = @"\n";
// NSOutputStream *stream = [[NSOutputStream alloc] initToFileAtPath:LTUserPhrasesDataPathCHT() append:YES];
// [stream open];
// NSData *strData = [lineBreak dataUsingEncoding:NSUTF8StringEncoding];
// [stream write:(uint8_t *)[strData bytes] maxLength:[strData length]];
// [stream close];
gUserPhraseLanguageModelCHT.close();
gUserPhraseLanguageModelCHS.close();
bool resultCHT = gUserPhraseLanguageModelCHT.open([LTUserPhrasesDataPathCHT() UTF8String]);
bool resultCHS = gUserPhraseLanguageModelCHS.open([LTUserPhrasesDataPathCHS() UTF8String]);
if (!resultCHT) {
NSLog(@"Failed opening language model for CHT user phrases.");
}
if (!resultCHS) {
NSLog(@"Failed opening language model for CHS user phrases.");
}
}

View File

@ -0,0 +1,60 @@
//
// LanguageModelManager.h
//
// Copyright (c) 2021-2022 The vChewing Project.
// Copyright (c) 2011-2022 The OpenVanilla Project.
//
// Contributors:
// Weizhong Yang (@zonble) @ OpenVanilla
// Hiraku Wang (@hirakujira) @ vChewing
// Shiki Suen (@ShikiSuen) @ vChewing
//
// Based on the Syrup Project and the Formosana Library
// by Lukhnos Liu (@lukhnos).
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
#import <Foundation/Foundation.h>
#import "FastLM.h"
#import "UserOverrideModel.h"
#import "vChewingLM.h"
NS_ASSUME_NONNULL_BEGIN
@interface LanguageModelManager : NSObject
+ (void)loadDataModels;
+ (void)loadUserPhrasesModel;
+ (BOOL)checkIfUserLanguageModelFilesExist;
+ (BOOL)writeUserPhrase:(NSString *)userPhrase;
@property (class, readonly, nonatomic) NSString *dataFolderPath;
@property (class, readonly, nonatomic) NSString *userPhrasesDataPathBopomofo;
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathBopomofo;
@property (class, readonly, nonatomic) NSString *excludedPhrasesDataPathSimpBopomofo;
@property (class, readonly, nonatomic) vChewing::vChewingLM *languageModelBopomofo;
@property (class, readonly, nonatomic) vChewing::vChewingLM *languageModelSimpBopomofo;
@property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModel;
@end
NS_ASSUME_NONNULL_END

View File

@ -0,0 +1,191 @@
//
// LanguageModelManager.mm
//
// Copyright (c) 2021-2022 The vChewing Project.
// Copyright (c) 2011-2022 The OpenVanilla Project.
//
// Contributors:
// Weizhong Yang (@zonble) @ OpenVanilla
// Hiraku Wang (@hirakujira) @ vChewing
// Shiki Suen (@ShikiSuen) @ vChewing
//
// Based on the Syrup Project and the Formosana Library
// by Lukhnos Liu (@lukhnos).
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
#import "LanguageModelManager.h"
#import <fstream>
#import <iostream>
#import <set>
#import "OVStringHelper.h"
#import "OVUTF8Helper.h"
using namespace std;
using namespace Formosa::Gramambular;
using namespace vChewing;
using namespace OpenVanilla;
static const int kUserOverrideModelCapacity = 500;
static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
vChewingLM glanguageModelBopomofo;
vChewingLM gLanguageModelSimpBopomofo;
UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
@implementation LanguageModelManager
static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewingLM &lm)
{
Class cls = NSClassFromString(@"vChewingInputMethodController");
NSString *dataPath = [[NSBundle bundleForClass:cls] pathForResource:filenameWithoutExtension ofType:@"txt"];
lm.loadLanguageModel([dataPath UTF8String]);
}
+ (void)loadDataModels
{
LTLoadLanguageModelFile(@"data", glanguageModelBopomofo);
LTLoadLanguageModelFile(@"data-plain-bpmf", gLanguageModelSimpBopomofo);
}
+ (void)loadUserPhrasesModel
{
glanguageModelBopomofo.loadUserPhrases([[self userPhrasesDataPathBopomofo] UTF8String], [[self excludedPhrasesDataPathBopomofo] UTF8String]);
gLanguageModelSimpBopomofo.loadUserPhrases(NULL, [[self excludedPhrasesDataPathSimpBopomofo] UTF8String]);
}
+ (BOOL)checkIfUserDataFolderExists
{
NSString *folderPath = [self dataFolderPath];
BOOL isFolder = NO;
BOOL folderExist = [[NSFileManager defaultManager] fileExistsAtPath:folderPath isDirectory:&isFolder];
if (folderExist && !isFolder) {
NSError *error = nil;
[[NSFileManager defaultManager] removeItemAtPath:folderPath error:&error];
if (error) {
NSLog(@"Failed to remove folder %@", error);
return NO;
}
folderExist = NO;
}
if (!folderExist) {
NSError *error = nil;
[[NSFileManager defaultManager] createDirectoryAtPath:folderPath withIntermediateDirectories:YES attributes:nil error:&error];
if (error) {
NSLog(@"Failed to create folder %@", error);
return NO;
}
}
return YES;
}
+ (BOOL)checkIfFileExist:(NSString *)filePath
{
if (![[NSFileManager defaultManager] fileExistsAtPath:filePath]) {
BOOL result = [[@"" dataUsingEncoding:NSUTF8StringEncoding] writeToFile:filePath atomically:YES];
if (!result) {
NSLog(@"Failed to write file");
return NO;
}
}
return YES;
}
+ (BOOL)checkIfUserLanguageModelFilesExist
{
if (![self checkIfUserDataFolderExists]) {
return NO;
}
if (![self checkIfFileExist:[self userPhrasesDataPathBopomofo]]) {
return NO;
}
if (![self checkIfFileExist:[self excludedPhrasesDataPathBopomofo]]) {
return NO;
}
if (![self checkIfFileExist:[self excludedPhrasesDataPathSimpBopomofo]]) {
return NO;
}
return YES;
}
+ (BOOL)writeUserPhrase:(NSString *)userPhrase
{
if (![self checkIfUserLanguageModelFilesExist]) {
return NO;
}
NSString *currentMarkedPhrase = [userPhrase stringByAppendingString:@"\n"];
NSString *path = [self userPhrasesDataPathBopomofo];
NSFileHandle *file = [NSFileHandle fileHandleForUpdatingAtPath:path];
if (!file) {
return NO;
}
[file seekToEndOfFile];
NSData *data = [currentMarkedPhrase dataUsingEncoding:NSUTF8StringEncoding];
[file writeData:data];
[file closeFile];
[self loadUserPhrasesModel];
return YES;
}
+ (NSString *)dataFolderPath
{
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES);
NSString *appSupportPath = [paths objectAtIndex:0];
NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"vChewing"];
return userDictPath;
}
+ (NSString *)userPhrasesDataPathBopomofo
{
return [[self dataFolderPath] stringByAppendingPathComponent:@"data.txt"];
}
+ (NSString *)excludedPhrasesDataPathBopomofo
{
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases.txt"];
}
+ (NSString *)excludedPhrasesDataPathSimpBopomofo
{
return [[self dataFolderPath] stringByAppendingPathComponent:@"exclude-phrases-plain-bpmf.txt"];
}
+ (vChewingLM *)languageModelBopomofo
{
return &glanguageModelBopomofo;
}
+ (vChewingLM *)languageModelSimpBopomofo
{
return &gLanguageModelSimpBopomofo;
}
+ (vChewing::UserOverrideModel *)userOverrideModel
{
return &gUserOverrideModel;
}
@end

71
Source/Shit4Migration.txt Normal file
View File

@ -0,0 +1,71 @@
// shared language model object that stores our phrase-term probability database
FastLM gLanguageModelCHT;
FastLM gLanguageModelCHS;
FastLM gUserPhraseLanguageModelCHT;
FastLM gUserPhraseLanguageModelCHS;
static const int kUserOverrideModelCapacity = 500;
static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
vChewing::UserOverrideModel gUserOverrideModelCHT(kUserOverrideModelCapacity, kObservedOverrideHalflife);
vChewing::UserOverrideModel gUserOverrideModelCHS(kUserOverrideModelCapacity, kObservedOverrideHalflife);
static NSString *LTUserDataFolderPath()
{
NSArray *paths = NSSearchPathForDirectoriesInDomains(NSApplicationSupportDirectory, NSUserDirectory, YES);
NSString *appSupportPath = [paths objectAtIndex:0];
NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"vChewing"];
return userDictPath;
}
static NSString *LTUserPhrasesDataPathCHT()
{
return [LTUserDataFolderPath() stringByAppendingPathComponent:@"userdata-cht.txt"];
}
static NSString *LTUserPhrasesDataPathCHS()
{
return [LTUserDataFolderPath() stringByAppendingPathComponent:@"userdata-chs.txt"];
}
static BOOL LTCheckIfUserLanguageModelFileExists() {
NSString *folderPath = LTUserDataFolderPath();
BOOL isFolder = NO;
BOOL folderExist = [[NSFileManager defaultManager] fileExistsAtPath:folderPath isDirectory:&isFolder];
if (folderExist && !isFolder) {
NSError *error = nil;
[[NSFileManager defaultManager] removeItemAtPath:folderPath error:&error];
if (error) {
NSLog(@"Failed to remove folder %@", error);
return NO;
}
folderExist = NO;
}
if (!folderExist) {
NSError *error = nil;
[[NSFileManager defaultManager] createDirectoryAtPath:folderPath withIntermediateDirectories:YES attributes:nil error:&error];
if (error) {
NSLog(@"Failed to create folder %@", error);
return NO;
}
}
NSString *filePathCHS = LTUserPhrasesDataPathCHS();
if (![[NSFileManager defaultManager] fileExistsAtPath:filePathCHS]) {
BOOL result = [[@"" dataUsingEncoding:NSUTF8StringEncoding] writeToFile:filePathCHS atomically:YES];
if (!result) {
NSLog(@"Failed to write userdict CHS file");
return NO;
}
}
NSString *filePathCHT = LTUserPhrasesDataPathCHT();
if (![[NSFileManager defaultManager] fileExistsAtPath:filePathCHT]) {
BOOL result = [[@"" dataUsingEncoding:NSUTF8StringEncoding] writeToFile:filePathCHT atomically:YES];
if (!result) {
NSLog(@"Failed to write userdict CHT file");
return NO;
}
}
return YES;
}

View File

@ -23,3 +23,4 @@
"Reload User Phrases" = "Reload User Phrases";
"Unable to create the user phrase file." = "Unable to create the user phrase file.";
"Please check the permission of at \"%@\"." = "Please check the permission of at \"%@\".";
"Edit Excluded Phrases" = "Edit Excluded Phrases";

View File

@ -3,5 +3,9 @@
//
#import "frmAboutWindow.h"
extern void LTLoadLanguageModel(void);
extern void LTLoadUserLanguageModelFile(void);
#import <Foundation/Foundation.h> // @import Foundation;
@interface LanguageModelManager : NSObject
+ (void)loadDataModels;
+ (void)loadUserPhrasesModel;
+ (BOOL)checkIfUserLanguageModelFilesExist;
@end

View File

@ -23,3 +23,4 @@
"Reload User Phrases" = "重载自订语汇";
"Unable to create the user phrase file." = "无法创建自订语汇档案。";
"Please check the permission of at \"%@\"." = "请检查此处的存取权限:\"%@\".";
"Edit Excluded Phrases" = "编辑要滤除的语汇";

View File

@ -23,3 +23,4 @@
"Reload User Phrases" = "重載自訂語彙";
"Unable to create the user phrase file." = "無法創建自訂語彙檔案。";
"Please check the permission of at \"%@\"." = "請檢查此處的存取權限:\"%@\".";
"Edit Excluded Phrases" = "編輯要濾除的語彙";

View File

@ -12,6 +12,8 @@
5B1958522788A2BF00FAEB14 /* MITLicense.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5B58E87D278413E7003EA2AD /* MITLicense.txt */; };
5B42B64027876FDC00BB9B9F /* UserOverrideModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 5B42B63E27876FDC00BB9B9F /* UserOverrideModel.cpp */; };
5B58E87F278413E7003EA2AD /* MITLicense.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5B58E87D278413E7003EA2AD /* MITLicense.txt */; };
5B5F4F8E27928F9300922DC2 /* vChewingLM.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 5B5F4F8D27928F9300922DC2 /* vChewingLM.cpp */; };
5B5F4F93279294A300922DC2 /* LanguageModelManager.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5B5F4F92279294A300922DC2 /* LanguageModelManager.mm */; };
5BC3EE1B278FC48C00F5E44C /* VerticalCandidateController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BC3EE18278FC48C00F5E44C /* VerticalCandidateController.swift */; };
5BC3EE1C278FC48C00F5E44C /* VTCandidateController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BC3EE19278FC48C00F5E44C /* VTCandidateController.swift */; };
5BC3EE1D278FC48C00F5E44C /* HorizontalCandidateController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BC3EE1A278FC48C00F5E44C /* HorizontalCandidateController.swift */; };
@ -83,6 +85,11 @@
5B58E87E278413E7003EA2AD /* en */ = {isa = PBXFileReference; lastKnownFileType = text; name = en; path = Source/en.lproj/MITLicense.txt; sourceTree = SOURCE_ROOT; };
5B58E880278413EF003EA2AD /* zh-Hans */ = {isa = PBXFileReference; lastKnownFileType = text; name = "zh-Hans"; path = "zh-Hans.lproj/MITLicense.txt"; sourceTree = "<group>"; };
5B58E881278413F1003EA2AD /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text; name = "zh-Hant"; path = "zh-Hant.lproj/MITLicense.txt"; sourceTree = "<group>"; };
5B5F4F8C27928F9300922DC2 /* vChewingLM.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = vChewingLM.h; sourceTree = "<group>"; };
5B5F4F8D27928F9300922DC2 /* vChewingLM.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = vChewingLM.cpp; sourceTree = "<group>"; };
5B5F4F91279294A300922DC2 /* LanguageModelManager.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LanguageModelManager.h; sourceTree = "<group>"; };
5B5F4F92279294A300922DC2 /* LanguageModelManager.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = LanguageModelManager.mm; sourceTree = "<group>"; };
5B5F4F9427929ADC00922DC2 /* Shit4Migration.txt */ = {isa = PBXFileReference; lastKnownFileType = text; path = Shit4Migration.txt; sourceTree = "<group>"; };
5B9781D32763850700897999 /* zh-Hans */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hans"; path = "zh-Hans.lproj/InfoPlist.strings"; sourceTree = "<group>"; };
5B9781D52763850700897999 /* zh-Hans */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hans"; path = "zh-Hans.lproj/Localizable.strings"; sourceTree = "<group>"; };
5B9781D72763850700897999 /* zh-Hans */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hans"; path = "Source/zh-Hans.lproj/InfoPlist.strings"; sourceTree = "<group>"; };
@ -229,6 +236,19 @@
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
5BA8DAFE27928120009C9FFF /* LanguageModel */ = {
isa = PBXGroup;
children = (
5B5F4F8D27928F9300922DC2 /* vChewingLM.cpp */,
5B5F4F8C27928F9300922DC2 /* vChewingLM.h */,
6A0421A615FEF3F50061ED63 /* FastLM.cpp */,
6A0421A715FEF3F50061ED63 /* FastLM.h */,
5B42B63E27876FDC00BB9B9F /* UserOverrideModel.cpp */,
5B42B63F27876FDC00BB9B9F /* UserOverrideModel.h */,
);
path = LanguageModel;
sourceTree = "<group>";
};
6A0D4E9215FC0CFA00ABF4B3 = {
isa = PBXGroup;
children = (
@ -272,6 +292,9 @@
5BF4A6FC27844738007DC6E7 /* frmAboutWindow.m */,
6A0D4EC615FC0D6400ABF4B3 /* InputMethodController.h */,
6A0D4EC715FC0D6400ABF4B3 /* InputMethodController.mm */,
5B5F4F9427929ADC00922DC2 /* Shit4Migration.txt */,
5B5F4F91279294A300922DC2 /* LanguageModelManager.h */,
5B5F4F92279294A300922DC2 /* LanguageModelManager.mm */,
6A0D4EC815FC0D6400ABF4B3 /* main.m */,
6A0D4EF615FC0DA600ABF4B3 /* vChewing-Prefix.pch */,
5BDF2D022791C71200838ADB /* NonModalAlertWindowController.swift */,
@ -280,8 +303,6 @@
5BDF2CFD2791BE4400838ADB /* InputSourceHelper.swift */,
D427A9BF25ED28CC005D43E0 /* vChewing-Bridging-Header.h */,
5BA923AC2791B7C20001323A /* vChewingInstaller-Bridging-Header.h */,
5B42B63E27876FDC00BB9B9F /* UserOverrideModel.cpp */,
5B42B63F27876FDC00BB9B9F /* UserOverrideModel.h */,
);
path = Source;
sourceTree = "<group>";
@ -311,11 +332,10 @@
6A0D4F1215FC0EB100ABF4B3 /* Engine */ = {
isa = PBXGroup;
children = (
5BA8DAFE27928120009C9FFF /* LanguageModel */,
6A0D4F1315FC0EB100ABF4B3 /* Gramambular */,
6A0D4F1F15FC0EB100ABF4B3 /* Mandarin */,
6A0D4F2215FC0EB100ABF4B3 /* OpenVanilla */,
6A0421A615FEF3F50061ED63 /* FastLM.cpp */,
6A0421A715FEF3F50061ED63 /* FastLM.h */,
);
path = Engine;
sourceTree = "<group>";
@ -618,9 +638,11 @@
buildActionMask = 2147483647;
files = (
5BDF2CFE2791BE4400838ADB /* InputSourceHelper.swift in Sources */,
5B5F4F93279294A300922DC2 /* LanguageModelManager.mm in Sources */,
6A0D4ED215FC0D6400ABF4B3 /* InputMethodController.mm in Sources */,
6A0D4ED315FC0D6400ABF4B3 /* main.m in Sources */,
5BF4A6FE27844738007DC6E7 /* frmAboutWindow.m in Sources */,
5B5F4F8E27928F9300922DC2 /* vChewingLM.cpp in Sources */,
5BDF2D062791DFF200838ADB /* AppDelegate.swift in Sources */,
5BC3EE1B278FC48C00F5E44C /* VerticalCandidateController.swift in Sources */,
5B42B64027876FDC00BB9B9F /* UserOverrideModel.cpp in Sources */,