diff --git a/Source/AppDelegate.swift b/Source/AppDelegate.swift
index f93bf8b2..f8d348fb 100644
--- a/Source/AppDelegate.swift
+++ b/Source/AppDelegate.swift
@@ -51,6 +51,7 @@ class AppDelegate: NSObject, NSApplicationDelegate, NonModalAlertWindowControlle
private var updateNextStepURL: URL?
func applicationDidFinishLaunching(_ notification: Notification) {
+ LanguageModelManager.setupDataModelValueConverter()
LanguageModelManager.loadDataModels()
LanguageModelManager.loadUserPhrases()
LanguageModelManager.loadUserPhraseReplacement()
diff --git a/Source/Base.lproj/preferences.xib b/Source/Base.lproj/preferences.xib
index 451d8d39..25ee9a38 100644
--- a/Source/Base.lproj/preferences.xib
+++ b/Source/Base.lproj/preferences.xib
@@ -19,14 +19,14 @@
-
+
-
+
-
+
@@ -35,7 +35,7 @@
-
+
@@ -47,7 +47,7 @@
-
+
@@ -56,7 +56,7 @@
-
+
@@ -84,7 +84,7 @@
-
+
@@ -93,7 +93,7 @@
-
+
@@ -114,7 +114,7 @@
-
+
@@ -134,7 +134,7 @@
-
+
@@ -143,7 +143,7 @@
-
+
@@ -152,7 +152,7 @@
-
+
@@ -161,7 +161,7 @@
-
+
@@ -187,7 +187,7 @@
-
+
@@ -212,8 +212,43 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
@@ -239,7 +274,7 @@
-
+
@@ -261,12 +296,8 @@
-
-
-
-
+
+
+
+
-
+
diff --git a/Source/Engine/McBopomofoLM.cpp b/Source/Engine/McBopomofoLM.cpp
index ea85c2dc..9c73bcef 100644
--- a/Source/Engine/McBopomofoLM.cpp
+++ b/Source/Engine/McBopomofoLM.cpp
@@ -24,7 +24,6 @@
#include "McBopomofoLM.h"
#include
#include
-#include
using namespace McBopomofo;
@@ -49,7 +48,7 @@ void McBopomofoLM::loadLanguageModel(const char* languageModelDataPath)
}
void McBopomofoLM::loadUserPhrases(const char* userPhrasesDataPath,
- const char* excludedPhrasesDataPath)
+ const char* excludedPhrasesDataPath)
{
if (userPhrasesDataPath) {
m_userPhrases.close();
@@ -61,7 +60,8 @@ void McBopomofoLM::loadUserPhrases(const char* userPhrasesDataPath,
}
}
-void McBopomofoLM::loadPhraseReplacementMap(const char* phraseReplacementPath) {
+void McBopomofoLM::loadPhraseReplacementMap(const char* phraseReplacementPath)
+{
if (phraseReplacementPath) {
m_phraseReplacement.close();
m_phraseReplacement.open(phraseReplacementPath);
@@ -75,75 +75,37 @@ const vector McBopomofoLM::bigramsForKeys(const string& preceedingKey, c
const vector McBopomofoLM::unigramsForKey(const string& key)
{
- vector unigrams;
+ vector allUnigrams;
vector userUnigrams;
- // Use unordered_set so that you don't have to do O(n*m)
unordered_set excludedValues;
- unordered_set userValues;
+ unordered_set insertedValues;
if (m_excludedPhrases.hasUnigramsForKey(key)) {
vector excludedUnigrams = m_excludedPhrases.unigramsForKey(key);
transform(excludedUnigrams.begin(), excludedUnigrams.end(),
- inserter(excludedValues, excludedValues.end()),
- [](const Unigram &u) { return u.keyValue.value; });
+ inserter(excludedValues, excludedValues.end()),
+ [](const Unigram& u) { return u.keyValue.value; });
}
if (m_userPhrases.hasUnigramsForKey(key)) {
vector rawUserUnigrams = m_userPhrases.unigramsForKey(key);
- vector filterredUserUnigrams = m_userPhrases.unigramsForKey(key);
-
- for (auto&& unigram : rawUserUnigrams) {
- if (excludedValues.find(unigram.keyValue.value) == excludedValues.end()) {
- filterredUserUnigrams.push_back(unigram);
- }
- }
-
- transform(filterredUserUnigrams.begin(), filterredUserUnigrams.end(),
- inserter(userValues, userValues.end()),
- [](const Unigram &u) { return u.keyValue.value; });
-
- if (m_phraseReplacementEnabled) {
- for (auto&& unigram : filterredUserUnigrams) {
- string value = unigram.keyValue.value;
- string replacement = m_phraseReplacement.valueForKey(value);
- if (replacement != "") {
- unigram.keyValue.value = replacement;
- }
- unigrams.push_back(unigram);
- }
- } else {
- unigrams = filterredUserUnigrams;
- }
+ userUnigrams = filterAndTransformUnigrams(rawUserUnigrams, excludedValues, insertedValues);
}
if (m_languageModel.hasUnigramsForKey(key)) {
- vector globalUnigrams = m_languageModel.unigramsForKey(key);
-
- for (auto&& unigram : globalUnigrams) {
- string value = unigram.keyValue.value;
- if (excludedValues.find(value) == excludedValues.end() &&
- userValues.find(value) == userValues.end()) {
- if (m_phraseReplacementEnabled) {
- string replacement = m_phraseReplacement.valueForKey(value);
- if (replacement != "") {
- unigram.keyValue.value = replacement;
- }
- }
- unigrams.push_back(unigram);
- }
- }
+ vector rawGlobalUnigrams = m_languageModel.unigramsForKey(key);
+ allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues);
}
- unigrams.insert(unigrams.begin(), userUnigrams.begin(), userUnigrams.end());
- return unigrams;
+ allUnigrams.insert(allUnigrams.begin(), userUnigrams.begin(), userUnigrams.end());
+ return allUnigrams;
}
bool McBopomofoLM::hasUnigramsForKey(const string& key)
{
if (!m_excludedPhrases.hasUnigramsForKey(key)) {
- return m_userPhrases.hasUnigramsForKey(key) ||
- m_languageModel.hasUnigramsForKey(key);
+ return m_userPhrases.hasUnigramsForKey(key) || m_languageModel.hasUnigramsForKey(key);
}
return unigramsForKey(key).size() > 0;
@@ -159,3 +121,52 @@ bool McBopomofoLM::phraseReplacementEnabled()
return m_phraseReplacementEnabled;
}
+void McBopomofoLM::setExternalConverterEnabled(bool enabled)
+{
+ m_externalConverterEnabled = enabled;
+}
+
+bool McBopomofoLM::externalConverterEnabled()
+{
+ return m_externalConverterEnabled;
+}
+
+void McBopomofoLM::setExternalConverter(std::function externalConverter)
+{
+ m_externalConverter = externalConverter;
+}
+
+const vector McBopomofoLM::filterAndTransformUnigrams(const vector unigrams, const unordered_set& excludedValues, unordered_set& insertedValues)
+{
+ vector results;
+
+ for (auto&& unigram : unigrams) {
+ // excludedValues filters out the unigrams with the original value.
+ // insertedValues filters out the ones with the converted value
+ string originalValue = unigram.keyValue.value;
+ if (excludedValues.find(originalValue) != excludedValues.end()) {
+ continue;
+ }
+
+ string value = originalValue;
+ if (m_phraseReplacementEnabled) {
+ string replacement = m_phraseReplacement.valueForKey(value);
+ if (replacement != "") {
+ value = replacement;
+ }
+ }
+ if (m_externalConverterEnabled && m_externalConverter) {
+ string replacement = m_externalConverter(value);
+ value = replacement;
+ }
+ if (insertedValues.find(value) == insertedValues.end()) {
+ Unigram g;
+ g.keyValue.value = value;
+ g.keyValue.key = unigram.keyValue.key;
+ g.score = unigram.score;
+ results.push_back(g);
+ insertedValues.insert(value);
+ }
+ }
+ return results;
+}
diff --git a/Source/Engine/McBopomofoLM.h b/Source/Engine/McBopomofoLM.h
index 00babc01..de90a4a3 100644
--- a/Source/Engine/McBopomofoLM.h
+++ b/Source/Engine/McBopomofoLM.h
@@ -28,34 +28,91 @@
#include "UserPhrasesLM.h"
#include "ParselessLM.h"
#include "PhraseReplacementMap.h"
+#include
namespace McBopomofo {
using namespace Formosa::Gramambular;
+/// McBopomofoLM is a facade for managing a set of models including
+/// the input method language model, user phrases and excluded phrases.
+///
+/// It is the primary model class that the input controller and grammer builder
+/// of McBopomofo talk to. When the grammer builder starts to build a sentense
+/// from a series of BPMF readings, it passes the readings to the model to see
+/// if there are valid unigrams, and use returned unigrams to produce the final
+/// results.
+///
+/// McBopomofoLM combine and transform the unigrams from the primary language
+/// model and user phrases. The process is
+///
+/// 1) Get the original unigrams.
+/// 2) Drop the unigrams whose value is contained in the exclusion map.
+/// 3) Replace the values of the unigrams using the phrase replacement map.
+/// 4) Replace the values of the unigrams using an external converter lambda.
+/// 5) Drop the duplicated phrases.
+///
+/// The controller can ask the model to load the primary input method language
+/// model while launching and to load the user phrases anytime if the custom
+/// files are modified. It does not keep the reference of the data pathes but
+/// you have to pass the paths when you ask it to do loading.
class McBopomofoLM : public LanguageModel {
public:
McBopomofoLM();
~McBopomofoLM();
- void loadLanguageModel(const char* languageModelDataPath);
- void loadUserPhrases(const char* userPhrasesDataPath,
- const char* excludedPhrasesDataPath);
+ /// Asks to load the primary language model a the given path.
+ /// @param languageModelPath Thw path of the language model.
+ void loadLanguageModel(const char* languageModelPath);
+ /// Asks to load the user phrases and excluded phrases at the given path.
+ /// @param userPhrasesPath The path of user phrases.
+ /// @param excludedPhrasesPath The path of excluded phrases.
+ void loadUserPhrases(const char* userPhrasesPath, const char* excludedPhrasesPath);
+ /// Asks to load th phrase replacement table at the given path.
+ /// @param phraseReplacementPath The path of the phrase replacement table.
void loadPhraseReplacementMap(const char* phraseReplacementPath);
+ /// Not implemented since we do not have data to provide bigram function.
const vector bigramsForKeys(const string& preceedingKey, const string& key);
+ /// Returns a list of available unigram for the given key.
+ /// @param key A string represents the BPMF reading or a symbol key. For
+ /// example, it you pass "ㄇㄚ", it returns "嗎", "媽", and so on.
const vector unigramsForKey(const string& key);
+ /// If the model has unigrams for the given key.
+ /// @param key The key.
bool hasUnigramsForKey(const string& key);
+ /// Enables or disables phrase replacement.
void setPhraseReplacementEnabled(bool enabled);
+ /// If phrease replacement is enabled or not.
bool phraseReplacementEnabled();
+ /// Enables or disables the external converter.
+ void setExternalConverterEnabled(bool enabled);
+ /// If the external converted is enabled or not.
+ bool externalConverterEnabled();
+ /// Sets a lambda to let the values of unigrams could be converted by it.
+ void setExternalConverter(std::function externalConverter);
+
protected:
+ /// Filters and converts the input unigrams and return a new list of unigrams.
+ ///
+ /// @param unigrams The unigrams to be processed.
+ /// @param excludedValues The values to excluded unigrams.
+ /// @param insertedValues The values for unigrams already in the results.
+ /// It helps to prevent duplicated unigrams. Please note that the method
+ /// has a side effect that it inserts values to `insertedValues`.
+ const vector filterAndTransformUnigrams(const vector unigrams,
+ const std::unordered_set& excludedValues,
+ std::unordered_set& insertedValues);
+
ParselessLM m_languageModel;
UserPhrasesLM m_userPhrases;
UserPhrasesLM m_excludedPhrases;
PhraseReplacementMap m_phraseReplacement;
bool m_phraseReplacementEnabled;
+ bool m_externalConverterEnabled;
+ std::function m_externalConverter;
};
};
diff --git a/Source/InputMethodController.mm b/Source/InputMethodController.mm
index 762e7665..a1e30e27 100644
--- a/Source/InputMethodController.mm
+++ b/Source/InputMethodController.mm
@@ -243,6 +243,8 @@ static double FindHighestScore(const vector& nodes, double epsilon)
Preferences.keyboardLayout = KeyboardLayoutStandard;
}
+ _languageModel->setExternalConverterEnabled(Preferences.chineseConversionStyle == 1);
+
[(AppDelegate *)[NSApp delegate] checkForUpdate];
}
@@ -275,12 +277,14 @@ static double FindHighestScore(const vector& nodes, double epsilon)
if ([value isKindOfClass:[NSString class]] && [value isEqual:kPlainBopomofoModeIdentifier]) {
newInputMode = kPlainBopomofoModeIdentifier;
newLanguageModel = [LanguageModelManager languageModelPlainBopomofo];
+ newLanguageModel->setPhraseReplacementEnabled(false);
}
else {
newInputMode = kBopomofoModeIdentifier;
newLanguageModel = [LanguageModelManager languageModelMcBopomofo];
newLanguageModel->setPhraseReplacementEnabled(Preferences.phraseReplacementEnabled);
}
+ newLanguageModel->setExternalConverterEnabled(Preferences.chineseConversionStyle == 1);
// Only apply the changes if the value is changed
if (![_inputMode isEqualToString:newInputMode]) {
@@ -312,8 +316,16 @@ static double FindHighestScore(const vector& nodes, double epsilon)
#pragma mark - IMKServerInput protocol methods
-- (NSString *)_convertToSimplifiedChinese:(NSString *)text
+- (NSString *)_convertToSimplifiedChineseIfRequired:(NSString *)text
{
+ if (!Preferences.chineseConversionEnabled) {
+ return text;
+ }
+
+ if (Preferences.chineseConversionStyle == 1) {
+ return text;
+ }
+
if (Preferences.chineneConversionEngine == 1) {
return [VXHanConvert convertToSimplifiedFrom:text];
}
@@ -333,11 +345,7 @@ static double FindHighestScore(const vector& nodes, double epsilon)
}
// Chinese conversion.
- NSString *buffer = _composingBuffer;
-
- if (Preferences.chineseConversionEnabled) {
- buffer = [self _convertToSimplifiedChinese:_composingBuffer];
- }
+ NSString *buffer = [self _convertToSimplifiedChineseIfRequired:_composingBuffer];
// commit the text, clear the state
[client insertText:buffer replacementRange:NSMakeRange(NSNotFound, NSNotFound)];
@@ -483,10 +491,7 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
NodeAnchor &anchor = _walkedNodes[0];
NSString *popedText = [NSString stringWithUTF8String:anchor.node->currentKeyValue().value.c_str()];
// Chinese conversion.
- BOOL chineseConversionEnabled = Preferences.chineseConversionEnabled;
- if (chineseConversionEnabled) {
- popedText = [self _convertToSimplifiedChinese:popedText];
- }
+ popedText = [self _convertToSimplifiedChineseIfRequired:popedText];
[client insertText:popedText replacementRange:NSMakeRange(NSNotFound, NSNotFound)];
_builder->removeHeadReadings(anchor.spanningLength);
}
@@ -1504,7 +1509,7 @@ NS_INLINE size_t max(size_t a, size_t b) { return a > b ? a : b; }
{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
- [Preferences tooglePhraseReplacementEnabled];
+ [Preferences toogleHalfWidthPunctuationEnabled];
#pragma GCC diagnostic pop
}
diff --git a/Source/LanguageModelManager.h b/Source/LanguageModelManager.h
index 6a82c47a..ce28eaf5 100644
--- a/Source/LanguageModelManager.h
+++ b/Source/LanguageModelManager.h
@@ -9,6 +9,7 @@ NS_ASSUME_NONNULL_BEGIN
+ (void)loadDataModels;
+ (void)loadUserPhrases;
+ (void)loadUserPhraseReplacement;
++ (void)setupDataModelValueConverter;
+ (BOOL)checkIfUserLanguageModelFilesExist;
+ (BOOL)writeUserPhrase:(NSString *)userPhrase;
diff --git a/Source/LanguageModelManager.mm b/Source/LanguageModelManager.mm
index 189e2eb6..bdf0ac9f 100644
--- a/Source/LanguageModelManager.mm
+++ b/Source/LanguageModelManager.mm
@@ -4,6 +4,10 @@
#import
#import "OVStringHelper.h"
#import "OVUTF8Helper.h"
+#import "McBopomofo-Swift.h"
+
+@import VXHanConvert;
+@import OpenCCBridge;
using namespace std;
using namespace Formosa::Gramambular;
@@ -43,6 +47,31 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, McBopomo
gLanguageModelMcBopomofo.loadPhraseReplacementMap([[self phraseReplacementDataPathMcBopomofo] UTF8String]);
}
++ (void)setupDataModelValueConverter
+{
+ auto converter = [] (string input) {
+ if (!Preferences.chineseConversionEnabled) {
+ return input;
+ }
+
+ if (Preferences.chineseConversionStyle == 0) {
+ return input;
+ }
+
+ NSString *text = [NSString stringWithUTF8String:input.c_str()];
+ if (Preferences.chineneConversionEngine == 1) {
+ text = [VXHanConvert convertToSimplifiedFrom:text];
+ }
+ else {
+ text = [OpenCCBridge convertToSimplified:text];
+ }
+ return string(text.UTF8String);
+ };
+
+ gLanguageModelMcBopomofo.setExternalConverter(converter);
+ gLanguageModelPlainBopomofo.setExternalConverter(converter);
+}
+
+ (BOOL)checkIfUserDataFolderExists
{
NSString *folderPath = [self dataFolderPath];
diff --git a/Source/McBopomofo-Bridging-Header.h b/Source/McBopomofo-Bridging-Header.h
index 8310cc67..69a7fc4f 100644
--- a/Source/McBopomofo-Bridging-Header.h
+++ b/Source/McBopomofo-Bridging-Header.h
@@ -8,4 +8,5 @@
+ (void)loadDataModels;
+ (void)loadUserPhrases;
+ (void)loadUserPhraseReplacement;
++ (void)setupDataModelValueConverter;
@end
diff --git a/Source/Preferences.swift b/Source/Preferences.swift
index e1f2303c..61ecead0 100644
--- a/Source/Preferences.swift
+++ b/Source/Preferences.swift
@@ -50,8 +50,9 @@ private let kEscToCleanInputBufferKey = "EscToCleanInputBuffer"
private let kCandidateTextFontName = "CandidateTextFontName"
private let kCandidateKeyLabelFontName = "CandidateKeyLabelFontName"
private let kCandidateKeys = "CandidateKeys"
-private let kChineseConversionEngineKey = "ChineseConversionEngine"
private let kPhraseReplacementEnabledKey = "PhraseReplacementEnabled"
+private let kChineseConversionEngineKey = "ChineseConversionEngine"
+private let kChineseConversionStyle = "ChineseConversionStyle"
private let kDefaultCandidateListTextSize: CGFloat = 16
private let kMinKeyLabelSize: CGFloat = 10
@@ -217,6 +218,20 @@ struct ComposingKeys {
}
}
+@objc enum ChineseConversionStyle: Int {
+ case output
+ case model
+
+ var name: String {
+ switch (self) {
+ case .output:
+ return "output"
+ case .model:
+ return "model"
+ }
+ }
+}
+
// MARK: -
class Preferences: NSObject {
@@ -285,13 +300,6 @@ class Preferences: NSObject {
kDefaultKeys
}
- @UserDefault(key: kChineseConversionEngineKey, defaultValue: 0)
- @objc static var chineneConversionEngine: Int
-
- @objc static var chineneConversionEngineName: String? {
- return ChineseConversionEngine(rawValue: chineneConversionEngine)?.name
- }
-
@UserDefault(key: kPhraseReplacementEnabledKey, defaultValue: false)
@objc static var phraseReplacementEnabled: Bool
@@ -300,4 +308,26 @@ class Preferences: NSObject {
return phraseReplacementEnabled;
}
+ /// The conversion engine.
+ ///
+ /// - 0: OpenCC
+ /// - 1: VXHanConvert
+ @UserDefault(key: kChineseConversionEngineKey, defaultValue: 0)
+ @objc static var chineneConversionEngine: Int
+
+ @objc static var chineneConversionEngineName: String? {
+ return ChineseConversionEngine(rawValue: chineneConversionEngine)?.name
+ }
+
+ /// The conversion style.
+ ///
+ /// - 0: convert the output
+ /// - 1: convert the phrase models.
+ @UserDefault(key: kChineseConversionStyle, defaultValue: 0)
+ @objc static var chineseConversionStyle: Int
+
+ @objc static var chineseConversionStyleName: String? {
+ return ChineseConversionStyle(rawValue: chineseConversionStyle)?.name
+ }
+
}
diff --git a/Source/zh-Hant.lproj/preferences.xib b/Source/zh-Hant.lproj/preferences.xib
index 6cd9981c..3aad70ae 100644
--- a/Source/zh-Hant.lproj/preferences.xib
+++ b/Source/zh-Hant.lproj/preferences.xib
@@ -19,14 +19,14 @@
-
+
-
+
-
+
@@ -49,7 +49,7 @@
-
+
@@ -58,7 +58,7 @@
-
+
@@ -70,7 +70,7 @@
-
+
@@ -79,7 +79,7 @@
-
+
@@ -88,7 +88,7 @@
-
+
@@ -97,7 +97,7 @@
-
+
@@ -123,7 +123,7 @@
-
+
@@ -149,7 +149,7 @@
-
+
@@ -172,7 +172,7 @@
-
+
@@ -200,7 +200,7 @@
-
+
@@ -209,7 +209,7 @@
-
+
@@ -229,16 +229,16 @@
-
+
-
+
-
+
@@ -263,12 +263,43 @@
-
-
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+