diff --git a/Source/Modules/LangModelRelated/LMInstantiator.h b/Source/Modules/LangModelRelated/LMInstantiator.h index d4f49eb9..594cbe23 100644 --- a/Source/Modules/LangModelRelated/LMInstantiator.h +++ b/Source/Modules/LangModelRelated/LMInstantiator.h @@ -21,9 +21,11 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #define LMInstantiator_H #include "AssociatedPhrases.h" +#include "CoreLM.h" #include "CNSLM.h" #include "ParselessLM.h" #include "PhraseReplacementMap.h" +#include "SymbolLM.h" #include "UserPhrasesLM.h" #include #include @@ -65,6 +67,18 @@ public: /// If the data model is already loaded. bool isDataModelLoaded(); + /// Asks to load the primary language model at the given path. + /// @param miscDataPath The path of the misc data model. + void loadMiscData(const char* miscDataPath); + /// If the data model is already loaded. + bool isMiscDataLoaded(); + + /// Asks to load the primary language model at the given path. + /// @param symbolDataPath The path of the symbol data model. + void loadSymbolData(const char* symbolDataPath); + /// If the data model is already loaded. + bool isSymbolDataLoaded(); + /// Asks to load the primary language model at the given path. /// @param cnsDataPath The path of the CNS data model. void loadCNSData(const char* cnsDataPath); @@ -126,6 +140,8 @@ protected: std::unordered_set& insertedValues); ParselessLM m_languageModel; + CoreLM m_miscModel; + SymbolLM m_symbolModel; CNSLM m_cnsModel; UserPhrasesLM m_userPhrases; UserPhrasesLM m_excludedPhrases; diff --git a/Source/Modules/LangModelRelated/LMInstantiator.mm b/Source/Modules/LangModelRelated/LMInstantiator.mm index 6f61e65c..4fe8d376 100644 --- a/Source/Modules/LangModelRelated/LMInstantiator.mm +++ b/Source/Modules/LangModelRelated/LMInstantiator.mm @@ -30,6 +30,7 @@ LMInstantiator::LMInstantiator() LMInstantiator::~LMInstantiator() { m_languageModel.close(); + m_miscModel.close(); m_userPhrases.close(); m_cnsModel.close(); m_excludedPhrases.close(); @@ -63,6 +64,32 @@ bool LMInstantiator::isCNSDataLoaded() return m_cnsModel.isLoaded(); } +void LMInstantiator::loadMiscData(const char* miscDataPath) +{ + if (miscDataPath) { + m_miscModel.close(); + m_miscModel.open(miscDataPath); + } +} + +bool LMInstantiator::isMiscDataLoaded() +{ + return m_miscModel.isLoaded(); +} + +void LMInstantiator::loadSymbolData(const char* symbolDataPath) +{ + if (symbolDataPath) { + m_symbolModel.close(); + m_symbolModel.open(symbolDataPath); + } +} + +bool LMInstantiator::isSymbolDataLoaded() +{ + return m_symbolModel.isLoaded(); +} + void LMInstantiator::loadUserPhrases(const char* userPhrasesDataPath, const char* excludedPhrasesDataPath) { @@ -110,6 +137,8 @@ const std::vector LMInstantiator::unigramsForKey(c } std::vector allUnigrams; + std::vector miscUnigrams; + std::vector symbolUnigrams; std::vector userUnigrams; std::vector cnsUnigrams; @@ -136,6 +165,16 @@ const std::vector LMInstantiator::unigramsForKey(c allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues); } + if (m_miscModel.hasUnigramsForKey(key)) { + std::vector rawMiscUnigrams = m_miscModel.unigramsForKey(key); + miscUnigrams = filterAndTransformUnigrams(rawMiscUnigrams, excludedValues, insertedValues); + } + + if (m_symbolModel.hasUnigramsForKey(key)) { + std::vector rawSymbolUnigrams = m_symbolModel.unigramsForKey(key); + symbolUnigrams = filterAndTransformUnigrams(rawSymbolUnigrams, excludedValues, insertedValues); + } + if (m_cnsModel.hasUnigramsForKey(key) && m_cnsEnabled) { std::vector rawCNSUnigrams = m_cnsModel.unigramsForKey(key); cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues); @@ -143,6 +182,8 @@ const std::vector LMInstantiator::unigramsForKey(c allUnigrams.insert(allUnigrams.begin(), userUnigrams.begin(), userUnigrams.end()); allUnigrams.insert(allUnigrams.end(), cnsUnigrams.begin(), cnsUnigrams.end()); + allUnigrams.insert(allUnigrams.begin(), miscUnigrams.begin(), miscUnigrams.end()); + allUnigrams.insert(allUnigrams.end(), symbolUnigrams.begin(), symbolUnigrams.end()); return allUnigrams; } diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/SymbolLM.h b/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/SymbolLM.h new file mode 100644 index 00000000..e2402f91 --- /dev/null +++ b/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/SymbolLM.h @@ -0,0 +1,44 @@ +// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). +// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, + except as required to fulfill notice requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifndef SYMBOLLM_H +#define SYMBOLLM_H + +#include +#include +#include +#include "LanguageModel.h" +#include "UserPhrasesLM.h" + +namespace vChewing { + +class SymbolLM: public UserPhrasesLM +{ +public: + virtual bool allowConsolidation() override { + return false; + } + virtual float overridedValue() override { + return -12.0; + } +}; + +} + +#endif diff --git a/Source/Modules/LangModelRelated/mgrLangModel.h b/Source/Modules/LangModelRelated/mgrLangModel.h index 0aa14424..e003fd70 100644 --- a/Source/Modules/LangModelRelated/mgrLangModel.h +++ b/Source/Modules/LangModelRelated/mgrLangModel.h @@ -36,13 +36,13 @@ NS_ASSUME_NONNULL_BEGIN + (BOOL)writeUserPhrase:(NSString *)userPhrase inputMode:(InputMode)mode areWeDuplicating:(BOOL)areWeDuplicating; + (void)setPhraseReplacementEnabled:(BOOL)phraseReplacementEnabled; + (void)setCNSEnabled:(BOOL)cnsEnabled; ++ (NSString *)specifyBundleDataPath:(NSString *)filename; + (NSString *)userPhrasesDataPath:(InputMode)mode; + (NSString *)userAssociatedPhrasesDataPath:(InputMode)mode; + (NSString *)excludedPhrasesDataPath:(InputMode)mode; + (NSString *)phraseReplacementDataPath:(InputMode)mode; @property (class, readonly, nonatomic) NSString *dataFolderPath; -@property (class, readonly, nonatomic) NSString *cnsDataPath; @end diff --git a/Source/Modules/LangModelRelated/mgrLangModel.mm b/Source/Modules/LangModelRelated/mgrLangModel.mm index c6f830f6..9c696d6c 100644 --- a/Source/Modules/LangModelRelated/mgrLangModel.mm +++ b/Source/Modules/LangModelRelated/mgrLangModel.mm @@ -45,19 +45,38 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing lm.loadLanguageModel([dataPath UTF8String]); } ++ (NSString *)specifyBundleDataPath:(NSString *)filenameWithoutExtension; +{ + Class cls = NSClassFromString(@"ctlInputMethod"); + return [[NSBundle bundleForClass:cls] pathForResource:filenameWithoutExtension ofType:@"txt"]; +} + + (void)loadDataModels { if (!gLangModelCHT.isDataModelLoaded()) { LTLoadLanguageModelFile(@"data-cht", gLangModelCHT); } - if (!gLangModelCHT.isCNSDataLoaded()){ - gLangModelCHT.loadCNSData([[self cnsDataPath] UTF8String]); + if (!gLangModelCHT.isMiscDataLoaded()) { + gLangModelCHT.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]); } + if (!gLangModelCHT.isSymbolDataLoaded()){ + gLangModelCHT.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]); + } + if (!gLangModelCHT.isCNSDataLoaded()){ + gLangModelCHT.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]); + } + // ----------------- if (!gLangModelCHS.isDataModelLoaded()) { LTLoadLanguageModelFile(@"data-chs", gLangModelCHS); } + if (!gLangModelCHS.isMiscDataLoaded()) { + gLangModelCHS.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]); + } + if (!gLangModelCHS.isSymbolDataLoaded()){ + gLangModelCHS.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]); + } if (!gLangModelCHS.isCNSDataLoaded()){ - gLangModelCHS.loadCNSData([[self cnsDataPath] UTF8String]); + gLangModelCHS.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]); } } @@ -67,8 +86,14 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing if (!gLangModelCHT.isDataModelLoaded()) { LTLoadLanguageModelFile(@"data-cht", gLangModelCHT); } + if (!gLangModelCHT.isMiscDataLoaded()) { + gLangModelCHT.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]); + } + if (!gLangModelCHT.isSymbolDataLoaded()){ + gLangModelCHT.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]); + } if (!gLangModelCHT.isCNSDataLoaded()){ - gLangModelCHT.loadCNSData([[self cnsDataPath] UTF8String]); + gLangModelCHT.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]); } } @@ -76,8 +101,14 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing if (!gLangModelCHS.isDataModelLoaded()) { LTLoadLanguageModelFile(@"data-chs", gLangModelCHS); } + if (!gLangModelCHS.isMiscDataLoaded()) { + gLangModelCHS.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]); + } + if (!gLangModelCHS.isSymbolDataLoaded()){ + gLangModelCHS.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]); + } if (!gLangModelCHS.isCNSDataLoaded()){ - gLangModelCHS.loadCNSData([[self cnsDataPath] UTF8String]); + gLangModelCHS.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]); } } } @@ -308,12 +339,6 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing return [[self dataFolderPath] stringByAppendingPathComponent:fileName]; } -+ (NSString *)cnsDataPath -{ - Class cls = NSClassFromString(@"ctlInputMethod"); - return [[NSBundle bundleForClass:cls] pathForResource:@"char-kanji-cns" ofType:@"txt"]; -} - + (vChewing::LMInstantiator *)lmCHT { return &gLangModelCHT; diff --git a/vChewing.xcodeproj/project.pbxproj b/vChewing.xcodeproj/project.pbxproj index 41a88e28..b28153e9 100644 --- a/vChewing.xcodeproj/project.pbxproj +++ b/vChewing.xcodeproj/project.pbxproj @@ -11,6 +11,8 @@ 5B11328927B94CFB00E58451 /* AppleKeyboardConverter.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B11328827B94CFB00E58451 /* AppleKeyboardConverter.swift */; }; 5B2DB16F27AF6891006D874E /* data-chs.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5B2DB16D27AF6891006D874E /* data-chs.txt */; }; 5B2DB17027AF6891006D874E /* data-cht.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5B2DB16E27AF6891006D874E /* data-cht.txt */; }; + 5B4D47C127C9304000220DDC /* data-zhuyinwen.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5B4D47BD27C9304000220DDC /* data-zhuyinwen.txt */; }; + 5B4D47C227C9304000220DDC /* data-symbols.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5B4D47BE27C9304000220DDC /* data-symbols.txt */; }; 5B62A31727AE73A700A19448 /* unzip.m in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A30927AE73A700A19448 /* unzip.m */; }; 5B62A31827AE73A700A19448 /* zip.m in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A30A27AE73A700A19448 /* zip.m */; }; 5B62A31927AE73A700A19448 /* ioapi.m in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A30B27AE73A700A19448 /* ioapi.m */; }; @@ -174,6 +176,8 @@ 5B2DB16E27AF6891006D874E /* data-cht.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = "data-cht.txt"; path = "Data/data-cht.txt"; sourceTree = ""; }; 5B2DB17127AF8771006D874E /* Makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; name = Makefile; path = Data/Makefile; sourceTree = ""; }; 5B30F11227BA568800484E24 /* vChewingKeyLayout.bundle */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.plug-in"; path = vChewingKeyLayout.bundle; sourceTree = ""; }; + 5B4D47BD27C9304000220DDC /* data-zhuyinwen.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = "data-zhuyinwen.txt"; path = "../../libvchewing-data/components/common/data-zhuyinwen.txt"; sourceTree = ""; }; + 5B4D47BE27C9304000220DDC /* data-symbols.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = "data-symbols.txt"; path = "../../libvchewing-data/components/common/data-symbols.txt"; sourceTree = ""; }; 5B62A30927AE73A700A19448 /* unzip.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = unzip.m; sourceTree = ""; }; 5B62A30A27AE73A700A19448 /* zip.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = zip.m; sourceTree = ""; }; 5B62A30B27AE73A700A19448 /* ioapi.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = ioapi.m; sourceTree = ""; }; @@ -206,6 +210,7 @@ 5B73FB5F27B2BE1300E9BF49 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/InfoPlist.strings; sourceTree = ""; }; 5B7BC4AF27AFFBE800F66C24 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Source/WindowNIBs/Base.lproj/frmPrefWindow.xib; sourceTree = ""; }; 5B7BC4B227AFFC0B00F66C24 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = Source/WindowNIBs/en.lproj/frmPrefWindow.strings; sourceTree = ""; }; + 5B8F43ED27C9BC220069AC27 /* SymbolLM.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = SymbolLM.h; sourceTree = ""; }; 5BBBB75D27AED54C0023B93A /* Beep.m4a */ = {isa = PBXFileReference; lastKnownFileType = file; path = Beep.m4a; sourceTree = ""; }; 5BBBB75E27AED54C0023B93A /* Fart.m4a */ = {isa = PBXFileReference; lastKnownFileType = file; path = Fart.m4a; sourceTree = ""; }; 5BBBB76627AED5DB0023B93A /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/frmNonModalAlertWindow.xib; sourceTree = ""; }; @@ -365,6 +370,7 @@ 5B4D47B627C9186900220DDC /* InstantiatedModels */ = { isa = PBXGroup; children = ( + 5B8F43ED27C9BC220069AC27 /* SymbolLM.h */, 5B62A32B27AE78B000A19448 /* CNSLM.h */, ); path = InstantiatedModels; @@ -596,6 +602,8 @@ 5BD05B8027B22F3C004C4F1D /* char-kanji-cns.txt */, 5B2DB16D27AF6891006D874E /* data-chs.txt */, 5B2DB16E27AF6891006D874E /* data-cht.txt */, + 5B4D47BE27C9304000220DDC /* data-symbols.txt */, + 5B4D47BD27C9304000220DDC /* data-zhuyinwen.txt */, 5B2DB17127AF8771006D874E /* Makefile */, ); name = Data; @@ -962,6 +970,7 @@ D4E33D8A27A838CF006DB1CF /* Localizable.strings in Resources */, 5BDCBB2E27B4E67A00D0CC59 /* vChewingPhraseEditor.app in Resources */, 5BBBB76027AED54C0023B93A /* Fart.m4a in Resources */, + 5B4D47C227C9304000220DDC /* data-symbols.txt in Resources */, 6A2E40F6253A69DA00D1AE1D /* Images.xcassets in Resources */, D4E33D8F27A838F0006DB1CF /* InfoPlist.strings in Resources */, 5BBBB76B27AED5DB0023B93A /* frmNonModalAlertWindow.xib in Resources */, @@ -969,6 +978,7 @@ 5BBBB77527AED70B0023B93A /* MenuIcon-SCVIM.png in Resources */, 5B7BC4B027AFFBE800F66C24 /* frmPrefWindow.xib in Resources */, 5BD05B8127B22F3C004C4F1D /* char-kanji-cns.txt in Resources */, + 5B4D47C127C9304000220DDC /* data-zhuyinwen.txt in Resources */, 5B2DB17027AF6891006D874E /* data-cht.txt in Resources */, 5BBBB77327AED70B0023B93A /* MenuIcon-TCVIM@2x.png in Resources */, 5BBBB77627AED70B0023B93A /* MenuIcon-TCVIM.png in Resources */,