diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/CNSLM.mm b/Source/Modules/LangModelRelated/SubLanguageModels/CNSLM.mm deleted file mode 100644 index 6c4ddcad..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/CNSLM.mm +++ /dev/null @@ -1,150 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "CNSLM.h" - -#include -#include -#include -#include -#include -#include - -#include "KeyValueBlobReader.h" - -namespace vChewing { - -CNSLM::CNSLM() - : fd(-1) - , data(0) - , length(0) -{ -} - -CNSLM::~CNSLM() -{ - if (data) { - close(); - } -} - -bool CNSLM::isLoaded() -{ - if (data) { - return true; - } - return false; -} - -bool CNSLM::open(const char *path) -{ - if (data) { - syslog(LOG_CONS, "CNSLM: Failed at Open Step 1.\n"); - return false; - } - - fd = ::open(path, O_RDONLY); - if (fd == -1) { - syslog(LOG_CONS, "CNSLM: Failed at Open Step 2.\n"); - printf("open:: file not exist"); - return false; - } - - struct stat sb; - if (fstat(fd, &sb) == -1) { - syslog(LOG_CONS, "CNSLM: Failed at Open Step 3.\n"); - printf("open:: cannot open file"); - return false; - } - - length = (size_t)sb.st_size; - - data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0); - if (!data) { - ::close(fd); - syslog(LOG_CONS, "CNSLM: Failed at Open Step 4.\n"); - return false; - } - - KeyValueBlobReader reader(static_cast(data), length); - KeyValueBlobReader::KeyValue keyValue; - KeyValueBlobReader::State state; - while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) { - // We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF reading. - keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key); - } - // 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行) - if (state == KeyValueBlobReader::State::ERROR) { - // close(); - syslog(LOG_CONS, "CNSLM: Failed at Open Step 5. On Error Resume Next.\n"); - // return false; - } - return true; -} - -void CNSLM::close() -{ - if (data) { - munmap(data, length); - ::close(fd); - data = 0; - } - - keyRowMap.clear(); -} - -void CNSLM::dump() -{ - for (const auto& entry : keyRowMap) { - const std::vector& rows = entry.second; - for (const auto& row : rows) { - std::cerr << row.key << " " << row.value << "\n"; - } - } -} - -const std::vector CNSLM::bigramsForKeys(const std::string& preceedingKey, const std::string& key) -{ - return std::vector(); -} - -const std::vector CNSLM::unigramsForKey(const std::string& key) -{ - std::vector v; - auto iter = keyRowMap.find(key); - if (iter != keyRowMap.end()) { - const std::vector& rows = iter->second; - for (const auto& row : rows) { - Taiyan::Gramambular::Unigram g; - g.keyValue.key = row.key; - g.keyValue.value = row.value; - g.score = -17.0; - v.push_back(g); - } - } - - return v; -} - -bool CNSLM::hasUnigramsForKey(const std::string& key) -{ - return keyRowMap.find(key) != keyRowMap.end(); -} - -}; // namespace vChewing diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/CNSLM.h b/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/CNSLM.h similarity index 67% rename from Source/Modules/LangModelRelated/SubLanguageModels/CNSLM.h rename to Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/CNSLM.h index d15bc734..80c47b96 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/CNSLM.h +++ b/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/CNSLM.h @@ -24,35 +24,19 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include #include #include "LanguageModel.h" +#include "UserPhrasesLM.h" namespace vChewing { -class CNSLM : public Taiyan::Gramambular::LanguageModel +class CNSLM: public UserPhrasesLM { public: - CNSLM(); - ~CNSLM(); - - bool isLoaded(); - bool open(const char *path); - void close(); - void dump(); - - virtual const std::vector bigramsForKeys(const std::string& preceedingKey, const std::string& key); - virtual const std::vector unigramsForKey(const std::string& key); - virtual bool hasUnigramsForKey(const std::string& key); - -protected: - struct Row { - Row(std::string_view& k, std::string_view& v) : key(k), value(v) {} - std::string_view key; - std::string_view value; - }; - - std::map> keyRowMap; - int fd; - void *data; - size_t length; + virtual bool allowConsolidation() override { + return false; + } + virtual float overridedValue() override { + return -11.0; + } }; } diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.h b/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.h index 6d0f30d6..33d57c61 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.h +++ b/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.h @@ -37,7 +37,15 @@ public: bool open(const char *path); void close(); void dump(); - + + virtual bool allowConsolidation() { + return true; + } + + virtual float overridedValue() { + return 0.0; + } + virtual const std::vector bigramsForKeys(const std::string& preceedingKey, const std::string& key); virtual const std::vector unigramsForKey(const std::string& key); virtual bool hasUnigramsForKey(const std::string& key); diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.mm b/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.mm index cd82d11c..6ae55429 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.mm +++ b/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.mm @@ -59,8 +59,10 @@ bool UserPhrasesLM::open(const char *path) return false; } - LMConsolidator::FixEOF(path); - LMConsolidator::ConsolidateContent(path, true); + if (allowConsolidation()) { + LMConsolidator::FixEOF(path); + LMConsolidator::ConsolidateContent(path, true); + } fd = ::open(path, O_RDONLY); if (fd == -1) { @@ -134,7 +136,7 @@ const std::vector UserPhrasesLM::unigramsForKey(co Taiyan::Gramambular::Unigram g; g.keyValue.key = row.key; g.keyValue.value = row.value; - g.score = 0.0; + g.score = overridedValue(); v.push_back(g); } } diff --git a/vChewing.xcodeproj/project.pbxproj b/vChewing.xcodeproj/project.pbxproj index fdbd60be..41a88e28 100644 --- a/vChewing.xcodeproj/project.pbxproj +++ b/vChewing.xcodeproj/project.pbxproj @@ -18,7 +18,6 @@ 5B62A31B27AE73A700A19448 /* SSZipArchive.m in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A31327AE73A700A19448 /* SSZipArchive.m */; }; 5B62A31C27AE73A700A19448 /* AWFileHash.m in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A31627AE73A700A19448 /* AWFileHash.m */; }; 5B62A32927AE77D100A19448 /* FSEventStreamHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A32827AE77D100A19448 /* FSEventStreamHelper.swift */; }; - 5B62A32E27AE78B000A19448 /* CNSLM.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A32A27AE78B000A19448 /* CNSLM.mm */; }; 5B62A32F27AE78B000A19448 /* CoreLM.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A32D27AE78B000A19448 /* CoreLM.mm */; }; 5B62A33227AE792F00A19448 /* InputSourceHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A33127AE792F00A19448 /* InputSourceHelper.swift */; }; 5B62A33627AE795800A19448 /* PreferencesModule.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A33527AE795800A19448 /* PreferencesModule.swift */; }; @@ -191,7 +190,6 @@ 5B62A32627AE77BB00A19448 /* LMConsolidator.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LMConsolidator.h; sourceTree = ""; }; 5B62A32727AE77BB00A19448 /* LMConsolidator.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = LMConsolidator.mm; sourceTree = ""; }; 5B62A32827AE77D100A19448 /* FSEventStreamHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FSEventStreamHelper.swift; sourceTree = ""; }; - 5B62A32A27AE78B000A19448 /* CNSLM.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = CNSLM.mm; sourceTree = ""; }; 5B62A32B27AE78B000A19448 /* CNSLM.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CNSLM.h; sourceTree = ""; }; 5B62A32C27AE78B000A19448 /* CoreLM.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CoreLM.h; sourceTree = ""; }; 5B62A32D27AE78B000A19448 /* CoreLM.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = CoreLM.mm; sourceTree = ""; }; @@ -364,6 +362,14 @@ name = MiscRootFiles; sourceTree = ""; }; + 5B4D47B627C9186900220DDC /* InstantiatedModels */ = { + isa = PBXGroup; + children = ( + 5B62A32B27AE78B000A19448 /* CNSLM.h */, + ); + path = InstantiatedModels; + sourceTree = ""; + }; 5B62A30127AE732800A19448 /* 3rdParty */ = { isa = PBXGroup; children = ( @@ -495,8 +501,7 @@ 5B62A32527AE758000A19448 /* SubLanguageModels */ = { isa = PBXGroup; children = ( - 5B62A32B27AE78B000A19448 /* CNSLM.h */, - 5B62A32A27AE78B000A19448 /* CNSLM.mm */, + 5B4D47B627C9186900220DDC /* InstantiatedModels */, 5B62A32C27AE78B000A19448 /* CoreLM.h */, 5B62A32D27AE78B000A19448 /* CoreLM.mm */, D41355DC278EA3ED005E5CBD /* UserPhrasesLM.mm */, @@ -1054,7 +1059,6 @@ 5B62A34A27AE7CD900A19448 /* NotifierController.swift in Sources */, 5B11328927B94CFB00E58451 /* AppleKeyboardConverter.swift in Sources */, 5B62A31827AE73A700A19448 /* zip.m in Sources */, - 5B62A32E27AE78B000A19448 /* CNSLM.mm in Sources */, D41355DB278E6D17005E5CBD /* LMInstantiator.mm in Sources */, 5B62A31A27AE73A700A19448 /* mztools.m in Sources */, 5B62A32927AE77D100A19448 /* FSEventStreamHelper.swift in Sources */,