diff --git a/Source/Modules/FileHandlers/LMConsolidator.h b/Source/Modules/FileHandlers/LMConsolidator.h deleted file mode 100644 index 9bda0d9e..00000000 --- a/Source/Modules/FileHandlers/LMConsolidator.h +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef LMConsolidator_hpp -#define LMConsolidator_hpp - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace std; -namespace vChewing -{ - -class LMConsolidator -{ - public: - static bool CheckPragma(const char *path); - static bool FixEOF(const char *path); - static bool ConsolidateContent(const char *path, bool shouldCheckPragma); -}; - -} // namespace vChewing -#endif /* LMConsolidator_hpp */ diff --git a/Source/Modules/FileHandlers/LMConsolidator.mm b/Source/Modules/FileHandlers/LMConsolidator.mm deleted file mode 100644 index 0843e93d..00000000 --- a/Source/Modules/FileHandlers/LMConsolidator.mm +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "LMConsolidator.h" -#include "vChewing-Swift.h" - -namespace vChewing -{ - -constexpr std::string_view FORMATTED_PRAGMA_HEADER = - "# 𝙵𝙾𝚁𝙼𝙰𝚃 𝚘𝚛𝚐.𝚊𝚝𝚎𝚕𝚒𝚎𝚛𝙸𝚗𝚖𝚞.𝚟𝚌𝚑𝚎𝚠𝚒𝚗𝚐.𝚞𝚜𝚎𝚛𝙻𝚊𝚗𝚐𝚞𝚊𝚐𝚎𝙼𝚘𝚍𝚎𝚕𝙳𝚊𝚝𝚊.𝚏𝚘𝚛𝚖𝚊𝚝𝚝𝚎𝚍"; - -// HEADER VERIFIER. CREDIT: Shiki Suen -bool LMConsolidator::CheckPragma(const char *path) -{ - ifstream zfdCheckPragma(path); - if (zfdCheckPragma.good()) - { - string firstLine; - getline(zfdCheckPragma, firstLine); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "HEADER SEEN ||%s", firstLine.c_str()); - if (firstLine != FORMATTED_PRAGMA_HEADER) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "HEADER VERIFICATION FAILED. START IN-PLACE CONSOLIDATING PROCESS."); - return false; - } - } - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "HEADER VERIFICATION SUCCESSFUL."); - return true; -} - -// EOF FIXER. CREDIT: Shiki Suen. -bool LMConsolidator::FixEOF(const char *path) -{ - std::fstream zfdEOFFixerIncomingStream(path); - zfdEOFFixerIncomingStream.seekg(-1, std::ios_base::end); - char z; - zfdEOFFixerIncomingStream.get(z); - if (z != '\n') - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "// REPORT: Data File not ended with a new line.\n"); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "// DATA FILE: %s", path); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "// PROCEDURE: Trying to insert a new line as EOF before per-line check process.\n"); - std::ofstream zfdEOFFixerOutput(path, std::ios_base::app); - zfdEOFFixerOutput << std::endl; - zfdEOFFixerOutput.close(); - if (zfdEOFFixerOutput.fail()) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "// REPORT: Failed to append a newline to the data file. Insufficient Privileges?\n"); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "// DATA FILE: %s", path); - return false; - } - } - zfdEOFFixerIncomingStream.close(); - if (zfdEOFFixerIncomingStream.fail()) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, - "// REPORT: Failed to read lines through the data file for EOF check. Insufficient Privileges?\n"); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "// DATA FILE: %s", path); - return false; - } - return true; -} // END: EOF FIXER. - -// CONTENT CONSOLIDATOR. CREDIT: Shiki Suen. -bool LMConsolidator::ConsolidateContent(const char *path, bool shouldCheckPragma) -{ - bool pragmaCheckResult = LMConsolidator::CheckPragma(path); - if (pragmaCheckResult && shouldCheckPragma) - { - return true; - } - - ifstream zfdContentConsolidatorIncomingStream(path); - vector vecEntry; - while (!zfdContentConsolidatorIncomingStream.eof()) - { // Xcode 13 能用的 ObjCpp 與 Cpp 並無原生支援「\h」這個 Regex 參數的能力,只能逐行處理。 - string zfdBuffer; - getline(zfdContentConsolidatorIncomingStream, zfdBuffer); - vecEntry.push_back(zfdBuffer); - } - // 第一遍 for 用來統整每行內的內容。 - // regex sedCJKWhiteSpace("\\x{3000}"), sedNonBreakWhiteSpace("\\x{A0}"), sedWhiteSpace("\\s+"), - // sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // 這樣寫會導致輸入法敲不了任何字,推測 Xcode 13 支援的 cpp / - // objCpp 可能對某些 Regex 寫法有相容性問題。 regex sedCJKWhiteSpace(" "), sedNonBreakWhiteSpace(" "), - // sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // RegEx 先定義好。 - regex sedToConsolidate("( +| +| +|\t+)+"), sedToTrim("(^\\s|\\s$)"); - for (int i = 0; i < vecEntry.size(); i++) - { // 第一遍 for 用來統整每行內的內容。 - if (vecEntry[i].size() != 0) - { // 不要理會空行,否則給空行加上 endl 等於再加空行。 - // RegEx 處理順序:先將全形空格換成西文空格,然後合併任何意義上的連續空格(包括 tab - // 等),最後去除每行首尾空格。 vecEntry[i] = regex_replace(vecEntry[i], sedCJKWhiteSpace, " ").c_str(); // - // 中日韓全形空格轉為 ASCII 空格。 vecEntry[i] = regex_replace(vecEntry[i], sedNonBreakWhiteSpace, " - // ").c_str(); // Non-Break 型空格轉為 ASCII 空格。 vecEntry[i] = regex_replace(vecEntry[i], sedWhiteSpace, - // " ").c_str(); // 所有意義上的連續的 \s 型空格都轉為單個 ASCII 空格。 vecEntry[i] = - // regex_replace(vecEntry[i], sedLeadingSpace, "").c_str(); // 去掉行首空格。 vecEntry[i] = - // regex_replace(vecEntry[i], sedTrailingSpace, "").c_str(); // 去掉行尾空格。 - // 上述命令分步驟執行容易產生效能問題,故濃縮為下述兩句。 - vecEntry[i] = regex_replace(vecEntry[i], sedToConsolidate, " ").c_str(); - vecEntry[i] = regex_replace(vecEntry[i], sedToTrim, "").c_str(); - } - } - // 在第二遍 for 運算之前,針對 vecEntry 去除重複條目。 - std::reverse(vecEntry.begin(), vecEntry.end()); // 先首尾顛倒,免得破壞最新的 override 資訊。 - vecEntry.erase(unique(vecEntry.begin(), vecEntry.end()), vecEntry.end()); // 去重複。 - std::reverse(vecEntry.begin(), vecEntry.end()); // 再顛倒回來。 - // 統整完畢。開始將統整過的內容寫入檔案。 - ofstream zfdContentConsolidatorOutput(path); // 這裡是要從頭開始重寫檔案內容,所以不需要「 ios_base::app 」。 - if (!pragmaCheckResult) - { - zfdContentConsolidatorOutput << FORMATTED_PRAGMA_HEADER << endl; // 寫入經過整理處理的 HEADER。 - } - for (int i = 0; i < vecEntry.size(); i++) - { // 第二遍 for 用來寫入統整過的內容。 - if (vecEntry[i].size() != 0) - { // 這句很重要,不然還是會把經過 RegEx 處理後出現的空行搞到檔案裡。 - zfdContentConsolidatorOutput << vecEntry[i] - << endl; // 這裡是必須得加上 endl 的,不然所有行都變成一個整合行。 - } - } - zfdContentConsolidatorOutput.close(); - if (zfdContentConsolidatorOutput.fail()) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, - "// REPORT: Failed to write content-consolidated data to the file. Insufficient Privileges?\n"); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "// DATA FILE: %s", path); - return false; - } - zfdContentConsolidatorIncomingStream.close(); - if (zfdContentConsolidatorIncomingStream.fail()) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "// REPORT: Failed to read lines through the data file for content-consolidation. " - "Insufficient Privileges?\n"); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "// DATA FILE: %s", path); - return false; - } - return true; -} // END: CONTENT CONSOLIDATOR. - -} // namespace vChewing diff --git a/Source/Modules/LangModelRelated/KeyValueBlobReader.cpp b/Source/Modules/LangModelRelated/KeyValueBlobReader.cpp deleted file mode 100644 index eee32bbf..00000000 --- a/Source/Modules/LangModelRelated/KeyValueBlobReader.cpp +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "KeyValueBlobReader.h" - -namespace vChewing -{ - -KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue *out) -{ - static auto new_line = [](char c) { return c == '\n' || c == '\r'; }; - static auto blank = [](char c) { return c == ' ' || c == '\t'; }; - static auto blank_or_newline = [](char c) { return blank(c) || new_line(c); }; - static auto content_char = [](char c) { return !blank(c) && !new_line(c); }; - - if (state_ == State::ERROR) - { - return state_; - } - - const char *key_begin = nullptr; - size_t key_length = 0; - const char *value_begin = nullptr; - size_t value_length = 0; - - while (true) - { - state_ = SkipUntilNot(blank_or_newline); - if (state_ != State::CAN_CONTINUE) - { - return state_; - } - - // Check if it's a comment line; if so, read until end of line. - if (*current_ != '#') - { - break; - } - state_ = SkipUntil(new_line); - if (state_ != State::CAN_CONTINUE) - { - return state_; - } - } - - // No need to check whether* current_ is a content_char, since content_char - // is defined as not blank and not new_line. - - key_begin = current_; - state_ = SkipUntilNot(content_char); - if (state_ != State::CAN_CONTINUE) - { - goto error; - } - key_length = current_ - key_begin; - - // There should be at least one blank character after the key string. - if (!blank(*current_)) - { - goto error; - } - - state_ = SkipUntilNot(blank); - if (state_ != State::CAN_CONTINUE) - { - goto error; - } - - if (!content_char(*current_)) - { - goto error; - } - - value_begin = current_; - // value must only contain content characters, blanks not are allowed. - // also, there's no need to check the state after this, since we will always - // emit the value. This also avoids the situation where trailing spaces in a - // line would become part of the value. - SkipUntilNot(content_char); - value_length = current_ - value_begin; - - // Unconditionally skip until the end of the line. This prevents the case - // like "foo bar baz\n" where baz should not be treated as the Next key. - SkipUntil(new_line); - - if (out != nullptr) - { - *out = KeyValue{std::string_view{key_begin, key_length}, std::string_view{value_begin, value_length}}; - } - state_ = State::HAS_PAIR; - return state_; - -error: - state_ = State::ERROR; - return state_; -} - -KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot(const std::function &f) -{ - while (current_ != end_ && *current_) - { - if (!f(*current_)) - { - return State::CAN_CONTINUE; - } - ++current_; - } - - return State::END; -} - -KeyValueBlobReader::State KeyValueBlobReader::SkipUntil(const std::function &f) -{ - while (current_ != end_ && *current_) - { - if (f(*current_)) - { - return State::CAN_CONTINUE; - } - ++current_; - } - - return State::END; -} - -std::ostream &operator<<(std::ostream &os, const KeyValueBlobReader::KeyValue &kv) -{ - os << "(key: " << kv.key << ", value: " << kv.value << ")"; - return os; -} - -} // namespace vChewing diff --git a/Source/Modules/LangModelRelated/KeyValueBlobReader.h b/Source/Modules/LangModelRelated/KeyValueBlobReader.h deleted file mode 100644 index 8ca313be..00000000 --- a/Source/Modules/LangModelRelated/KeyValueBlobReader.h +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef SOURCE_ENGINE_KEYVALUEBLOBREADER_H_ -#define SOURCE_ENGINE_KEYVALUEBLOBREADER_H_ - -#include -#include -#include -#include - -// A reader for text-based, blank-separated key-value pairs in a binary blob. -// -// This reader is suitable for reading language model files that entirely -// consist of key-value pairs. Leading or trailing spaces are ignored. -// Lines that start with "#" are treated as comments. Values cannot contain -// spaces. Any space after the value string is parsed is ignored. This implies -// that after a blank, anything that comes after the value can be used as -// comment. Both ' ' and '\t' are treated as blank characters, and the parser -// is agnostic to how lines are ended, and so LF, CR LF, and CR are all valid -// line endings. -// -// std::string_view is used to allow returning results efficiently. As a result, -// the blob is a const char* and will never be mutated. This implies, for -// example, read-only mmap can be used to parse large files. -namespace vChewing -{ - -class KeyValueBlobReader -{ - public: - enum class State : int - { - // There are no more key-value pairs in this blob. - END = 0, - // The reader has produced a new key-value pair. - HAS_PAIR = 1, - // An error is encountered and the parsing stopped. - ERROR = -1, - // Internal-only state: the parser can continue parsing. - CAN_CONTINUE = 2 - }; - - struct KeyValue - { - constexpr KeyValue() : key(""), value("") - { - } - constexpr KeyValue(std::string_view k, std::string_view v) : key(k), value(v) - { - } - - bool operator==(const KeyValue &another) const - { - return key == another.key && value == another.value; - } - - std::string_view key; - std::string_view value; - }; - - KeyValueBlobReader(const char *blob, size_t size) : current_(blob), end_(blob + size) - { - } - - // Parse the next key-value pair and return the state of the reader. If - // `out` is passed, out will be set to the produced key-value pair if there - // is one. - State Next(KeyValue *out = nullptr); - - private: - State SkipUntil(const std::function &f); - State SkipUntilNot(const std::function &f); - - const char *current_; - const char *end_; - State state_ = State::CAN_CONTINUE; -}; - -std::ostream &operator<<(std::ostream &, const KeyValueBlobReader::KeyValue &); - -} // namespace vChewing - -#endif // SOURCE_ENGINE_KEYVALUEBLOBREADER_H_ diff --git a/Source/Modules/LangModelRelated/LMInstantiator.h b/Source/Modules/LangModelRelated/LMInstantiator.h deleted file mode 100644 index fdbf92a7..00000000 --- a/Source/Modules/LangModelRelated/LMInstantiator.h +++ /dev/null @@ -1,167 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef LMInstantiator_H -#define LMInstantiator_H - -#include "AssociatedPhrases.h" -#include "CNSLM.h" -#include "CoreLM.h" -#include "ParselessLM.h" -#include "PhraseReplacementMap.h" -#include "SymbolLM.h" -#include "UserPhrasesLM.h" -#include "UserSymbolLM.h" -#include -#include - -namespace vChewing -{ - -using namespace Gramambular; - -/// LMInstantiator is a facade for managing a set of models including -/// the input method language model, user phrases and excluded phrases. -/// -/// It is the primary model class that the input controller and grammar builder -/// of vChewing talks to. When the grammar builder starts to build a sentence -/// from a series of BPMF readings, it passes the readings to the model to see -/// if there are valid unigrams, and use returned unigrams to produce the final -/// results. -/// -/// LMInstantiator combine and transform the unigrams from the primary language -/// model and user phrases. The process is -/// -/// 1) Get the original unigrams. -/// 2) Drop the unigrams whose value is contained in the exclusion map. -/// 3) Replace the values of the unigrams using the phrase replacement map. -/// 4) Replace the values of the unigrams using an external converter lambda. -/// 5) Drop the duplicated phrases. -/// -/// The controller can ask the model to load the primary input method language -/// model while launching and to load the user phrases anytime if the custom -/// files are modified. It does not keep the reference of the data pathes but -/// you have to pass the paths when you ask it to do loading. -class LMInstantiator : public Gramambular::LanguageModel -{ - public: - LMInstantiator(); - ~LMInstantiator(); - - /// Asks to load the primary language model at the given path. - /// @param languageModelPath The path of the language model. - void loadLanguageModel(const char *languageModelPath); - /// If the data model is already loaded. - bool isDataModelLoaded(); - - /// Asks to load the primary language model at the given path. - /// @param miscDataPath The path of the misc data model. - void loadMiscData(const char *miscDataPath); - /// If the data model is already loaded. - bool isMiscDataLoaded(); - - /// Asks to load the primary language model at the given path. - /// @param symbolDataPath The path of the symbol data model. - void loadSymbolData(const char *symbolDataPath); - /// If the data model is already loaded. - bool isSymbolDataLoaded(); - - /// Asks to load the primary language model at the given path. - /// @param cnsDataPath The path of the CNS data model. - void loadCNSData(const char *cnsDataPath); - /// If the data model is already loaded. - bool isCNSDataLoaded(); - - /// Asks to load the user phrases and excluded phrases at the given path. - /// @param userPhrasesPath The path of user phrases. - /// @param excludedPhrasesPath The path of excluded phrases. - void loadUserPhrases(const char *userPhrasesPath, const char *excludedPhrasesPath); - /// Asks to load the user symbol data at the given path. - /// @param userSymbolDataPath The path of user symbol data. - void loadUserSymbolData(const char *userPhrasesPath); - /// Asks to load the user associated phrases at the given path. - /// @param userAssociatedPhrasesPath The path of the user associated phrases. - void loadUserAssociatedPhrases(const char *userAssociatedPhrasesPath); - /// Asks to load the phrase replacement table at the given path. - /// @param phraseReplacementPath The path of the phrase replacement table. - void loadPhraseReplacementMap(const char *phraseReplacementPath); - - /// Not implemented since we do not have data to provide bigram function. - const std::vector bigramsForKeys(const std::string &preceedingKey, const std::string &key); - /// Returns a list of available unigram for the given key. - /// @param key A std::string represents the BPMF reading or a symbol key. For - /// example, it you pass "ㄇㄚ", it returns "嗎", "媽", and so on. - const std::vector unigramsForKey(const std::string &key); - /// If the model has unigrams for the given key. - /// @param key The key. - bool hasUnigramsForKey(const std::string &key); - - /// Enables or disables phrase replacement. - void setPhraseReplacementEnabled(bool enabled); - /// If phrase replacement is enabled or not. - bool phraseReplacementEnabled(); - - /// Enables or disables symbol input. - void setSymbolEnabled(bool enabled); - /// If symbol input is enabled or not. - bool symbolEnabled(); - - /// Enables or disables CNS11643 input. - void setCNSEnabled(bool enabled); - /// If CNS11643 input is enabled or not. - bool cnsEnabled(); - - const std::vector associatedPhrasesForKey(const std::string &key); - bool hasAssociatedPhrasesForKey(const std::string &key); - - protected: - /// Filters and converts the input unigrams and return a new list of unigrams. - /// - /// @param unigrams The unigrams to be processed. - /// @param excludedValues The values to excluded unigrams. - /// @param insertedValues The values for unigrams already in the results. - /// It helps to prevent duplicated unigrams. Please note that the method - /// has a side effect that it inserts values to `insertedValues`. - const std::vector filterAndTransformUnigrams( - const std::vector unigrams, const std::unordered_set &excludedValues, - std::unordered_set &insertedValues); - - ParselessLM m_languageModel; - CoreLM m_miscModel; - SymbolLM m_symbolModel; - CNSLM m_cnsModel; - UserPhrasesLM m_userPhrases; - UserPhrasesLM m_excludedPhrases; - UserSymbolLM m_userSymbolModel; - PhraseReplacementMap m_phraseReplacement; - AssociatedPhrases m_associatedPhrases; - bool m_phraseReplacementEnabled; - bool m_cnsEnabled; - bool m_symbolEnabled; -}; -}; // namespace vChewing - -#endif diff --git a/Source/Modules/LangModelRelated/LMInstantiator.mm b/Source/Modules/LangModelRelated/LMInstantiator.mm deleted file mode 100644 index 2873cbf2..00000000 --- a/Source/Modules/LangModelRelated/LMInstantiator.mm +++ /dev/null @@ -1,323 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "LMInstantiator.h" -#include -#include - -namespace vChewing -{ - -LMInstantiator::LMInstantiator() -{ -} - -LMInstantiator::~LMInstantiator() -{ - m_languageModel.close(); - m_miscModel.close(); - m_userPhrases.close(); - m_userSymbolModel.close(); - m_cnsModel.close(); - m_excludedPhrases.close(); - m_phraseReplacement.close(); - m_associatedPhrases.close(); -} - -void LMInstantiator::loadLanguageModel(const char *languageModelDataPath) -{ - if (languageModelDataPath) - { - m_languageModel.close(); - m_languageModel.open(languageModelDataPath); - } -} - -bool LMInstantiator::isDataModelLoaded() -{ - return m_languageModel.isLoaded(); -} - -void LMInstantiator::loadCNSData(const char *cnsDataPath) -{ - if (cnsDataPath) - { - m_cnsModel.close(); - m_cnsModel.open(cnsDataPath); - } -} - -bool LMInstantiator::isCNSDataLoaded() -{ - return m_cnsModel.isLoaded(); -} - -void LMInstantiator::loadMiscData(const char *miscDataPath) -{ - if (miscDataPath) - { - m_miscModel.close(); - m_miscModel.open(miscDataPath); - } -} - -bool LMInstantiator::isMiscDataLoaded() -{ - return m_miscModel.isLoaded(); -} - -void LMInstantiator::loadSymbolData(const char *symbolDataPath) -{ - if (symbolDataPath) - { - m_symbolModel.close(); - m_symbolModel.open(symbolDataPath); - } -} - -bool LMInstantiator::isSymbolDataLoaded() -{ - return m_symbolModel.isLoaded(); -} - -void LMInstantiator::loadUserPhrases(const char *userPhrasesDataPath, const char *excludedPhrasesDataPath) -{ - if (userPhrasesDataPath) - { - m_userPhrases.close(); - m_userPhrases.open(userPhrasesDataPath); - } - if (excludedPhrasesDataPath) - { - m_excludedPhrases.close(); - m_excludedPhrases.open(excludedPhrasesDataPath); - } -} - -void LMInstantiator::loadUserSymbolData(const char *userSymbolDataPath) -{ - if (userSymbolDataPath) - { - m_userSymbolModel.close(); - m_userSymbolModel.open(userSymbolDataPath); - } -} - -void LMInstantiator::loadUserAssociatedPhrases(const char *userAssociatedPhrasesPath) -{ - if (userAssociatedPhrasesPath) - { - m_associatedPhrases.close(); - m_associatedPhrases.open(userAssociatedPhrasesPath); - } -} - -void LMInstantiator::loadPhraseReplacementMap(const char *phraseReplacementPath) -{ - if (phraseReplacementPath) - { - m_phraseReplacement.close(); - m_phraseReplacement.open(phraseReplacementPath); - } -} - -const std::vector LMInstantiator::bigramsForKeys(const std::string &preceedingKey, - const std::string &key) -{ - return std::vector(); -} - -const std::vector LMInstantiator::unigramsForKey(const std::string &key) -{ - if (key == " ") - { - std::vector spaceUnigrams; - Gramambular::Unigram g; - g.keyValue.key = " "; - g.keyValue.value = " "; - g.score = 0; - spaceUnigrams.push_back(g); - return spaceUnigrams; - } - - std::vector allUnigrams; - std::vector miscUnigrams; - std::vector symbolUnigrams; - std::vector userUnigrams; - std::vector userSymbolUnigrams; - std::vector cnsUnigrams; - - std::unordered_set excludedValues; - std::unordered_set insertedValues; - - if (m_excludedPhrases.hasUnigramsForKey(key)) - { - std::vector excludedUnigrams = m_excludedPhrases.unigramsForKey(key); - transform(excludedUnigrams.begin(), excludedUnigrams.end(), inserter(excludedValues, excludedValues.end()), - [](const Gramambular::Unigram &u) { return u.keyValue.value; }); - } - - if (m_userPhrases.hasUnigramsForKey(key)) - { - std::vector rawUserUnigrams = m_userPhrases.unigramsForKey(key); - // 用這句指令讓使用者語彙檔案內的詞條優先順序隨著行數增加而逐漸增高。 - // 這樣一來就可以在就地新增語彙時徹底複寫優先權。 - std::reverse(rawUserUnigrams.begin(), rawUserUnigrams.end()); - userUnigrams = filterAndTransformUnigrams(rawUserUnigrams, excludedValues, insertedValues); - } - - if (m_languageModel.hasUnigramsForKey(key)) - { - std::vector rawGlobalUnigrams = m_languageModel.unigramsForKey(key); - allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues); - } - - if (m_miscModel.hasUnigramsForKey(key)) - { - std::vector rawMiscUnigrams = m_miscModel.unigramsForKey(key); - miscUnigrams = filterAndTransformUnigrams(rawMiscUnigrams, excludedValues, insertedValues); - } - - if (m_symbolModel.hasUnigramsForKey(key) && m_symbolEnabled) - { - std::vector rawSymbolUnigrams = m_symbolModel.unigramsForKey(key); - symbolUnigrams = filterAndTransformUnigrams(rawSymbolUnigrams, excludedValues, insertedValues); - } - - if (m_userSymbolModel.hasUnigramsForKey(key) && m_symbolEnabled) - { - std::vector rawUserSymbolUnigrams = m_userSymbolModel.unigramsForKey(key); - userSymbolUnigrams = filterAndTransformUnigrams(rawUserSymbolUnigrams, excludedValues, insertedValues); - } - - if (m_cnsModel.hasUnigramsForKey(key) && m_cnsEnabled) - { - std::vector rawCNSUnigrams = m_cnsModel.unigramsForKey(key); - cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues); - } - - allUnigrams.insert(allUnigrams.begin(), userUnigrams.begin(), userUnigrams.end()); - allUnigrams.insert(allUnigrams.end(), cnsUnigrams.begin(), cnsUnigrams.end()); - allUnigrams.insert(allUnigrams.begin(), miscUnigrams.begin(), miscUnigrams.end()); - allUnigrams.insert(allUnigrams.end(), userSymbolUnigrams.begin(), userSymbolUnigrams.end()); - allUnigrams.insert(allUnigrams.end(), symbolUnigrams.begin(), symbolUnigrams.end()); - return allUnigrams; -} - -bool LMInstantiator::hasUnigramsForKey(const std::string &key) -{ - if (key == " ") - { - return true; - } - - if (!m_excludedPhrases.hasUnigramsForKey(key)) - { - return m_userPhrases.hasUnigramsForKey(key) || m_languageModel.hasUnigramsForKey(key); - } - - return unigramsForKey(key).size() > 0; -} - -void LMInstantiator::setPhraseReplacementEnabled(bool enabled) -{ - m_phraseReplacementEnabled = enabled; -} - -bool LMInstantiator::phraseReplacementEnabled() -{ - return m_phraseReplacementEnabled; -} - -void LMInstantiator::setCNSEnabled(bool enabled) -{ - m_cnsEnabled = enabled; -} - -bool LMInstantiator::cnsEnabled() -{ - return m_cnsEnabled; -} - -void LMInstantiator::setSymbolEnabled(bool enabled) -{ - m_symbolEnabled = enabled; -} - -bool LMInstantiator::symbolEnabled() -{ - return m_symbolEnabled; -} - -const std::vector LMInstantiator::filterAndTransformUnigrams( - const std::vector unigrams, const std::unordered_set &excludedValues, - std::unordered_set &insertedValues) -{ - std::vector results; - - for (auto &&unigram : unigrams) - { - // excludedValues filters out the unigrams with the original value. - // insertedValues filters out the ones with the converted value - std::string originalValue = unigram.keyValue.value; - if (excludedValues.find(originalValue) != excludedValues.end()) - { - continue; - } - - std::string value = originalValue; - if (m_phraseReplacementEnabled) - { - std::string replacement = m_phraseReplacement.valueForKey(value); - if (replacement != "") - { - value = replacement; - } - } - if (insertedValues.find(value) == insertedValues.end()) - { - Gramambular::Unigram g; - g.keyValue.value = value; - g.keyValue.key = unigram.keyValue.key; - g.score = unigram.score; - results.push_back(g); - insertedValues.insert(value); - } - } - return results; -} - -const std::vector LMInstantiator::associatedPhrasesForKey(const std::string &key) -{ - return m_associatedPhrases.valuesForKey(key); -} - -bool LMInstantiator::hasAssociatedPhrasesForKey(const std::string &key) -{ - return m_associatedPhrases.hasValuesForKey(key); -} - -} // namespace vChewing diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/ParselessLM.cpp b/Source/Modules/LangModelRelated/OldFileReferences/ParselessLM.cpp similarity index 100% rename from Source/Modules/LangModelRelated/SubLanguageModels/ParselessLM.cpp rename to Source/Modules/LangModelRelated/OldFileReferences/ParselessLM.cpp diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/ParselessLM.h b/Source/Modules/LangModelRelated/OldFileReferences/ParselessLM.h similarity index 100% rename from Source/Modules/LangModelRelated/SubLanguageModels/ParselessLM.h rename to Source/Modules/LangModelRelated/OldFileReferences/ParselessLM.h diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/ParselessPhraseDB.cpp b/Source/Modules/LangModelRelated/OldFileReferences/ParselessPhraseDB.cpp similarity index 100% rename from Source/Modules/LangModelRelated/SubLanguageModels/ParselessPhraseDB.cpp rename to Source/Modules/LangModelRelated/OldFileReferences/ParselessPhraseDB.cpp diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/ParselessPhraseDB.h b/Source/Modules/LangModelRelated/OldFileReferences/ParselessPhraseDB.h similarity index 100% rename from Source/Modules/LangModelRelated/SubLanguageModels/ParselessPhraseDB.h rename to Source/Modules/LangModelRelated/OldFileReferences/ParselessPhraseDB.h diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/UserOverrideModel.cpp b/Source/Modules/LangModelRelated/OldFileReferences/UserOverrideModel.cpp similarity index 100% rename from Source/Modules/LangModelRelated/SubLanguageModels/UserOverrideModel.cpp rename to Source/Modules/LangModelRelated/OldFileReferences/UserOverrideModel.cpp diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/UserOverrideModel.h b/Source/Modules/LangModelRelated/OldFileReferences/UserOverrideModel.h similarity index 100% rename from Source/Modules/LangModelRelated/SubLanguageModels/UserOverrideModel.h rename to Source/Modules/LangModelRelated/OldFileReferences/UserOverrideModel.h diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.h b/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.h deleted file mode 100644 index 63f6aca1..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.h +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef ASSOCIATEDPHRASES_H -#define ASSOCIATEDPHRASES_H - -#include -#include -#include -#include - -namespace vChewing -{ - -class AssociatedPhrases -{ - public: - AssociatedPhrases(); - ~AssociatedPhrases(); - - const bool isLoaded(); - bool open(const char *path); - void close(); - const std::vector valuesForKey(const std::string &key); - const bool hasValuesForKey(const std::string &key); - - protected: - struct Row - { - Row(std::string_view &k, std::string_view &v) : key(k), value(v) - { - } - std::string_view key; - std::string_view value; - }; - - std::map> keyRowMap; - - int fd; - void *data; - size_t length; -}; - -} // namespace vChewing - -#endif /* AssociatedPhrases_hpp */ diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.mm b/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.mm deleted file mode 100644 index ac0f223e..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.mm +++ /dev/null @@ -1,146 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "AssociatedPhrases.h" -#include "vChewing-Swift.h" -#include -#include -#include -#include -#include - -#include "KeyValueBlobReader.h" -#include "LMConsolidator.h" - -namespace vChewing -{ - -AssociatedPhrases::AssociatedPhrases() : fd(-1), data(0), length(0) -{ -} - -AssociatedPhrases::~AssociatedPhrases() -{ - if (data) - { - close(); - } -} - -const bool AssociatedPhrases::isLoaded() -{ - if (data) - { - return true; - } - return false; -} - -bool AssociatedPhrases::open(const char *path) -{ - if (data) - { - return false; - } - - LMConsolidator::FixEOF(path); - LMConsolidator::ConsolidateContent(path, true); - - fd = ::open(path, O_RDONLY); - if (fd == -1) - { - printf("open:: file not exist"); - return false; - } - - struct stat sb; - if (fstat(fd, &sb) == -1) - { - printf("open:: cannot open file"); - return false; - } - - length = (size_t)sb.st_size; - - data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0); - if (!data) - { - ::close(fd); - return false; - } - - KeyValueBlobReader reader(static_cast(data), length); - KeyValueBlobReader::KeyValue keyValue; - KeyValueBlobReader::State state; - while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) - { - keyRowMap[keyValue.key].emplace_back(keyValue.key, keyValue.value); - } - // 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行) - if (state == KeyValueBlobReader::State::ERROR) - { - // close(); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "AssociatedPhrases: Failed at Open Step 5. On Error Resume Next.\n"); - // return false; - } - return true; -} - -void AssociatedPhrases::close() -{ - if (data) - { - munmap(data, length); - ::close(fd); - data = 0; - } - - keyRowMap.clear(); -} - -const std::vector AssociatedPhrases::valuesForKey(const std::string &key) -{ - std::vector v; - auto iter = keyRowMap.find(key); - if (iter != keyRowMap.end()) - { - const std::vector &rows = iter->second; - for (const auto &row : rows) - { - std::string_view value = row.value; - v.push_back({value.data(), value.size()}); - } - } - return v; -} - -const bool AssociatedPhrases::hasValuesForKey(const std::string &key) -{ - return keyRowMap.find(key) != keyRowMap.end(); -} - -}; // namespace vChewing diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.h b/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.h deleted file mode 100644 index 46625e74..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.h +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef CoreLM_H -#define CoreLM_H - -#include "LanguageModel.h" -#include -#include -#include -#include - -// this class relies on the fact that we have a space-separated data -// format, and we use mmap and zero-out the separators and line feeds -// to avoid creating new string objects; the parser is a simple DFA - -using namespace std; -using namespace Gramambular; - -namespace vChewing -{ - -class CoreLM : public Gramambular::LanguageModel -{ - public: - CoreLM(); - ~CoreLM(); - - bool isLoaded(); - bool open(const char *path); - void close(); - void dump(); - - virtual const std::vector bigramsForKeys(const string &preceedingKey, const string &key); - virtual const std::vector unigramsForKey(const string &key); - virtual bool hasUnigramsForKey(const string &key); - - protected: - struct CStringCmp - { - bool operator()(const char *s1, const char *s2) const - { - return strcmp(s1, s2) < 0; - } - }; - - struct Row - { - const char *key; - const char *value; - const char *logProbability; - }; - - map, CStringCmp> keyRowMap; - int fd; - void *data; - size_t length; -}; - -}; // namespace vChewing - -#endif diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.mm b/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.mm deleted file mode 100644 index de24f821..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.mm +++ /dev/null @@ -1,365 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "CoreLM.h" -#include "vChewing-Swift.h" -#include -#include -#include -#include -#include -#include - -using namespace Gramambular; - -vChewing::CoreLM::CoreLM() : fd(-1), data(0), length(0) -{ -} - -vChewing::CoreLM::~CoreLM() -{ - if (data) - { - close(); - } -} - -bool vChewing::CoreLM::isLoaded() -{ - if (data) - { - return true; - } - return false; -} - -bool vChewing::CoreLM::open(const char *path) -{ - if (data) - { - return false; - } - - fd = ::open(path, O_RDONLY); - if (fd == -1) - { - return false; - } - - struct stat sb; - if (fstat(fd, &sb) == -1) - { - return false; - } - - length = (size_t)sb.st_size; - - data = mmap(NULL, length, PROT_WRITE, MAP_PRIVATE, fd, 0); - if (!data) - { - ::close(fd); - return false; - } - - // Regular expression for parsing: - // (\n*\w\w*\s\w\w*\s\w\w*)*$ - // - // Expanded as DFA (in Graphviz): - // - // digraph finite_state_machine { - // rankdir = LR; - // size = "10"; - // - // node [shape = doublecircle]; End; - // node [shape = circle]; - // - // Start -> End [ label = "EOF"]; - // Start -> Error [ label = "\\s" ]; - // Start -> Start [ label = "\\n" ]; - // Start -> 1 [ label = "\\w" ]; - // - // 1 -> Error [ label = "\\n, EOF" ]; - // 1 -> 2 [ label = "\\s" ]; - // 1 -> 1 [ label = "\\w" ]; - // - // 2 -> Error [ label = "\\n, \\s, EOF" ]; - // 2 -> 3 [ label = "\\w" ]; - // - // 3 -> Error [ label = "\\n, EOF "]; - // 3 -> 4 [ label = "\\s" ]; - // 3 -> 3 [ label = "\\w" ]; - // - // 4 -> Error [ label = "\\n, \\s, EOF" ]; - // 4 -> 5 [ label = "\\w" ]; - // - // 5 -> Error [ label = "\\s, EOF" ]; - // 5 -> Start [ label = "\\n" ]; - // 5 -> 5 [ label = "\\w" ]; - // } - - char *head = (char *)data; - char *end = (char *)data + length; - char c; - Row row; - -start: - // EOF -> end - if (head == end) - { - goto end; - } - - c = *head; - // \s -> error - if (c == ' ') - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // Start: \\s -> error"); - goto error; - } - // \n -> start - else if (c == '\n') - { - head++; - goto start; - } - - // \w -> record column star, state1 - row.value = head; - head++; - // fall through to state 1 - -state1: - // EOF -> error - if (head == end) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: EOF -> error"); - goto error; - } - - c = *head; - // \n -> error - if (c == '\n') - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: \\n -> error"); - goto error; - } - // \s -> state2 + zero out ending + record column start - else if (c == ' ') - { - *head = 0; - head++; - row.key = head; - goto state2; - } - - // \w -> state1 - head++; - goto state1; - -state2: - // eof -> error - if (head == end) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: EOF -> error"); - goto error; - } - - c = *head; - // \n, \s -> error - if (c == '\n' || c == ' ') - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: \\n \\s -> error"); - goto error; - } - - // \w -> state3 - head++; - - // fall through to state 3 - -state3: - // eof -> error - if (head == end) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: EOF -> error"); - goto error; - } - - c = *head; - - // \n -> error - if (c == '\n') - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: \\n -> error"); - goto error; - } - // \s -> state4 + zero out ending + record column start - else if (c == ' ') - { - *head = 0; - head++; - row.logProbability = head; - goto state4; - } - - // \w -> state3 - head++; - goto state3; - -state4: - // eof -> error - if (head == end) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: EOF -> error"); - goto error; - } - - c = *head; - // \n, \s -> error - if (c == '\n' || c == ' ') - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: \\n \\s -> error"); - goto error; - } - - // \w -> state5 - head++; - - // fall through to state 5 - -state5: - // eof -> error - if (head == end) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: EOF -> error"); - goto error; - } - - c = *head; - // \s -> error - if (c == ' ') - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: \\s -> error"); - goto error; - } - // \n -> start - else if (c == '\n') - { - *head = 0; - head++; - keyRowMap[row.key].push_back(row); - goto start; - } - - // \w -> state 5 - head++; - goto state5; - -error: - close(); - return false; - -end: - static const char *space = " "; - static const char *zero = "0.0"; - Row emptyRow; - emptyRow.key = space; - emptyRow.value = space; - emptyRow.logProbability = zero; - keyRowMap[space].push_back(emptyRow); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // File Load Complete."); - return true; -} - -void vChewing::CoreLM::close() -{ - if (data) - { - munmap(data, length); - ::close(fd); - data = 0; - } - - keyRowMap.clear(); -} - -void vChewing::CoreLM::dump() -{ - size_t rows = 0; - for (map>::const_iterator i = keyRowMap.begin(), e = keyRowMap.end(); i != e; ++i) - { - const vector &r = (*i).second; - for (vector::const_iterator ri = r.begin(), re = r.end(); ri != re; ++ri) - { - const Row &row = *ri; - cerr << row.key << " " << row.value << " " << row.logProbability << "\n"; - rows++; - } - } -} - -const std::vector vChewing::CoreLM::bigramsForKeys(const string &preceedingKey, const string &key) -{ - return std::vector(); -} - -const std::vector vChewing::CoreLM::unigramsForKey(const string &key) -{ - std::vector v; - map>::const_iterator i = keyRowMap.find(key.c_str()); - - if (i != keyRowMap.end()) - { - for (vector::const_iterator ri = (*i).second.begin(), re = (*i).second.end(); ri != re; ++ri) - { - Unigram g; - const Row &r = *ri; - g.keyValue.key = r.key; - g.keyValue.value = r.value; - g.score = atof(r.logProbability); - v.push_back(g); - } - } - - return v; -} - -bool vChewing::CoreLM::hasUnigramsForKey(const string &key) -{ - return keyRowMap.find(key.c_str()) != keyRowMap.end(); -} diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/CNSLM.h b/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/CNSLM.h deleted file mode 100644 index f464255f..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/CNSLM.h +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef CNSLM_H -#define CNSLM_H - -#include "LanguageModel.h" -#include "UserPhrasesLM.h" -#include -#include -#include - -namespace vChewing -{ - -class CNSLM : public UserPhrasesLM -{ - public: - bool allowConsolidation() override - { - return false; - } - float overridedValue() override - { - return -11.0; - } -}; - -} // namespace vChewing - -#endif diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/SymbolLM.h b/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/SymbolLM.h deleted file mode 100644 index 7c385307..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/SymbolLM.h +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef SYMBOLLM_H -#define SYMBOLLM_H - -#include "LanguageModel.h" -#include "UserPhrasesLM.h" -#include -#include -#include - -namespace vChewing -{ - -class SymbolLM : public UserPhrasesLM -{ - public: - bool allowConsolidation() override - { - return false; - } - float overridedValue() override - { - return -13.0; - } -}; - -} // namespace vChewing - -#endif diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/UserSymbolLM.h b/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/UserSymbolLM.h deleted file mode 100644 index 7f37c3ac..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/UserSymbolLM.h +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef USERSYMBOLLM_H -#define USERSYMBOLLM_H - -#include "LanguageModel.h" -#include "UserPhrasesLM.h" -#include -#include -#include - -namespace vChewing -{ - -class UserSymbolLM : public UserPhrasesLM -{ - public: - bool allowConsolidation() override - { - return true; - } - float overridedValue() override - { - return -12.0; - } -}; - -} // namespace vChewing - -#endif diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.h b/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.h deleted file mode 100644 index 43263923..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.h +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef PHRASEREPLACEMENTMAP_H -#define PHRASEREPLACEMENTMAP_H - -#include -#include -#include - -namespace vChewing -{ - -class PhraseReplacementMap -{ - public: - PhraseReplacementMap(); - ~PhraseReplacementMap(); - - bool open(const char *path); - void close(); - const std::string valueForKey(const std::string &key); - - protected: - std::map keyValueMap; - int fd; - void *data; - size_t length; -}; - -} // namespace vChewing - -#endif diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.mm b/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.mm deleted file mode 100644 index 7fde339b..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.mm +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "PhraseReplacementMap.h" -#include "vChewing-Swift.h" -#include -#include -#include -#include -#include -#include - -#include "KeyValueBlobReader.h" -#include "LMConsolidator.h" - -namespace vChewing -{ - -using std::string; - -PhraseReplacementMap::PhraseReplacementMap() : fd(-1), data(0), length(0) -{ -} - -PhraseReplacementMap::~PhraseReplacementMap() -{ - if (data) - { - close(); - } -} - -bool PhraseReplacementMap::open(const char *path) -{ - if (data) - { - return false; - } - - LMConsolidator::FixEOF(path); - LMConsolidator::ConsolidateContent(path, true); - - fd = ::open(path, O_RDONLY); - if (fd == -1) - { - printf("open:: file not exist"); - return false; - } - - struct stat sb; - if (fstat(fd, &sb) == -1) - { - printf("open:: cannot open file"); - return false; - } - - length = (size_t)sb.st_size; - - data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0); - if (!data) - { - ::close(fd); - return false; - } - - KeyValueBlobReader reader(static_cast(data), length); - KeyValueBlobReader::KeyValue keyValue; - KeyValueBlobReader::State state; - while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) - { - keyValueMap[keyValue.key] = keyValue.value; - } - // 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行) - if (state == KeyValueBlobReader::State::ERROR) - { - // close(); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "PhraseReplacementMap: Failed at Open Step 5. On Error Resume Next.\n"); - // return false; - } - return true; -} - -void PhraseReplacementMap::close() -{ - if (data) - { - munmap(data, length); - ::close(fd); - data = 0; - } - - keyValueMap.clear(); -} - -const std::string PhraseReplacementMap::valueForKey(const std::string &key) -{ - auto iter = keyValueMap.find(key); - if (iter != keyValueMap.end()) - { - const std::string_view v = iter->second; - return {v.data(), v.size()}; - } - return string(""); -} - -} diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.h b/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.h deleted file mode 100644 index 4c27d748..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.h +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef USERPHRASESLM_H -#define USERPHRASESLM_H - -#include "LanguageModel.h" -#include -#include -#include - -namespace vChewing -{ - -class UserPhrasesLM : public Gramambular::LanguageModel -{ - public: - UserPhrasesLM(); - ~UserPhrasesLM(); - - bool isLoaded(); - bool open(const char *path); - void close(); - void dump(); - - virtual bool allowConsolidation() - { - return true; - } - - virtual float overridedValue() - { - return 0.0; - } - - virtual const std::vector bigramsForKeys(const std::string &preceedingKey, - const std::string &key); - virtual const std::vector unigramsForKey(const std::string &key); - virtual bool hasUnigramsForKey(const std::string &key); - - protected: - struct Row - { - Row(std::string_view &k, std::string_view &v) : key(k), value(v) - { - } - std::string_view key; - std::string_view value; - }; - - std::map> keyRowMap; - int fd; - void *data; - size_t length; -}; - -} // namespace vChewing - -#endif diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.mm b/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.mm deleted file mode 100644 index e3565d0e..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.mm +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "UserPhrasesLM.h" -#include "vChewing-Swift.h" -#include -#include -#include -#include -#include -#include - -#include "KeyValueBlobReader.h" -#include "LMConsolidator.h" - -namespace vChewing -{ - -UserPhrasesLM::UserPhrasesLM() : fd(-1), data(0), length(0) -{ -} - -UserPhrasesLM::~UserPhrasesLM() -{ - if (data) - { - close(); - } -} - -bool UserPhrasesLM::isLoaded() -{ - if (data) - { - return true; - } - return false; -} - -bool UserPhrasesLM::open(const char *path) -{ - if (data) - { - return false; - } - - if (allowConsolidation()) - { - LMConsolidator::FixEOF(path); - LMConsolidator::ConsolidateContent(path, true); - } - - fd = ::open(path, O_RDONLY); - if (fd == -1) - { - printf("open:: file not exist"); - return false; - } - - struct stat sb; - if (fstat(fd, &sb) == -1) - { - printf("open:: cannot open file"); - return false; - } - - length = (size_t)sb.st_size; - - data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0); - if (!data) - { - ::close(fd); - return false; - } - - KeyValueBlobReader reader(static_cast(data), length); - KeyValueBlobReader::KeyValue keyValue; - KeyValueBlobReader::State state; - while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) - { - // We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF - // reading. - keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key); - } - // 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行) - if (state == KeyValueBlobReader::State::ERROR) - { - // close(); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "UserPhrasesLM: Failed at Open Step 5. On Error Resume Next.\n"); - // return false; - } - return true; -} - -void UserPhrasesLM::close() -{ - if (data) - { - munmap(data, length); - ::close(fd); - data = 0; - } - - keyRowMap.clear(); -} - -void UserPhrasesLM::dump() -{ - for (const auto &entry : keyRowMap) - { - const std::vector &rows = entry.second; - for (const auto &row : rows) - { - std::cerr << row.key << " " << row.value << "\n"; - } - } -} - -const std::vector UserPhrasesLM::bigramsForKeys(const std::string &preceedingKey, - const std::string &key) -{ - return std::vector(); -} - -const std::vector UserPhrasesLM::unigramsForKey(const std::string &key) -{ - std::vector v; - auto iter = keyRowMap.find(key); - if (iter != keyRowMap.end()) - { - const std::vector &rows = iter->second; - for (const auto &row : rows) - { - Gramambular::Unigram g; - g.keyValue.key = row.key; - g.keyValue.value = row.value; - g.score = overridedValue(); - v.push_back(g); - } - } - - return v; -} - -bool UserPhrasesLM::hasUnigramsForKey(const std::string &key) -{ - return keyRowMap.find(key) != keyRowMap.end(); -} - -}; // namespace vChewing diff --git a/vChewing.xcodeproj/project.pbxproj b/vChewing.xcodeproj/project.pbxproj index 35d90f9e..e4a08b84 100644 --- a/vChewing.xcodeproj/project.pbxproj +++ b/vChewing.xcodeproj/project.pbxproj @@ -206,12 +206,7 @@ 5B5D28AB281EA1E800523D4D /* lmLite.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = lmLite.swift; sourceTree = ""; }; 5B5E535127EF261400C6AA1E /* IME.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = IME.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 5B61B0C9280BEFD4002E3CFA /* KeyHandler_Misc.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = KeyHandler_Misc.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B62A32627AE77BB00A19448 /* LMConsolidator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = LMConsolidator.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 5B62A32727AE77BB00A19448 /* LMConsolidator.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = LMConsolidator.mm; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; 5B62A32827AE77D100A19448 /* FSEventStreamHelper.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = FSEventStreamHelper.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B62A32B27AE78B000A19448 /* CNSLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = CNSLM.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 5B62A32C27AE78B000A19448 /* CoreLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = CoreLM.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 5B62A32D27AE78B000A19448 /* CoreLM.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = CoreLM.mm; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; 5B62A33127AE792F00A19448 /* InputSourceHelper.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = InputSourceHelper.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 5B62A33527AE795800A19448 /* mgrPrefs.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = mgrPrefs.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 5B62A33727AE79CD00A19448 /* NSStringUtils.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = NSStringUtils.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; @@ -223,14 +218,12 @@ 5B62A34527AE7CD900A19448 /* NotifierController.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = NotifierController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 5B707CE527D9F3A10099EF99 /* SwiftyOpenCC */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = SwiftyOpenCC; path = Packages/SwiftyOpenCC; sourceTree = ""; }; 5B707CE727D9F4590099EF99 /* OpenCCBridge.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = OpenCCBridge.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B7111C727DEF9FF00444310 /* UserSymbolLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = UserSymbolLM.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; 5B73FB5427B2BD6900E9BF49 /* PhraseEditor-Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; name = "PhraseEditor-Info.plist"; path = "UserPhraseEditor/PhraseEditor-Info.plist"; sourceTree = SOURCE_ROOT; }; 5B73FB5F27B2BE1300E9BF49 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/InfoPlist.strings; sourceTree = ""; }; 5B782EC3280C243C007276DE /* KeyHandler_HandleCandidate.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = KeyHandler_HandleCandidate.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 5B7BC4AF27AFFBE800F66C24 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/frmPrefWindow.xib; sourceTree = ""; }; 5B7BC4B227AFFC0B00F66C24 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/frmPrefWindow.strings; sourceTree = ""; }; 5B7F225C2808501000DDD3CB /* KeyHandler_HandleInput.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = KeyHandler_HandleInput.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B8F43ED27C9BC220069AC27 /* SymbolLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = SymbolLM.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; 5B949BD82816DC5400D87B5D /* LineReader.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = LineReader.swift; sourceTree = ""; usesTabs = 1; }; 5B949BDA2816DDBC00D87B5D /* LMConsolidator.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = LMConsolidator.swift; sourceTree = ""; usesTabs = 1; }; 5BA0DF2E2817857D009E73BB /* lmUserOverride.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = lmUserOverride.swift; sourceTree = ""; usesTabs = 1; }; @@ -324,25 +317,15 @@ 6ACA41EF15FC1D9000935EF6 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/Localizable.strings; sourceTree = ""; }; 6ACA41F215FC1D9000935EF6 /* Installer-Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; name = "Installer-Info.plist"; path = "Installer/Installer-Info.plist"; sourceTree = SOURCE_ROOT; }; 6ACA41F315FC1D9000935EF6 /* Installer-Prefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "Installer-Prefix.pch"; path = "Installer/Installer-Prefix.pch"; sourceTree = SOURCE_ROOT; }; - 6ACC3D3C27914AAB00F1B140 /* KeyValueBlobReader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = KeyValueBlobReader.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6ACC3D3E27914F2400F1B140 /* KeyValueBlobReader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = KeyValueBlobReader.cpp; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; 6ACC3D402793701600F1B140 /* ParselessPhraseDB.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = ParselessPhraseDB.cpp; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; 6ACC3D412793701600F1B140 /* ParselessPhraseDB.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = ParselessPhraseDB.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; 6ACC3D422793701600F1B140 /* ParselessLM.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = ParselessLM.cpp; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; 6ACC3D432793701600F1B140 /* ParselessLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = ParselessLM.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D41355D9278E6D17005E5CBD /* LMInstantiator.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = LMInstantiator.mm; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D41355DA278E6D17005E5CBD /* LMInstantiator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = LMInstantiator.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D41355DC278EA3ED005E5CBD /* UserPhrasesLM.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = UserPhrasesLM.mm; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D41355DD278EA3ED005E5CBD /* UserPhrasesLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = UserPhrasesLM.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; D427A9BF25ED28CC005D43E0 /* vChewing-Bridging-Header.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = "vChewing-Bridging-Header.h"; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; D427F76B278CA1BA004A2160 /* AppDelegate.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = AppDelegate.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - D44FB74B2792189A003C80A6 /* PhraseReplacementMap.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = PhraseReplacementMap.mm; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D44FB74C2792189A003C80A6 /* PhraseReplacementMap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = PhraseReplacementMap.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; D456576D279E4F7B00DF6BC9 /* InputHandler.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = InputHandler.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; D461B791279DAC010070E734 /* InputState.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = InputState.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; D47B92BF27972AC800458394 /* main.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = main.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - D47D73AA27A6CAE600255A50 /* AssociatedPhrases.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = AssociatedPhrases.mm; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D47D73AB27A6CAE600255A50 /* AssociatedPhrases.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = AssociatedPhrases.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; D47F7DCD278BFB57002F9DD7 /* ctlPrefWindow.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ctlPrefWindow.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; D47F7DCF278C0897002F9DD7 /* ctlNonModalAlertWindow.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ctlNonModalAlertWindow.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; D47F7DD1278C1263002F9DD7 /* UserOverrideModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = UserOverrideModel.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; @@ -417,16 +400,6 @@ path = SubLMs; sourceTree = ""; }; - 5B4D47B627C9186900220DDC /* InstantiatedModels */ = { - isa = PBXGroup; - children = ( - 5B62A32B27AE78B000A19448 /* CNSLM.h */, - 5B8F43ED27C9BC220069AC27 /* SymbolLM.h */, - 5B7111C727DEF9FF00444310 /* UserSymbolLM.h */, - ); - path = InstantiatedModels; - sourceTree = ""; - }; 5B62A30127AE732800A19448 /* 3rdParty */ = { isa = PBXGroup; children = ( @@ -489,8 +462,6 @@ isa = PBXGroup; children = ( 5B62A32827AE77D100A19448 /* FSEventStreamHelper.swift */, - 5B62A32627AE77BB00A19448 /* LMConsolidator.h */, - 5B62A32727AE77BB00A19448 /* LMConsolidator.mm */, 5B949BDA2816DDBC00D87B5D /* LMConsolidator.swift */, ); path = FileHandlers; @@ -520,39 +491,26 @@ 5B62A32427AE757300A19448 /* LangModelRelated */ = { isa = PBXGroup; children = ( - 5B62A32527AE758000A19448 /* SubLanguageModels */, + 5B62A32527AE758000A19448 /* OldFileReferences */, 5B407308281672610023DFFF /* SubLMs */, - 6ACC3D3E27914F2400F1B140 /* KeyValueBlobReader.cpp */, - 6ACC3D3C27914AAB00F1B140 /* KeyValueBlobReader.h */, 5BE33BEC28169B5D00CE5BB0 /* KeyValueStructs.swift */, 5BD0113A28180D6100609769 /* LMInstantiator.swift */, - D41355DA278E6D17005E5CBD /* LMInstantiator.h */, - D41355D9278E6D17005E5CBD /* LMInstantiator.mm */, 5BAEFACF28012565001F42C9 /* mgrLangModel.swift */, ); path = LangModelRelated; sourceTree = ""; }; - 5B62A32527AE758000A19448 /* SubLanguageModels */ = { + 5B62A32527AE758000A19448 /* OldFileReferences */ = { isa = PBXGroup; children = ( - 5B4D47B627C9186900220DDC /* InstantiatedModels */, - D47D73AB27A6CAE600255A50 /* AssociatedPhrases.h */, - D47D73AA27A6CAE600255A50 /* AssociatedPhrases.mm */, - 5B62A32C27AE78B000A19448 /* CoreLM.h */, - 5B62A32D27AE78B000A19448 /* CoreLM.mm */, 6ACC3D422793701600F1B140 /* ParselessLM.cpp */, 6ACC3D432793701600F1B140 /* ParselessLM.h */, 6ACC3D402793701600F1B140 /* ParselessPhraseDB.cpp */, 6ACC3D412793701600F1B140 /* ParselessPhraseDB.h */, - D44FB74C2792189A003C80A6 /* PhraseReplacementMap.h */, - D44FB74B2792189A003C80A6 /* PhraseReplacementMap.mm */, D47F7DD2278C1263002F9DD7 /* UserOverrideModel.cpp */, D47F7DD1278C1263002F9DD7 /* UserOverrideModel.h */, - D41355DD278EA3ED005E5CBD /* UserPhrasesLM.h */, - D41355DC278EA3ED005E5CBD /* UserPhrasesLM.mm */, ); - path = SubLanguageModels; + path = OldFileReferences; sourceTree = ""; }; 5B62A33027AE78E500A19448 /* Resources */ = {