From 69b8c361865d7bf028d32610279683d8856e97be Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Thu, 12 May 2022 15:10:14 +0800 Subject: [PATCH] Repo // Remove remained ParselessLM files. --- .../OldFileReferences/ParselessLM.cpp | 168 ------------------ .../OldFileReferences/ParselessLM.h | 63 ------- .../OldFileReferences/ParselessPhraseDB.cpp | 163 ----------------- .../OldFileReferences/ParselessPhraseDB.h | 61 ------- vChewing.xcodeproj/project.pbxproj | 8 - 5 files changed, 463 deletions(-) delete mode 100644 Source/Modules/LangModelRelated/OldFileReferences/ParselessLM.cpp delete mode 100644 Source/Modules/LangModelRelated/OldFileReferences/ParselessLM.h delete mode 100644 Source/Modules/LangModelRelated/OldFileReferences/ParselessPhraseDB.cpp delete mode 100644 Source/Modules/LangModelRelated/OldFileReferences/ParselessPhraseDB.h diff --git a/Source/Modules/LangModelRelated/OldFileReferences/ParselessLM.cpp b/Source/Modules/LangModelRelated/OldFileReferences/ParselessLM.cpp deleted file mode 100644 index 4f40bb8c..00000000 --- a/Source/Modules/LangModelRelated/OldFileReferences/ParselessLM.cpp +++ /dev/null @@ -1,168 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "ParselessLM.h" - -#include -#include -#include -#include - -#include - -vChewing::ParselessLM::~ParselessLM() -{ - close(); -} - -bool vChewing::ParselessLM::isLoaded() -{ - if (data_) - { - return true; - } - return false; -} - -bool vChewing::ParselessLM::open(const std::string_view &path) -{ - if (data_) - { - return false; - } - - fd_ = ::open(path.data(), O_RDONLY); - if (fd_ == -1) - { - return false; - } - - struct stat sb; - if (fstat(fd_, &sb) == -1) - { - ::close(fd_); - fd_ = -1; - return false; - } - - length_ = static_cast(sb.st_size); - - data_ = mmap(NULL, length_, PROT_READ, MAP_SHARED, fd_, 0); - if (data_ == nullptr) - { - ::close(fd_); - fd_ = -1; - length_ = 0; - return false; - } - - db_ = std::unique_ptr(new ParselessPhraseDB(static_cast(data_), length_)); - return true; -} - -void vChewing::ParselessLM::close() -{ - if (data_ != nullptr) - { - munmap(data_, length_); - ::close(fd_); - fd_ = -1; - length_ = 0; - data_ = nullptr; - } -} - -const std::vector vChewing::ParselessLM::bigramsForKeys(const std::string &preceedingKey, - const std::string &key) -{ - return std::vector(); -} - -const std::vector vChewing::ParselessLM::unigramsForKey(const std::string &key) -{ - if (db_ == nullptr) - { - return std::vector(); - } - - std::vector results; - for (const auto &row : db_->findRows(key + " ")) - { - Gramambular::Unigram unigram; - - // Move ahead until we encounter the first space. This is the key. - auto it = row.begin(); - while (it != row.end() && *it != ' ') - { - ++it; - } - - unigram.keyValue.key = std::string(row.begin(), it); - - // Read past the space. - if (it != row.end()) - { - ++it; - } - - if (it != row.end()) - { - // Now it is the start of the value portion. - auto value_begin = it; - - // Move ahead until we encounter the second space. This is the - // value. - while (it != row.end() && *it != ' ') - { - ++it; - } - unigram.keyValue.value = std::string(value_begin, it); - } - - // Read past the space. The remainder, if it exists, is the score. - if (it != row.end()) - { - ++it; - } - - if (it != row.end()) - { - unigram.score = std::stod(std::string(it, row.end())); - } - results.push_back(unigram); - } - return results; -} - -bool vChewing::ParselessLM::hasUnigramsForKey(const std::string &key) -{ - if (db_ == nullptr) - { - return false; - } - - return db_->findFirstMatchingLine(key + " ") != nullptr; -} diff --git a/Source/Modules/LangModelRelated/OldFileReferences/ParselessLM.h b/Source/Modules/LangModelRelated/OldFileReferences/ParselessLM.h deleted file mode 100644 index 698bcecc..00000000 --- a/Source/Modules/LangModelRelated/OldFileReferences/ParselessLM.h +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef SOURCE_ENGINE_PARSELESSLM_H_ -#define SOURCE_ENGINE_PARSELESSLM_H_ - -#include -#include -#include - -#include "LanguageModel.h" -#include "ParselessPhraseDB.h" - -namespace vChewing -{ - -class ParselessLM : public Gramambular::LanguageModel -{ - public: - ~ParselessLM() override; - - bool isLoaded(); - bool open(const std::string_view &path); - void close(); - - const std::vector bigramsForKeys(const std::string &preceedingKey, - const std::string &key) override; - const std::vector unigramsForKey(const std::string &key) override; - bool hasUnigramsForKey(const std::string &key) override; - - private: - int fd_ = -1; - void *data_ = nullptr; - size_t length_ = 0; - std::unique_ptr db_; -}; - -}; // namespace vChewing - -#endif // SOURCE_ENGINE_PARSELESSLM_H_ diff --git a/Source/Modules/LangModelRelated/OldFileReferences/ParselessPhraseDB.cpp b/Source/Modules/LangModelRelated/OldFileReferences/ParselessPhraseDB.cpp deleted file mode 100644 index a0097e69..00000000 --- a/Source/Modules/LangModelRelated/OldFileReferences/ParselessPhraseDB.cpp +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "ParselessPhraseDB.h" - -#include -#include - -namespace vChewing -{ - -ParselessPhraseDB::ParselessPhraseDB(const char *buf, size_t length) : begin_(buf), end_(buf + length) -{ -} - -std::vector ParselessPhraseDB::findRows(const std::string_view &key) -{ - std::vector rows; - - const char *ptr = findFirstMatchingLine(key); - if (ptr == nullptr) - { - return rows; - } - - while (ptr + key.length() <= end_ && memcmp(ptr, key.data(), key.length()) == 0) - { - const char *eol = ptr; - - while (eol != end_ && *eol != '\n') - { - ++eol; - } - - rows.emplace_back(ptr, eol - ptr); - if (eol == end_) - { - break; - } - - ptr = ++eol; - } - - return rows; -} - -// Implements a binary search that returns the pointer to the first matching -// row. In its core it's just a standard binary search, but we use backtracking -// to locate the line start. We also check the previous line to see if the -// current line is actually the first matching line: if the previous line is -// less to the key and the current line starts exactly with the key, then -// the current line is the first matching line. -const char *ParselessPhraseDB::findFirstMatchingLine(const std::string_view &key) -{ - if (key.empty()) - { - return begin_; - } - - const char *top = begin_; - const char *bottom = end_; - - while (top < bottom) - { - const char *mid = top + (bottom - top) / 2; - const char *ptr = mid; - - if (ptr != begin_) - { - --ptr; - } - - while (ptr != begin_ && *ptr != '\n') - { - --ptr; - } - - const char *prev = nullptr; - if (*ptr == '\n') - { - prev = ptr; - ++ptr; - } - - // ptr is now in the "current" line we're interested in. - if (ptr + key.length() > end_) - { - // not enough data to compare at this point, bail. - break; - } - - int current_cmp = memcmp(ptr, key.data(), key.length()); - - if (current_cmp > 0) - { - bottom = mid - 1; - continue; - } - - if (current_cmp < 0) - { - top = mid + 1; - continue; - } - - if (!prev) - { - return ptr; - } - - // Move the prev so that it reaches the previous line. - if (prev != begin_) - { - --prev; - } - while (prev != begin_ && *prev != '\n') - { - --prev; - } - if (*prev == '\n') - { - ++prev; - } - - int prev_cmp = memcmp(prev, key.data(), key.length()); - - // This is the first occurrence. - if (prev_cmp < 0 && current_cmp == 0) - { - return ptr; - } - - // This is not, which means ptr is "larger" than the keyData. - bottom = mid - 1; - } - - return nullptr; -} - -}; // namespace vChewing diff --git a/Source/Modules/LangModelRelated/OldFileReferences/ParselessPhraseDB.h b/Source/Modules/LangModelRelated/OldFileReferences/ParselessPhraseDB.h deleted file mode 100644 index 3ac28768..00000000 --- a/Source/Modules/LangModelRelated/OldFileReferences/ParselessPhraseDB.h +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef SOURCE_ENGINE_PARSELESSPHRASEDB_H_ -#define SOURCE_ENGINE_PARSELESSPHRASEDB_H_ - -#include -#include -#include - -namespace vChewing -{ - -// Defines phrase database that consists of (key, value, score) rows that are -// pre-sorted by the byte value of the keys. It is way faster than FastLM -// because it does not need to parse anything. Instead, it relies on the fact -// that the database is already sorted, and binary search is used to find the -// rows. -class ParselessPhraseDB -{ - public: - ParselessPhraseDB(const char *buf, size_t length); - - // Find the rows that match the key. Note that prefix match is used. If you - // need exact match, the key will need to have a delimiter (usually a space) - // at the end. - std::vector findRows(const std::string_view &key); - - const char *findFirstMatchingLine(const std::string_view &key); - - private: - const char *begin_; - const char *end_; -}; - -}; // namespace vChewing - -#endif // SOURCE_ENGINE_PARSELESSPHRASEDB_H_ diff --git a/vChewing.xcodeproj/project.pbxproj b/vChewing.xcodeproj/project.pbxproj index f8bb64b8..6418bc0f 100644 --- a/vChewing.xcodeproj/project.pbxproj +++ b/vChewing.xcodeproj/project.pbxproj @@ -315,10 +315,6 @@ 6ACA41EF15FC1D9000935EF6 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/Localizable.strings; sourceTree = ""; }; 6ACA41F215FC1D9000935EF6 /* Installer-Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; name = "Installer-Info.plist"; path = "Installer/Installer-Info.plist"; sourceTree = SOURCE_ROOT; }; 6ACA41F315FC1D9000935EF6 /* Installer-Prefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "Installer-Prefix.pch"; path = "Installer/Installer-Prefix.pch"; sourceTree = SOURCE_ROOT; }; - 6ACC3D402793701600F1B140 /* ParselessPhraseDB.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = ParselessPhraseDB.cpp; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6ACC3D412793701600F1B140 /* ParselessPhraseDB.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = ParselessPhraseDB.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6ACC3D422793701600F1B140 /* ParselessLM.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = ParselessLM.cpp; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6ACC3D432793701600F1B140 /* ParselessLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = ParselessLM.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; D427A9BF25ED28CC005D43E0 /* vChewing-Bridging-Header.h */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = "vChewing-Bridging-Header.h"; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; D427F76B278CA1BA004A2160 /* AppDelegate.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = AppDelegate.swift; sourceTree = ""; tabWidth = 2; usesTabs = 0; }; D456576D279E4F7B00DF6BC9 /* InputHandler.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = InputHandler.swift; sourceTree = ""; tabWidth = 2; usesTabs = 0; }; @@ -501,10 +497,6 @@ 5B62A32527AE758000A19448 /* OldFileReferences */ = { isa = PBXGroup; children = ( - 6ACC3D422793701600F1B140 /* ParselessLM.cpp */, - 6ACC3D432793701600F1B140 /* ParselessLM.h */, - 6ACC3D402793701600F1B140 /* ParselessPhraseDB.cpp */, - 6ACC3D412793701600F1B140 /* ParselessPhraseDB.h */, D47F7DD2278C1263002F9DD7 /* UserOverrideModel.cpp */, D47F7DD1278C1263002F9DD7 /* UserOverrideModel.h */, );