diff --git a/Source/Modules/ControllerModules/KeyHandlerSputnik.swift b/Source/Modules/ControllerModules/KeyHandlerSputnik.swift index c6a933cf..fc100936 100644 --- a/Source/Modules/ControllerModules/KeyHandlerSputnik.swift +++ b/Source/Modules/ControllerModules/KeyHandlerSputnik.swift @@ -25,7 +25,6 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ import Cocoa -import Megrez // MARK: - KeyHandler Sputnik. diff --git a/Source/Modules/ControllerModules/KeyHandler_Kernel.swift b/Source/Modules/ControllerModules/KeyHandler_Kernel.swift index f6e55991..a04cf18d 100644 --- a/Source/Modules/ControllerModules/KeyHandler_Kernel.swift +++ b/Source/Modules/ControllerModules/KeyHandler_Kernel.swift @@ -25,7 +25,6 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ import Cocoa -import Megrez public enum InputMode: String { case imeModeCHS = "org.atelierInmu.inputmethod.vChewing.IMECHS" diff --git a/Source/Modules/ControllerModules/KeyHandler_States.swift b/Source/Modules/ControllerModules/KeyHandler_States.swift index e36be109..e2d0a645 100644 --- a/Source/Modules/ControllerModules/KeyHandler_States.swift +++ b/Source/Modules/ControllerModules/KeyHandler_States.swift @@ -25,7 +25,6 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ import Cocoa -import Megrez // MARK: - § State managements. diff --git a/Source/Modules/LangModelRelated/mgrLangModel.swift b/Source/Modules/LangModelRelated/mgrLangModel.swift index c768853a..d12ee66c 100644 --- a/Source/Modules/LangModelRelated/mgrLangModel.swift +++ b/Source/Modules/LangModelRelated/mgrLangModel.swift @@ -25,7 +25,6 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ import Cocoa -import Megrez /// 我們不能讓 mgrLangModel 這個靜態管理器來承載下面這些副本變數。 /// 所以,這些副本變數只能放在 mgrLangModel 的外部。 diff --git a/Source/Modules/LanguageParsers/Gramambular/Bigram.h b/Source/Modules/LanguageParsers/Gramambular/Bigram.h deleted file mode 100644 index a4b8c8b2..00000000 --- a/Source/Modules/LanguageParsers/Gramambular/Bigram.h +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef BIGRAM_H_ -#define BIGRAM_H_ - -#include - -#include "KeyValuePair.h" - -namespace Gramambular -{ -class Bigram -{ - public: - Bigram(); - - KeyValuePair preceedingKeyValue; - KeyValuePair keyValue; - double score; - - bool operator==(const Bigram &another) const; - bool operator<(const Bigram &another) const; -}; - -inline std::ostream &operator<<(std::ostream &stream, const Bigram &gram) -{ - std::streamsize p = stream.precision(); - stream.precision(6); - stream << "(" << gram.keyValue << "|" << gram.preceedingKeyValue << "," << gram.score << ")"; - stream.precision(p); - return stream; -} - -inline std::ostream &operator<<(std::ostream &stream, const std::vector &grams) -{ - stream << "[" << grams.size() << "]=>{"; - - size_t index = 0; - - for (std::vector::const_iterator gi = grams.begin(); gi != grams.end(); ++gi, ++index) - { - stream << index << "=>"; - stream << *gi; - if (gi + 1 != grams.end()) - { - stream << ","; - } - } - - stream << "}"; - return stream; -} - -inline Bigram::Bigram() : score(0.0) -{ -} - -inline bool Bigram::operator==(const Bigram &another) const -{ - return preceedingKeyValue == another.preceedingKeyValue && keyValue == another.keyValue && score == another.score; -} - -inline bool Bigram::operator<(const Bigram &another) const -{ - if (preceedingKeyValue < another.preceedingKeyValue) - { - return true; - } - else if (preceedingKeyValue == another.preceedingKeyValue) - { - if (keyValue < another.keyValue) - { - return true; - } - else if (keyValue == another.keyValue) - { - return score < another.score; - } - return false; - } - - return false; -} -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Gramambular/BlockReadingBuilder.h b/Source/Modules/LanguageParsers/Gramambular/BlockReadingBuilder.h deleted file mode 100644 index 12046b15..00000000 --- a/Source/Modules/LanguageParsers/Gramambular/BlockReadingBuilder.h +++ /dev/null @@ -1,242 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef BLOCKREADINGBUILDER_H_ -#define BLOCKREADINGBUILDER_H_ - -#include -#include - -#include "Grid.h" -#include "LanguageModel.h" - -namespace Gramambular -{ - -class BlockReadingBuilder -{ - public: - explicit BlockReadingBuilder(LanguageModel *lm); - void clear(); - - size_t length() const; - size_t cursorIndex() const; - void setCursorIndex(size_t newIndex); - void insertReadingAtCursor(const std::string &reading); - bool deleteReadingBeforeCursor(); // backspace - bool deleteReadingAfterCursor(); // delete - - bool removeHeadReadings(size_t count); - - void setJoinSeparator(const std::string &separator); - const std::string joinSeparator() const; - - std::vector readings() const; - - Grid &grid(); - - protected: - void build(); - - static const std::string Join(std::vector::const_iterator begin, - std::vector::const_iterator end, const std::string &separator); - - // 規定最多可以組成的詞的字數上限為 10 - static const size_t MaximumBuildSpanLength = 10; - - size_t m_cursorIndex; - std::vector m_readings; - - Grid m_grid; - LanguageModel *m_LM; - std::string m_joinSeparator; -}; - -inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *lm) : m_LM(lm), m_cursorIndex(0) -{ -} - -inline void BlockReadingBuilder::clear() -{ - m_cursorIndex = 0; - m_readings.clear(); - m_grid.clear(); -} - -inline size_t BlockReadingBuilder::length() const -{ - return m_readings.size(); -} - -inline size_t BlockReadingBuilder::cursorIndex() const -{ - return m_cursorIndex; -} - -inline void BlockReadingBuilder::setCursorIndex(size_t newIndex) -{ - m_cursorIndex = newIndex > m_readings.size() ? m_readings.size() : newIndex; -} - -inline void BlockReadingBuilder::insertReadingAtCursor(const std::string &reading) -{ - m_readings.insert(m_readings.begin() + m_cursorIndex, reading); - - m_grid.expandGridByOneAtLocation(m_cursorIndex); - build(); - m_cursorIndex++; -} - -inline std::vector BlockReadingBuilder::readings() const -{ - return m_readings; -} - -inline bool BlockReadingBuilder::deleteReadingBeforeCursor() -{ - if (!m_cursorIndex) - { - return false; - } - - m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.begin() + m_cursorIndex); - m_cursorIndex--; - m_grid.shrinkGridByOneAtLocation(m_cursorIndex); - build(); - return true; -} - -inline bool BlockReadingBuilder::deleteReadingAfterCursor() -{ - if (m_cursorIndex == m_readings.size()) - { - return false; - } - - m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.begin() + m_cursorIndex + 1); - m_grid.shrinkGridByOneAtLocation(m_cursorIndex); - build(); - return true; -} - -inline bool BlockReadingBuilder::removeHeadReadings(size_t count) -{ - if (count > length()) - { - return false; - } - - for (size_t i = 0; i < count; i++) - { - if (m_cursorIndex) - { - m_cursorIndex--; - } - m_readings.erase(m_readings.begin(), m_readings.begin() + 1); - m_grid.shrinkGridByOneAtLocation(0); - build(); - } - - return true; -} - -inline void BlockReadingBuilder::setJoinSeparator(const std::string &separator) -{ - m_joinSeparator = separator; -} - -inline const std::string BlockReadingBuilder::joinSeparator() const -{ - return m_joinSeparator; -} - -inline Grid &BlockReadingBuilder::grid() -{ - return m_grid; -} - -inline void BlockReadingBuilder::build() -{ - if (!m_LM) - { - return; - } - - size_t begin = 0; - size_t end = m_cursorIndex + MaximumBuildSpanLength; - - if (m_cursorIndex < MaximumBuildSpanLength) - { - begin = 0; - } - else - { - begin = m_cursorIndex - MaximumBuildSpanLength; - } - - if (end > m_readings.size()) - { - end = m_readings.size(); - } - - for (size_t p = begin; p < end; p++) - { - for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) - { - std::string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator); - if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) - { - std::vector unigrams = m_LM->unigramsForKey(combinedReading); - - if (unigrams.size() > 0) - { - Node n(combinedReading, unigrams, std::vector()); - m_grid.insertNode(n, p, q); - } - } - } - } -} - -inline const std::string BlockReadingBuilder::Join(std::vector::const_iterator begin, - std::vector::const_iterator end, - const std::string &separator) -{ - std::string result; - for (std::vector::const_iterator iter = begin; iter != end;) - { - result += *iter; - ++iter; - if (iter != end) - { - result += separator; - } - } - return result; -} -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Grid.h b/Source/Modules/LanguageParsers/Gramambular/Grid.h deleted file mode 100644 index 5a39fe7a..00000000 --- a/Source/Modules/LanguageParsers/Gramambular/Grid.h +++ /dev/null @@ -1,313 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef GRID_H_ -#define GRID_H_ - -#include -#include -#include - -#include "NodeAnchor.h" -#include "Span.h" - -namespace Gramambular -{ - -class Grid -{ - public: - void clear(); - void insertNode(const Node &node, size_t location, size_t spanningLength); - bool hasNodeAtLocationSpanningLengthMatchingKey(size_t location, size_t spanningLength, const std::string &key); - - void expandGridByOneAtLocation(size_t location); - void shrinkGridByOneAtLocation(size_t location); - - size_t width() const; - std::vector nodesEndingAt(size_t location); - std::vector nodesCrossingOrEndingAt(size_t location); - - // "Freeze" the node with the unigram that represents the selected candidate - // value. After this, the node that contains the unigram will always be - // evaluated to that unigram, while all other overlapping nodes will be reset - // to their initial state (that is, if any of those nodes were "frozen" or - // fixed, they will be unfrozen.) - NodeAnchor fixNodeSelectedCandidate(size_t location, const std::string &value); - - // Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, - // only boost the unigram that represents the value with an overriding score. - // This has the same side effect as fixNodeSelectedCandidate, which is that - // all other overlapping nodes will be reset to their initial state. - void overrideNodeScoreForSelectedCandidate(size_t location, const std::string &value, float overridingScore); - - std::string dumpDOT() - { - std::stringstream sst; - sst << "digraph {" << std::endl; - sst << "graph [ rankdir=LR ];" << std::endl; - sst << "BOS;" << std::endl; - - for (size_t p = 0; p < m_spans.size(); p++) - { - Span &span = m_spans[p]; - for (size_t ni = 0; ni <= span.maximumLength(); ni++) - { - Node *np = span.nodeOfLength(ni); - if (np) - { - if (!p) - { - sst << "BOS -> " << np->currentKeyValue().value << ";" << std::endl; - } - - sst << np->currentKeyValue().value << ";" << std::endl; - - if (p + ni < m_spans.size()) - { - Span &dstSpan = m_spans[p + ni]; - for (size_t q = 0; q <= dstSpan.maximumLength(); q++) - { - Node *dn = dstSpan.nodeOfLength(q); - if (dn) - { - sst << np->currentKeyValue().value << " -> " << dn->currentKeyValue().value << ";" - << std::endl; - } - } - } - - if (p + ni == m_spans.size()) - { - sst << np->currentKeyValue().value << " -> " - << "EOS;" << std::endl; - } - } - } - } - - sst << "EOS;" << std::endl; - sst << "}"; - return sst.str(); - } - - protected: - std::vector m_spans; -}; - -inline void Grid::clear() -{ - m_spans.clear(); -} - -inline void Grid::insertNode(const Node &node, size_t location, size_t spanningLength) -{ - if (location >= m_spans.size()) - { - size_t diff = location - m_spans.size() + 1; - - for (size_t i = 0; i < diff; i++) - { - m_spans.push_back(Span()); - } - } - - m_spans[location].insertNodeOfLength(node, spanningLength); -} - -inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(size_t location, size_t spanningLength, - const std::string &key) -{ - if (location > m_spans.size()) - { - return false; - } - - const Node *n = m_spans[location].nodeOfLength(spanningLength); - if (!n) - { - return false; - } - - return key == n->key(); -} - -inline void Grid::expandGridByOneAtLocation(size_t location) -{ - if (!location || location == m_spans.size()) - { - m_spans.insert(m_spans.begin() + location, Span()); - } - else - { - m_spans.insert(m_spans.begin() + location, Span()); - for (size_t i = 0; i < location; i++) - { - // zaps overlapping spans - m_spans[i].removeNodeOfLengthGreaterThan(location - i); - } - } -} - -inline void Grid::shrinkGridByOneAtLocation(size_t location) -{ - if (location >= m_spans.size()) - { - return; - } - - m_spans.erase(m_spans.begin() + location); - for (size_t i = 0; i < location; i++) - { - // zaps overlapping spans - m_spans[i].removeNodeOfLengthGreaterThan(location - i); - } -} - -inline size_t Grid::width() const -{ - return m_spans.size(); -} - -// macOS 10.6 開始的內建注音的游標前置選字風格 -inline std::vector Grid::nodesEndingAt(size_t location) -{ - std::vector result; - - if (m_spans.size() && location <= m_spans.size()) - { - for (size_t i = 0; i < location; i++) - { - Span &span = m_spans[i]; - if (i + span.maximumLength() >= location) - { - Node *np = span.nodeOfLength(location - i); - if (np) - { - NodeAnchor na; - na.node = np; - na.location = i; - na.spanningLength = location - i; - - result.push_back(na); - } - } - } - } - - return result; -} - -// Windows 版奇摩注音輸入法的游標後置的選字風格。 -// 與微軟新注音相異的是,這個風格允許在詞的中間叫出候選字窗。 -inline std::vector Grid::nodesCrossingOrEndingAt(size_t location) -{ - std::vector result; - - if (m_spans.size() && location <= m_spans.size()) - { - for (size_t i = 0; i < location; i++) - { - Span &span = m_spans[i]; - - if (i + span.maximumLength() >= location) - { - for (size_t j = 1, m = span.maximumLength(); j <= m; j++) - { - if (i + j < location) - { - continue; - } - - Node *np = span.nodeOfLength(j); - if (np) - { - NodeAnchor na; - na.node = np; - na.location = i; - na.spanningLength = location - i; - - result.push_back(na); - } - } - } - } - } - - return result; -} - -// For nodes found at the location, fix their currently-selected candidate using -// the supplied string value. -inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, const std::string &value) -{ - std::vector nodes = nodesCrossingOrEndingAt(location); - NodeAnchor node; - for (auto nodeAnchor : nodes) - { - auto candidates = nodeAnchor.node->candidates(); - - // Reset the candidate-fixed state of every node at the location. - const_cast(nodeAnchor.node)->resetCandidate(); - - for (size_t i = 0, c = candidates.size(); i < c; ++i) - { - if (candidates[i].value == value) - { - const_cast(nodeAnchor.node)->selectCandidateAtIndex(i); - node = nodeAnchor; - break; - } - } - } - return node; -} - -inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const std::string &value, - float overridingScore) -{ - std::vector nodes = nodesCrossingOrEndingAt(location); - for (auto nodeAnchor : nodes) - { - auto candidates = nodeAnchor.node->candidates(); - - // Reset the candidate-fixed state of every node at the location. - const_cast(nodeAnchor.node)->resetCandidate(); - - for (size_t i = 0, c = candidates.size(); i < c; ++i) - { - if (candidates[i].value == value) - { - const_cast(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore); - break; - } - } - } -} - -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Node.h b/Source/Modules/LanguageParsers/Gramambular/Node.h deleted file mode 100644 index 16b69fdf..00000000 --- a/Source/Modules/LanguageParsers/Gramambular/Node.h +++ /dev/null @@ -1,249 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef NODE_H_ -#define NODE_H_ - -#include -#include -#include -#include - -#include "LanguageModel.h" - -namespace Gramambular -{ - -class Node -{ - public: - Node(); - Node(const std::string &key, const std::vector &unigrams, const std::vector &bigrams); - - void primeNodeWithPreceedingKeyValues(const std::vector &keyValues); - - bool isCandidateFixed() const; - const std::vector &candidates() const; - void selectCandidateAtIndex(size_t index = 0, bool fix = true); - void resetCandidate(); - void selectFloatingCandidateAtIndex(size_t index, double score); - - const std::string &key() const; - double score() const; - double scoreForCandidate(const std::string &candidate) const; - const KeyValuePair currentKeyValue() const; - double highestUnigramScore() const; - - protected: - const LanguageModel *m_LM; - - std::string m_key; - double m_score; - - std::vector m_unigrams; - std::vector m_candidates; - std::map m_valueUnigramIndexMap; - std::map> m_preceedingGramBigramMap; - - bool m_candidateFixed; - size_t m_selectedUnigramIndex; - - friend std::ostream &operator<<(std::ostream &stream, const Node &node); -}; - -inline std::ostream &operator<<(std::ostream &stream, const Node &node) -{ - stream << "(node,key:" << node.m_key << ",fixed:" << (node.m_candidateFixed ? "true" : "false") - << ",selected:" << node.m_selectedUnigramIndex << "," << node.m_unigrams << ")"; - return stream; -} - -inline Node::Node() : m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) -{ -} - -inline Node::Node(const std::string &key, const std::vector &unigrams, const std::vector &bigrams) - : m_key(key), m_unigrams(unigrams), m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) -{ - stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare); - - if (m_unigrams.size()) - { - m_score = m_unigrams[0].score; - } - - size_t i = 0; - for (std::vector::const_iterator ui = m_unigrams.begin(); ui != m_unigrams.end(); ++ui) - { - m_valueUnigramIndexMap[(*ui).keyValue.value] = i; - i++; - - m_candidates.push_back((*ui).keyValue); - } - - for (std::vector::const_iterator bi = bigrams.begin(); bi != bigrams.end(); ++bi) - { - m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi); - } -} - -inline void Node::primeNodeWithPreceedingKeyValues(const std::vector &keyValues) -{ - size_t newIndex = m_selectedUnigramIndex; - double max = m_score; - - if (!isCandidateFixed()) - { - for (std::vector::const_iterator kvi = keyValues.begin(); kvi != keyValues.end(); ++kvi) - { - std::map>::const_iterator f = m_preceedingGramBigramMap.find(*kvi); - if (f != m_preceedingGramBigramMap.end()) - { - const std::vector &bigrams = (*f).second; - - for (std::vector::const_iterator bi = bigrams.begin(); bi != bigrams.end(); ++bi) - { - const Bigram &bigram = *bi; - if (bigram.score > max) - { - std::map::const_iterator uf = - m_valueUnigramIndexMap.find((*bi).keyValue.value); - if (uf != m_valueUnigramIndexMap.end()) - { - newIndex = (*uf).second; - max = bigram.score; - } - } - } - } - } - } - - if (m_score != max) - { - m_score = max; - } - - if (newIndex != m_selectedUnigramIndex) - { - m_selectedUnigramIndex = newIndex; - } -} - -inline bool Node::isCandidateFixed() const -{ - return m_candidateFixed; -} - -inline const std::vector &Node::candidates() const -{ - return m_candidates; -} - -inline void Node::selectCandidateAtIndex(size_t index, bool fix) -{ - if (index >= m_unigrams.size()) - { - m_selectedUnigramIndex = 0; - } - else - { - m_selectedUnigramIndex = index; - } - - m_candidateFixed = fix; - m_score = 99; -} - -inline void Node::resetCandidate() -{ - m_selectedUnigramIndex = 0; - m_candidateFixed = 0; - if (m_unigrams.size()) - { - m_score = m_unigrams[0].score; - } -} - -inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) -{ - if (index >= m_unigrams.size()) - { - m_selectedUnigramIndex = 0; - } - else - { - m_selectedUnigramIndex = index; - } - m_candidateFixed = false; - m_score = score; -} - -inline const std::string &Node::key() const -{ - return m_key; -} - -inline double Node::score() const -{ - return m_score; -} - -inline double Node::scoreForCandidate(const std::string &candidate) const -{ - for (auto unigram : m_unigrams) - { - if (unigram.keyValue.value == candidate) - { - return unigram.score; - } - } - return 0.0; -} - -inline double Node::highestUnigramScore() const -{ - if (m_unigrams.empty()) - { - return 0.0; - } - return m_unigrams[0].score; -} - -inline const KeyValuePair Node::currentKeyValue() const -{ - if (m_selectedUnigramIndex >= m_unigrams.size()) - { - return KeyValuePair(); - } - else - { - return m_candidates[m_selectedUnigramIndex]; - } -} -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Span.h b/Source/Modules/LanguageParsers/Gramambular/Span.h deleted file mode 100644 index 57c9a64c..00000000 --- a/Source/Modules/LanguageParsers/Gramambular/Span.h +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef SPAN_H_ -#define SPAN_H_ - -#include -#include -#include - -#include "Node.h" - -namespace Gramambular -{ -class Span -{ - public: - void clear(); - void insertNodeOfLength(const Node &node, size_t length); - void removeNodeOfLengthGreaterThan(size_t length); - - Node *nodeOfLength(size_t length); - size_t maximumLength() const; - - protected: - std::map m_lengthNodeMap; - size_t m_maximumLength = 0; -}; - -inline void Span::clear() -{ - m_lengthNodeMap.clear(); - m_maximumLength = 0; -} - -inline void Span::insertNodeOfLength(const Node &node, size_t length) -{ - m_lengthNodeMap[length] = node; - if (length > m_maximumLength) - { - m_maximumLength = length; - } -} - -inline void Span::removeNodeOfLengthGreaterThan(size_t length) -{ - if (length > m_maximumLength) - { - return; - } - - size_t max = 0; - std::set removeSet; - for (std::map::iterator i = m_lengthNodeMap.begin(), e = m_lengthNodeMap.end(); i != e; ++i) - { - if ((*i).first > length) - { - removeSet.insert((*i).first); - } - else - { - if ((*i).first > max) - { - max = (*i).first; - } - } - } - - for (std::set::iterator i = removeSet.begin(), e = removeSet.end(); i != e; ++i) - { - m_lengthNodeMap.erase(*i); - } - - m_maximumLength = max; -} - -inline Node *Span::nodeOfLength(size_t length) -{ - std::map::iterator f = m_lengthNodeMap.find(length); - return f == m_lengthNodeMap.end() ? 0 : &(*f).second; -} - -inline size_t Span::maximumLength() const -{ - return m_maximumLength; -} -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Unigram.h b/Source/Modules/LanguageParsers/Gramambular/Unigram.h deleted file mode 100644 index 7faac48d..00000000 --- a/Source/Modules/LanguageParsers/Gramambular/Unigram.h +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef UNIGRAM_H_ -#define UNIGRAM_H_ - -#include - -#include "KeyValuePair.h" - -namespace Gramambular -{ - -class Unigram -{ - public: - Unigram(); - - KeyValuePair keyValue; - double score; - - bool operator==(const Unigram &another) const; - bool operator<(const Unigram &another) const; - - static bool ScoreCompare(const Unigram &a, const Unigram &b); -}; - -inline std::ostream &operator<<(std::ostream &stream, const Unigram &gram) -{ - std::streamsize p = stream.precision(); - stream.precision(6); - stream << "(" << gram.keyValue << "," << gram.score << ")"; - stream.precision(p); - return stream; -} - -inline std::ostream &operator<<(std::ostream &stream, const std::vector &grams) -{ - stream << "[" << grams.size() << "]=>{"; - - size_t index = 0; - - for (std::vector::const_iterator gi = grams.begin(); gi != grams.end(); ++gi, ++index) - { - stream << index << "=>"; - stream << *gi; - if (gi + 1 != grams.end()) - { - stream << ","; - } - } - - stream << "}"; - return stream; -} - -inline Unigram::Unigram() : score(0.0) -{ -} - -inline bool Unigram::operator==(const Unigram &another) const -{ - return keyValue == another.keyValue && score == another.score; -} - -inline bool Unigram::operator<(const Unigram &another) const -{ - if (keyValue < another.keyValue) - { - return true; - } - else if (keyValue == another.keyValue) - { - return score < another.score; - } - return false; -} - -inline bool Unigram::ScoreCompare(const Unigram &a, const Unigram &b) -{ - return a.score > b.score; -} -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Walker.h b/Source/Modules/LanguageParsers/Gramambular/Walker.h deleted file mode 100644 index c5ef2e3d..00000000 --- a/Source/Modules/LanguageParsers/Gramambular/Walker.h +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef WALKER_H_ -#define WALKER_H_ - -#include -#include - -#include "Grid.h" - -namespace Gramambular -{ - -class Walker -{ - public: - explicit Walker(Grid *inGrid); - const std::vector reverseWalk(size_t location, double accumulatedScore = 0.0); - - protected: - Grid *m_grid; -}; - -inline Walker::Walker(Grid *inGrid) : m_grid(inGrid) -{ -} - -inline const std::vector Walker::reverseWalk(size_t location, double accumulatedScore) -{ - if (!location || location > m_grid->width()) - { - return std::vector(); - } - - std::vector> paths; - - std::vector nodes = m_grid->nodesEndingAt(location); - - for (std::vector::iterator ni = nodes.begin(); ni != nodes.end(); ++ni) - { - if (!(*ni).node) - { - continue; - } - - (*ni).accumulatedScore = accumulatedScore + (*ni).node->score(); - - std::vector path = reverseWalk(location - (*ni).spanningLength, (*ni).accumulatedScore); - path.insert(path.begin(), *ni); - - paths.push_back(path); - } - - if (!paths.size()) - { - return std::vector(); - } - - std::vector *result = &*(paths.begin()); - for (std::vector>::iterator pi = paths.begin(); pi != paths.end(); ++pi) - { - if ((*pi).back().accumulatedScore > result->back().accumulatedScore) - { - result = &*pi; - } - } - - return *result; -} -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Gramambular.h b/Source/Modules/LanguageParsers/Megrez/0_Megrez.swift similarity index 71% rename from Source/Modules/LanguageParsers/Gramambular/Gramambular.h rename to Source/Modules/LanguageParsers/Megrez/0_Megrez.swift index d33a298b..cc4b4804 100644 --- a/Source/Modules/LanguageParsers/Gramambular/Gramambular.h +++ b/Source/Modules/LanguageParsers/Megrez/0_Megrez.swift @@ -1,6 +1,5 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). /* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -24,18 +23,5 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef GRAMAMBULAR_H_ -#define GRAMAMBULAR_H_ - -#include "Bigram.h" -#include "BlockReadingBuilder.h" -#include "Grid.h" -#include "KeyValuePair.h" -#include "LanguageModel.h" -#include "Node.h" -#include "NodeAnchor.h" -#include "Span.h" -#include "Unigram.h" -#include "Walker.h" - -#endif +/// The namespace for this package. +public enum Megrez {} diff --git a/Source/Modules/LanguageParsers/Megrez/1_BlockReadingBuilder.swift b/Source/Modules/LanguageParsers/Megrez/1_BlockReadingBuilder.swift new file mode 100644 index 00000000..64578605 --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/1_BlockReadingBuilder.swift @@ -0,0 +1,146 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +extension Megrez { + public class BlockReadingBuilder { + let kMaximumBuildSpanLength = 10 // 規定最多可以組成的詞的字數上限為 10 + var mutCursorIndex: Int = 0 + var mutReadings: [String] = [] + var mutGrid: Grid = .init() + var mutLM: LanguageModel + var mutJoinSeparator: String = "" + + public init(lm: LanguageModel) { + mutLM = lm + } + + public func clear() { + mutCursorIndex = 0 + mutReadings.removeAll() + mutGrid.clear() + } + + public func length() -> Int { mutReadings.count } + + public func cursorIndex() -> Int { mutCursorIndex } + + public func setCursorIndex(newIndex: Int) { + mutCursorIndex = min(newIndex, mutReadings.count) + } + + public func insertReadingAtCursor(reading: String) { + mutReadings.insert(reading, at: mutCursorIndex) + mutGrid.expandGridByOneAt(location: mutCursorIndex) + build() + mutCursorIndex += 1 + } + + public func readings() -> [String] { mutReadings } + + @discardableResult public func deleteReadingBeforeCursor() -> Bool { + if mutCursorIndex == 0 { + return false + } + + mutReadings.remove(at: mutCursorIndex - 1) + mutCursorIndex -= 1 + mutGrid.shrinkGridByOneAt(location: mutCursorIndex) + build() + return true + } + + @discardableResult public func deleteReadingAfterCursor() -> Bool { + if mutCursorIndex == mutReadings.count { + return false + } + + mutReadings.remove(at: mutCursorIndex) + mutGrid.shrinkGridByOneAt(location: mutCursorIndex) + build() + return true + } + + @discardableResult public func removeHeadReadings(count: Int) -> Bool { + if count > length() { + return false + } + + var i = 0 + while i < count { + if mutCursorIndex != 0 { + mutCursorIndex -= 1 + } + mutReadings.removeFirst() + mutGrid.shrinkGridByOneAt(location: 0) + build() + i += 1 + } + + return true + } + + public func setJoinSeparator(separator: String) { + mutJoinSeparator = separator + } + + public func joinSeparator() -> String { mutJoinSeparator } + + public func grid() -> Grid { mutGrid } + + public func build() { + // if (mutLM == nil) { return } // 這個出不了 nil,所以註釋掉。 + + let itrBegin: Int = + (mutCursorIndex < kMaximumBuildSpanLength) ? 0 : mutCursorIndex - kMaximumBuildSpanLength + let itrEnd: Int = min(mutCursorIndex + kMaximumBuildSpanLength, mutReadings.count) + + var p = itrBegin + while p < itrEnd { + var q = 1 + while q <= kMaximumBuildSpanLength, p + q <= itrEnd { + let strSlice = mutReadings[p..<(p + q)] + let combinedReading: String = join(slice: strSlice, separator: mutJoinSeparator) + if !mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) { + let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading) + if !unigrams.isEmpty { + let n = Node(key: combinedReading, unigrams: unigrams) + mutGrid.insertNode(node: n, location: p, spanningLength: q) + } + } + q += 1 + } + p += 1 + } + } + + public func join(slice strSlice: ArraySlice, separator: String) -> String { + var arrResult: [String] = [] + for value in strSlice { + arrResult.append(value) + } + return arrResult.joined(separator: separator) + } + } +} diff --git a/Source/Modules/LanguageParsers/Megrez/1_Walker.swift b/Source/Modules/LanguageParsers/Megrez/1_Walker.swift new file mode 100644 index 00000000..d6590be8 --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/1_Walker.swift @@ -0,0 +1,74 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +extension Megrez { + public class Walker { + var mutGrid: Grid + + public init(grid: Megrez.Grid = Megrez.Grid()) { + mutGrid = grid + } + + public func reverseWalk(at location: Int, score accumulatedScore: Double = 0.0) -> [NodeAnchor] { + if location == 0 || location > mutGrid.width() { + return [] as [NodeAnchor] + } + + var paths: [[NodeAnchor]] = [] + let nodes: [NodeAnchor] = mutGrid.nodesEndingAt(location: location) + + for n in nodes { + var n = n + if n.node == nil { + continue + } + + n.accumulatedScore = accumulatedScore + n.node!.score() + + var path: [NodeAnchor] = reverseWalk( + at: location - n.spanningLength, + score: n.accumulatedScore + ) + path.insert(n, at: 0) + + paths.append(path) + } + + if !paths.isEmpty { + if var result = paths.first { + for value in paths { + if let vLast = value.last, let rLast = result.last { + if vLast.accumulatedScore > rLast.accumulatedScore { + result = value + } + } + } + return result + } + } + return [] as [NodeAnchor] + } + } +} diff --git a/Source/Modules/LanguageParsers/Megrez/2_Grid.swift b/Source/Modules/LanguageParsers/Megrez/2_Grid.swift new file mode 100644 index 00000000..db4ac907 --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/2_Grid.swift @@ -0,0 +1,180 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +extension Megrez { + public class Grid { + var mutSpans: [Megrez.Span] + + public init() { + mutSpans = [Megrez.Span]() + } + + public func clear() { + mutSpans = [Megrez.Span]() + } + + public func insertNode(node: Node, location: Int, spanningLength: Int) { + if location >= mutSpans.count { + let diff = location - mutSpans.count + 1 + var i = 0 + while i < diff { + mutSpans.append(Span()) + i += 1 + } + } + mutSpans[location].insert(node: node, length: spanningLength) + } + + public func hasMatchedNode(location: Int, spanningLength: Int, key: String) -> Bool { + if location > mutSpans.count { + return false + } + + let n = mutSpans[location].node(length: spanningLength) + return n == nil ? false : key == n?.key() + } + + public func expandGridByOneAt(location: Int) { + mutSpans.append(Span()) + if location > 0, location < mutSpans.count { + var i = 0 + while i < location { + // zaps overlapping spans + mutSpans[i].removeNodeOfLengthGreaterThan(location - i) + i += 1 + } + } + } + + public func shrinkGridByOneAt(location: Int) { + if location >= mutSpans.count { + return + } + + mutSpans.remove(at: location) + var i = 0 + while i < location { + // zaps overlapping spans + mutSpans[i].removeNodeOfLengthGreaterThan(location - i) + i += 1 + } + } + + public func width() -> Int { mutSpans.count } + + public func nodesEndingAt(location: Int) -> [NodeAnchor] { + var results: [NodeAnchor] = [] + if !mutSpans.isEmpty, location <= mutSpans.count { + var i = 0 + while i < location { + let span = mutSpans[i] + if i + span.maximumLength >= location { + if let np = span.node(length: location - i) { + results.append( + NodeAnchor( + node: np, + location: i, + spanningLength: location - i + ) + ) + } + } + i += 1 + } + } + return results + } + + public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] { + var results: [NodeAnchor] = [] + if !mutSpans.isEmpty, location <= mutSpans.count { + var i = 0 + while i < location { + let span = mutSpans[i] + if i + span.maximumLength >= location { + var j = 1 + while j <= span.maximumLength { + if i + j < location { + j += 1 + continue + } + if let np = span.node(length: j) { + results.append( + NodeAnchor( + node: np, + location: i, + spanningLength: location - i + ) + ) + } + j += 1 + } + } + i += 1 + } + } + return results + } + + public func fixNodeSelectedCandidate(location: Int, value: String) -> NodeAnchor { + var node = NodeAnchor() + let nodes = nodesCrossingOrEndingAt(location: location) + for nodeAnchor in nodes { + // Reset the candidate-fixed state of every node at the location. + let candidates = nodeAnchor.node?.candidates() ?? [] + nodeAnchor.node?.resetCandidate() + + for (i, candidate) in candidates.enumerated() { + if candidate.value == value { + nodeAnchor.node?.selectCandidateAt(index: i) + node = nodeAnchor + break + } + } + } + return node + } + + public func overrideNodeScoreForSelectedCandidate(location: Int, value: inout String, overridingScore: Double) { + for nodeAnchor in nodesCrossingOrEndingAt(location: location) { + var nodeAnchor = nodeAnchor + if let theNode = nodeAnchor.node { + let candidates = theNode.candidates() + // Reset the candidate-fixed state of every node at the location. + theNode.resetCandidate() + nodeAnchor.node = theNode + + for (i, candidate) in candidates.enumerated() { + if candidate.value == value { + theNode.selectFloatingCandidateAt(index: i, score: overridingScore) + nodeAnchor.node = theNode + break + } + } + } + } + } + } +} diff --git a/Source/Modules/LanguageParsers/Gramambular/LanguageModel.h b/Source/Modules/LanguageParsers/Megrez/3_NodeAnchor.swift similarity index 62% rename from Source/Modules/LanguageParsers/Gramambular/LanguageModel.h rename to Source/Modules/LanguageParsers/Megrez/3_NodeAnchor.swift index 1049c011..48bc364d 100644 --- a/Source/Modules/LanguageParsers/Gramambular/LanguageModel.h +++ b/Source/Modules/LanguageParsers/Megrez/3_NodeAnchor.swift @@ -1,6 +1,5 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). /* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -24,29 +23,14 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef LANGUAGEMODEL_H_ -#define LANGUAGEMODEL_H_ - -#include -#include - -#include "Bigram.h" -#include "Unigram.h" - -namespace Gramambular -{ - -class LanguageModel -{ - public: - virtual ~LanguageModel() - { - } - - virtual const std::vector bigramsForKeys(const std::string &preceedingKey, const std::string &key) = 0; - virtual const std::vector unigramsForKey(const std::string &key) = 0; - virtual bool hasUnigramsForKey(const std::string &key) = 0; -}; -} // namespace Gramambular - -#endif +extension Megrez { + @frozen public struct NodeAnchor { + public var node: Node? + public var location: Int = 0 + public var spanningLength: Int = 0 + public var accumulatedScore: Double = 0.0 + public var keyLength: Int { + node?.key().count ?? 0 + } + } +} diff --git a/Source/Modules/LanguageParsers/Gramambular/NodeAnchor.h b/Source/Modules/LanguageParsers/Megrez/3_Span.swift similarity index 50% rename from Source/Modules/LanguageParsers/Gramambular/NodeAnchor.h rename to Source/Modules/LanguageParsers/Megrez/3_Span.swift index 432566a0..0db3a889 100644 --- a/Source/Modules/LanguageParsers/Gramambular/NodeAnchor.h +++ b/Source/Modules/LanguageParsers/Megrez/3_Span.swift @@ -1,6 +1,5 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). /* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -24,52 +23,52 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef NODEANCHOR_H_ -#define NODEANCHOR_H_ +extension Megrez { + @frozen public struct Span { + private var mutLengthNodeMap: [Int: Megrez.Node] + private var mutMaximumLength: Int + var maximumLength: Int { + mutMaximumLength + } -#include + public init() { + mutLengthNodeMap = [:] + mutMaximumLength = 0 + } -#include "Node.h" + mutating func clear() { + mutLengthNodeMap.removeAll() + mutMaximumLength = 0 + } -namespace Gramambular -{ + mutating func insert(node: Node, length: Int) { + mutLengthNodeMap[length] = node + if length > mutMaximumLength { + mutMaximumLength = length + } + } -struct NodeAnchor -{ - const Node *node = nullptr; - size_t location = 0; - size_t spanningLength = 0; - double accumulatedScore = 0.0; -}; + mutating func removeNodeOfLengthGreaterThan(_ length: Int) { + if length > mutMaximumLength { return } + var max = 0 + var removalList: [Int: Megrez.Node] = [:] + for key in mutLengthNodeMap.keys { + if key > length { + removalList[key] = mutLengthNodeMap[key] + } else { + if key > max { + max = key + } + } + } + for key in removalList.keys { + mutLengthNodeMap.removeValue(forKey: key) + } + mutMaximumLength = max + } -inline std::ostream &operator<<(std::ostream &stream, const NodeAnchor &anchor) -{ - stream << "{@(" << anchor.location << "," << anchor.spanningLength << "),"; - if (anchor.node) - { - stream << *(anchor.node); - } - else - { - stream << "null"; - } - stream << "}"; - return stream; + public func node(length: Int) -> Node? { + mutLengthNodeMap[length] + } + } } - -inline std::ostream &operator<<(std::ostream &stream, const std::vector &anchor) -{ - for (std::vector::const_iterator i = anchor.begin(); i != anchor.end(); ++i) - { - stream << *i; - if (i + 1 != anchor.end()) - { - stream << "<-"; - } - } - - return stream; -} -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Megrez/4_Node.swift b/Source/Modules/LanguageParsers/Megrez/4_Node.swift new file mode 100644 index 00000000..9744086a --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/4_Node.swift @@ -0,0 +1,161 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +extension Megrez { + public class Node { + let mutLM: LanguageModel + var mutKey: String + var mutScore: Double = 0 + var mutUnigrams: [Unigram] + var mutCandidates: [KeyValuePair] + var mutValueUnigramIndexMap: [String: Int] + var mutPrecedingBigramMap: [KeyValuePair: [Megrez.Bigram]] + + var mutCandidateFixed: Bool = false + var mutSelectedUnigramIndex: Int = 0 + + public init(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) { + mutLM = LanguageModel() + + mutKey = key + mutScore = 0 + + mutUnigrams = unigrams + mutCandidates = [] + mutValueUnigramIndexMap = [:] + mutPrecedingBigramMap = [:] + + mutCandidateFixed = false + mutSelectedUnigramIndex = 0 + + if bigrams == [] { + node(key: key, unigrams: unigrams, bigrams: bigrams) + } else { + node(key: key, unigrams: unigrams) + } + } + + public func node(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) { + var unigrams = unigrams + mutKey = key + unigrams.sort { + $0.score > $1.score + } + + if !mutUnigrams.isEmpty { + mutScore = mutUnigrams[0].score + } + + for (i, theGram) in unigrams.enumerated() { + mutValueUnigramIndexMap[theGram.keyValue.value] = i + mutCandidates.append(theGram.keyValue) + } + + for gram in bigrams { + mutPrecedingBigramMap[gram.precedingKeyValue]?.append(gram) + } + } + + public func primeNodeWith(precedingKeyValues: [KeyValuePair]) { + var newIndex = mutSelectedUnigramIndex + var max = mutScore + + if !isCandidateFixed() { + for neta in precedingKeyValues { + let bigrams = mutPrecedingBigramMap[neta] ?? [] + for bigram in bigrams { + if bigram.score > max { + if let valRetrieved = mutValueUnigramIndexMap[bigram.keyValue.value] { + newIndex = valRetrieved as Int + max = bigram.score + } + } + } + } + } + + if mutScore != max { + mutScore = max + } + + if mutSelectedUnigramIndex != newIndex { + mutSelectedUnigramIndex = newIndex + } + } + + public func isCandidateFixed() -> Bool { mutCandidateFixed } + + public func candidates() -> [KeyValuePair] { mutCandidates } + + public func selectCandidateAt(index: Int = 0, fix: Bool = false) { + mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index + mutCandidateFixed = fix + mutScore = 99 + } + + public func resetCandidate() { + mutSelectedUnigramIndex = 0 + mutCandidateFixed = false + if !mutUnigrams.isEmpty { + mutScore = mutUnigrams[0].score + } + } + + public func selectFloatingCandidateAt(index: Int, score: Double) { + mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index + mutCandidateFixed = false + mutScore = score + } + + public func key() -> String { mutKey } + + public func score() -> Double { mutScore } + + public func scoreFor(candidate: String) -> Double { + for unigram in mutUnigrams { + if unigram.keyValue.value == candidate { + return unigram.score + } + } + return 0.0 + } + + public func currentKeyValue() -> KeyValuePair { + mutSelectedUnigramIndex >= mutUnigrams.count ? KeyValuePair() : mutCandidates[mutSelectedUnigramIndex] + } + + public func highestUnigramScore() -> Double { + mutUnigrams.isEmpty ? 0.0 : mutUnigrams[0].score + } + + public static func == (lhs: Node, rhs: Node) -> Bool { + lhs.mutUnigrams == rhs.mutUnigrams && lhs.mutCandidates == rhs.mutCandidates + && lhs.mutValueUnigramIndexMap == rhs.mutValueUnigramIndexMap + && lhs.mutPrecedingBigramMap == rhs.mutPrecedingBigramMap + && lhs.mutCandidateFixed == rhs.mutCandidateFixed + && lhs.mutSelectedUnigramIndex == rhs.mutSelectedUnigramIndex + } + } +} diff --git a/Source/Modules/LanguageParsers/Gramambular/KeyValuePair.h b/Source/Modules/LanguageParsers/Megrez/5_LanguageModel.swift similarity index 53% rename from Source/Modules/LanguageParsers/Gramambular/KeyValuePair.h rename to Source/Modules/LanguageParsers/Megrez/5_LanguageModel.swift index 231d6342..ce12ffaf 100644 --- a/Source/Modules/LanguageParsers/Gramambular/KeyValuePair.h +++ b/Source/Modules/LanguageParsers/Megrez/5_LanguageModel.swift @@ -1,6 +1,5 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). /* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -24,48 +23,22 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef KEYVALUEPAIR_H_ -#define KEYVALUEPAIR_H_ +extension Megrez { + // 這裡充其量只是框架,回頭實際使用時需要派生一個型別、且重寫相關函數。 + // 這裡寫了一點假內容,不然有些 Swift 格式化工具會破壞掉函數的參數設計。 + open class LanguageModel { + public init() {} -#include -#include + open func unigramsFor(key: String) -> [Megrez.Unigram] { + key.isEmpty ? [Megrez.Unigram]() : [Megrez.Unigram]() + } -namespace Gramambular -{ + open func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] { + precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]() + } -class KeyValuePair -{ - public: - std::string key; - std::string value; - - bool operator==(const KeyValuePair &another) const; - bool operator<(const KeyValuePair &another) const; -}; - -inline std::ostream &operator<<(std::ostream &stream, const KeyValuePair &pair) -{ - stream << "(" << pair.key << "," << pair.value << ")"; - return stream; + open func hasUnigramsFor(key: String) -> Bool { + key.count != 0 + } + } } - -inline bool KeyValuePair::operator==(const KeyValuePair &another) const -{ - return key == another.key && value == another.value; -} - -inline bool KeyValuePair::operator<(const KeyValuePair &another) const -{ - if (key < another.key) - { - return true; - } - else if (key == another.key) - { - return value < another.value; - } - return false; -} -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Megrez/6_Bigram.swift b/Source/Modules/LanguageParsers/Megrez/6_Bigram.swift new file mode 100644 index 00000000..a8f25ba3 --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/6_Bigram.swift @@ -0,0 +1,74 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +extension Megrez { + @frozen public struct Bigram: Equatable { + public var keyValue: KeyValuePair + public var precedingKeyValue: KeyValuePair + public var score: Double + // var paired: String + + public init(precedingKeyValue: KeyValuePair, keyValue: KeyValuePair, score: Double) { + self.keyValue = keyValue + self.precedingKeyValue = precedingKeyValue + self.score = score + // paired = "(" + keyValue.paired + "|" + precedingKeyValue.paired + "," + String(score) + ")" + } + + public func hash(into hasher: inout Hasher) { + hasher.combine(keyValue) + hasher.combine(precedingKeyValue) + hasher.combine(score) + // hasher.combine(paired) + } + + // static func getPairedBigrams(grams: [Bigram]) -> String { + // var arrOutputContent = [""] + // var index = 0 + // for gram in grams { + // arrOutputContent.append(contentsOf: [String(index) + "=>" + gram.paired]) + // index += 1 + // } + // return "[" + String(grams.count) + "]=>{" + arrOutputContent.joined(separator: ",") + "}" + // } + + public static func == (lhs: Bigram, rhs: Bigram) -> Bool { + lhs.precedingKeyValue == rhs.precedingKeyValue && lhs.keyValue == rhs.keyValue && lhs.score == rhs.score + } + + public static func < (lhs: Bigram, rhs: Bigram) -> Bool { + lhs.precedingKeyValue < rhs.precedingKeyValue + || (lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.keyValue < rhs.keyValue)) + } + + var description: String { + "\(keyValue):\(score)" + } + + var debugDescription: String { + "Bigram(keyValue: \(keyValue), score: \(score))" + } + } +} diff --git a/Source/Modules/LanguageParsers/Megrez/6_Unigram.swift b/Source/Modules/LanguageParsers/Megrez/6_Unigram.swift new file mode 100644 index 00000000..a7bc881e --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/6_Unigram.swift @@ -0,0 +1,75 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +extension Megrez { + @frozen public struct Unigram: Equatable { + public var keyValue: KeyValuePair + public var score: Double + // var paired: String + + public init(keyValue: KeyValuePair, score: Double) { + self.keyValue = keyValue + self.score = score + // paired = "(" + keyValue.paired + "," + String(score) + ")" + } + + public func hash(into hasher: inout Hasher) { + hasher.combine(keyValue) + hasher.combine(score) + // hasher.combine(paired) + } + + // 這個函數不再需要了。 + public static func compareScore(a: Unigram, b: Unigram) -> Bool { + a.score > b.score + } + + // static func getPairedUnigrams(grams: [Unigram]) -> String { + // var arrOutputContent = [""] + // var index = 0 + // for gram in grams { + // arrOutputContent.append(contentsOf: [String(index) + "=>" + gram.paired]) + // index += 1 + // } + // return "[" + String(grams.count) + "]=>{" + arrOutputContent.joined(separator: ",") + "}" + // } + + public static func == (lhs: Unigram, rhs: Unigram) -> Bool { + lhs.keyValue == rhs.keyValue && lhs.score == rhs.score + } + + public static func < (lhs: Unigram, rhs: Unigram) -> Bool { + lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.keyValue < rhs.keyValue) + } + + var description: String { + "\(keyValue):\(score)" + } + + var debugDescription: String { + "Unigram(keyValue: \(keyValue), score: \(score))" + } + } +} diff --git a/Source/Modules/LanguageParsers/Megrez/7_KeyValuePair.swift b/Source/Modules/LanguageParsers/Megrez/7_KeyValuePair.swift new file mode 100644 index 00000000..23a58295 --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/7_KeyValuePair.swift @@ -0,0 +1,72 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +extension Megrez { + @frozen public struct KeyValuePair: Equatable, Hashable, Comparable { + public var key: String + public var value: String + // public var paired: String + + public init(key: String = "", value: String = "") { + self.key = key + self.value = value + // paired = "(" + key + "," + value + ")" + } + + public func hash(into hasher: inout Hasher) { + hasher.combine(key) + hasher.combine(value) + // hasher.combine(paired) + } + + public static func == (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + lhs.key.count == rhs.key.count && lhs.value == rhs.value + } + + public static func < (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + (lhs.key.count < rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value < rhs.value) + } + + public static func > (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + (lhs.key.count > rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value > rhs.value) + } + + public static func <= (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + (lhs.key.count <= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value <= rhs.value) + } + + public static func >= (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + (lhs.key.count >= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value >= rhs.value) + } + + public var description: String { + "(\(key), \(value))" + } + + public var debugDescription: String { + "KeyValuePair(key: \(key), value: \(value))" + } + } +} diff --git a/vChewing.xcodeproj/project.pbxproj b/vChewing.xcodeproj/project.pbxproj index 0a2718f6..ffdce246 100644 --- a/vChewing.xcodeproj/project.pbxproj +++ b/vChewing.xcodeproj/project.pbxproj @@ -14,6 +14,17 @@ 5B2DB16F27AF6891006D874E /* data-chs.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5B2DB16D27AF6891006D874E /* data-chs.txt */; }; 5B2DB17027AF6891006D874E /* data-cht.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5B2DB16E27AF6891006D874E /* data-cht.txt */; }; 5B3133BF280B229700A4A505 /* KeyHandler_States.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B3133BE280B229700A4A505 /* KeyHandler_States.swift */; }; + 5B38F59A281E2E49007D5F5D /* 6_Unigram.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1D15FC0EB100ABF4B3 /* 6_Unigram.swift */; }; + 5B38F59B281E2E49007D5F5D /* 7_KeyValuePair.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1815FC0EB100ABF4B3 /* 7_KeyValuePair.swift */; }; + 5B38F59C281E2E49007D5F5D /* 2_Grid.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1715FC0EB100ABF4B3 /* 2_Grid.swift */; }; + 5B38F59D281E2E49007D5F5D /* 4_Node.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1A15FC0EB100ABF4B3 /* 4_Node.swift */; }; + 5B38F59E281E2E49007D5F5D /* 6_Bigram.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1415FC0EB100ABF4B3 /* 6_Bigram.swift */; }; + 5B38F59F281E2E49007D5F5D /* 3_NodeAnchor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1B15FC0EB100ABF4B3 /* 3_NodeAnchor.swift */; }; + 5B38F5A0281E2E49007D5F5D /* 1_Walker.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1E15FC0EB100ABF4B3 /* 1_Walker.swift */; }; + 5B38F5A1281E2E49007D5F5D /* 1_BlockReadingBuilder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1515FC0EB100ABF4B3 /* 1_BlockReadingBuilder.swift */; }; + 5B38F5A2281E2E49007D5F5D /* 0_Megrez.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1615FC0EB100ABF4B3 /* 0_Megrez.swift */; }; + 5B38F5A3281E2E49007D5F5D /* 3_Span.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1C15FC0EB100ABF4B3 /* 3_Span.swift */; }; + 5B38F5A4281E2E49007D5F5D /* 5_LanguageModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1915FC0EB100ABF4B3 /* 5_LanguageModel.swift */; }; 5B40730C281672610023DFFF /* lmAssociates.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B407309281672610023DFFF /* lmAssociates.swift */; }; 5B40730D281672610023DFFF /* lmReplacements.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B40730A281672610023DFFF /* lmReplacements.swift */; }; 5B5E535227EF261400C6AA1E /* IME.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B5E535127EF261400C6AA1E /* IME.swift */; }; @@ -288,17 +299,17 @@ 6A0D4EA215FC0D2D00ABF4B3 /* vChewing.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = vChewing.app; sourceTree = BUILT_PRODUCTS_DIR; }; 6A0D4EF515FC0DA600ABF4B3 /* IME-Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = "IME-Info.plist"; sourceTree = ""; }; 6A0D4EF615FC0DA600ABF4B3 /* vChewing-Prefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "vChewing-Prefix.pch"; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1415FC0EB100ABF4B3 /* Bigram.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = Bigram.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1515FC0EB100ABF4B3 /* BlockReadingBuilder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = BlockReadingBuilder.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1615FC0EB100ABF4B3 /* Gramambular.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = Gramambular.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1715FC0EB100ABF4B3 /* Grid.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = Grid.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1815FC0EB100ABF4B3 /* KeyValuePair.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = KeyValuePair.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1915FC0EB100ABF4B3 /* LanguageModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = LanguageModel.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1A15FC0EB100ABF4B3 /* Node.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = Node.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1B15FC0EB100ABF4B3 /* NodeAnchor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = NodeAnchor.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1C15FC0EB100ABF4B3 /* Span.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = Span.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1D15FC0EB100ABF4B3 /* Unigram.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = Unigram.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1E15FC0EB100ABF4B3 /* Walker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = Walker.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; + 6A0D4F1415FC0EB100ABF4B3 /* 6_Bigram.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = 6_Bigram.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1515FC0EB100ABF4B3 /* 1_BlockReadingBuilder.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = 1_BlockReadingBuilder.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1615FC0EB100ABF4B3 /* 0_Megrez.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = 0_Megrez.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1715FC0EB100ABF4B3 /* 2_Grid.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = 2_Grid.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1815FC0EB100ABF4B3 /* 7_KeyValuePair.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = 7_KeyValuePair.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1915FC0EB100ABF4B3 /* 5_LanguageModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = 5_LanguageModel.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1A15FC0EB100ABF4B3 /* 4_Node.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = 4_Node.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1B15FC0EB100ABF4B3 /* 3_NodeAnchor.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = 3_NodeAnchor.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1C15FC0EB100ABF4B3 /* 3_Span.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = 3_Span.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1D15FC0EB100ABF4B3 /* 6_Unigram.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = 6_Unigram.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1E15FC0EB100ABF4B3 /* 1_Walker.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = 1_Walker.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 6A0D4F2015FC0EB100ABF4B3 /* Mandarin.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = Mandarin.cpp; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; 6A0D4F2115FC0EB100ABF4B3 /* Mandarin.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = Mandarin.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; 6A15B32421A51F2300B92CD3 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/MainMenu.xib; sourceTree = ""; }; @@ -498,7 +509,7 @@ 5B62A32327AE756800A19448 /* LanguageParsers */ = { isa = PBXGroup; children = ( - 6A0D4F1315FC0EB100ABF4B3 /* Gramambular */, + 6A0D4F1315FC0EB100ABF4B3 /* Megrez */, ); path = LanguageParsers; sourceTree = ""; @@ -829,22 +840,22 @@ path = Modules; sourceTree = ""; }; - 6A0D4F1315FC0EB100ABF4B3 /* Gramambular */ = { + 6A0D4F1315FC0EB100ABF4B3 /* Megrez */ = { isa = PBXGroup; children = ( - 6A0D4F1415FC0EB100ABF4B3 /* Bigram.h */, - 6A0D4F1515FC0EB100ABF4B3 /* BlockReadingBuilder.h */, - 6A0D4F1615FC0EB100ABF4B3 /* Gramambular.h */, - 6A0D4F1715FC0EB100ABF4B3 /* Grid.h */, - 6A0D4F1815FC0EB100ABF4B3 /* KeyValuePair.h */, - 6A0D4F1915FC0EB100ABF4B3 /* LanguageModel.h */, - 6A0D4F1A15FC0EB100ABF4B3 /* Node.h */, - 6A0D4F1B15FC0EB100ABF4B3 /* NodeAnchor.h */, - 6A0D4F1C15FC0EB100ABF4B3 /* Span.h */, - 6A0D4F1D15FC0EB100ABF4B3 /* Unigram.h */, - 6A0D4F1E15FC0EB100ABF4B3 /* Walker.h */, + 6A0D4F1615FC0EB100ABF4B3 /* 0_Megrez.swift */, + 6A0D4F1515FC0EB100ABF4B3 /* 1_BlockReadingBuilder.swift */, + 6A0D4F1E15FC0EB100ABF4B3 /* 1_Walker.swift */, + 6A0D4F1715FC0EB100ABF4B3 /* 2_Grid.swift */, + 6A0D4F1B15FC0EB100ABF4B3 /* 3_NodeAnchor.swift */, + 6A0D4F1C15FC0EB100ABF4B3 /* 3_Span.swift */, + 6A0D4F1A15FC0EB100ABF4B3 /* 4_Node.swift */, + 6A0D4F1915FC0EB100ABF4B3 /* 5_LanguageModel.swift */, + 6A0D4F1415FC0EB100ABF4B3 /* 6_Bigram.swift */, + 6A0D4F1D15FC0EB100ABF4B3 /* 6_Unigram.swift */, + 6A0D4F1815FC0EB100ABF4B3 /* 7_KeyValuePair.swift */, ); - path = Gramambular; + path = Megrez; sourceTree = ""; }; 6ACA41E715FC1D9000935EF6 /* Installer */ = { @@ -1098,6 +1109,8 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( + 5B38F59D281E2E49007D5F5D /* 4_Node.swift in Sources */, + 5B38F5A3281E2E49007D5F5D /* 3_Span.swift in Sources */, 5B40730C281672610023DFFF /* lmAssociates.swift in Sources */, 5B707CE827D9F4590099EF99 /* OpenCCBridge.swift in Sources */, D427F76C278CA2B0004A2160 /* AppDelegate.swift in Sources */, @@ -1116,6 +1129,7 @@ D4E569DC27A34D0E00AC2CEF /* KeyHandler.mm in Sources */, 5BA9FD4627FEF3C9002DE248 /* Container.swift in Sources */, D47F7DD0278C0897002F9DD7 /* ctlNonModalAlertWindow.swift in Sources */, + 5B38F5A2281E2E49007D5F5D /* 0_Megrez.swift in Sources */, 5B949BD92816DC5400D87B5D /* LineReader.swift in Sources */, D456576E279E4F7B00DF6BC9 /* InputHandler.swift in Sources */, 5BA9FD1027FEDB6B002DE248 /* suiPrefPaneKeyboard.swift in Sources */, @@ -1131,7 +1145,9 @@ 5B62A34A27AE7CD900A19448 /* NotifierController.swift in Sources */, 5B11328927B94CFB00E58451 /* AppleKeyboardConverter.swift in Sources */, 5B62A32927AE77D100A19448 /* FSEventStreamHelper.swift in Sources */, + 5B38F59B281E2E49007D5F5D /* 7_KeyValuePair.swift in Sources */, 5B62A33627AE795800A19448 /* mgrPrefs.swift in Sources */, + 5B38F5A4281E2E49007D5F5D /* 5_LanguageModel.swift in Sources */, 5BAEFAD028012565001F42C9 /* mgrLangModel.swift in Sources */, 5B782EC4280C243C007276DE /* KeyHandler_HandleCandidate.swift in Sources */, 5B62A33827AE79CD00A19448 /* NSStringUtils.swift in Sources */, @@ -1139,13 +1155,17 @@ 5BA9FD4927FEF3C9002DE248 /* Section.swift in Sources */, 5BA9FD3E27FEF3C8002DE248 /* Utilities.swift in Sources */, 5BA9FD1127FEDB6B002DE248 /* ctlPrefUI.swift in Sources */, + 5B38F59C281E2E49007D5F5D /* 2_Grid.swift in Sources */, 5B40730D281672610023DFFF /* lmReplacements.swift in Sources */, + 5B38F59E281E2E49007D5F5D /* 6_Bigram.swift in Sources */, 5B62A33227AE792F00A19448 /* InputSourceHelper.swift in Sources */, 5BE33BED28169B5D00CE5BB0 /* KeyValueStructs.swift in Sources */, 5B5E535227EF261400C6AA1E /* IME.swift in Sources */, 5B62A34927AE7CD900A19448 /* TooltipController.swift in Sources */, 6A0D4F4515FC0EB100ABF4B3 /* Mandarin.cpp in Sources */, 5B61B0CA280BEFD4002E3CFA /* KeyHandler_Misc.swift in Sources */, + 5B38F59A281E2E49007D5F5D /* 6_Unigram.swift in Sources */, + 5B38F5A0281E2E49007D5F5D /* 1_Walker.swift in Sources */, 5B62A34827AE7CD900A19448 /* ctlCandidateVertical.swift in Sources */, 5BA9FD4027FEF3C8002DE248 /* Localization.swift in Sources */, 5BA9FD1327FEDB6B002DE248 /* suiPrefPaneDictionary.swift in Sources */, @@ -1153,10 +1173,12 @@ 5BA9FD4727FEF3C9002DE248 /* PreferencesStyleController.swift in Sources */, 5BF8423127BAA942008E7E4C /* vChewingKanjiConverter.swift in Sources */, 5B949BDB2816DDBC00D87B5D /* LMConsolidator.swift in Sources */, + 5B38F59F281E2E49007D5F5D /* 3_NodeAnchor.swift in Sources */, 5B62A34627AE7CD900A19448 /* ctlCandidateHorizontal.swift in Sources */, 5B62A34727AE7CD900A19448 /* ctlCandidate.swift in Sources */, 5BA9FD3F27FEF3C8002DE248 /* Pane.swift in Sources */, 5BB802DA27FABA8300CF1C19 /* ctlInputMethod_Menu.swift in Sources */, + 5B38F5A1281E2E49007D5F5D /* 1_BlockReadingBuilder.swift in Sources */, 5BDC1CFA27FDF1310052C2B9 /* apiUpdate.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0;