From 3648b62e94c9765b8a2114cceb8de9feaa9ca180 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Sun, 20 Feb 2022 22:33:46 +0800 Subject: [PATCH] Lukhnos: Gramambular // Modernization. --- .../LanguageParsers/Gramambular/Bigram.h | 133 +++--- .../Gramambular/BlockReadingBuilder.h | 342 +++++++------- .../LanguageParsers/Gramambular/Gramambular.h | 4 +- .../LanguageParsers/Gramambular/Grid.h | 419 ++++++++---------- .../LanguageParsers/Gramambular/Grid.mm | 70 +++ .../Gramambular/KeyValuePair.h | 64 ++- .../Gramambular/LanguageModel.h | 32 +- .../LanguageParsers/Gramambular/Node.h | 358 ++++++++------- .../LanguageParsers/Gramambular/NodeAnchor.h | 79 ++-- .../LanguageParsers/Gramambular/Span.h | 133 +++--- .../LanguageParsers/Gramambular/Unigram.h | 129 +++--- .../LanguageParsers/Gramambular/Walker.h | 104 ++--- vChewing.xcodeproj/project.pbxproj | 4 + 13 files changed, 926 insertions(+), 945 deletions(-) create mode 100644 Source/Modules/LanguageParsers/Gramambular/Grid.mm diff --git a/Source/Modules/LanguageParsers/Gramambular/Bigram.h b/Source/Modules/LanguageParsers/Gramambular/Bigram.h index 3f232750..5995238d 100644 --- a/Source/Modules/LanguageParsers/Gramambular/Bigram.h +++ b/Source/Modules/LanguageParsers/Gramambular/Bigram.h @@ -17,82 +17,77 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef Bigram_h -#define Bigram_h +#ifndef BIGRAM_H_ +#define BIGRAM_H_ #include #include "KeyValuePair.h" namespace Taiyan { - namespace Gramambular { - class Bigram { - public: - Bigram(); - - KeyValuePair preceedingKeyValue; - KeyValuePair keyValue; - double score; - - bool operator==(const Bigram& inAnother) const; - bool operator<(const Bigram& inAnother) const; - }; +namespace Gramambular { +class Bigram { +public: + Bigram(); + + KeyValuePair preceedingKeyValue; + KeyValuePair keyValue; + double score; + + bool operator==(const Bigram& another) const; + bool operator<(const Bigram& another) const; +}; - inline ostream& operator<<(ostream& inStream, const Bigram& inGram) - { - streamsize p = inStream.precision(); - inStream.precision(6); - inStream << "(" << inGram.keyValue << "|" <& inGrams) - { - inStream << "[" << inGrams.size() << "]=>{"; - - size_t index = 0; - - for (vector::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) { - inStream << index << "=>"; - inStream << *gi; - if (gi + 1 != inGrams.end()) { - inStream << ","; - } - } - - inStream << "}"; - return inStream; - } - - inline Bigram::Bigram() - : score(0.0) - { - } - - inline bool Bigram::operator==(const Bigram& inAnother) const - { - return preceedingKeyValue == inAnother.preceedingKeyValue && keyValue == inAnother.keyValue && score == inAnother.score; - } - - inline bool Bigram::operator<(const Bigram& inAnother) const - { - if (preceedingKeyValue < inAnother.preceedingKeyValue) { - return true; - } - else if (preceedingKeyValue == inAnother.preceedingKeyValue) { - if (keyValue < inAnother.keyValue) { - return true; - } - else if (keyValue == inAnother.keyValue) { - return score < inAnother.score; - } - return false; - } - - return false; - } - } +inline std::ostream& operator<<(std::ostream& stream, const Bigram& gram) { + std::streamsize p = stream.precision(); + stream.precision(6); + stream << "(" << gram.keyValue << "|" << gram.preceedingKeyValue << "," + << gram.score << ")"; + stream.precision(p); + return stream; } +inline std::ostream& operator<<(std::ostream& stream, + const std::vector& grams) { + stream << "[" << grams.size() << "]=>{"; + + size_t index = 0; + + for (std::vector::const_iterator gi = grams.begin(); + gi != grams.end(); ++gi, ++index) { + stream << index << "=>"; + stream << *gi; + if (gi + 1 != grams.end()) { + stream << ","; + } + } + + stream << "}"; + return stream; +} + +inline Bigram::Bigram() : score(0.0) {} + +inline bool Bigram::operator==(const Bigram& another) const { + return preceedingKeyValue == another.preceedingKeyValue && + keyValue == another.keyValue && score == another.score; +} + +inline bool Bigram::operator<(const Bigram& another) const { + if (preceedingKeyValue < another.preceedingKeyValue) { + return true; + } else if (preceedingKeyValue == another.preceedingKeyValue) { + if (keyValue < another.keyValue) { + return true; + } else if (keyValue == another.keyValue) { + return score < another.score; + } + return false; + } + + return false; +} +} // namespace Gramambular +} // namespace Taiyan + #endif diff --git a/Source/Modules/LanguageParsers/Gramambular/BlockReadingBuilder.h b/Source/Modules/LanguageParsers/Gramambular/BlockReadingBuilder.h index 039a7f88..07fc9add 100644 --- a/Source/Modules/LanguageParsers/Gramambular/BlockReadingBuilder.h +++ b/Source/Modules/LanguageParsers/Gramambular/BlockReadingBuilder.h @@ -17,202 +17,190 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef BlockReadingBuilder_h -#define BlockReadingBuilder_h +#ifndef BLOCKREADINGBUILDER_H_ +#define BLOCKREADINGBUILDER_H_ +#include #include + #include "Grid.h" #include "LanguageModel.h" namespace Taiyan { - namespace Gramambular { - using namespace std; - - class BlockReadingBuilder { - public: - BlockReadingBuilder(LanguageModel *inLM); - void clear(); - - size_t length() const; - size_t cursorIndex() const; - void setCursorIndex(size_t inNewIndex); - void insertReadingAtCursor(const string& inReading); - bool deleteReadingBeforeCursor(); // backspace - bool deleteReadingAfterCursor(); // delete - - bool removeHeadReadings(size_t count); - - void setJoinSeparator(const string& separator); - const string joinSeparator() const; +namespace Gramambular { - vector readings() const; +class BlockReadingBuilder { +public: + explicit BlockReadingBuilder(LanguageModel* lm); + void clear(); + + size_t length() const; + size_t cursorIndex() const; + void setCursorIndex(size_t newIndex); + void insertReadingAtCursor(const std::string& reading); + bool deleteReadingBeforeCursor(); // backspace + bool deleteReadingAfterCursor(); // delete + + bool removeHeadReadings(size_t count); + + void setJoinSeparator(const std::string& separator); + const std::string joinSeparator() const; + + std::vector readings() const; + + Grid& grid(); + +protected: + void build(); + + static const std::string Join(std::vector::const_iterator begin, + std::vector::const_iterator end, + const std::string& separator); + + // 最多使用六個字組成一個詞 + static const size_t MaximumBuildSpanLength = 6; + + size_t m_cursorIndex; + std::vector m_readings; + + Grid m_grid; + LanguageModel* m_LM; + std::string m_joinSeparator; +}; - Grid& grid(); - - protected: - void build(); - - static const string Join(vector::const_iterator begin, vector::const_iterator end, const string& separator); - - //最多使用六個字組成一個詞 - static const size_t MaximumBuildSpanLength = 6; - - size_t m_cursorIndex; - vector m_readings; - - Grid m_grid; - LanguageModel *m_LM; - string m_joinSeparator; - }; - - inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM) - : m_LM(inLM) - , m_cursorIndex(0) - { - } - - inline void BlockReadingBuilder::clear() - { - m_cursorIndex = 0; - m_readings.clear(); - m_grid.clear(); - } - - inline size_t BlockReadingBuilder::length() const - { - return m_readings.size(); - } - - inline size_t BlockReadingBuilder::cursorIndex() const - { - return m_cursorIndex; - } +inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel* lm) +: m_LM(lm), m_cursorIndex(0) {} - inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex) - { - m_cursorIndex = inNewIndex > m_readings.size() ? m_readings.size() : inNewIndex; - } - - inline void BlockReadingBuilder::insertReadingAtCursor(const string& inReading) - { - m_readings.insert(m_readings.begin() + m_cursorIndex, inReading); - - m_grid.expandGridByOneAtLocation(m_cursorIndex); - build(); - m_cursorIndex++; - } +inline void BlockReadingBuilder::clear() { + m_cursorIndex = 0; + m_readings.clear(); + m_grid.clear(); +} - inline vector BlockReadingBuilder::readings() const - { - return m_readings; - } - - inline bool BlockReadingBuilder::deleteReadingBeforeCursor() - { - if (!m_cursorIndex) { - return false; - } - - m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.begin() + m_cursorIndex); +inline size_t BlockReadingBuilder::length() const { return m_readings.size(); } + +inline size_t BlockReadingBuilder::cursorIndex() const { return m_cursorIndex; } + +inline void BlockReadingBuilder::setCursorIndex(size_t newIndex) { + m_cursorIndex = newIndex > m_readings.size() ? m_readings.size() : newIndex; +} + +inline void BlockReadingBuilder::insertReadingAtCursor( + const std::string& reading) { + m_readings.insert(m_readings.begin() + m_cursorIndex, reading); + + m_grid.expandGridByOneAtLocation(m_cursorIndex); + build(); + m_cursorIndex++; +} + +inline std::vector BlockReadingBuilder::readings() const { + return m_readings; +} + +inline bool BlockReadingBuilder::deleteReadingBeforeCursor() { + if (!m_cursorIndex) { + return false; + } + + m_readings.erase(m_readings.begin() + m_cursorIndex - 1, + m_readings.begin() + m_cursorIndex); + m_cursorIndex--; + m_grid.shrinkGridByOneAtLocation(m_cursorIndex); + build(); + return true; +} + +inline bool BlockReadingBuilder::deleteReadingAfterCursor() { + if (m_cursorIndex == m_readings.size()) { + return false; + } + + m_readings.erase(m_readings.begin() + m_cursorIndex, + m_readings.begin() + m_cursorIndex + 1); + m_grid.shrinkGridByOneAtLocation(m_cursorIndex); + build(); + return true; +} + +inline bool BlockReadingBuilder::removeHeadReadings(size_t count) { + if (count > length()) { + return false; + } + + for (size_t i = 0; i < count; i++) { + if (m_cursorIndex) { m_cursorIndex--; - m_grid.shrinkGridByOneAtLocation(m_cursorIndex); - build(); - return true; - } - - inline bool BlockReadingBuilder::deleteReadingAfterCursor() - { - if (m_cursorIndex == m_readings.size()) { - return false; - } - - m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.begin() + m_cursorIndex + 1); - m_grid.shrinkGridByOneAtLocation(m_cursorIndex); - build(); - return true; - } - - inline bool BlockReadingBuilder::removeHeadReadings(size_t count) - { - if (count > length()) { - return false; - } - - for (size_t i = 0; i < count; i++) { - if (m_cursorIndex) { - m_cursorIndex--; - } - m_readings.erase(m_readings.begin(), m_readings.begin() + 1); - m_grid.shrinkGridByOneAtLocation(0); - build(); - } - - return true; - } - - inline void BlockReadingBuilder::setJoinSeparator(const string& separator) - { - m_joinSeparator = separator; - } - - inline const string BlockReadingBuilder::joinSeparator() const - { - return m_joinSeparator; } + m_readings.erase(m_readings.begin(), m_readings.begin() + 1); + m_grid.shrinkGridByOneAtLocation(0); + build(); + } + + return true; +} - inline Grid& BlockReadingBuilder::grid() - { - return m_grid; - } +inline void BlockReadingBuilder::setJoinSeparator( + const std::string& separator) { + m_joinSeparator = separator; +} - inline void BlockReadingBuilder::build() - { - if (!m_LM) { - return; - } - - size_t begin = 0; - size_t end = m_cursorIndex + MaximumBuildSpanLength; - - if (m_cursorIndex < MaximumBuildSpanLength) { - begin = 0; - } - else { - begin = m_cursorIndex - MaximumBuildSpanLength; - } - - if (end > m_readings.size()) { - end = m_readings.size(); - } - - for (size_t p = begin ; p < end ; p++) { - for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) { - string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator); - if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) { - vector unigrams = m_LM->unigramsForKey(combinedReading); +inline const std::string BlockReadingBuilder::joinSeparator() const { + return m_joinSeparator; +} - if (unigrams.size() > 0) { - Node n(combinedReading, unigrams, vector()); - m_grid.insertNode(n, p, q); - } - } +inline Grid& BlockReadingBuilder::grid() { return m_grid; } + +inline void BlockReadingBuilder::build() { + if (!m_LM) { + return; + } + + size_t begin = 0; + size_t end = m_cursorIndex + MaximumBuildSpanLength; + + if (m_cursorIndex < MaximumBuildSpanLength) { + begin = 0; + } else { + begin = m_cursorIndex - MaximumBuildSpanLength; + } + + if (end > m_readings.size()) { + end = m_readings.size(); + } + + for (size_t p = begin; p < end; p++) { + for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) { + std::string combinedReading = Join( + m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator); + if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, + combinedReading)) { + std::vector unigrams = m_LM->unigramsForKey(combinedReading); + + if (unigrams.size() > 0) { + Node n(combinedReading, unigrams, std::vector()); + m_grid.insertNode(n, p, q); } } } - - inline const string BlockReadingBuilder::Join(vector::const_iterator begin, vector::const_iterator end, const string& separator) - { - string result; - for (vector::const_iterator iter = begin ; iter != end ; ) { - result += *iter; - ++iter; - if (iter != end) { - result += separator; - } - } - return result; - } } } +inline const std::string BlockReadingBuilder::Join( + std::vector::const_iterator begin, + std::vector::const_iterator end, + const std::string& separator) { + std::string result; + for (std::vector::const_iterator iter = begin; iter != end;) { + result += *iter; + ++iter; + if (iter != end) { + result += separator; + } + } + return result; +} +} // namespace Gramambular +} // namespace Taiyan + #endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Gramambular.h b/Source/Modules/LanguageParsers/Gramambular/Gramambular.h index bf40892f..d2601d3f 100644 --- a/Source/Modules/LanguageParsers/Gramambular/Gramambular.h +++ b/Source/Modules/LanguageParsers/Gramambular/Gramambular.h @@ -17,8 +17,8 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef Gramambular_h -#define Gramambular_h +#ifndef GRAMAMBULAR_H_ +#define GRAMAMBULAR_H_ #include "Bigram.h" #include "BlockReadingBuilder.h" diff --git a/Source/Modules/LanguageParsers/Gramambular/Grid.h b/Source/Modules/LanguageParsers/Gramambular/Grid.h index 8872cfae..f81c9301 100644 --- a/Source/Modules/LanguageParsers/Gramambular/Grid.h +++ b/Source/Modules/LanguageParsers/Gramambular/Grid.h @@ -17,248 +17,207 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef Grid_h -#define Grid_h +#ifndef GRID_H_ +#define GRID_H_ #include +#include +#include + #include "NodeAnchor.h" #include "Span.h" namespace Taiyan { - namespace Gramambular { +namespace Gramambular { + +class Grid { +public: + void clear(); + void insertNode(const Node& node, size_t location, size_t spanningLength); + bool hasNodeAtLocationSpanningLengthMatchingKey(size_t location, + size_t spanningLength, + const std::string& key); + + void expandGridByOneAtLocation(size_t location); + void shrinkGridByOneAtLocation(size_t location); + + size_t width() const; + std::vector nodesEndingAt(size_t location); + std::vector nodesCrossingOrEndingAt(size_t location); + + // "Freeze" the node with the unigram that represents the selected candidate + // value. After this, the node that contains the unigram will always be + // evaluated to that unigram, while all other overlapping nodes will be reset + // to their initial state (that is, if any of those nodes were "frozen" or + // fixed, they will be unfrozen.) + NodeAnchor fixNodeSelectedCandidate(size_t location, + const std::string& value); + + // Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, + // only boost the unigram that represents the value with an overriding score. + // This has the same side effect as fixNodeSelectedCandidate, which is that + // all other overlapping nodes will be reset to their initial state. + void overrideNodeScoreForSelectedCandidate(size_t location, + const std::string& value, + float overridingScore); + + std::string dumpDOT(); + +protected: + std::vector m_spans; +}; + +inline void Grid::clear() { m_spans.clear(); } + +inline void Grid::insertNode(const Node& node, size_t location, + size_t spanningLength) { + if (location >= m_spans.size()) { + size_t diff = location - m_spans.size() + 1; - class Grid { - public: - void clear(); - void insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength); - bool hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey); - - void expandGridByOneAtLocation(size_t inLocation); - void shrinkGridByOneAtLocation(size_t inLocation); - - size_t width() const; - vector nodesEndingAt(size_t inLocation); - vector nodesCrossingOrEndingAt(size_t inLocation); - - // "Freeze" the node with the unigram that represents the selected candidate value. - // After this, the node that contains the unigram will always be evaluated to that - // unigram, while all other overlapping nodes will be reset to their initial state - // (that is, if any of those nodes were "frozen" or fixed, they will be unfrozen.) - NodeAnchor fixNodeSelectedCandidate(size_t location, const string& value); - - // Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, only - // boost the unigram that represents the value with an overriding score. This - // has the same side effect as fixNodeSelectedCandidate, which is that all other - // overlapping nodes will be reset to their initial state. - void overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore); - - const string dumpDOT(); - - protected: - vector m_spans; - }; - - inline void Grid::clear() - { - m_spans.clear(); + for (size_t i = 0; i < diff; i++) { + m_spans.push_back(Span()); } - - inline void Grid::insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength) - { - if (inLocation >= m_spans.size()) { - size_t diff = inLocation - m_spans.size() + 1; - - for (size_t i = 0 ; i < diff ; i++) { - m_spans.push_back(Span()); - } - } + } + + m_spans[location].insertNodeOfLength(node, spanningLength); +} - m_spans[inLocation].insertNodeOfLength(inNode, inSpanningLength); - } +inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey( + size_t location, size_t spanningLength, const std::string& key) { + if (location > m_spans.size()) { + return false; + } + + const Node* n = m_spans[location].nodeOfLength(spanningLength); + if (!n) { + return false; + } + + return key == n->key(); +} - inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey) - { - if (inLocation > m_spans.size()) { - return false; - } - - const Node *n = m_spans[inLocation].nodeOfLength(inSpanningLength); - if (!n) { - return false; - } - - return inKey == n->key(); - } - - inline void Grid::expandGridByOneAtLocation(size_t inLocation) - { - if (!inLocation || inLocation == m_spans.size()) { - m_spans.insert(m_spans.begin() + inLocation, Span()); - } - else { - m_spans.insert(m_spans.begin() + inLocation, Span()); - for (size_t i = 0 ; i < inLocation ; i++) { - // zaps overlapping spans - m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i); - } - } - } - - inline void Grid::shrinkGridByOneAtLocation(size_t inLocation) - { - if (inLocation >= m_spans.size()) { - return; - } - - m_spans.erase(m_spans.begin() + inLocation); - for (size_t i = 0 ; i < inLocation ; i++) { - // zaps overlapping spans - m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i); - } - } - - inline size_t Grid::width() const - { - return m_spans.size(); - } - - inline vector Grid::nodesEndingAt(size_t inLocation) - { - vector result; - - if (m_spans.size() && inLocation <= m_spans.size()) { - for (size_t i = 0 ; i < inLocation ; i++) { - Span& span = m_spans[i]; - if (i + span.maximumLength() >= inLocation) { - Node *np = span.nodeOfLength(inLocation - i); - if (np) { - NodeAnchor na; - na.node = np; - na.location = i; - na.spanningLength = inLocation - i; - - result.push_back(na); - } - } - } - } - - return result; - } - - inline vector Grid::nodesCrossingOrEndingAt(size_t inLocation) - { - vector result; - - if (m_spans.size() && inLocation <= m_spans.size()) { - for (size_t i = 0 ; i < inLocation ; i++) { - Span& span = m_spans[i]; - - if (i + span.maximumLength() >= inLocation) { - - for (size_t j = 1, m = span.maximumLength(); j <= m ; j++) { - - if (i + j < inLocation) { - continue; - } - - Node *np = span.nodeOfLength(j); - if (np) { - NodeAnchor na; - na.node = np; - na.location = i; - na.spanningLength = inLocation - i; - - result.push_back(na); - } - } - } - } - } - - return result; - } - - // For nodes found at the location, fix their currently-selected candidate using the supplied string value. - inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, const string& value) - { - vector nodes = nodesCrossingOrEndingAt(location); - NodeAnchor node; - for (auto nodeAnchor : nodes) { - auto candidates = nodeAnchor.node->candidates(); - - // Reset the candidate-fixed state of every node at the location. - const_cast(nodeAnchor.node)->resetCandidate(); - - for (size_t i = 0, c = candidates.size(); i < c; ++i) { - if (candidates[i].value == value) { - const_cast(nodeAnchor.node)->selectCandidateAtIndex(i); - node = nodeAnchor; - break;; - } - } - } - return node; - } - - inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore) - { - vector nodes = nodesCrossingOrEndingAt(location); - for (auto nodeAnchor : nodes) { - auto candidates = nodeAnchor.node->candidates(); - - // Reset the candidate-fixed state of every node at the location. - const_cast(nodeAnchor.node)->resetCandidate(); - - for (size_t i = 0, c = candidates.size(); i < c; ++i) { - if (candidates[i].value == value) { - const_cast(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore); - break; - } - } - } - } - - inline const string Grid::dumpDOT() - { - stringstream sst; - sst << "digraph {" << endl; - sst << "graph [ rankdir=LR ];" << endl; - sst << "BOS;" << endl; - - for (size_t p = 0 ; p < m_spans.size() ; p++) { - Span& span = m_spans[p]; - for (size_t ni = 0 ; ni <= span.maximumLength() ; ni++) { - Node* np = span.nodeOfLength(ni); - if (np) { - if (!p) { - sst << "BOS -> " << np->currentKeyValue().value << ";" << endl; - } - - sst << np->currentKeyValue().value << ";" << endl; - - if (p + ni < m_spans.size()) { - Span& dstSpan = m_spans[p+ni]; - for (size_t q = 0 ; q <= dstSpan.maximumLength() ; q++) { - Node *dn = dstSpan.nodeOfLength(q); - if (dn) { - sst << np->currentKeyValue().value << " -> " << dn->currentKeyValue().value << ";" << endl; - } - } - } - - if (p + ni == m_spans.size()) { - sst << np->currentKeyValue().value << " -> " << "EOS;" << endl; - } - } - } - } - - sst << "EOS;" << endl; - sst << "}"; - return sst.str(); +inline void Grid::expandGridByOneAtLocation(size_t location) { + if (!location || location == m_spans.size()) { + m_spans.insert(m_spans.begin() + location, Span()); + } else { + m_spans.insert(m_spans.begin() + location, Span()); + for (size_t i = 0; i < location; i++) { + // zaps overlapping spans + m_spans[i].removeNodeOfLengthGreaterThan(location - i); } } } +inline void Grid::shrinkGridByOneAtLocation(size_t location) { + if (location >= m_spans.size()) { + return; + } + + m_spans.erase(m_spans.begin() + location); + for (size_t i = 0; i < location; i++) { + // zaps overlapping spans + m_spans[i].removeNodeOfLengthGreaterThan(location - i); + } +} + +inline size_t Grid::width() const { return m_spans.size(); } + +inline std::vector Grid::nodesEndingAt(size_t location) { + std::vector result; + + if (m_spans.size() && location <= m_spans.size()) { + for (size_t i = 0; i < location; i++) { + Span& span = m_spans[i]; + if (i + span.maximumLength() >= location) { + Node* np = span.nodeOfLength(location - i); + if (np) { + NodeAnchor na; + na.node = np; + na.location = i; + na.spanningLength = location - i; + + result.push_back(na); + } + } + } + } + + return result; +} + +inline std::vector Grid::nodesCrossingOrEndingAt(size_t location) { + std::vector result; + + if (m_spans.size() && location <= m_spans.size()) { + for (size_t i = 0; i < location; i++) { + Span& span = m_spans[i]; + + if (i + span.maximumLength() >= location) { + for (size_t j = 1, m = span.maximumLength(); j <= m; j++) { + if (i + j < location) { + continue; + } + + Node* np = span.nodeOfLength(j); + if (np) { + NodeAnchor na; + na.node = np; + na.location = i; + na.spanningLength = location - i; + + result.push_back(na); + } + } + } + } + } + + return result; +} + +// For nodes found at the location, fix their currently-selected candidate using +// the supplied string value. +inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, + const std::string& value) { + std::vector nodes = nodesCrossingOrEndingAt(location); + NodeAnchor node; + for (auto nodeAnchor : nodes) { + auto candidates = nodeAnchor.node->candidates(); + + // Reset the candidate-fixed state of every node at the location. + const_cast(nodeAnchor.node)->resetCandidate(); + + for (size_t i = 0, c = candidates.size(); i < c; ++i) { + if (candidates[i].value == value) { + const_cast(nodeAnchor.node)->selectCandidateAtIndex(i); + node = nodeAnchor; + break; + } + } + } + return node; +} + +inline void Grid::overrideNodeScoreForSelectedCandidate( + size_t location, const std::string& value, float overridingScore) { + std::vector nodes = nodesCrossingOrEndingAt(location); + for (auto nodeAnchor : nodes) { + auto candidates = nodeAnchor.node->candidates(); + + // Reset the candidate-fixed state of every node at the location. + const_cast(nodeAnchor.node)->resetCandidate(); + + for (size_t i = 0, c = candidates.size(); i < c; ++i) { + if (candidates[i].value == value) { + const_cast(nodeAnchor.node) + ->selectFloatingCandidateAtIndex(i, overridingScore); + break; + } + } + } +} + +} // namespace Gramambular +} // namespace Taiyan + #endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Grid.mm b/Source/Modules/LanguageParsers/Gramambular/Grid.mm new file mode 100644 index 00000000..ba76a6e0 --- /dev/null +++ b/Source/Modules/LanguageParsers/Gramambular/Grid.mm @@ -0,0 +1,70 @@ +// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). +// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, + except as required to fulfill notice requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#include "Grid.h" + +#include +#include + +namespace Taiyan { +namespace Gramambular { + +std::string Grid::dumpDOT() { + std::stringstream sst; + sst << "digraph {" << std::endl; + sst << "graph [ rankdir=LR ];" << std::endl; + sst << "BOS;" << std::endl; + + for (size_t p = 0; p < m_spans.size(); p++) { + Span& span = m_spans[p]; + for (size_t ni = 0; ni <= span.maximumLength(); ni++) { + Node* np = span.nodeOfLength(ni); + if (np) { + if (!p) { + sst << "BOS -> " << np->currentKeyValue().value << ";" << std::endl; + } + + sst << np->currentKeyValue().value << ";" << std::endl; + + if (p + ni < m_spans.size()) { + Span& dstSpan = m_spans[p + ni]; + for (size_t q = 0; q <= dstSpan.maximumLength(); q++) { + Node* dn = dstSpan.nodeOfLength(q); + if (dn) { + sst << np->currentKeyValue().value << " -> " + << dn->currentKeyValue().value << ";" << std::endl; + } + } + } + + if (p + ni == m_spans.size()) { + sst << np->currentKeyValue().value << " -> " + << "EOS;" << std::endl; + } + } + } + } + + sst << "EOS;" << std::endl; + sst << "}"; + return sst.str(); +} + +} // namespace Gramambular +} // namespace Taiyan diff --git a/Source/Modules/LanguageParsers/Gramambular/KeyValuePair.h b/Source/Modules/LanguageParsers/Gramambular/KeyValuePair.h index 1b6bdded..e22a96bd 100644 --- a/Source/Modules/LanguageParsers/Gramambular/KeyValuePair.h +++ b/Source/Modules/LanguageParsers/Gramambular/KeyValuePair.h @@ -17,47 +17,43 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef KeyValuePair_h -#define KeyValuePair_h +#ifndef KEYVALUEPAIR_H_ +#define KEYVALUEPAIR_H_ #include #include namespace Taiyan { - namespace Gramambular { - using namespace std; - - class KeyValuePair { - public: - string key; - string value; +namespace Gramambular { - bool operator==(const KeyValuePair& inAnother) const; - bool operator<(const KeyValuePair& inAnother) const; - }; +class KeyValuePair { +public: + std::string key; + std::string value; + + bool operator==(const KeyValuePair& another) const; + bool operator<(const KeyValuePair& another) const; +}; - inline ostream& operator<<(ostream& inStream, const KeyValuePair& inPair) - { - inStream << "(" << inPair.key << "," << inPair.value << ")"; - return inStream; - } - - inline bool KeyValuePair::operator==(const KeyValuePair& inAnother) const - { - return key == inAnother.key && value == inAnother.value; - } - - inline bool KeyValuePair::operator<(const KeyValuePair& inAnother) const - { - if (key < inAnother.key) { - return true; - } - else if (key == inAnother.key) { - return value < inAnother.value; - } - return false; - } - } +inline std::ostream& operator<<(std::ostream& stream, + const KeyValuePair& pair) { + stream << "(" << pair.key << "," << pair.value << ")"; + return stream; } +inline bool KeyValuePair::operator==(const KeyValuePair& another) const { + return key == another.key && value == another.value; +} + +inline bool KeyValuePair::operator<(const KeyValuePair& another) const { + if (key < another.key) { + return true; + } else if (key == another.key) { + return value < another.value; + } + return false; +} +} // namespace Gramambular +} // namespace Taiyan + #endif diff --git a/Source/Modules/LanguageParsers/Gramambular/LanguageModel.h b/Source/Modules/LanguageParsers/Gramambular/LanguageModel.h index bea86f7b..b24f7fff 100644 --- a/Source/Modules/LanguageParsers/Gramambular/LanguageModel.h +++ b/Source/Modules/LanguageParsers/Gramambular/LanguageModel.h @@ -17,28 +17,28 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef LanguageModel_h -#define LanguageModel_h +#ifndef LANGUAGEMODEL_H_ +#define LANGUAGEMODEL_H_ +#include #include + #include "Bigram.h" #include "Unigram.h" namespace Taiyan { - namespace Gramambular { - - using namespace std; - - class LanguageModel { - public: - virtual ~LanguageModel() {} - - virtual const vector bigramsForKeys(const string &preceedingKey, const string& key) = 0; - virtual const vector unigramsForKey(const string &key) = 0; - virtual bool hasUnigramsForKey(const string& key) = 0; - }; - } -} +namespace Gramambular { +class LanguageModel { +public: + virtual ~LanguageModel() {} + + virtual const std::vector bigramsForKeys( + const std::string& preceedingKey, const std::string& key) = 0; + virtual const std::vector unigramsForKey(const std::string& key) = 0; + virtual bool hasUnigramsForKey(const std::string& key) = 0; +}; +} // namespace Gramambular +} // namespace Taiyan #endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Node.h b/Source/Modules/LanguageParsers/Gramambular/Node.h index bf9e568f..ff1d3fd2 100644 --- a/Source/Modules/LanguageParsers/Gramambular/Node.h +++ b/Source/Modules/LanguageParsers/Gramambular/Node.h @@ -17,208 +17,198 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef Node_h -#define Node_h +#ifndef NODE_H_ +#define NODE_H_ #include +#include +#include #include + #include "LanguageModel.h" namespace Taiyan { - namespace Gramambular { - using namespace std; +namespace Gramambular { - class Node { - public: - Node(); - Node(const string& inKey, const vector& inUnigrams, const vector& inBigrams); - - void primeNodeWithPreceedingKeyValues(const vector& inKeyValues); - - bool isCandidateFixed() const; - const vector& candidates() const; - void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true); - void resetCandidate(); - void selectFloatingCandidateAtIndex(size_t index, double score); - - const string& key() const; - double score() const; - // double scoreForCandidate(string &candidate) const; // Prevents the override model to remember symbols with scode -X or lower. - const KeyValuePair currentKeyValue() const; - double highestUnigramScore() const; - - protected: - const LanguageModel* m_LM; - - string m_key; - double m_score; - - vector m_unigrams; - vector m_candidates; - map m_valueUnigramIndexMap; - map > m_preceedingGramBigramMap; - - bool m_candidateFixed; - size_t m_selectedUnigramIndex; - - friend ostream& operator<<(ostream& inStream, const Node& inNode); - }; +class Node { +public: + Node(); + Node(const std::string& key, const std::vector& unigrams, + const std::vector& bigrams); + + void primeNodeWithPreceedingKeyValues( + const std::vector& keyValues); + + bool isCandidateFixed() const; + const std::vector& candidates() const; + void selectCandidateAtIndex(size_t index = 0, bool fix = true); + void resetCandidate(); + void selectFloatingCandidateAtIndex(size_t index, double score); + + const std::string& key() const; + double score() const; + double scoreForCandidate(const std::string& candidate) const; + const KeyValuePair currentKeyValue() const; + double highestUnigramScore() const; + +protected: + const LanguageModel* m_LM; + + std::string m_key; + double m_score; + + std::vector m_unigrams; + std::vector m_candidates; + std::map m_valueUnigramIndexMap; + std::map > m_preceedingGramBigramMap; + + bool m_candidateFixed; + size_t m_selectedUnigramIndex; + + friend std::ostream& operator<<(std::ostream& stream, const Node& node); +}; - inline ostream& operator<<(ostream& inStream, const Node& inNode) - { - inStream << "(node,key:" << inNode.m_key << ",fixed:" << (inNode.m_candidateFixed ? "true" : "false") - << ",selected:" << inNode.m_selectedUnigramIndex - << "," << inNode.m_unigrams << ")"; - return inStream; - } +inline std::ostream& operator<<(std::ostream& stream, const Node& node) { + stream << "(node,key:" << node.m_key + << ",fixed:" << (node.m_candidateFixed ? "true" : "false") + << ",selected:" << node.m_selectedUnigramIndex << "," + << node.m_unigrams << ")"; + return stream; +} - inline Node::Node() - : m_candidateFixed(false) - , m_selectedUnigramIndex(0) - , m_score(0.0) - { - } +inline Node::Node() +: m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) {} - inline Node::Node(const string& inKey, const vector& inUnigrams, const vector& inBigrams) - : m_key(inKey) - , m_unigrams(inUnigrams) - , m_candidateFixed(false) - , m_selectedUnigramIndex(0) - , m_score(0.0) - { - stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare); - - if (m_unigrams.size()) { - m_score = m_unigrams[0].score; - } - - size_t i = 0; - for (vector::const_iterator ui = m_unigrams.begin() ; ui != m_unigrams.end() ; ++ui) { - m_valueUnigramIndexMap[(*ui).keyValue.value] = i; - i++; - - m_candidates.push_back((*ui).keyValue); - } - - for (vector::const_iterator bi = inBigrams.begin() ; bi != inBigrams.end() ; ++bi) { - m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi); - } - } +inline Node::Node(const std::string& key, const std::vector& unigrams, + const std::vector& bigrams) +: m_key(key), +m_unigrams(unigrams), +m_candidateFixed(false), +m_selectedUnigramIndex(0), +m_score(0.0) { + stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare); + + if (m_unigrams.size()) { + m_score = m_unigrams[0].score; + } + + size_t i = 0; + for (std::vector::const_iterator ui = m_unigrams.begin(); + ui != m_unigrams.end(); ++ui) { + m_valueUnigramIndexMap[(*ui).keyValue.value] = i; + i++; - inline void Node::primeNodeWithPreceedingKeyValues(const vector& inKeyValues) - { - size_t newIndex = m_selectedUnigramIndex; - double max = m_score; + m_candidates.push_back((*ui).keyValue); + } + + for (std::vector::const_iterator bi = bigrams.begin(); + bi != bigrams.end(); ++bi) { + m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi); + } +} - if (!isCandidateFixed()) { - for (vector::const_iterator kvi = inKeyValues.begin() ; kvi != inKeyValues.end() ; ++kvi) { - map >::const_iterator f = m_preceedingGramBigramMap.find(*kvi); - if (f != m_preceedingGramBigramMap.end()) { - const vector& bigrams = (*f).second; - - for (vector::const_iterator bi = bigrams.begin() ; bi != bigrams.end() ; ++bi) { - const Bigram& bigram = *bi; - if (bigram.score > max) { - map::const_iterator uf = m_valueUnigramIndexMap.find((*bi).keyValue.value); - if (uf != m_valueUnigramIndexMap.end()) { - newIndex = (*uf).second; - max = bigram.score; - } - } +inline void Node::primeNodeWithPreceedingKeyValues( + const std::vector& keyValues) { + size_t newIndex = m_selectedUnigramIndex; + double max = m_score; + + if (!isCandidateFixed()) { + for (std::vector::const_iterator kvi = keyValues.begin(); + kvi != keyValues.end(); ++kvi) { + std::map >::const_iterator f = + m_preceedingGramBigramMap.find(*kvi); + if (f != m_preceedingGramBigramMap.end()) { + const std::vector& bigrams = (*f).second; + + for (std::vector::const_iterator bi = bigrams.begin(); + bi != bigrams.end(); ++bi) { + const Bigram& bigram = *bi; + if (bigram.score > max) { + std::map::const_iterator uf = + m_valueUnigramIndexMap.find((*bi).keyValue.value); + if (uf != m_valueUnigramIndexMap.end()) { + newIndex = (*uf).second; + max = bigram.score; } } } } - - if (m_score != max) { - m_score = max; - } - - if (newIndex != m_selectedUnigramIndex) { - m_selectedUnigramIndex = newIndex; - } } - - inline bool Node::isCandidateFixed() const - { - return m_candidateFixed; - } - - inline const vector& Node::candidates() const - { - return m_candidates; - } - - inline void Node::selectCandidateAtIndex(size_t inIndex, bool inFix) - { - if (inIndex >= m_unigrams.size()) { - m_selectedUnigramIndex = 0; - } - else { - m_selectedUnigramIndex = inIndex; - } - - m_candidateFixed = inFix; - m_score = 99; - } - - inline void Node::resetCandidate() - { - m_selectedUnigramIndex = 0; - m_candidateFixed = 0; - if (m_unigrams.size()) { - m_score = m_unigrams[0].score; - } - } - - inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) { - if (index >= m_unigrams.size()) { - m_selectedUnigramIndex = 0; - } else { - m_selectedUnigramIndex = index; - } - m_candidateFixed = false; - m_score = score; - } - - inline const string& Node::key() const - { - return m_key; - } - - inline double Node::score() const - { - return m_score; - } - - // Prevents the override model to remember symbols with scode -X or lower. -// inline double Node::scoreForCandidate(string &candidate) const -// { -// for (auto unigram : m_unigrams) { -// if (unigram.keyValue.value == candidate) { -// return unigram.score; -// } -// } -// return 0.0; -// } - - inline double Node::highestUnigramScore() const { - if (m_unigrams.empty()) { - return 0.0; - } - return m_unigrams[0].score; - } - - inline const KeyValuePair Node::currentKeyValue() const - { - if(m_selectedUnigramIndex >= m_unigrams.size()) { - return KeyValuePair(); - } - else { - return m_candidates[m_selectedUnigramIndex]; - } - } + } + + if (m_score != max) { + m_score = max; + } + + if (newIndex != m_selectedUnigramIndex) { + m_selectedUnigramIndex = newIndex; } } +inline bool Node::isCandidateFixed() const { return m_candidateFixed; } + +inline const std::vector& Node::candidates() const { + return m_candidates; +} + +inline void Node::selectCandidateAtIndex(size_t index, bool fix) { + if (index >= m_unigrams.size()) { + m_selectedUnigramIndex = 0; + } else { + m_selectedUnigramIndex = index; + } + + m_candidateFixed = fix; + m_score = 99; +} + +inline void Node::resetCandidate() { + m_selectedUnigramIndex = 0; + m_candidateFixed = 0; + if (m_unigrams.size()) { + m_score = m_unigrams[0].score; + } +} + +inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) { + if (index >= m_unigrams.size()) { + m_selectedUnigramIndex = 0; + } else { + m_selectedUnigramIndex = index; + } + m_candidateFixed = false; + m_score = score; +} + +inline const std::string& Node::key() const { return m_key; } + +inline double Node::score() const { return m_score; } + +// Prevents the override model to remember symbols with scode -X or lower. +//inline double Node::scoreForCandidate(const std::string& candidate) const { +// for (auto unigram : m_unigrams) { +// if (unigram.keyValue.value == candidate) { +// return unigram.score; +// } +// } +// return 0.0; +//} + +inline double Node::highestUnigramScore() const { + if (m_unigrams.empty()) { + return 0.0; + } + return m_unigrams[0].score; +} + +inline const KeyValuePair Node::currentKeyValue() const { + if (m_selectedUnigramIndex >= m_unigrams.size()) { + return KeyValuePair(); + } else { + return m_candidates[m_selectedUnigramIndex]; + } +} +} // namespace Gramambular +} // namespace Taiyan + #endif diff --git a/Source/Modules/LanguageParsers/Gramambular/NodeAnchor.h b/Source/Modules/LanguageParsers/Gramambular/NodeAnchor.h index 29462d85..f215c92e 100644 --- a/Source/Modules/LanguageParsers/Gramambular/NodeAnchor.h +++ b/Source/Modules/LanguageParsers/Gramambular/NodeAnchor.h @@ -17,55 +17,48 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef NodeAnchor_h -#define NodeAnchor_h +#ifndef NODEANCHOR_H_ +#define NODEANCHOR_H_ + +#include #include "Node.h" namespace Taiyan { - namespace Gramambular { - class NodeAnchor { - public: - NodeAnchor(); - const Node *node; - size_t location; - size_t spanningLength; - double accumulatedScore; - }; - - inline NodeAnchor::NodeAnchor() - : node(0) - , location(0) - , spanningLength(0) - , accumulatedScore(0.0) - { - } +namespace Gramambular { - inline ostream& operator<<(ostream& inStream, const NodeAnchor& inAnchor) - { - inStream << "{@(" << inAnchor.location << "," << inAnchor.spanningLength << "),"; - if (inAnchor.node) { - inStream << *(inAnchor.node); - } - else { - inStream << "null"; - } - inStream << "}"; - return inStream; - } - - inline ostream& operator<<(ostream& inStream, const vector& inAnchor) - { - for (vector::const_iterator i = inAnchor.begin() ; i != inAnchor.end() ; ++i) { - inStream << *i; - if (i + 1 != inAnchor.end()) { - inStream << "<-"; - } - } - - return inStream; - } +struct NodeAnchor { + const Node* node = nullptr; + size_t location = 0; + size_t spanningLength = 0; + double accumulatedScore = 0.0; +}; + +inline std::ostream& operator<<(std::ostream& stream, + const NodeAnchor& anchor) { + stream << "{@(" << anchor.location << "," << anchor.spanningLength << "),"; + if (anchor.node) { + stream << *(anchor.node); + } else { + stream << "null"; } + stream << "}"; + return stream; } +inline std::ostream& operator<<(std::ostream& stream, + const std::vector& anchor) { + for (std::vector::const_iterator i = anchor.begin(); + i != anchor.end(); ++i) { + stream << *i; + if (i + 1 != anchor.end()) { + stream << "<-"; + } + } + + return stream; +} +} // namespace Gramambular +} // namespace Taiyan + #endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Span.h b/Source/Modules/LanguageParsers/Gramambular/Span.h index d4336d47..e46df777 100644 --- a/Source/Modules/LanguageParsers/Gramambular/Span.h +++ b/Source/Modules/LanguageParsers/Gramambular/Span.h @@ -17,88 +17,77 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef Span_h -#define Span_h +#ifndef SPAN_H_ +#define SPAN_H_ #include #include #include + #include "Node.h" namespace Taiyan { - namespace Gramambular { - class Span { - public: - Span(); +namespace Gramambular { +class Span { +public: + void clear(); + void insertNodeOfLength(const Node& node, size_t length); + void removeNodeOfLengthGreaterThan(size_t length); + + Node* nodeOfLength(size_t length); + size_t maximumLength() const; + +protected: + std::map m_lengthNodeMap; + size_t m_maximumLength = 0; +}; - void clear(); - void insertNodeOfLength(const Node& inNode, size_t inLength); - void removeNodeOfLengthGreaterThan(size_t inLength); - - Node* nodeOfLength(size_t inLength); - size_t maximumLength() const; +inline void Span::clear() { + m_lengthNodeMap.clear(); + m_maximumLength = 0; +} - protected: - map m_lengthNodeMap; - size_t m_maximumLength; - }; - - inline Span::Span() - : m_maximumLength(0) - { - } - - inline void Span::clear() - { - m_lengthNodeMap.clear(); - m_maximumLength = 0; - } - - inline void Span::insertNodeOfLength(const Node& inNode, size_t inLength) - { - m_lengthNodeMap[inLength] = inNode; - if (inLength > m_maximumLength) { - m_maximumLength = inLength; - } - } - - inline void Span::removeNodeOfLengthGreaterThan(size_t inLength) - { - if (inLength > m_maximumLength) { - return; - } - - size_t max = 0; - set removeSet; - for (map::iterator i = m_lengthNodeMap.begin(), e = m_lengthNodeMap.end() ; i != e ; ++i) { - if ((*i).first > inLength) { - removeSet.insert((*i).first); - } - else { - if ((*i).first > max) { - max = (*i).first; - } - } - } - - for (set::iterator i = removeSet.begin(), e = removeSet.end(); i != e; ++i) { - m_lengthNodeMap.erase(*i); - } - - m_maximumLength = max; - } - - inline Node* Span::nodeOfLength(size_t inLength) - { - map::iterator f = m_lengthNodeMap.find(inLength); - return f == m_lengthNodeMap.end() ? 0 : &(*f).second; - } - - inline size_t Span::maximumLength() const - { - return m_maximumLength; - } +inline void Span::insertNodeOfLength(const Node& node, size_t length) { + m_lengthNodeMap[length] = node; + if (length > m_maximumLength) { + m_maximumLength = length; } } +inline void Span::removeNodeOfLengthGreaterThan(size_t length) { + if (length > m_maximumLength) { + return; + } + + size_t max = 0; + std::set removeSet; + for (std::map::iterator i = m_lengthNodeMap.begin(), + e = m_lengthNodeMap.end(); + i != e; ++i) { + if ((*i).first > length) { + removeSet.insert((*i).first); + } else { + if ((*i).first > max) { + max = (*i).first; + } + } + } + + for (std::set::iterator i = removeSet.begin(), e = removeSet.end(); + i != e; ++i) { + m_lengthNodeMap.erase(*i); + } + + m_maximumLength = max; +} + +inline Node* Span::nodeOfLength(size_t length) { + std::map::iterator f = m_lengthNodeMap.find(length); + return f == m_lengthNodeMap.end() ? 0 : &(*f).second; +} + +inline size_t Span::maximumLength() const { return m_maximumLength; } +} // namespace Gramambular +} // namespace Taiyan + #endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Unigram.h b/Source/Modules/LanguageParsers/Gramambular/Unigram.h index 2f4a5b88..b05e55f6 100644 --- a/Source/Modules/LanguageParsers/Gramambular/Unigram.h +++ b/Source/Modules/LanguageParsers/Gramambular/Unigram.h @@ -17,80 +17,75 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef Unigram_h -#define Unigram_h +#ifndef UNIGRAM_H_ +#define UNIGRAM_H_ #include + #include "KeyValuePair.h" namespace Taiyan { - namespace Gramambular { - class Unigram { - public: - Unigram(); +namespace Gramambular { - KeyValuePair keyValue; - double score; - - bool operator==(const Unigram& inAnother) const; - bool operator<(const Unigram& inAnother) const; - - static bool ScoreCompare(const Unigram& a, const Unigram& b); - }; +class Unigram { +public: + Unigram(); + + KeyValuePair keyValue; + double score; + + bool operator==(const Unigram& another) const; + bool operator<(const Unigram& another) const; + + static bool ScoreCompare(const Unigram& a, const Unigram& b); +}; - inline ostream& operator<<(ostream& inStream, const Unigram& inGram) - { - streamsize p = inStream.precision(); - inStream.precision(6); - inStream << "(" << inGram.keyValue << "," << inGram.score << ")"; - inStream.precision(p); - return inStream; - } - - inline ostream& operator<<(ostream& inStream, const vector& inGrams) - { - inStream << "[" << inGrams.size() << "]=>{"; - - size_t index = 0; - - for (vector::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) { - inStream << index << "=>"; - inStream << *gi; - if (gi + 1 != inGrams.end()) { - inStream << ","; - } - } - - inStream << "}"; - return inStream; - } - - inline Unigram::Unigram() - : score(0.0) - { - } - - inline bool Unigram::operator==(const Unigram& inAnother) const - { - return keyValue == inAnother.keyValue && score == inAnother.score; - } - - inline bool Unigram::operator<(const Unigram& inAnother) const - { - if (keyValue < inAnother.keyValue) { - return true; - } - else if (keyValue == inAnother.keyValue) { - return score < inAnother.score; - } - return false; - } - - inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) - { - return a.score > b.score; - } - } +inline std::ostream& operator<<(std::ostream& stream, const Unigram& gram) { + std::streamsize p = stream.precision(); + stream.precision(6); + stream << "(" << gram.keyValue << "," << gram.score << ")"; + stream.precision(p); + return stream; } +inline std::ostream& operator<<(std::ostream& stream, + const std::vector& grams) { + stream << "[" << grams.size() << "]=>{"; + + size_t index = 0; + + for (std::vector::const_iterator gi = grams.begin(); + gi != grams.end(); ++gi, ++index) { + stream << index << "=>"; + stream << *gi; + if (gi + 1 != grams.end()) { + stream << ","; + } + } + + stream << "}"; + return stream; +} + +inline Unigram::Unigram() : score(0.0) {} + +inline bool Unigram::operator==(const Unigram& another) const { + return keyValue == another.keyValue && score == another.score; +} + +inline bool Unigram::operator<(const Unigram& another) const { + if (keyValue < another.keyValue) { + return true; + } else if (keyValue == another.keyValue) { + return score < another.score; + } + return false; +} + +inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) { + return a.score > b.score; +} +} // namespace Gramambular +} // namespace Taiyan + #endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Walker.h b/Source/Modules/LanguageParsers/Gramambular/Walker.h index 06c2be3d..fdb035d6 100644 --- a/Source/Modules/LanguageParsers/Gramambular/Walker.h +++ b/Source/Modules/LanguageParsers/Gramambular/Walker.h @@ -17,67 +17,69 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef Walker_h -#define Walker_h +#ifndef WALKER_H_ +#define WALKER_H_ #include +#include + #include "Grid.h" namespace Taiyan { - namespace Gramambular { - using namespace std; +namespace Gramambular { - class Walker { - public: - Walker(Grid* inGrid); - const vector reverseWalk(size_t inLocation, double inAccumulatedScore = 0.0); - - protected: - Grid* m_grid; - }; - - inline Walker::Walker(Grid* inGrid) - : m_grid(inGrid) - { +class Walker { +public: + explicit Walker(Grid* inGrid); + const std::vector reverseWalk(size_t location, + double accumulatedScore = 0.0); + +protected: + Grid* m_grid; +}; + +inline Walker::Walker(Grid* inGrid) : m_grid(inGrid) {} + +inline const std::vector Walker::reverseWalk( + size_t location, double accumulatedScore) { + if (!location || location > m_grid->width()) { + return std::vector(); + } + + std::vector > paths; + + std::vector nodes = m_grid->nodesEndingAt(location); + + for (std::vector::iterator ni = nodes.begin(); ni != nodes.end(); + ++ni) { + if (!(*ni).node) { + continue; } - inline const vector Walker::reverseWalk(size_t inLocation, double inAccumulatedScore) - { - if (!inLocation || inLocation > m_grid->width()) { - return vector(); - } - - vector > paths; - - vector nodes = m_grid->nodesEndingAt(inLocation); - - for (vector::iterator ni = nodes.begin() ; ni != nodes.end() ; ++ni) { - if (!(*ni).node) { - continue; - } - - (*ni).accumulatedScore = inAccumulatedScore + (*ni).node->score(); - - vector path = reverseWalk(inLocation - (*ni).spanningLength, (*ni).accumulatedScore); - path.insert(path.begin(), *ni); - - paths.push_back(path); - } - - if (!paths.size()) { - return vector(); - } - - vector* result = &*(paths.begin()); - for (vector >::iterator pi = paths.begin() ; pi != paths.end() ; ++pi) { - if ((*pi).back().accumulatedScore > result->back().accumulatedScore) { - result = &*pi; - } - } - - return *result; + (*ni).accumulatedScore = accumulatedScore + (*ni).node->score(); + + std::vector path = + reverseWalk(location - (*ni).spanningLength, (*ni).accumulatedScore); + path.insert(path.begin(), *ni); + + paths.push_back(path); + } + + if (!paths.size()) { + return std::vector(); + } + + std::vector* result = &*(paths.begin()); + for (std::vector >::iterator pi = paths.begin(); + pi != paths.end(); ++pi) { + if ((*pi).back().accumulatedScore > result->back().accumulatedScore) { + result = &*pi; } } + + return *result; } +} // namespace Gramambular +} // namespace Taiyan #endif diff --git a/vChewing.xcodeproj/project.pbxproj b/vChewing.xcodeproj/project.pbxproj index 449538b2..578eea6b 100644 --- a/vChewing.xcodeproj/project.pbxproj +++ b/vChewing.xcodeproj/project.pbxproj @@ -50,6 +50,7 @@ 5BD05C6827B2BBEF004C4F1D /* Content.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BD05C6327B2BBEF004C4F1D /* Content.swift */; }; 5BD05C6927B2BBEF004C4F1D /* WindowController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BD05C6427B2BBEF004C4F1D /* WindowController.swift */; }; 5BD05C6A27B2BBEF004C4F1D /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BD05C6527B2BBEF004C4F1D /* ViewController.swift */; }; + 5BDC5CAB27C2873D00E1CCE2 /* Grid.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5BDC5CAA27C2873D00E1CCE2 /* Grid.mm */; }; 5BDCBB2E27B4E67A00D0CC59 /* vChewingPhraseEditor.app in Resources */ = {isa = PBXBuildFile; fileRef = 5BD05BB827B2A429004C4F1D /* vChewingPhraseEditor.app */; }; 5BE78BD927B3775B005EA1BE /* ctlAboutWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BE78BD827B37750005EA1BE /* ctlAboutWindow.swift */; }; 5BE78BDD27B3776D005EA1BE /* frmAboutWindow.xib in Resources */ = {isa = PBXBuildFile; fileRef = 5BE78BDA27B37764005EA1BE /* frmAboutWindow.xib */; }; @@ -195,6 +196,7 @@ 5BD05C6327B2BBEF004C4F1D /* Content.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Content.swift; sourceTree = ""; }; 5BD05C6427B2BBEF004C4F1D /* WindowController.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = WindowController.swift; sourceTree = ""; }; 5BD05C6527B2BBEF004C4F1D /* ViewController.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = ""; }; + 5BDC5CAA27C2873D00E1CCE2 /* Grid.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = Grid.mm; sourceTree = ""; }; 5BDCBB4227B4F6C600D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/MainMenu.strings"; sourceTree = ""; }; 5BDCBB4327B4F6C600D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/frmAboutWindow.strings"; sourceTree = ""; }; 5BDCBB4527B4F6C600D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "Source/WindowNIBs/zh-Hant.lproj/frmPrefWindow.strings"; sourceTree = ""; }; @@ -663,6 +665,7 @@ 6A0D4F1515FC0EB100ABF4B3 /* BlockReadingBuilder.h */, 6A0D4F1615FC0EB100ABF4B3 /* Gramambular.h */, 6A0D4F1715FC0EB100ABF4B3 /* Grid.h */, + 5BDC5CAA27C2873D00E1CCE2 /* Grid.mm */, 6A0D4F1815FC0EB100ABF4B3 /* KeyValuePair.h */, 6A0D4F1915FC0EB100ABF4B3 /* LanguageModel.h */, 6A0D4F1A15FC0EB100ABF4B3 /* Node.h */, @@ -943,6 +946,7 @@ D47F7DD3278C1263002F9DD7 /* UserOverrideModel.cpp in Sources */, 5B62A33627AE795800A19448 /* PreferencesModule.swift in Sources */, 5B62A33827AE79CD00A19448 /* NSStringUtils.swift in Sources */, + 5BDC5CAB27C2873D00E1CCE2 /* Grid.mm in Sources */, 5B62A33227AE792F00A19448 /* InputSourceHelper.swift in Sources */, 5B62A34927AE7CD900A19448 /* TooltipController.swift in Sources */, 6A0D4F4515FC0EB100ABF4B3 /* Mandarin.cpp in Sources */,