From 305c874003da9a8b5d16caf19b58273c698fe0d8 Mon Sep 17 00:00:00 2001 From: Lukhnos Liu Date: Sat, 19 Feb 2022 08:59:45 -0800 Subject: [PATCH 1/8] Supply a CMake project and a trivial unit test --- Source/Engine/Gramambular/CMakeLists.txt | 31 +++++++++++++++++++ Source/Engine/Gramambular/GramambularTest.cpp | 28 +++++++++++++++++ Source/Engine/Gramambular/Grid.cpp | 0 3 files changed, 59 insertions(+) create mode 100644 Source/Engine/Gramambular/CMakeLists.txt create mode 100644 Source/Engine/Gramambular/GramambularTest.cpp create mode 100644 Source/Engine/Gramambular/Grid.cpp diff --git a/Source/Engine/Gramambular/CMakeLists.txt b/Source/Engine/Gramambular/CMakeLists.txt new file mode 100644 index 00000000..31f660cd --- /dev/null +++ b/Source/Engine/Gramambular/CMakeLists.txt @@ -0,0 +1,31 @@ +cmake_minimum_required(VERSION 3.17) +project(Gramambular) + +set(CMAKE_CXX_STANDARD 17) + +add_library(GramambularLib Bigram.h BlockReadingBuilder.h Gramambular.h Grid.h Grid.cpp KeyValuePair.h LanguageModel.h Node.h NodeAnchor.h Span.h Unigram.h Walker.h) + +# Let CMake fetch Google Test for us. +# https://github.com/google/googletest/tree/main/googletest#incorporating-into-an-existing-cmake-project +include(FetchContent) + +FetchContent_Declare( + googletest + # Specify the commit you depend on and update it regularly. + URL https://github.com/google/googletest/archive/609281088cfefc76f9d0ce82e1ff6c30cc3591e5.zip +) +# For Windows: Prevent overriding the parent project's compiler/linker settings +set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +FetchContent_MakeAvailable(googletest) + +# Test target declarations. +add_executable(GramambularTest GramambularTest.cpp) +target_link_libraries(GramambularTest gtest_main GramambularLib) +include(GoogleTest) +gtest_discover_tests(GramambularTest) + +add_custom_target( + runTest + COMMAND ${CMAKE_CURRENT_BINARY_DIR}/GramambularTest +) +add_dependencies(runTest GramambularTest) diff --git a/Source/Engine/Gramambular/GramambularTest.cpp b/Source/Engine/Gramambular/GramambularTest.cpp new file mode 100644 index 00000000..5868382a --- /dev/null +++ b/Source/Engine/Gramambular/GramambularTest.cpp @@ -0,0 +1,28 @@ +// Copyright (c) 2022 and onwards Lukhnos Liu +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "gtest/gtest.h" + +TEST(GramambularTest, Trivial) { + ASSERT_EQ(1, 1); +} diff --git a/Source/Engine/Gramambular/Grid.cpp b/Source/Engine/Gramambular/Grid.cpp new file mode 100644 index 00000000..e69de29b From d4d8d140047f97b4ee73be5fc778b9c33496ac72 Mon Sep 17 00:00:00 2001 From: Lukhnos Liu Date: Sat, 19 Feb 2022 09:25:18 -0800 Subject: [PATCH 2/8] Import the test code from Formosana --- Source/Engine/Gramambular/GramambularTest.cpp | 216 +++++++++++++++++- 1 file changed, 214 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Gramambular/GramambularTest.cpp b/Source/Engine/Gramambular/GramambularTest.cpp index 5868382a..d9ea65a3 100644 --- a/Source/Engine/Gramambular/GramambularTest.cpp +++ b/Source/Engine/Gramambular/GramambularTest.cpp @@ -22,7 +22,219 @@ // OTHER DEALINGS IN THE SOFTWARE. #include "gtest/gtest.h" +#include +#include +#include +#include +#include +#include +#include "Gramambular.h" -TEST(GramambularTest, Trivial) { - ASSERT_EQ(1, 1); +const char* SampleData = R"( +# +# The sample is from libtabe (http://sourceforge.net/projects/libtabe/) +# last updated in 2002. The project was originally initiated by +# Pai-Hsiang Hsiao in 1999. +# +# Libtabe is a frequency table of Taiwanese Mandarin words. The database +# itself is, according to the tar file, released under the BSD License. +# + +ㄙ 絲 -9.495858 +ㄙ 思 -9.006414 +ㄙ 私 -99.000000 +ㄙ 斯 -8.091803 +ㄙ 司 -99.000000 +ㄙ 嘶 -13.513987 +ㄙ 撕 -12.259095 +ㄍㄠ 高 -7.171551 +ㄎㄜ 顆 -10.574273 +ㄎㄜ 棵 -11.504072 +ㄎㄜ 刻 -10.450457 +ㄎㄜ 科 -7.171052 +ㄎㄜ 柯 -99.000000 +ㄍㄠ 膏 -11.928720 +ㄍㄠ 篙 -13.624335 +ㄍㄠ 糕 -12.390804 +ㄉㄜ˙ 的 -3.516024 +ㄉㄧˊ 的 -3.516024 +ㄉㄧˋ 的 -3.516024 +ㄓㄨㄥ 中 -5.809297 +ㄉㄜ˙ 得 -7.427179 +ㄍㄨㄥ 共 -8.381971 +ㄍㄨㄥ 供 -8.501463 +ㄐㄧˋ 既 -99.000000 +ㄐㄧㄣ 今 -8.034095 +ㄍㄨㄥ 紅 -8.858181 +ㄐㄧˋ 際 -7.608341 +ㄐㄧˋ 季 -99.000000 +ㄐㄧㄣ 金 -7.290109 +ㄐㄧˋ 騎 -10.939895 +ㄓㄨㄥ 終 -99.000000 +ㄐㄧˋ 記 -99.000000 +ㄐㄧˋ 寄 -99.000000 +ㄐㄧㄣ 斤 -99.000000 +ㄐㄧˋ 繼 -9.715317 +ㄐㄧˋ 計 -7.926683 +ㄐㄧˋ 暨 -8.373022 +ㄓㄨㄥ 鐘 -9.877580 +ㄐㄧㄣ 禁 -10.711079 +ㄍㄨㄥ 公 -7.877973 +ㄍㄨㄥ 工 -7.822167 +ㄍㄨㄥ 攻 -99.000000 +ㄍㄨㄥ 功 -99.000000 +ㄍㄨㄥ 宮 -99.000000 +ㄓㄨㄥ 鍾 -9.685671 +ㄐㄧˋ 繫 -10.425662 +ㄍㄨㄥ 弓 -99.000000 +ㄍㄨㄥ 恭 -99.000000 +ㄐㄧˋ 劑 -8.888722 +ㄐㄧˋ 祭 -10.204425 +ㄐㄧㄣ 浸 -11.378321 +ㄓㄨㄥ 盅 -99.000000 +ㄐㄧˋ 忌 -99.000000 +ㄐㄧˋ 技 -8.450826 +ㄐㄧㄣ 筋 -11.074890 +ㄍㄨㄥ 躬 -99.000000 +ㄐㄧˋ 冀 -12.045357 +ㄓㄨㄥ 忠 -99.000000 +ㄐㄧˋ 妓 -99.000000 +ㄐㄧˋ 濟 -9.517568 +ㄐㄧˋ 薊 -12.021587 +ㄐㄧㄣ 巾 -99.000000 +ㄐㄧㄣ 襟 -12.784206 +ㄋㄧㄢˊ 年 -6.086515 +ㄐㄧㄤˇ 講 -9.164384 +ㄐㄧㄤˇ 獎 -8.690941 +ㄐㄧㄤˇ 蔣 -10.127828 +ㄋㄧㄢˊ 黏 -11.336864 +ㄋㄧㄢˊ 粘 -11.285740 +ㄐㄧㄤˇ 槳 -12.492933 +ㄍㄨㄥㄙ 公司 -6.299461 +ㄎㄜㄐㄧˋ 科技 -6.736613 +ㄐㄧˋㄍㄨㄥ 濟公 -13.336653 +ㄐㄧㄤˇㄐㄧㄣ 獎金 -10.344678 +ㄋㄧㄢˊㄓㄨㄥ 年終 -11.668947 +ㄋㄧㄢˊㄓㄨㄥ 年中 -11.373044 +ㄍㄠㄎㄜㄐㄧˋ 高科技 -9.842421 +)"; + +using namespace std; +using namespace Formosa::Gramambular; + +class SimpleLM : public LanguageModel +{ + public: + SimpleLM(const char* input, bool swapKeyValue = false) + { + stringstream sstream(input); + while (sstream.good()) { + string line; + getline(sstream, line); + + if (!line.size() || (line.size() && line[0] == '#')) { + continue; + } + + stringstream linestream(line); + string col0; + string col1; + string col2; + linestream >> col0; + linestream >> col1; + linestream >> col2; + + Unigram u; + + if (swapKeyValue) { + u.keyValue.key = col1; + u.keyValue.value = col0; + } + else { + u.keyValue.key = col0; + u.keyValue.value = col1; + } + + u.score = atof(col2.c_str()); + + m_db[u.keyValue.key].push_back(u); + } + } + + const vector bigramsForKeys(const string &preceedingKey, const string& key) override + { + return vector(); + } + + const vector unigramsForKey(const string &key) override + { + map >::const_iterator f = m_db.find(key); + return f == m_db.end() ? vector() : (*f).second; + } + + bool hasUnigramsForKey(const string& key) override + { + map >::const_iterator f = m_db.find(key); + return f != m_db.end(); + } + + protected: + map > m_db; +}; + +TEST(GramambularTest, InputTest) { + SimpleLM lm(SampleData); + + BlockReadingBuilder builder(&lm); + builder.insertReadingAtCursor("ㄍㄠ"); + builder.insertReadingAtCursor("ㄐㄧˋ"); + builder.setCursorIndex(1); + builder.insertReadingAtCursor("ㄎㄜ"); + builder.setCursorIndex(0); + builder.deleteReadingAfterCursor(); + builder.insertReadingAtCursor("ㄍㄠ"); + builder.setCursorIndex(builder.length()); + builder.insertReadingAtCursor("ㄍㄨㄥ"); + builder.insertReadingAtCursor("ㄙ"); + builder.insertReadingAtCursor("ㄉㄜ˙"); + builder.insertReadingAtCursor("ㄋㄧㄢˊ"); + builder.insertReadingAtCursor("ㄓㄨㄥ"); + builder.insertReadingAtCursor("ㄐㄧㄤˇ"); + builder.insertReadingAtCursor("ㄐㄧㄣ"); + + Walker walker(&builder.grid()); + + vector walked = walker.reverseWalk(builder.grid().width(), 0.0); + reverse(walked.begin(), walked.end()); + + vector composed; + for (vector::iterator wi = walked.begin() ; wi != walked.end() ; ++wi) { + composed.push_back((*wi).node->currentKeyValue().value); + } + ASSERT_EQ(composed, (vector{"高科技", "公司", "的", "年中", "獎金"})); +} + +TEST(GramambularTest, WordSegmentationTest) { + SimpleLM lm2(SampleData, true); + BlockReadingBuilder builder2(&lm2); + builder2.insertReadingAtCursor("高"); + builder2.insertReadingAtCursor("科"); + builder2.insertReadingAtCursor("技"); + builder2.insertReadingAtCursor("公"); + builder2.insertReadingAtCursor("司"); + builder2.insertReadingAtCursor("的"); + builder2.insertReadingAtCursor("年"); + builder2.insertReadingAtCursor("終"); + builder2.insertReadingAtCursor("獎"); + builder2.insertReadingAtCursor("金"); + Walker walker2(&builder2.grid()); + + vector walked = walker2.reverseWalk(builder2.grid().width(), 0.0); + reverse(walked.begin(), walked.end()); + + vector segmented; + for (vector::iterator wi = walked.begin(); wi != walked.end(); ++wi) { + segmented.push_back((*wi).node->currentKeyValue().key); + } + ASSERT_EQ(segmented, (vector{"高科技", "公司", "的", "年終", "獎金"})); } From d3302ef70a2a825f84506897b589c330692a79ae Mon Sep 17 00:00:00 2001 From: Lukhnos Liu Date: Sat, 19 Feb 2022 09:28:04 -0800 Subject: [PATCH 3/8] Reformat Gramambular with clang-format --- Source/Engine/Gramambular/Bigram.h | 124 +++-- .../Engine/Gramambular/BlockReadingBuilder.h | 349 +++++++------- Source/Engine/Gramambular/GramambularTest.cpp | 35 +- Source/Engine/Gramambular/Grid.h | 445 +++++++++--------- Source/Engine/Gramambular/KeyValuePair.h | 58 ++- Source/Engine/Gramambular/LanguageModel.h | 27 +- Source/Engine/Gramambular/Node.h | 355 +++++++------- Source/Engine/Gramambular/NodeAnchor.h | 79 ++-- Source/Engine/Gramambular/Span.h | 133 +++--- Source/Engine/Gramambular/Unigram.h | 121 +++-- Source/Engine/Gramambular/Walker.h | 100 ++-- 11 files changed, 887 insertions(+), 939 deletions(-) diff --git a/Source/Engine/Gramambular/Bigram.h b/Source/Engine/Gramambular/Bigram.h index 42ac9033..750db4c1 100644 --- a/Source/Engine/Gramambular/Bigram.h +++ b/Source/Engine/Gramambular/Bigram.h @@ -33,74 +33,68 @@ #include "KeyValuePair.h" namespace Formosa { - namespace Gramambular { - class Bigram { - public: - Bigram(); - - KeyValuePair preceedingKeyValue; - KeyValuePair keyValue; - double score; - - bool operator==(const Bigram& inAnother) const; - bool operator<(const Bigram& inAnother) const; - }; +namespace Gramambular { +class Bigram { + public: + Bigram(); - inline ostream& operator<<(ostream& inStream, const Bigram& inGram) - { - streamsize p = inStream.precision(); - inStream.precision(6); - inStream << "(" << inGram.keyValue << "|" <& inGrams) - { - inStream << "[" << inGrams.size() << "]=>{"; - - size_t index = 0; - - for (vector::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) { - inStream << index << "=>"; - inStream << *gi; - if (gi + 1 != inGrams.end()) { - inStream << ","; - } - } - - inStream << "}"; - return inStream; - } - - inline Bigram::Bigram() - : score(0.0) - { - } - - inline bool Bigram::operator==(const Bigram& inAnother) const - { - return preceedingKeyValue == inAnother.preceedingKeyValue && keyValue == inAnother.keyValue && score == inAnother.score; - } - - inline bool Bigram::operator<(const Bigram& inAnother) const - { - if (preceedingKeyValue < inAnother.preceedingKeyValue) { - return true; - } - else if (preceedingKeyValue == inAnother.preceedingKeyValue) { - if (keyValue < inAnother.keyValue) { - return true; - } - else if (keyValue == inAnother.keyValue) { - return score < inAnother.score; - } - return false; - } + bool operator==(const Bigram& inAnother) const; + bool operator<(const Bigram& inAnother) const; +}; - return false; - } - } +inline ostream& operator<<(ostream& inStream, const Bigram& inGram) { + streamsize p = inStream.precision(); + inStream.precision(6); + inStream << "(" << inGram.keyValue << "|" << inGram.preceedingKeyValue << "," + << inGram.score << ")"; + inStream.precision(p); + return inStream; } +inline ostream& operator<<(ostream& inStream, const vector& inGrams) { + inStream << "[" << inGrams.size() << "]=>{"; + + size_t index = 0; + + for (vector::const_iterator gi = inGrams.begin(); gi != inGrams.end(); + ++gi, ++index) { + inStream << index << "=>"; + inStream << *gi; + if (gi + 1 != inGrams.end()) { + inStream << ","; + } + } + + inStream << "}"; + return inStream; +} + +inline Bigram::Bigram() : score(0.0) {} + +inline bool Bigram::operator==(const Bigram& inAnother) const { + return preceedingKeyValue == inAnother.preceedingKeyValue && + keyValue == inAnother.keyValue && score == inAnother.score; +} + +inline bool Bigram::operator<(const Bigram& inAnother) const { + if (preceedingKeyValue < inAnother.preceedingKeyValue) { + return true; + } else if (preceedingKeyValue == inAnother.preceedingKeyValue) { + if (keyValue < inAnother.keyValue) { + return true; + } else if (keyValue == inAnother.keyValue) { + return score < inAnother.score; + } + return false; + } + + return false; +} +} // namespace Gramambular +} // namespace Formosa + #endif diff --git a/Source/Engine/Gramambular/BlockReadingBuilder.h b/Source/Engine/Gramambular/BlockReadingBuilder.h index 8c503fcc..219e6a5a 100644 --- a/Source/Engine/Gramambular/BlockReadingBuilder.h +++ b/Source/Engine/Gramambular/BlockReadingBuilder.h @@ -29,198 +29,185 @@ #define BlockReadingBuilder_h #include + #include "Grid.h" #include "LanguageModel.h" namespace Formosa { - namespace Gramambular { - using namespace std; - - class BlockReadingBuilder { - public: - BlockReadingBuilder(LanguageModel *inLM); - void clear(); - - size_t length() const; - size_t cursorIndex() const; - void setCursorIndex(size_t inNewIndex); - void insertReadingAtCursor(const string& inReading); - bool deleteReadingBeforeCursor(); // backspace - bool deleteReadingAfterCursor(); // delete - - bool removeHeadReadings(size_t count); - - void setJoinSeparator(const string& separator); - const string joinSeparator() const; +namespace Gramambular { +using namespace std; - vector readings() const; +class BlockReadingBuilder { + public: + BlockReadingBuilder(LanguageModel* inLM); + void clear(); - Grid& grid(); - - protected: - void build(); - - static const string Join(vector::const_iterator begin, vector::const_iterator end, const string& separator); - - //最多使用六個字組成一個詞 - static const size_t MaximumBuildSpanLength = 6; - - size_t m_cursorIndex; - vector m_readings; - - Grid m_grid; - LanguageModel *m_LM; - string m_joinSeparator; - }; - - inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM) - : m_LM(inLM) - , m_cursorIndex(0) - { - } - - inline void BlockReadingBuilder::clear() - { - m_cursorIndex = 0; - m_readings.clear(); - m_grid.clear(); - } - - inline size_t BlockReadingBuilder::length() const - { - return m_readings.size(); - } - - inline size_t BlockReadingBuilder::cursorIndex() const - { - return m_cursorIndex; - } + size_t length() const; + size_t cursorIndex() const; + void setCursorIndex(size_t inNewIndex); + void insertReadingAtCursor(const string& inReading); + bool deleteReadingBeforeCursor(); // backspace + bool deleteReadingAfterCursor(); // delete - inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex) - { - m_cursorIndex = inNewIndex > m_readings.size() ? m_readings.size() : inNewIndex; - } - - inline void BlockReadingBuilder::insertReadingAtCursor(const string& inReading) - { - m_readings.insert(m_readings.begin() + m_cursorIndex, inReading); - - m_grid.expandGridByOneAtLocation(m_cursorIndex); - build(); - m_cursorIndex++; - } + bool removeHeadReadings(size_t count); - inline vector BlockReadingBuilder::readings() const - { - return m_readings; - } - - inline bool BlockReadingBuilder::deleteReadingBeforeCursor() - { - if (!m_cursorIndex) { - return false; - } - - m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.begin() + m_cursorIndex); - m_cursorIndex--; - m_grid.shrinkGridByOneAtLocation(m_cursorIndex); - build(); - return true; - } - - inline bool BlockReadingBuilder::deleteReadingAfterCursor() - { - if (m_cursorIndex == m_readings.size()) { - return false; - } - - m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.begin() + m_cursorIndex + 1); - m_grid.shrinkGridByOneAtLocation(m_cursorIndex); - build(); - return true; - } - - inline bool BlockReadingBuilder::removeHeadReadings(size_t count) - { - if (count > length()) { - return false; - } - - for (size_t i = 0; i < count; i++) { - if (m_cursorIndex) { - m_cursorIndex--; - } - m_readings.erase(m_readings.begin(), m_readings.begin() + 1); - m_grid.shrinkGridByOneAtLocation(0); - build(); - } - - return true; - } - - inline void BlockReadingBuilder::setJoinSeparator(const string& separator) - { - m_joinSeparator = separator; - } - - inline const string BlockReadingBuilder::joinSeparator() const - { - return m_joinSeparator; - } + void setJoinSeparator(const string& separator); + const string joinSeparator() const; - inline Grid& BlockReadingBuilder::grid() - { - return m_grid; - } + vector readings() const; - inline void BlockReadingBuilder::build() - { - if (!m_LM) { - return; - } - - size_t begin = 0; - size_t end = m_cursorIndex + MaximumBuildSpanLength; - - if (m_cursorIndex < MaximumBuildSpanLength) { - begin = 0; - } - else { - begin = m_cursorIndex - MaximumBuildSpanLength; - } - - if (end > m_readings.size()) { - end = m_readings.size(); - } - - for (size_t p = begin ; p < end ; p++) { - for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) { - string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator); - if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) { - vector unigrams = m_LM->unigramsForKey(combinedReading); + Grid& grid(); - if (unigrams.size() > 0) { - Node n(combinedReading, unigrams, vector()); - m_grid.insertNode(n, p, q); - } - } - } - } - } - - inline const string BlockReadingBuilder::Join(vector::const_iterator begin, vector::const_iterator end, const string& separator) - { - string result; - for (vector::const_iterator iter = begin ; iter != end ; ) { - result += *iter; - ++iter; - if (iter != end) { - result += separator; - } - } - return result; - } - } + protected: + void build(); + + static const string Join(vector::const_iterator begin, + vector::const_iterator end, + const string& separator); + + //最多使用六個字組成一個詞 + static const size_t MaximumBuildSpanLength = 6; + + size_t m_cursorIndex; + vector m_readings; + + Grid m_grid; + LanguageModel* m_LM; + string m_joinSeparator; +}; + +inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel* inLM) + : m_LM(inLM), m_cursorIndex(0) {} + +inline void BlockReadingBuilder::clear() { + m_cursorIndex = 0; + m_readings.clear(); + m_grid.clear(); } +inline size_t BlockReadingBuilder::length() const { return m_readings.size(); } + +inline size_t BlockReadingBuilder::cursorIndex() const { return m_cursorIndex; } + +inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex) { + m_cursorIndex = + inNewIndex > m_readings.size() ? m_readings.size() : inNewIndex; +} + +inline void BlockReadingBuilder::insertReadingAtCursor( + const string& inReading) { + m_readings.insert(m_readings.begin() + m_cursorIndex, inReading); + + m_grid.expandGridByOneAtLocation(m_cursorIndex); + build(); + m_cursorIndex++; +} + +inline vector BlockReadingBuilder::readings() const { + return m_readings; +} + +inline bool BlockReadingBuilder::deleteReadingBeforeCursor() { + if (!m_cursorIndex) { + return false; + } + + m_readings.erase(m_readings.begin() + m_cursorIndex - 1, + m_readings.begin() + m_cursorIndex); + m_cursorIndex--; + m_grid.shrinkGridByOneAtLocation(m_cursorIndex); + build(); + return true; +} + +inline bool BlockReadingBuilder::deleteReadingAfterCursor() { + if (m_cursorIndex == m_readings.size()) { + return false; + } + + m_readings.erase(m_readings.begin() + m_cursorIndex, + m_readings.begin() + m_cursorIndex + 1); + m_grid.shrinkGridByOneAtLocation(m_cursorIndex); + build(); + return true; +} + +inline bool BlockReadingBuilder::removeHeadReadings(size_t count) { + if (count > length()) { + return false; + } + + for (size_t i = 0; i < count; i++) { + if (m_cursorIndex) { + m_cursorIndex--; + } + m_readings.erase(m_readings.begin(), m_readings.begin() + 1); + m_grid.shrinkGridByOneAtLocation(0); + build(); + } + + return true; +} + +inline void BlockReadingBuilder::setJoinSeparator(const string& separator) { + m_joinSeparator = separator; +} + +inline const string BlockReadingBuilder::joinSeparator() const { + return m_joinSeparator; +} + +inline Grid& BlockReadingBuilder::grid() { return m_grid; } + +inline void BlockReadingBuilder::build() { + if (!m_LM) { + return; + } + + size_t begin = 0; + size_t end = m_cursorIndex + MaximumBuildSpanLength; + + if (m_cursorIndex < MaximumBuildSpanLength) { + begin = 0; + } else { + begin = m_cursorIndex - MaximumBuildSpanLength; + } + + if (end > m_readings.size()) { + end = m_readings.size(); + } + + for (size_t p = begin; p < end; p++) { + for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) { + string combinedReading = Join( + m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator); + if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, + combinedReading)) { + vector unigrams = m_LM->unigramsForKey(combinedReading); + + if (unigrams.size() > 0) { + Node n(combinedReading, unigrams, vector()); + m_grid.insertNode(n, p, q); + } + } + } + } +} + +inline const string BlockReadingBuilder::Join( + vector::const_iterator begin, vector::const_iterator end, + const string& separator) { + string result; + for (vector::const_iterator iter = begin; iter != end;) { + result += *iter; + ++iter; + if (iter != end) { + result += separator; + } + } + return result; +} +} // namespace Gramambular +} // namespace Formosa + #endif diff --git a/Source/Engine/Gramambular/GramambularTest.cpp b/Source/Engine/Gramambular/GramambularTest.cpp index d9ea65a3..8f0e008d 100644 --- a/Source/Engine/Gramambular/GramambularTest.cpp +++ b/Source/Engine/Gramambular/GramambularTest.cpp @@ -21,14 +21,15 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. -#include "gtest/gtest.h" #include +#include #include #include -#include -#include #include +#include + #include "Gramambular.h" +#include "gtest/gtest.h" const char* SampleData = R"( # @@ -122,11 +123,9 @@ const char* SampleData = R"( using namespace std; using namespace Formosa::Gramambular; -class SimpleLM : public LanguageModel -{ +class SimpleLM : public LanguageModel { public: - SimpleLM(const char* input, bool swapKeyValue = false) - { + SimpleLM(const char* input, bool swapKeyValue = false) { stringstream sstream(input); while (sstream.good()) { string line; @@ -149,8 +148,7 @@ class SimpleLM : public LanguageModel if (swapKeyValue) { u.keyValue.key = col1; u.keyValue.value = col0; - } - else { + } else { u.keyValue.key = col0; u.keyValue.value = col1; } @@ -161,19 +159,17 @@ class SimpleLM : public LanguageModel } } - const vector bigramsForKeys(const string &preceedingKey, const string& key) override - { + const vector bigramsForKeys(const string& preceedingKey, + const string& key) override { return vector(); } - const vector unigramsForKey(const string &key) override - { + const vector unigramsForKey(const string& key) override { map >::const_iterator f = m_db.find(key); return f == m_db.end() ? vector() : (*f).second; } - bool hasUnigramsForKey(const string& key) override - { + bool hasUnigramsForKey(const string& key) override { map >::const_iterator f = m_db.find(key); return f != m_db.end(); } @@ -208,7 +204,8 @@ TEST(GramambularTest, InputTest) { reverse(walked.begin(), walked.end()); vector composed; - for (vector::iterator wi = walked.begin() ; wi != walked.end() ; ++wi) { + for (vector::iterator wi = walked.begin(); wi != walked.end(); + ++wi) { composed.push_back((*wi).node->currentKeyValue().value); } ASSERT_EQ(composed, (vector{"高科技", "公司", "的", "年中", "獎金"})); @@ -233,8 +230,10 @@ TEST(GramambularTest, WordSegmentationTest) { reverse(walked.begin(), walked.end()); vector segmented; - for (vector::iterator wi = walked.begin(); wi != walked.end(); ++wi) { + for (vector::iterator wi = walked.begin(); wi != walked.end(); + ++wi) { segmented.push_back((*wi).node->currentKeyValue().key); } - ASSERT_EQ(segmented, (vector{"高科技", "公司", "的", "年終", "獎金"})); + ASSERT_EQ(segmented, + (vector{"高科技", "公司", "的", "年終", "獎金"})); } diff --git a/Source/Engine/Gramambular/Grid.h b/Source/Engine/Gramambular/Grid.h index ccceacc1..75c9c52c 100644 --- a/Source/Engine/Gramambular/Grid.h +++ b/Source/Engine/Gramambular/Grid.h @@ -29,244 +29,243 @@ #define Grid_h #include + #include "NodeAnchor.h" #include "Span.h" namespace Formosa { - namespace Gramambular { - - class Grid { - public: - void clear(); - void insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength); - bool hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey); +namespace Gramambular { - void expandGridByOneAtLocation(size_t inLocation); - void shrinkGridByOneAtLocation(size_t inLocation); +class Grid { + public: + void clear(); + void insertNode(const Node& inNode, size_t inLocation, + size_t inSpanningLength); + bool hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, + size_t inSpanningLength, + const string& inKey); - size_t width() const; - vector nodesEndingAt(size_t inLocation); - vector nodesCrossingOrEndingAt(size_t inLocation); + void expandGridByOneAtLocation(size_t inLocation); + void shrinkGridByOneAtLocation(size_t inLocation); - // "Freeze" the node with the unigram that represents the selected candidate value. - // After this, the node that contains the unigram will always be evaluated to that - // unigram, while all other overlapping nodes will be reset to their initial state - // (that is, if any of those nodes were "frozen" or fixed, they will be unfrozen.) - NodeAnchor fixNodeSelectedCandidate(size_t location, const string& value); + size_t width() const; + vector nodesEndingAt(size_t inLocation); + vector nodesCrossingOrEndingAt(size_t inLocation); - // Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, only - // boost the unigram that represents the value with an overriding score. This - // has the same side effect as fixNodeSelectedCandidate, which is that all other - // overlapping nodes will be reset to their initial state. - void overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore); - - const string dumpDOT(); - - protected: - vector m_spans; - }; - - inline void Grid::clear() - { - m_spans.clear(); - } - - inline void Grid::insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength) - { - if (inLocation >= m_spans.size()) { - size_t diff = inLocation - m_spans.size() + 1; - - for (size_t i = 0 ; i < diff ; i++) { - m_spans.push_back(Span()); - } - } + // "Freeze" the node with the unigram that represents the selected candidate + // value. After this, the node that contains the unigram will always be + // evaluated to that unigram, while all other overlapping nodes will be reset + // to their initial state (that is, if any of those nodes were "frozen" or + // fixed, they will be unfrozen.) + NodeAnchor fixNodeSelectedCandidate(size_t location, const string& value); - m_spans[inLocation].insertNodeOfLength(inNode, inSpanningLength); - } + // Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, + // only boost the unigram that represents the value with an overriding score. + // This has the same side effect as fixNodeSelectedCandidate, which is that + // all other overlapping nodes will be reset to their initial state. + void overrideNodeScoreForSelectedCandidate(size_t location, + const string& value, + float overridingScore); - inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey) - { - if (inLocation > m_spans.size()) { - return false; - } - - const Node *n = m_spans[inLocation].nodeOfLength(inSpanningLength); - if (!n) { - return false; - } - - return inKey == n->key(); - } + const string dumpDOT(); - inline void Grid::expandGridByOneAtLocation(size_t inLocation) - { - if (!inLocation || inLocation == m_spans.size()) { - m_spans.insert(m_spans.begin() + inLocation, Span()); - } - else { - m_spans.insert(m_spans.begin() + inLocation, Span()); - for (size_t i = 0 ; i < inLocation ; i++) { - // zaps overlapping spans - m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i); - } - } - } - - inline void Grid::shrinkGridByOneAtLocation(size_t inLocation) - { - if (inLocation >= m_spans.size()) { - return; - } - - m_spans.erase(m_spans.begin() + inLocation); - for (size_t i = 0 ; i < inLocation ; i++) { - // zaps overlapping spans - m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i); - } - } + protected: + vector m_spans; +}; - inline size_t Grid::width() const - { - return m_spans.size(); - } - - inline vector Grid::nodesEndingAt(size_t inLocation) - { - vector result; - - if (m_spans.size() && inLocation <= m_spans.size()) { - for (size_t i = 0 ; i < inLocation ; i++) { - Span& span = m_spans[i]; - if (i + span.maximumLength() >= inLocation) { - Node *np = span.nodeOfLength(inLocation - i); - if (np) { - NodeAnchor na; - na.node = np; - na.location = i; - na.spanningLength = inLocation - i; - - result.push_back(na); - } - } - } - } - - return result; - } +inline void Grid::clear() { m_spans.clear(); } - inline vector Grid::nodesCrossingOrEndingAt(size_t inLocation) - { - vector result; - - if (m_spans.size() && inLocation <= m_spans.size()) { - for (size_t i = 0 ; i < inLocation ; i++) { - Span& span = m_spans[i]; - - if (i + span.maximumLength() >= inLocation) { +inline void Grid::insertNode(const Node& inNode, size_t inLocation, + size_t inSpanningLength) { + if (inLocation >= m_spans.size()) { + size_t diff = inLocation - m_spans.size() + 1; - for (size_t j = 1, m = span.maximumLength(); j <= m ; j++) { - - if (i + j < inLocation) { - continue; - } - - Node *np = span.nodeOfLength(j); - if (np) { - NodeAnchor na; - na.node = np; - na.location = i; - na.spanningLength = inLocation - i; - - result.push_back(na); - } - } - } - } - } - - return result; - } - - // For nodes found at the location, fix their currently-selected candidate using the supplied string value. - inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, const string& value) - { - vector nodes = nodesCrossingOrEndingAt(location); - NodeAnchor node; - for (auto nodeAnchor : nodes) { - auto candidates = nodeAnchor.node->candidates(); - - // Reset the candidate-fixed state of every node at the location. - const_cast(nodeAnchor.node)->resetCandidate(); - - for (size_t i = 0, c = candidates.size(); i < c; ++i) { - if (candidates[i].value == value) { - const_cast(nodeAnchor.node)->selectCandidateAtIndex(i); - node = nodeAnchor; - break;; - } - } - } - return node; - } - - inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore) - { - vector nodes = nodesCrossingOrEndingAt(location); - for (auto nodeAnchor : nodes) { - auto candidates = nodeAnchor.node->candidates(); - - // Reset the candidate-fixed state of every node at the location. - const_cast(nodeAnchor.node)->resetCandidate(); - - for (size_t i = 0, c = candidates.size(); i < c; ++i) { - if (candidates[i].value == value) { - const_cast(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore); - break; - } - } - } - } - - inline const string Grid::dumpDOT() - { - stringstream sst; - sst << "digraph {" << endl; - sst << "graph [ rankdir=LR ];" << endl; - sst << "BOS;" << endl; - - for (size_t p = 0 ; p < m_spans.size() ; p++) { - Span& span = m_spans[p]; - for (size_t ni = 0 ; ni <= span.maximumLength() ; ni++) { - Node* np = span.nodeOfLength(ni); - if (np) { - if (!p) { - sst << "BOS -> " << np->currentKeyValue().value << ";" << endl; - } - - sst << np->currentKeyValue().value << ";" << endl; - - if (p + ni < m_spans.size()) { - Span& dstSpan = m_spans[p+ni]; - for (size_t q = 0 ; q <= dstSpan.maximumLength() ; q++) { - Node *dn = dstSpan.nodeOfLength(q); - if (dn) { - sst << np->currentKeyValue().value << " -> " << dn->currentKeyValue().value << ";" << endl; - } - } - } - - if (p + ni == m_spans.size()) { - sst << np->currentKeyValue().value << " -> " << "EOS;" << endl; - } - } - } - } - - sst << "EOS;" << endl; - sst << "}"; - return sst.str(); - } + for (size_t i = 0; i < diff; i++) { + m_spans.push_back(Span()); } + } + + m_spans[inLocation].insertNodeOfLength(inNode, inSpanningLength); } +inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey( + size_t inLocation, size_t inSpanningLength, const string& inKey) { + if (inLocation > m_spans.size()) { + return false; + } + + const Node* n = m_spans[inLocation].nodeOfLength(inSpanningLength); + if (!n) { + return false; + } + + return inKey == n->key(); +} + +inline void Grid::expandGridByOneAtLocation(size_t inLocation) { + if (!inLocation || inLocation == m_spans.size()) { + m_spans.insert(m_spans.begin() + inLocation, Span()); + } else { + m_spans.insert(m_spans.begin() + inLocation, Span()); + for (size_t i = 0; i < inLocation; i++) { + // zaps overlapping spans + m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i); + } + } +} + +inline void Grid::shrinkGridByOneAtLocation(size_t inLocation) { + if (inLocation >= m_spans.size()) { + return; + } + + m_spans.erase(m_spans.begin() + inLocation); + for (size_t i = 0; i < inLocation; i++) { + // zaps overlapping spans + m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i); + } +} + +inline size_t Grid::width() const { return m_spans.size(); } + +inline vector Grid::nodesEndingAt(size_t inLocation) { + vector result; + + if (m_spans.size() && inLocation <= m_spans.size()) { + for (size_t i = 0; i < inLocation; i++) { + Span& span = m_spans[i]; + if (i + span.maximumLength() >= inLocation) { + Node* np = span.nodeOfLength(inLocation - i); + if (np) { + NodeAnchor na; + na.node = np; + na.location = i; + na.spanningLength = inLocation - i; + + result.push_back(na); + } + } + } + } + + return result; +} + +inline vector Grid::nodesCrossingOrEndingAt(size_t inLocation) { + vector result; + + if (m_spans.size() && inLocation <= m_spans.size()) { + for (size_t i = 0; i < inLocation; i++) { + Span& span = m_spans[i]; + + if (i + span.maximumLength() >= inLocation) { + for (size_t j = 1, m = span.maximumLength(); j <= m; j++) { + if (i + j < inLocation) { + continue; + } + + Node* np = span.nodeOfLength(j); + if (np) { + NodeAnchor na; + na.node = np; + na.location = i; + na.spanningLength = inLocation - i; + + result.push_back(na); + } + } + } + } + } + + return result; +} + +// For nodes found at the location, fix their currently-selected candidate using +// the supplied string value. +inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, + const string& value) { + vector nodes = nodesCrossingOrEndingAt(location); + NodeAnchor node; + for (auto nodeAnchor : nodes) { + auto candidates = nodeAnchor.node->candidates(); + + // Reset the candidate-fixed state of every node at the location. + const_cast(nodeAnchor.node)->resetCandidate(); + + for (size_t i = 0, c = candidates.size(); i < c; ++i) { + if (candidates[i].value == value) { + const_cast(nodeAnchor.node)->selectCandidateAtIndex(i); + node = nodeAnchor; + break; + ; + } + } + } + return node; +} + +inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, + const string& value, + float overridingScore) { + vector nodes = nodesCrossingOrEndingAt(location); + for (auto nodeAnchor : nodes) { + auto candidates = nodeAnchor.node->candidates(); + + // Reset the candidate-fixed state of every node at the location. + const_cast(nodeAnchor.node)->resetCandidate(); + + for (size_t i = 0, c = candidates.size(); i < c; ++i) { + if (candidates[i].value == value) { + const_cast(nodeAnchor.node) + ->selectFloatingCandidateAtIndex(i, overridingScore); + break; + } + } + } +} + +inline const string Grid::dumpDOT() { + stringstream sst; + sst << "digraph {" << endl; + sst << "graph [ rankdir=LR ];" << endl; + sst << "BOS;" << endl; + + for (size_t p = 0; p < m_spans.size(); p++) { + Span& span = m_spans[p]; + for (size_t ni = 0; ni <= span.maximumLength(); ni++) { + Node* np = span.nodeOfLength(ni); + if (np) { + if (!p) { + sst << "BOS -> " << np->currentKeyValue().value << ";" << endl; + } + + sst << np->currentKeyValue().value << ";" << endl; + + if (p + ni < m_spans.size()) { + Span& dstSpan = m_spans[p + ni]; + for (size_t q = 0; q <= dstSpan.maximumLength(); q++) { + Node* dn = dstSpan.nodeOfLength(q); + if (dn) { + sst << np->currentKeyValue().value << " -> " + << dn->currentKeyValue().value << ";" << endl; + } + } + } + + if (p + ni == m_spans.size()) { + sst << np->currentKeyValue().value << " -> " + << "EOS;" << endl; + } + } + } + } + + sst << "EOS;" << endl; + sst << "}"; + return sst.str(); +} +} // namespace Gramambular +} // namespace Formosa + #endif diff --git a/Source/Engine/Gramambular/KeyValuePair.h b/Source/Engine/Gramambular/KeyValuePair.h index 0abbb891..ac4395de 100644 --- a/Source/Engine/Gramambular/KeyValuePair.h +++ b/Source/Engine/Gramambular/KeyValuePair.h @@ -32,40 +32,36 @@ #include namespace Formosa { - namespace Gramambular { - using namespace std; - - class KeyValuePair { - public: - string key; - string value; +namespace Gramambular { +using namespace std; - bool operator==(const KeyValuePair& inAnother) const; - bool operator<(const KeyValuePair& inAnother) const; - }; +class KeyValuePair { + public: + string key; + string value; - inline ostream& operator<<(ostream& inStream, const KeyValuePair& inPair) - { - inStream << "(" << inPair.key << "," << inPair.value << ")"; - return inStream; - } - - inline bool KeyValuePair::operator==(const KeyValuePair& inAnother) const - { - return key == inAnother.key && value == inAnother.value; - } + bool operator==(const KeyValuePair& inAnother) const; + bool operator<(const KeyValuePair& inAnother) const; +}; - inline bool KeyValuePair::operator<(const KeyValuePair& inAnother) const - { - if (key < inAnother.key) { - return true; - } - else if (key == inAnother.key) { - return value < inAnother.value; - } - return false; - } - } +inline ostream& operator<<(ostream& inStream, const KeyValuePair& inPair) { + inStream << "(" << inPair.key << "," << inPair.value << ")"; + return inStream; } +inline bool KeyValuePair::operator==(const KeyValuePair& inAnother) const { + return key == inAnother.key && value == inAnother.value; +} + +inline bool KeyValuePair::operator<(const KeyValuePair& inAnother) const { + if (key < inAnother.key) { + return true; + } else if (key == inAnother.key) { + return value < inAnother.value; + } + return false; +} +} // namespace Gramambular +} // namespace Formosa + #endif diff --git a/Source/Engine/Gramambular/LanguageModel.h b/Source/Engine/Gramambular/LanguageModel.h index 65331b37..8c41a529 100644 --- a/Source/Engine/Gramambular/LanguageModel.h +++ b/Source/Engine/Gramambular/LanguageModel.h @@ -29,24 +29,25 @@ #define LanguageModel_h #include + #include "Bigram.h" #include "Unigram.h" namespace Formosa { - namespace Gramambular { - - using namespace std; - - class LanguageModel { - public: - virtual ~LanguageModel() {} +namespace Gramambular { - virtual const vector bigramsForKeys(const string &preceedingKey, const string& key) = 0; - virtual const vector unigramsForKey(const string &key) = 0; - virtual bool hasUnigramsForKey(const string& key) = 0; - }; - } -} +using namespace std; +class LanguageModel { + public: + virtual ~LanguageModel() {} + + virtual const vector bigramsForKeys(const string& preceedingKey, + const string& key) = 0; + virtual const vector unigramsForKey(const string& key) = 0; + virtual bool hasUnigramsForKey(const string& key) = 0; +}; +} // namespace Gramambular +} // namespace Formosa #endif diff --git a/Source/Engine/Gramambular/Node.h b/Source/Engine/Gramambular/Node.h index a877d27c..9c15ce53 100644 --- a/Source/Engine/Gramambular/Node.h +++ b/Source/Engine/Gramambular/Node.h @@ -30,202 +30,191 @@ #include #include + #include "LanguageModel.h" namespace Formosa { - namespace Gramambular { - using namespace std; +namespace Gramambular { +using namespace std; - class Node { - public: - Node(); - Node(const string& inKey, const vector& inUnigrams, const vector& inBigrams); - - void primeNodeWithPreceedingKeyValues(const vector& inKeyValues); - - bool isCandidateFixed() const; - const vector& candidates() const; - void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true); - void resetCandidate(); - void selectFloatingCandidateAtIndex(size_t index, double score); - - const string& key() const; - double score() const; - double scoreForCandidate(string &candidate) const; - const KeyValuePair currentKeyValue() const; - double highestUnigramScore() const; - - protected: - const LanguageModel* m_LM; - - string m_key; - double m_score; - - vector m_unigrams; - vector m_candidates; - map m_valueUnigramIndexMap; - map > m_preceedingGramBigramMap; - - bool m_candidateFixed; - size_t m_selectedUnigramIndex; - - friend ostream& operator<<(ostream& inStream, const Node& inNode); - }; +class Node { + public: + Node(); + Node(const string& inKey, const vector& inUnigrams, + const vector& inBigrams); - inline ostream& operator<<(ostream& inStream, const Node& inNode) - { - inStream << "(node,key:" << inNode.m_key << ",fixed:" << (inNode.m_candidateFixed ? "true" : "false") - << ",selected:" << inNode.m_selectedUnigramIndex - << "," << inNode.m_unigrams << ")"; - return inStream; - } + void primeNodeWithPreceedingKeyValues( + const vector& inKeyValues); - inline Node::Node() - : m_candidateFixed(false) - , m_selectedUnigramIndex(0) - , m_score(0.0) - { - } + bool isCandidateFixed() const; + const vector& candidates() const; + void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true); + void resetCandidate(); + void selectFloatingCandidateAtIndex(size_t index, double score); - inline Node::Node(const string& inKey, const vector& inUnigrams, const vector& inBigrams) - : m_key(inKey) - , m_unigrams(inUnigrams) - , m_candidateFixed(false) - , m_selectedUnigramIndex(0) - , m_score(0.0) - { - stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare); - - if (m_unigrams.size()) { - m_score = m_unigrams[0].score; - } - - size_t i = 0; - for (vector::const_iterator ui = m_unigrams.begin() ; ui != m_unigrams.end() ; ++ui) { - m_valueUnigramIndexMap[(*ui).keyValue.value] = i; - i++; - - m_candidates.push_back((*ui).keyValue); - } - - for (vector::const_iterator bi = inBigrams.begin() ; bi != inBigrams.end() ; ++bi) { - m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi); - } - } - - inline void Node::primeNodeWithPreceedingKeyValues(const vector& inKeyValues) - { - size_t newIndex = m_selectedUnigramIndex; - double max = m_score; + const string& key() const; + double score() const; + double scoreForCandidate(string& candidate) const; + const KeyValuePair currentKeyValue() const; + double highestUnigramScore() const; - if (!isCandidateFixed()) { - for (vector::const_iterator kvi = inKeyValues.begin() ; kvi != inKeyValues.end() ; ++kvi) { - map >::const_iterator f = m_preceedingGramBigramMap.find(*kvi); - if (f != m_preceedingGramBigramMap.end()) { - const vector& bigrams = (*f).second; - - for (vector::const_iterator bi = bigrams.begin() ; bi != bigrams.end() ; ++bi) { - const Bigram& bigram = *bi; - if (bigram.score > max) { - map::const_iterator uf = m_valueUnigramIndexMap.find((*bi).keyValue.value); - if (uf != m_valueUnigramIndexMap.end()) { - newIndex = (*uf).second; - max = bigram.score; - } - } - } - } - } - } + protected: + const LanguageModel* m_LM; - if (m_score != max) { - m_score = max; - } - - if (newIndex != m_selectedUnigramIndex) { - m_selectedUnigramIndex = newIndex; - } - } - - inline bool Node::isCandidateFixed() const - { - return m_candidateFixed; - } - - inline const vector& Node::candidates() const - { - return m_candidates; - } + string m_key; + double m_score; - inline void Node::selectCandidateAtIndex(size_t inIndex, bool inFix) - { - if (inIndex >= m_unigrams.size()) { - m_selectedUnigramIndex = 0; - } - else { - m_selectedUnigramIndex = inIndex; - } - - m_candidateFixed = inFix; - m_score = 99; - } + vector m_unigrams; + vector m_candidates; + map m_valueUnigramIndexMap; + map > m_preceedingGramBigramMap; - inline void Node::resetCandidate() - { - m_selectedUnigramIndex = 0; - m_candidateFixed = 0; - if (m_unigrams.size()) { - m_score = m_unigrams[0].score; - } - } + bool m_candidateFixed; + size_t m_selectedUnigramIndex; - inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) { - if (index >= m_unigrams.size()) { - m_selectedUnigramIndex = 0; - } else { - m_selectedUnigramIndex = index; - } - m_candidateFixed = false; - m_score = score; - } - - inline const string& Node::key() const - { - return m_key; - } - - inline double Node::score() const - { - return m_score; - } + friend ostream& operator<<(ostream& inStream, const Node& inNode); +}; - inline double Node::scoreForCandidate(string &candidate) const - { - for (auto unigram : m_unigrams) { - if (unigram.keyValue.value == candidate) { - return unigram.score; - } - } - return 0.0; - } - - inline double Node::highestUnigramScore() const { - if (m_unigrams.empty()) { - return 0.0; - } - return m_unigrams[0].score; - } - - inline const KeyValuePair Node::currentKeyValue() const - { - if(m_selectedUnigramIndex >= m_unigrams.size()) { - return KeyValuePair(); - } - else { - return m_candidates[m_selectedUnigramIndex]; - } - } - } +inline ostream& operator<<(ostream& inStream, const Node& inNode) { + inStream << "(node,key:" << inNode.m_key + << ",fixed:" << (inNode.m_candidateFixed ? "true" : "false") + << ",selected:" << inNode.m_selectedUnigramIndex << "," + << inNode.m_unigrams << ")"; + return inStream; } +inline Node::Node() + : m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) {} + +inline Node::Node(const string& inKey, const vector& inUnigrams, + const vector& inBigrams) + : m_key(inKey), + m_unigrams(inUnigrams), + m_candidateFixed(false), + m_selectedUnigramIndex(0), + m_score(0.0) { + stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare); + + if (m_unigrams.size()) { + m_score = m_unigrams[0].score; + } + + size_t i = 0; + for (vector::const_iterator ui = m_unigrams.begin(); + ui != m_unigrams.end(); ++ui) { + m_valueUnigramIndexMap[(*ui).keyValue.value] = i; + i++; + + m_candidates.push_back((*ui).keyValue); + } + + for (vector::const_iterator bi = inBigrams.begin(); + bi != inBigrams.end(); ++bi) { + m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi); + } +} + +inline void Node::primeNodeWithPreceedingKeyValues( + const vector& inKeyValues) { + size_t newIndex = m_selectedUnigramIndex; + double max = m_score; + + if (!isCandidateFixed()) { + for (vector::const_iterator kvi = inKeyValues.begin(); + kvi != inKeyValues.end(); ++kvi) { + map >::const_iterator f = + m_preceedingGramBigramMap.find(*kvi); + if (f != m_preceedingGramBigramMap.end()) { + const vector& bigrams = (*f).second; + + for (vector::const_iterator bi = bigrams.begin(); + bi != bigrams.end(); ++bi) { + const Bigram& bigram = *bi; + if (bigram.score > max) { + map::const_iterator uf = + m_valueUnigramIndexMap.find((*bi).keyValue.value); + if (uf != m_valueUnigramIndexMap.end()) { + newIndex = (*uf).second; + max = bigram.score; + } + } + } + } + } + } + + if (m_score != max) { + m_score = max; + } + + if (newIndex != m_selectedUnigramIndex) { + m_selectedUnigramIndex = newIndex; + } +} + +inline bool Node::isCandidateFixed() const { return m_candidateFixed; } + +inline const vector& Node::candidates() const { + return m_candidates; +} + +inline void Node::selectCandidateAtIndex(size_t inIndex, bool inFix) { + if (inIndex >= m_unigrams.size()) { + m_selectedUnigramIndex = 0; + } else { + m_selectedUnigramIndex = inIndex; + } + + m_candidateFixed = inFix; + m_score = 99; +} + +inline void Node::resetCandidate() { + m_selectedUnigramIndex = 0; + m_candidateFixed = 0; + if (m_unigrams.size()) { + m_score = m_unigrams[0].score; + } +} + +inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) { + if (index >= m_unigrams.size()) { + m_selectedUnigramIndex = 0; + } else { + m_selectedUnigramIndex = index; + } + m_candidateFixed = false; + m_score = score; +} + +inline const string& Node::key() const { return m_key; } + +inline double Node::score() const { return m_score; } + +inline double Node::scoreForCandidate(string& candidate) const { + for (auto unigram : m_unigrams) { + if (unigram.keyValue.value == candidate) { + return unigram.score; + } + } + return 0.0; +} + +inline double Node::highestUnigramScore() const { + if (m_unigrams.empty()) { + return 0.0; + } + return m_unigrams[0].score; +} + +inline const KeyValuePair Node::currentKeyValue() const { + if (m_selectedUnigramIndex >= m_unigrams.size()) { + return KeyValuePair(); + } else { + return m_candidates[m_selectedUnigramIndex]; + } +} +} // namespace Gramambular +} // namespace Formosa + #endif diff --git a/Source/Engine/Gramambular/NodeAnchor.h b/Source/Engine/Gramambular/NodeAnchor.h index 62e5e12e..3fa7c7a3 100644 --- a/Source/Engine/Gramambular/NodeAnchor.h +++ b/Source/Engine/Gramambular/NodeAnchor.h @@ -31,49 +31,44 @@ #include "Node.h" namespace Formosa { - namespace Gramambular { - class NodeAnchor { - public: - NodeAnchor(); - const Node *node; - size_t location; - size_t spanningLength; - double accumulatedScore; - }; - - inline NodeAnchor::NodeAnchor() - : node(0) - , location(0) - , spanningLength(0) - , accumulatedScore(0.0) - { - } +namespace Gramambular { +class NodeAnchor { + public: + NodeAnchor(); + const Node* node; + size_t location; + size_t spanningLength; + double accumulatedScore; +}; - inline ostream& operator<<(ostream& inStream, const NodeAnchor& inAnchor) - { - inStream << "{@(" << inAnchor.location << "," << inAnchor.spanningLength << "),"; - if (inAnchor.node) { - inStream << *(inAnchor.node); - } - else { - inStream << "null"; - } - inStream << "}"; - return inStream; - } - - inline ostream& operator<<(ostream& inStream, const vector& inAnchor) - { - for (vector::const_iterator i = inAnchor.begin() ; i != inAnchor.end() ; ++i) { - inStream << *i; - if (i + 1 != inAnchor.end()) { - inStream << "<-"; - } - } - - return inStream; - } - } +inline NodeAnchor::NodeAnchor() + : node(0), location(0), spanningLength(0), accumulatedScore(0.0) {} + +inline ostream& operator<<(ostream& inStream, const NodeAnchor& inAnchor) { + inStream << "{@(" << inAnchor.location << "," << inAnchor.spanningLength + << "),"; + if (inAnchor.node) { + inStream << *(inAnchor.node); + } else { + inStream << "null"; + } + inStream << "}"; + return inStream; } +inline ostream& operator<<(ostream& inStream, + const vector& inAnchor) { + for (vector::const_iterator i = inAnchor.begin(); + i != inAnchor.end(); ++i) { + inStream << *i; + if (i + 1 != inAnchor.end()) { + inStream << "<-"; + } + } + + return inStream; +} +} // namespace Gramambular +} // namespace Formosa + #endif diff --git a/Source/Engine/Gramambular/Span.h b/Source/Engine/Gramambular/Span.h index 87cb6563..319ef738 100644 --- a/Source/Engine/Gramambular/Span.h +++ b/Source/Engine/Gramambular/Span.h @@ -31,82 +31,75 @@ #include #include #include + #include "Node.h" namespace Formosa { - namespace Gramambular { - class Span { - public: - Span(); +namespace Gramambular { +class Span { + public: + Span(); - void clear(); - void insertNodeOfLength(const Node& inNode, size_t inLength); - void removeNodeOfLengthGreaterThan(size_t inLength); - - Node* nodeOfLength(size_t inLength); - size_t maximumLength() const; + void clear(); + void insertNodeOfLength(const Node& inNode, size_t inLength); + void removeNodeOfLengthGreaterThan(size_t inLength); - protected: - map m_lengthNodeMap; - size_t m_maximumLength; - }; - - inline Span::Span() - : m_maximumLength(0) - { - } - - inline void Span::clear() - { - m_lengthNodeMap.clear(); - m_maximumLength = 0; - } - - inline void Span::insertNodeOfLength(const Node& inNode, size_t inLength) - { - m_lengthNodeMap[inLength] = inNode; - if (inLength > m_maximumLength) { - m_maximumLength = inLength; - } - } - - inline void Span::removeNodeOfLengthGreaterThan(size_t inLength) - { - if (inLength > m_maximumLength) { - return; - } - - size_t max = 0; - set removeSet; - for (map::iterator i = m_lengthNodeMap.begin(), e = m_lengthNodeMap.end() ; i != e ; ++i) { - if ((*i).first > inLength) { - removeSet.insert((*i).first); - } - else { - if ((*i).first > max) { - max = (*i).first; - } - } - } - - for (set::iterator i = removeSet.begin(), e = removeSet.end(); i != e; ++i) { - m_lengthNodeMap.erase(*i); - } + Node* nodeOfLength(size_t inLength); + size_t maximumLength() const; - m_maximumLength = max; - } - - inline Node* Span::nodeOfLength(size_t inLength) - { - map::iterator f = m_lengthNodeMap.find(inLength); - return f == m_lengthNodeMap.end() ? 0 : &(*f).second; - } - - inline size_t Span::maximumLength() const - { - return m_maximumLength; - } - } + protected: + map m_lengthNodeMap; + size_t m_maximumLength; +}; + +inline Span::Span() : m_maximumLength(0) {} + +inline void Span::clear() { + m_lengthNodeMap.clear(); + m_maximumLength = 0; } +inline void Span::insertNodeOfLength(const Node& inNode, size_t inLength) { + m_lengthNodeMap[inLength] = inNode; + if (inLength > m_maximumLength) { + m_maximumLength = inLength; + } +} + +inline void Span::removeNodeOfLengthGreaterThan(size_t inLength) { + if (inLength > m_maximumLength) { + return; + } + + size_t max = 0; + set removeSet; + for (map::iterator i = m_lengthNodeMap.begin(), + e = m_lengthNodeMap.end(); + i != e; ++i) { + if ((*i).first > inLength) { + removeSet.insert((*i).first); + } else { + if ((*i).first > max) { + max = (*i).first; + } + } + } + + for (set::iterator i = removeSet.begin(), e = removeSet.end(); i != e; + ++i) { + m_lengthNodeMap.erase(*i); + } + + m_maximumLength = max; +} + +inline Node* Span::nodeOfLength(size_t inLength) { + map::iterator f = m_lengthNodeMap.find(inLength); + return f == m_lengthNodeMap.end() ? 0 : &(*f).second; +} + +inline size_t Span::maximumLength() const { return m_maximumLength; } +} // namespace Gramambular +} // namespace Formosa + #endif diff --git a/Source/Engine/Gramambular/Unigram.h b/Source/Engine/Gramambular/Unigram.h index 5af28502..4aa0833e 100644 --- a/Source/Engine/Gramambular/Unigram.h +++ b/Source/Engine/Gramambular/Unigram.h @@ -29,76 +29,69 @@ #define Unigram_h #include + #include "KeyValuePair.h" namespace Formosa { - namespace Gramambular { - class Unigram { - public: - Unigram(); +namespace Gramambular { +class Unigram { + public: + Unigram(); - KeyValuePair keyValue; - double score; - - bool operator==(const Unigram& inAnother) const; - bool operator<(const Unigram& inAnother) const; - - static bool ScoreCompare(const Unigram& a, const Unigram& b); - }; + KeyValuePair keyValue; + double score; - inline ostream& operator<<(ostream& inStream, const Unigram& inGram) - { - streamsize p = inStream.precision(); - inStream.precision(6); - inStream << "(" << inGram.keyValue << "," << inGram.score << ")"; - inStream.precision(p); - return inStream; - } - - inline ostream& operator<<(ostream& inStream, const vector& inGrams) - { - inStream << "[" << inGrams.size() << "]=>{"; - - size_t index = 0; - - for (vector::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) { - inStream << index << "=>"; - inStream << *gi; - if (gi + 1 != inGrams.end()) { - inStream << ","; - } - } - - inStream << "}"; - return inStream; - } - - inline Unigram::Unigram() - : score(0.0) - { - } - - inline bool Unigram::operator==(const Unigram& inAnother) const - { - return keyValue == inAnother.keyValue && score == inAnother.score; - } - - inline bool Unigram::operator<(const Unigram& inAnother) const - { - if (keyValue < inAnother.keyValue) { - return true; - } - else if (keyValue == inAnother.keyValue) { - return score < inAnother.score; - } - return false; - } + bool operator==(const Unigram& inAnother) const; + bool operator<(const Unigram& inAnother) const; - inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) - { - return a.score > b.score; - } - } + static bool ScoreCompare(const Unigram& a, const Unigram& b); +}; + +inline ostream& operator<<(ostream& inStream, const Unigram& inGram) { + streamsize p = inStream.precision(); + inStream.precision(6); + inStream << "(" << inGram.keyValue << "," << inGram.score << ")"; + inStream.precision(p); + return inStream; } +inline ostream& operator<<(ostream& inStream, const vector& inGrams) { + inStream << "[" << inGrams.size() << "]=>{"; + + size_t index = 0; + + for (vector::const_iterator gi = inGrams.begin(); + gi != inGrams.end(); ++gi, ++index) { + inStream << index << "=>"; + inStream << *gi; + if (gi + 1 != inGrams.end()) { + inStream << ","; + } + } + + inStream << "}"; + return inStream; +} + +inline Unigram::Unigram() : score(0.0) {} + +inline bool Unigram::operator==(const Unigram& inAnother) const { + return keyValue == inAnother.keyValue && score == inAnother.score; +} + +inline bool Unigram::operator<(const Unigram& inAnother) const { + if (keyValue < inAnother.keyValue) { + return true; + } else if (keyValue == inAnother.keyValue) { + return score < inAnother.score; + } + return false; +} + +inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) { + return a.score > b.score; +} +} // namespace Gramambular +} // namespace Formosa + #endif diff --git a/Source/Engine/Gramambular/Walker.h b/Source/Engine/Gramambular/Walker.h index c40ffbf1..1f57fa0d 100644 --- a/Source/Engine/Gramambular/Walker.h +++ b/Source/Engine/Gramambular/Walker.h @@ -29,63 +29,65 @@ #define Walker_h #include + #include "Grid.h" namespace Formosa { - namespace Gramambular { - using namespace std; +namespace Gramambular { +using namespace std; - class Walker { - public: - Walker(Grid* inGrid); - const vector reverseWalk(size_t inLocation, double inAccumulatedScore = 0.0); - - protected: - Grid* m_grid; - }; - - inline Walker::Walker(Grid* inGrid) - : m_grid(inGrid) - { - } - - inline const vector Walker::reverseWalk(size_t inLocation, double inAccumulatedScore) - { - if (!inLocation || inLocation > m_grid->width()) { - return vector(); - } - - vector > paths; +class Walker { + public: + Walker(Grid* inGrid); + const vector reverseWalk(size_t inLocation, + double inAccumulatedScore = 0.0); - vector nodes = m_grid->nodesEndingAt(inLocation); - - for (vector::iterator ni = nodes.begin() ; ni != nodes.end() ; ++ni) { - if (!(*ni).node) { - continue; - } + protected: + Grid* m_grid; +}; - (*ni).accumulatedScore = inAccumulatedScore + (*ni).node->score(); +inline Walker::Walker(Grid* inGrid) : m_grid(inGrid) {} - vector path = reverseWalk(inLocation - (*ni).spanningLength, (*ni).accumulatedScore); - path.insert(path.begin(), *ni); - - paths.push_back(path); - } - - if (!paths.size()) { - return vector(); - } - - vector* result = &*(paths.begin()); - for (vector >::iterator pi = paths.begin() ; pi != paths.end() ; ++pi) { - if ((*pi).back().accumulatedScore > result->back().accumulatedScore) { - result = &*pi; - } - } - - return *result; - } +inline const vector Walker::reverseWalk(size_t inLocation, + double inAccumulatedScore) { + if (!inLocation || inLocation > m_grid->width()) { + return vector(); + } + + vector > paths; + + vector nodes = m_grid->nodesEndingAt(inLocation); + + for (vector::iterator ni = nodes.begin(); ni != nodes.end(); + ++ni) { + if (!(*ni).node) { + continue; } + + (*ni).accumulatedScore = inAccumulatedScore + (*ni).node->score(); + + vector path = + reverseWalk(inLocation - (*ni).spanningLength, (*ni).accumulatedScore); + path.insert(path.begin(), *ni); + + paths.push_back(path); + } + + if (!paths.size()) { + return vector(); + } + + vector* result = &*(paths.begin()); + for (vector >::iterator pi = paths.begin(); + pi != paths.end(); ++pi) { + if ((*pi).back().accumulatedScore > result->back().accumulatedScore) { + result = &*pi; + } + } + + return *result; } +} // namespace Gramambular +} // namespace Formosa #endif From 13609f41f526de9c6bc5dab163c9bf08d5e2f1c0 Mon Sep 17 00:00:00 2001 From: Lukhnos Liu Date: Sat, 19 Feb 2022 09:35:48 -0800 Subject: [PATCH 4/8] Remove all the "using namespace" usage --- Source/Engine/Gramambular/Bigram.h | 11 ++-- .../Engine/Gramambular/BlockReadingBuilder.h | 45 ++++++------- Source/Engine/Gramambular/GramambularTest.cpp | 64 +++++++++++-------- Source/Engine/Gramambular/Grid.h | 57 +++++++++-------- Source/Engine/Gramambular/KeyValuePair.h | 8 +-- Source/Engine/Gramambular/LanguageModel.h | 10 ++- Source/Engine/Gramambular/Node.h | 54 ++++++++-------- Source/Engine/Gramambular/NodeAnchor.h | 9 +-- Source/Engine/Gramambular/Span.h | 14 ++-- Source/Engine/Gramambular/Unigram.h | 9 +-- Source/Engine/Gramambular/Walker.h | 25 ++++---- 11 files changed, 158 insertions(+), 148 deletions(-) diff --git a/Source/Engine/Gramambular/Bigram.h b/Source/Engine/Gramambular/Bigram.h index 750db4c1..caa46de2 100644 --- a/Source/Engine/Gramambular/Bigram.h +++ b/Source/Engine/Gramambular/Bigram.h @@ -46,8 +46,8 @@ class Bigram { bool operator<(const Bigram& inAnother) const; }; -inline ostream& operator<<(ostream& inStream, const Bigram& inGram) { - streamsize p = inStream.precision(); +inline std::ostream& operator<<(std::ostream& inStream, const Bigram& inGram) { + std::streamsize p = inStream.precision(); inStream.precision(6); inStream << "(" << inGram.keyValue << "|" << inGram.preceedingKeyValue << "," << inGram.score << ")"; @@ -55,13 +55,14 @@ inline ostream& operator<<(ostream& inStream, const Bigram& inGram) { return inStream; } -inline ostream& operator<<(ostream& inStream, const vector& inGrams) { +inline std::ostream& operator<<(std::ostream& inStream, + const std::vector& inGrams) { inStream << "[" << inGrams.size() << "]=>{"; size_t index = 0; - for (vector::const_iterator gi = inGrams.begin(); gi != inGrams.end(); - ++gi, ++index) { + for (std::vector::const_iterator gi = inGrams.begin(); + gi != inGrams.end(); ++gi, ++index) { inStream << index << "=>"; inStream << *gi; if (gi + 1 != inGrams.end()) { diff --git a/Source/Engine/Gramambular/BlockReadingBuilder.h b/Source/Engine/Gramambular/BlockReadingBuilder.h index 219e6a5a..56c335ca 100644 --- a/Source/Engine/Gramambular/BlockReadingBuilder.h +++ b/Source/Engine/Gramambular/BlockReadingBuilder.h @@ -35,7 +35,6 @@ namespace Formosa { namespace Gramambular { -using namespace std; class BlockReadingBuilder { public: @@ -45,35 +44,35 @@ class BlockReadingBuilder { size_t length() const; size_t cursorIndex() const; void setCursorIndex(size_t inNewIndex); - void insertReadingAtCursor(const string& inReading); + void insertReadingAtCursor(const std::string& inReading); bool deleteReadingBeforeCursor(); // backspace bool deleteReadingAfterCursor(); // delete bool removeHeadReadings(size_t count); - void setJoinSeparator(const string& separator); - const string joinSeparator() const; + void setJoinSeparator(const std::string& separator); + const std::string joinSeparator() const; - vector readings() const; + std::vector readings() const; Grid& grid(); protected: void build(); - static const string Join(vector::const_iterator begin, - vector::const_iterator end, - const string& separator); + static const std::string Join(std::vector::const_iterator begin, + std::vector::const_iterator end, + const std::string& separator); //最多使用六個字組成一個詞 static const size_t MaximumBuildSpanLength = 6; size_t m_cursorIndex; - vector m_readings; + std::vector m_readings; Grid m_grid; LanguageModel* m_LM; - string m_joinSeparator; + std::string m_joinSeparator; }; inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel* inLM) @@ -95,7 +94,7 @@ inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex) { } inline void BlockReadingBuilder::insertReadingAtCursor( - const string& inReading) { + const std::string& inReading) { m_readings.insert(m_readings.begin() + m_cursorIndex, inReading); m_grid.expandGridByOneAtLocation(m_cursorIndex); @@ -103,7 +102,7 @@ inline void BlockReadingBuilder::insertReadingAtCursor( m_cursorIndex++; } -inline vector BlockReadingBuilder::readings() const { +inline std::vector BlockReadingBuilder::readings() const { return m_readings; } @@ -149,11 +148,12 @@ inline bool BlockReadingBuilder::removeHeadReadings(size_t count) { return true; } -inline void BlockReadingBuilder::setJoinSeparator(const string& separator) { +inline void BlockReadingBuilder::setJoinSeparator( + const std::string& separator) { m_joinSeparator = separator; } -inline const string BlockReadingBuilder::joinSeparator() const { +inline const std::string BlockReadingBuilder::joinSeparator() const { return m_joinSeparator; } @@ -179,14 +179,14 @@ inline void BlockReadingBuilder::build() { for (size_t p = begin; p < end; p++) { for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) { - string combinedReading = Join( + std::string combinedReading = Join( m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator); if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) { - vector unigrams = m_LM->unigramsForKey(combinedReading); + std::vector unigrams = m_LM->unigramsForKey(combinedReading); if (unigrams.size() > 0) { - Node n(combinedReading, unigrams, vector()); + Node n(combinedReading, unigrams, std::vector()); m_grid.insertNode(n, p, q); } } @@ -194,11 +194,12 @@ inline void BlockReadingBuilder::build() { } } -inline const string BlockReadingBuilder::Join( - vector::const_iterator begin, vector::const_iterator end, - const string& separator) { - string result; - for (vector::const_iterator iter = begin; iter != end;) { +inline const std::string BlockReadingBuilder::Join( + std::vector::const_iterator begin, + std::vector::const_iterator end, + const std::string& separator) { + std::string result; + for (std::vector::const_iterator iter = begin; iter != end;) { result += *iter; ++iter; if (iter != end) { diff --git a/Source/Engine/Gramambular/GramambularTest.cpp b/Source/Engine/Gramambular/GramambularTest.cpp index 8f0e008d..ca6b5d32 100644 --- a/Source/Engine/Gramambular/GramambularTest.cpp +++ b/Source/Engine/Gramambular/GramambularTest.cpp @@ -31,6 +31,9 @@ #include "Gramambular.h" #include "gtest/gtest.h" +namespace Formosa { +namespace Gramambular { + const char* SampleData = R"( # # The sample is from libtabe (http://sourceforge.net/projects/libtabe/) @@ -120,25 +123,22 @@ const char* SampleData = R"( ㄍㄠㄎㄜㄐㄧˋ 高科技 -9.842421 )"; -using namespace std; -using namespace Formosa::Gramambular; - class SimpleLM : public LanguageModel { public: SimpleLM(const char* input, bool swapKeyValue = false) { - stringstream sstream(input); + std::stringstream sstream(input); while (sstream.good()) { - string line; + std::string line; getline(sstream, line); if (!line.size() || (line.size() && line[0] == '#')) { continue; } - stringstream linestream(line); - string col0; - string col1; - string col2; + std::stringstream linestream(line); + std::string col0; + std::string col1; + std::string col2; linestream >> col0; linestream >> col1; linestream >> col2; @@ -159,23 +159,25 @@ class SimpleLM : public LanguageModel { } } - const vector bigramsForKeys(const string& preceedingKey, - const string& key) override { - return vector(); + const std::vector bigramsForKeys(const std::string& preceedingKey, + const std::string& key) override { + return std::vector(); } - const vector unigramsForKey(const string& key) override { - map >::const_iterator f = m_db.find(key); - return f == m_db.end() ? vector() : (*f).second; + const std::vector unigramsForKey(const std::string& key) override { + std::map >::const_iterator f = + m_db.find(key); + return f == m_db.end() ? std::vector() : (*f).second; } - bool hasUnigramsForKey(const string& key) override { - map >::const_iterator f = m_db.find(key); + bool hasUnigramsForKey(const std::string& key) override { + std::map >::const_iterator f = + m_db.find(key); return f != m_db.end(); } protected: - map > m_db; + std::map > m_db; }; TEST(GramambularTest, InputTest) { @@ -200,15 +202,17 @@ TEST(GramambularTest, InputTest) { Walker walker(&builder.grid()); - vector walked = walker.reverseWalk(builder.grid().width(), 0.0); + std::vector walked = + walker.reverseWalk(builder.grid().width(), 0.0); reverse(walked.begin(), walked.end()); - vector composed; - for (vector::iterator wi = walked.begin(); wi != walked.end(); - ++wi) { + std::vector composed; + for (std::vector::iterator wi = walked.begin(); + wi != walked.end(); ++wi) { composed.push_back((*wi).node->currentKeyValue().value); } - ASSERT_EQ(composed, (vector{"高科技", "公司", "的", "年中", "獎金"})); + ASSERT_EQ(composed, + (std::vector{"高科技", "公司", "的", "年中", "獎金"})); } TEST(GramambularTest, WordSegmentationTest) { @@ -226,14 +230,18 @@ TEST(GramambularTest, WordSegmentationTest) { builder2.insertReadingAtCursor("金"); Walker walker2(&builder2.grid()); - vector walked = walker2.reverseWalk(builder2.grid().width(), 0.0); + std::vector walked = + walker2.reverseWalk(builder2.grid().width(), 0.0); reverse(walked.begin(), walked.end()); - vector segmented; - for (vector::iterator wi = walked.begin(); wi != walked.end(); - ++wi) { + std::vector segmented; + for (std::vector::iterator wi = walked.begin(); + wi != walked.end(); ++wi) { segmented.push_back((*wi).node->currentKeyValue().key); } ASSERT_EQ(segmented, - (vector{"高科技", "公司", "的", "年終", "獎金"})); + (std::vector{"高科技", "公司", "的", "年終", "獎金"})); } + +} // namespace Gramambular +} // namespace Formosa diff --git a/Source/Engine/Gramambular/Grid.h b/Source/Engine/Gramambular/Grid.h index 75c9c52c..80e0dada 100644 --- a/Source/Engine/Gramambular/Grid.h +++ b/Source/Engine/Gramambular/Grid.h @@ -43,34 +43,35 @@ class Grid { size_t inSpanningLength); bool hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, - const string& inKey); + const std::string& inKey); void expandGridByOneAtLocation(size_t inLocation); void shrinkGridByOneAtLocation(size_t inLocation); size_t width() const; - vector nodesEndingAt(size_t inLocation); - vector nodesCrossingOrEndingAt(size_t inLocation); + std::vector nodesEndingAt(size_t inLocation); + std::vector nodesCrossingOrEndingAt(size_t inLocation); // "Freeze" the node with the unigram that represents the selected candidate // value. After this, the node that contains the unigram will always be // evaluated to that unigram, while all other overlapping nodes will be reset // to their initial state (that is, if any of those nodes were "frozen" or // fixed, they will be unfrozen.) - NodeAnchor fixNodeSelectedCandidate(size_t location, const string& value); + NodeAnchor fixNodeSelectedCandidate(size_t location, + const std::string& value); // Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, // only boost the unigram that represents the value with an overriding score. // This has the same side effect as fixNodeSelectedCandidate, which is that // all other overlapping nodes will be reset to their initial state. void overrideNodeScoreForSelectedCandidate(size_t location, - const string& value, + const std::string& value, float overridingScore); - const string dumpDOT(); + const std::string dumpDOT(); protected: - vector m_spans; + std::vector m_spans; }; inline void Grid::clear() { m_spans.clear(); } @@ -89,7 +90,7 @@ inline void Grid::insertNode(const Node& inNode, size_t inLocation, } inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey( - size_t inLocation, size_t inSpanningLength, const string& inKey) { + size_t inLocation, size_t inSpanningLength, const std::string& inKey) { if (inLocation > m_spans.size()) { return false; } @@ -128,8 +129,8 @@ inline void Grid::shrinkGridByOneAtLocation(size_t inLocation) { inline size_t Grid::width() const { return m_spans.size(); } -inline vector Grid::nodesEndingAt(size_t inLocation) { - vector result; +inline std::vector Grid::nodesEndingAt(size_t inLocation) { + std::vector result; if (m_spans.size() && inLocation <= m_spans.size()) { for (size_t i = 0; i < inLocation; i++) { @@ -151,8 +152,9 @@ inline vector Grid::nodesEndingAt(size_t inLocation) { return result; } -inline vector Grid::nodesCrossingOrEndingAt(size_t inLocation) { - vector result; +inline std::vector Grid::nodesCrossingOrEndingAt( + size_t inLocation) { + std::vector result; if (m_spans.size() && inLocation <= m_spans.size()) { for (size_t i = 0; i < inLocation; i++) { @@ -184,8 +186,8 @@ inline vector Grid::nodesCrossingOrEndingAt(size_t inLocation) { // For nodes found at the location, fix their currently-selected candidate using // the supplied string value. inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, - const string& value) { - vector nodes = nodesCrossingOrEndingAt(location); + const std::string& value) { + std::vector nodes = nodesCrossingOrEndingAt(location); NodeAnchor node; for (auto nodeAnchor : nodes) { auto candidates = nodeAnchor.node->candidates(); @@ -205,10 +207,9 @@ inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, return node; } -inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, - const string& value, - float overridingScore) { - vector nodes = nodesCrossingOrEndingAt(location); +inline void Grid::overrideNodeScoreForSelectedCandidate( + size_t location, const std::string& value, float overridingScore) { + std::vector nodes = nodesCrossingOrEndingAt(location); for (auto nodeAnchor : nodes) { auto candidates = nodeAnchor.node->candidates(); @@ -225,11 +226,11 @@ inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, } } -inline const string Grid::dumpDOT() { - stringstream sst; - sst << "digraph {" << endl; - sst << "graph [ rankdir=LR ];" << endl; - sst << "BOS;" << endl; +inline const std::string Grid::dumpDOT() { + std::stringstream sst; + sst << "digraph {" << std::endl; + sst << "graph [ rankdir=LR ];" << std::endl; + sst << "BOS;" << std::endl; for (size_t p = 0; p < m_spans.size(); p++) { Span& span = m_spans[p]; @@ -237,10 +238,10 @@ inline const string Grid::dumpDOT() { Node* np = span.nodeOfLength(ni); if (np) { if (!p) { - sst << "BOS -> " << np->currentKeyValue().value << ";" << endl; + sst << "BOS -> " << np->currentKeyValue().value << ";" << std::endl; } - sst << np->currentKeyValue().value << ";" << endl; + sst << np->currentKeyValue().value << ";" << std::endl; if (p + ni < m_spans.size()) { Span& dstSpan = m_spans[p + ni]; @@ -248,20 +249,20 @@ inline const string Grid::dumpDOT() { Node* dn = dstSpan.nodeOfLength(q); if (dn) { sst << np->currentKeyValue().value << " -> " - << dn->currentKeyValue().value << ";" << endl; + << dn->currentKeyValue().value << ";" << std::endl; } } } if (p + ni == m_spans.size()) { sst << np->currentKeyValue().value << " -> " - << "EOS;" << endl; + << "EOS;" << std::endl; } } } } - sst << "EOS;" << endl; + sst << "EOS;" << std::endl; sst << "}"; return sst.str(); } diff --git a/Source/Engine/Gramambular/KeyValuePair.h b/Source/Engine/Gramambular/KeyValuePair.h index ac4395de..5c4f6b61 100644 --- a/Source/Engine/Gramambular/KeyValuePair.h +++ b/Source/Engine/Gramambular/KeyValuePair.h @@ -33,18 +33,18 @@ namespace Formosa { namespace Gramambular { -using namespace std; class KeyValuePair { public: - string key; - string value; + std::string key; + std::string value; bool operator==(const KeyValuePair& inAnother) const; bool operator<(const KeyValuePair& inAnother) const; }; -inline ostream& operator<<(ostream& inStream, const KeyValuePair& inPair) { +inline std::ostream& operator<<(std::ostream& inStream, + const KeyValuePair& inPair) { inStream << "(" << inPair.key << "," << inPair.value << ")"; return inStream; } diff --git a/Source/Engine/Gramambular/LanguageModel.h b/Source/Engine/Gramambular/LanguageModel.h index 8c41a529..79ae6ee2 100644 --- a/Source/Engine/Gramambular/LanguageModel.h +++ b/Source/Engine/Gramambular/LanguageModel.h @@ -36,16 +36,14 @@ namespace Formosa { namespace Gramambular { -using namespace std; - class LanguageModel { public: virtual ~LanguageModel() {} - virtual const vector bigramsForKeys(const string& preceedingKey, - const string& key) = 0; - virtual const vector unigramsForKey(const string& key) = 0; - virtual bool hasUnigramsForKey(const string& key) = 0; + virtual const std::vector bigramsForKeys( + const std::string& preceedingKey, const std::string& key) = 0; + virtual const std::vector unigramsForKey(const std::string& key) = 0; + virtual bool hasUnigramsForKey(const std::string& key) = 0; }; } // namespace Gramambular } // namespace Formosa diff --git a/Source/Engine/Gramambular/Node.h b/Source/Engine/Gramambular/Node.h index 9c15ce53..fe4690d9 100644 --- a/Source/Engine/Gramambular/Node.h +++ b/Source/Engine/Gramambular/Node.h @@ -35,47 +35,46 @@ namespace Formosa { namespace Gramambular { -using namespace std; class Node { public: Node(); - Node(const string& inKey, const vector& inUnigrams, - const vector& inBigrams); + Node(const std::string& inKey, const std::vector& inUnigrams, + const std::vector& inBigrams); void primeNodeWithPreceedingKeyValues( - const vector& inKeyValues); + const std::vector& inKeyValues); bool isCandidateFixed() const; - const vector& candidates() const; + const std::vector& candidates() const; void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true); void resetCandidate(); void selectFloatingCandidateAtIndex(size_t index, double score); - const string& key() const; + const std::string& key() const; double score() const; - double scoreForCandidate(string& candidate) const; + double scoreForCandidate(std::string& candidate) const; const KeyValuePair currentKeyValue() const; double highestUnigramScore() const; protected: const LanguageModel* m_LM; - string m_key; + std::string m_key; double m_score; - vector m_unigrams; - vector m_candidates; - map m_valueUnigramIndexMap; - map > m_preceedingGramBigramMap; + std::vector m_unigrams; + std::vector m_candidates; + std::map m_valueUnigramIndexMap; + std::map > m_preceedingGramBigramMap; bool m_candidateFixed; size_t m_selectedUnigramIndex; - friend ostream& operator<<(ostream& inStream, const Node& inNode); + friend std::ostream& operator<<(std::ostream& inStream, const Node& inNode); }; -inline ostream& operator<<(ostream& inStream, const Node& inNode) { +inline std::ostream& operator<<(std::ostream& inStream, const Node& inNode) { inStream << "(node,key:" << inNode.m_key << ",fixed:" << (inNode.m_candidateFixed ? "true" : "false") << ",selected:" << inNode.m_selectedUnigramIndex << "," @@ -86,8 +85,9 @@ inline ostream& operator<<(ostream& inStream, const Node& inNode) { inline Node::Node() : m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) {} -inline Node::Node(const string& inKey, const vector& inUnigrams, - const vector& inBigrams) +inline Node::Node(const std::string& inKey, + const std::vector& inUnigrams, + const std::vector& inBigrams) : m_key(inKey), m_unigrams(inUnigrams), m_candidateFixed(false), @@ -100,7 +100,7 @@ inline Node::Node(const string& inKey, const vector& inUnigrams, } size_t i = 0; - for (vector::const_iterator ui = m_unigrams.begin(); + for (std::vector::const_iterator ui = m_unigrams.begin(); ui != m_unigrams.end(); ++ui) { m_valueUnigramIndexMap[(*ui).keyValue.value] = i; i++; @@ -108,30 +108,30 @@ inline Node::Node(const string& inKey, const vector& inUnigrams, m_candidates.push_back((*ui).keyValue); } - for (vector::const_iterator bi = inBigrams.begin(); + for (std::vector::const_iterator bi = inBigrams.begin(); bi != inBigrams.end(); ++bi) { m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi); } } inline void Node::primeNodeWithPreceedingKeyValues( - const vector& inKeyValues) { + const std::vector& inKeyValues) { size_t newIndex = m_selectedUnigramIndex; double max = m_score; if (!isCandidateFixed()) { - for (vector::const_iterator kvi = inKeyValues.begin(); + for (std::vector::const_iterator kvi = inKeyValues.begin(); kvi != inKeyValues.end(); ++kvi) { - map >::const_iterator f = + std::map >::const_iterator f = m_preceedingGramBigramMap.find(*kvi); if (f != m_preceedingGramBigramMap.end()) { - const vector& bigrams = (*f).second; + const std::vector& bigrams = (*f).second; - for (vector::const_iterator bi = bigrams.begin(); + for (std::vector::const_iterator bi = bigrams.begin(); bi != bigrams.end(); ++bi) { const Bigram& bigram = *bi; if (bigram.score > max) { - map::const_iterator uf = + std::map::const_iterator uf = m_valueUnigramIndexMap.find((*bi).keyValue.value); if (uf != m_valueUnigramIndexMap.end()) { newIndex = (*uf).second; @@ -154,7 +154,7 @@ inline void Node::primeNodeWithPreceedingKeyValues( inline bool Node::isCandidateFixed() const { return m_candidateFixed; } -inline const vector& Node::candidates() const { +inline const std::vector& Node::candidates() const { return m_candidates; } @@ -187,11 +187,11 @@ inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) { m_score = score; } -inline const string& Node::key() const { return m_key; } +inline const std::string& Node::key() const { return m_key; } inline double Node::score() const { return m_score; } -inline double Node::scoreForCandidate(string& candidate) const { +inline double Node::scoreForCandidate(std::string& candidate) const { for (auto unigram : m_unigrams) { if (unigram.keyValue.value == candidate) { return unigram.score; diff --git a/Source/Engine/Gramambular/NodeAnchor.h b/Source/Engine/Gramambular/NodeAnchor.h index 3fa7c7a3..2ddef073 100644 --- a/Source/Engine/Gramambular/NodeAnchor.h +++ b/Source/Engine/Gramambular/NodeAnchor.h @@ -44,7 +44,8 @@ class NodeAnchor { inline NodeAnchor::NodeAnchor() : node(0), location(0), spanningLength(0), accumulatedScore(0.0) {} -inline ostream& operator<<(ostream& inStream, const NodeAnchor& inAnchor) { +inline std::ostream& operator<<(std::ostream& inStream, + const NodeAnchor& inAnchor) { inStream << "{@(" << inAnchor.location << "," << inAnchor.spanningLength << "),"; if (inAnchor.node) { @@ -56,9 +57,9 @@ inline ostream& operator<<(ostream& inStream, const NodeAnchor& inAnchor) { return inStream; } -inline ostream& operator<<(ostream& inStream, - const vector& inAnchor) { - for (vector::const_iterator i = inAnchor.begin(); +inline std::ostream& operator<<(std::ostream& inStream, + const std::vector& inAnchor) { + for (std::vector::const_iterator i = inAnchor.begin(); i != inAnchor.end(); ++i) { inStream << *i; if (i + 1 != inAnchor.end()) { diff --git a/Source/Engine/Gramambular/Span.h b/Source/Engine/Gramambular/Span.h index 319ef738..c733886b 100644 --- a/Source/Engine/Gramambular/Span.h +++ b/Source/Engine/Gramambular/Span.h @@ -48,7 +48,7 @@ class Span { size_t maximumLength() const; protected: - map m_lengthNodeMap; + std::map m_lengthNodeMap; size_t m_maximumLength; }; @@ -72,9 +72,9 @@ inline void Span::removeNodeOfLengthGreaterThan(size_t inLength) { } size_t max = 0; - set removeSet; - for (map::iterator i = m_lengthNodeMap.begin(), - e = m_lengthNodeMap.end(); + std::set removeSet; + for (std::map::iterator i = m_lengthNodeMap.begin(), + e = m_lengthNodeMap.end(); i != e; ++i) { if ((*i).first > inLength) { removeSet.insert((*i).first); @@ -85,8 +85,8 @@ inline void Span::removeNodeOfLengthGreaterThan(size_t inLength) { } } - for (set::iterator i = removeSet.begin(), e = removeSet.end(); i != e; - ++i) { + for (std::set::iterator i = removeSet.begin(), e = removeSet.end(); + i != e; ++i) { m_lengthNodeMap.erase(*i); } @@ -94,7 +94,7 @@ inline void Span::removeNodeOfLengthGreaterThan(size_t inLength) { } inline Node* Span::nodeOfLength(size_t inLength) { - map::iterator f = m_lengthNodeMap.find(inLength); + std::map::iterator f = m_lengthNodeMap.find(inLength); return f == m_lengthNodeMap.end() ? 0 : &(*f).second; } diff --git a/Source/Engine/Gramambular/Unigram.h b/Source/Engine/Gramambular/Unigram.h index 4aa0833e..c40322e7 100644 --- a/Source/Engine/Gramambular/Unigram.h +++ b/Source/Engine/Gramambular/Unigram.h @@ -47,20 +47,21 @@ class Unigram { static bool ScoreCompare(const Unigram& a, const Unigram& b); }; -inline ostream& operator<<(ostream& inStream, const Unigram& inGram) { - streamsize p = inStream.precision(); +inline std::ostream& operator<<(std::ostream& inStream, const Unigram& inGram) { + std::streamsize p = inStream.precision(); inStream.precision(6); inStream << "(" << inGram.keyValue << "," << inGram.score << ")"; inStream.precision(p); return inStream; } -inline ostream& operator<<(ostream& inStream, const vector& inGrams) { +inline std::ostream& operator<<(std::ostream& inStream, + const std::vector& inGrams) { inStream << "[" << inGrams.size() << "]=>{"; size_t index = 0; - for (vector::const_iterator gi = inGrams.begin(); + for (std::vector::const_iterator gi = inGrams.begin(); gi != inGrams.end(); ++gi, ++index) { inStream << index << "=>"; inStream << *gi; diff --git a/Source/Engine/Gramambular/Walker.h b/Source/Engine/Gramambular/Walker.h index 1f57fa0d..3c383105 100644 --- a/Source/Engine/Gramambular/Walker.h +++ b/Source/Engine/Gramambular/Walker.h @@ -34,13 +34,12 @@ namespace Formosa { namespace Gramambular { -using namespace std; class Walker { public: Walker(Grid* inGrid); - const vector reverseWalk(size_t inLocation, - double inAccumulatedScore = 0.0); + const std::vector reverseWalk(size_t inLocation, + double inAccumulatedScore = 0.0); protected: Grid* m_grid; @@ -48,17 +47,17 @@ class Walker { inline Walker::Walker(Grid* inGrid) : m_grid(inGrid) {} -inline const vector Walker::reverseWalk(size_t inLocation, - double inAccumulatedScore) { +inline const std::vector Walker::reverseWalk( + size_t inLocation, double inAccumulatedScore) { if (!inLocation || inLocation > m_grid->width()) { - return vector(); + return std::vector(); } - vector > paths; + std::vector > paths; - vector nodes = m_grid->nodesEndingAt(inLocation); + std::vector nodes = m_grid->nodesEndingAt(inLocation); - for (vector::iterator ni = nodes.begin(); ni != nodes.end(); + for (std::vector::iterator ni = nodes.begin(); ni != nodes.end(); ++ni) { if (!(*ni).node) { continue; @@ -66,7 +65,7 @@ inline const vector Walker::reverseWalk(size_t inLocation, (*ni).accumulatedScore = inAccumulatedScore + (*ni).node->score(); - vector path = + std::vector path = reverseWalk(inLocation - (*ni).spanningLength, (*ni).accumulatedScore); path.insert(path.begin(), *ni); @@ -74,11 +73,11 @@ inline const vector Walker::reverseWalk(size_t inLocation, } if (!paths.size()) { - return vector(); + return std::vector(); } - vector* result = &*(paths.begin()); - for (vector >::iterator pi = paths.begin(); + std::vector* result = &*(paths.begin()); + for (std::vector >::iterator pi = paths.begin(); pi != paths.end(); ++pi) { if ((*pi).back().accumulatedScore > result->back().accumulatedScore) { result = &*pi; From 26ad5fd5eac64b78ac624778c8711aee5697822a Mon Sep 17 00:00:00 2001 From: Lukhnos Liu Date: Sat, 19 Feb 2022 09:56:10 -0800 Subject: [PATCH 5/8] Move dumpDOT to Grid.cpp --- McBopomofo.xcodeproj/project.pbxproj | 8 ++- Source/Engine/Gramambular/Grid.cpp | 74 ++++++++++++++++++++++++++++ Source/Engine/Gramambular/Grid.h | 42 +--------------- 3 files changed, 81 insertions(+), 43 deletions(-) diff --git a/McBopomofo.xcodeproj/project.pbxproj b/McBopomofo.xcodeproj/project.pbxproj index 69cae58f..619bed32 100644 --- a/McBopomofo.xcodeproj/project.pbxproj +++ b/McBopomofo.xcodeproj/project.pbxproj @@ -20,6 +20,7 @@ 6A6ED16C2797650A0012872E /* template-data.txt in Resources */ = {isa = PBXBuildFile; fileRef = 6A6ED1652797650A0012872E /* template-data.txt */; }; 6A6ED16D2797650A0012872E /* template-exclude-phrases-plain-bpmf.txt in Resources */ = {isa = PBXBuildFile; fileRef = 6A6ED1672797650A0012872E /* template-exclude-phrases-plain-bpmf.txt */; }; 6A6ED16E2797650A0012872E /* template-exclude-phrases.txt in Resources */ = {isa = PBXBuildFile; fileRef = 6A6ED1692797650A0012872E /* template-exclude-phrases.txt */; }; + 6A74B14927C16845001988F4 /* Grid.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6A74B14827C16845001988F4 /* Grid.cpp */; }; 6ACA41FA15FC1D9000935EF6 /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = 6ACA41EA15FC1D9000935EF6 /* InfoPlist.strings */; }; 6ACA41FB15FC1D9000935EF6 /* License.rtf in Resources */ = {isa = PBXBuildFile; fileRef = 6ACA41EC15FC1D9000935EF6 /* License.rtf */; }; 6ACA41FC15FC1D9000935EF6 /* Localizable.strings in Resources */ = {isa = PBXBuildFile; fileRef = 6ACA41EE15FC1D9000935EF6 /* Localizable.strings */; }; @@ -59,8 +60,8 @@ D485D3B92796A8A000657FF3 /* PreferencesTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D485D3B82796A8A000657FF3 /* PreferencesTests.swift */; }; D485D3C02796CE3200657FF3 /* VersionUpdateTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D485D3BF2796CE3200657FF3 /* VersionUpdateTests.swift */; }; D4A13D5A27A59F0B003BE359 /* InputMethodController.swift in Sources */ = {isa = PBXBuildFile; fileRef = D4A13D5927A59D5C003BE359 /* InputMethodController.swift */; }; - D4C9CAB127AAC9690058DFEA /* NSStringUtils in Frameworks */ = {isa = PBXBuildFile; productRef = D4C9CAB027AAC9690058DFEA /* NSStringUtils */; }; D4A8E43627A9E982002F7A07 /* KeyHandlerPlainBopomofoTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D4A8E43527A9E982002F7A07 /* KeyHandlerPlainBopomofoTests.swift */; }; + D4C9CAB127AAC9690058DFEA /* NSStringUtils in Frameworks */ = {isa = PBXBuildFile; productRef = D4C9CAB027AAC9690058DFEA /* NSStringUtils */; }; D4E33D8A27A838CF006DB1CF /* Localizable.strings in Resources */ = {isa = PBXBuildFile; fileRef = D4E33D8827A838CF006DB1CF /* Localizable.strings */; }; D4E33D8F27A838F0006DB1CF /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = D4E33D8D27A838F0006DB1CF /* InfoPlist.strings */; }; D4E569DC27A34D0E00AC2CEF /* KeyHandler.mm in Sources */ = {isa = PBXBuildFile; fileRef = D4E569DB27A34CC100AC2CEF /* KeyHandler.mm */; }; @@ -131,6 +132,7 @@ 6A6ED170279765140012872E /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text; name = "zh-Hant"; path = "zh-Hant.lproj/template-exclude-phrases-plain-bpmf.txt"; sourceTree = ""; }; 6A6ED171279765170012872E /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text; name = "zh-Hant"; path = "zh-Hant.lproj/template-exclude-phrases.txt"; sourceTree = ""; }; 6A6ED1722797651A0012872E /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text; name = "zh-Hant"; path = "zh-Hant.lproj/template-phrases-replacement.txt"; sourceTree = ""; }; + 6A74B14827C16845001988F4 /* Grid.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Grid.cpp; path = Grid.cpp; sourceTree = ""; }; 6A93050C279877FF00D370DA /* McBopomofoInstaller-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "McBopomofoInstaller-Bridging-Header.h"; sourceTree = ""; }; 6ACA41CB15FC1D7500935EF6 /* McBopomofoInstaller.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = McBopomofoInstaller.app; sourceTree = BUILT_PRODUCTS_DIR; }; 6ACA41EB15FC1D9000935EF6 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/InfoPlist.strings; sourceTree = ""; }; @@ -186,8 +188,8 @@ D485D3BF2796CE3200657FF3 /* VersionUpdateTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VersionUpdateTests.swift; sourceTree = ""; }; D495583A27A5C6C4006ADE1C /* LanguageModelManager+Privates.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "LanguageModelManager+Privates.h"; sourceTree = ""; }; D4A13D5927A59D5C003BE359 /* InputMethodController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = InputMethodController.swift; sourceTree = ""; }; - D4C9CAAF27AAC8EC0058DFEA /* NSStringUtils */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = NSStringUtils; path = Packages/NSStringUtils; sourceTree = ""; }; D4A8E43527A9E982002F7A07 /* KeyHandlerPlainBopomofoTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = KeyHandlerPlainBopomofoTests.swift; sourceTree = ""; }; + D4C9CAAF27AAC8EC0058DFEA /* NSStringUtils */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = NSStringUtils; path = Packages/NSStringUtils; sourceTree = ""; }; D4E33D8927A838CF006DB1CF /* Base */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = Base; path = Base.lproj/Localizable.strings; sourceTree = ""; }; D4E33D8B27A838D5006DB1CF /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/Localizable.strings; sourceTree = ""; }; D4E33D8C27A838D8006DB1CF /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/Localizable.strings"; sourceTree = ""; }; @@ -329,6 +331,7 @@ 6A0D4F1415FC0EB100ABF4B3 /* Bigram.h */, 6A0D4F1515FC0EB100ABF4B3 /* BlockReadingBuilder.h */, 6A0D4F1615FC0EB100ABF4B3 /* Gramambular.h */, + 6A74B14827C16845001988F4 /* Grid.cpp */, 6A0D4F1715FC0EB100ABF4B3 /* Grid.h */, 6A0D4F1815FC0EB100ABF4B3 /* KeyValuePair.h */, 6A0D4F1915FC0EB100ABF4B3 /* LanguageModel.h */, @@ -669,6 +672,7 @@ D41355DE278EA3ED005E5CBD /* UserPhrasesLM.cpp in Sources */, 6ACC3D3F27914F2400F1B140 /* KeyValueBlobReader.cpp in Sources */, D41355D8278D74B5005E5CBD /* LanguageModelManager.mm in Sources */, + 6A74B14927C16845001988F4 /* Grid.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/Source/Engine/Gramambular/Grid.cpp b/Source/Engine/Gramambular/Grid.cpp index e69de29b..2019e139 100644 --- a/Source/Engine/Gramambular/Grid.cpp +++ b/Source/Engine/Gramambular/Grid.cpp @@ -0,0 +1,74 @@ +// Copyright (c) 2007 and onwards Lukhnos Liu +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "Grid.h" + +#include +#include + +namespace Formosa { +namespace Gramambular { + +std::string Grid::dumpDOT() { + std::stringstream sst; + sst << "digraph {" << std::endl; + sst << "graph [ rankdir=LR ];" << std::endl; + sst << "BOS;" << std::endl; + + for (unsigned long p = 0; p < m_spans.size(); p++) { + Span& span = m_spans[p]; + for (unsigned long ni = 0; ni <= span.maximumLength(); ni++) { + Node* np = span.nodeOfLength(ni); + if (np) { + if (!p) { + sst << "BOS -> " << np->currentKeyValue().value << ";" << std::endl; + } + + sst << np->currentKeyValue().value << ";" << std::endl; + + if (p + ni < m_spans.size()) { + Span& dstSpan = m_spans[p + ni]; + for (unsigned long q = 0; q <= dstSpan.maximumLength(); q++) { + Node* dn = dstSpan.nodeOfLength(q); + if (dn) { + sst << np->currentKeyValue().value << " -> " + << dn->currentKeyValue().value << ";" << std::endl; + } + } + } + + if (p + ni == m_spans.size()) { + sst << np->currentKeyValue().value << " -> " + << "EOS;" << std::endl; + } + } + } + } + + sst << "EOS;" << std::endl; + sst << "}"; + return sst.str(); +} + +} // namespace Gramambular +} // namespace Formosa diff --git a/Source/Engine/Gramambular/Grid.h b/Source/Engine/Gramambular/Grid.h index 80e0dada..eb42659a 100644 --- a/Source/Engine/Gramambular/Grid.h +++ b/Source/Engine/Gramambular/Grid.h @@ -68,7 +68,7 @@ class Grid { const std::string& value, float overridingScore); - const std::string dumpDOT(); + std::string dumpDOT(); protected: std::vector m_spans; @@ -226,46 +226,6 @@ inline void Grid::overrideNodeScoreForSelectedCandidate( } } -inline const std::string Grid::dumpDOT() { - std::stringstream sst; - sst << "digraph {" << std::endl; - sst << "graph [ rankdir=LR ];" << std::endl; - sst << "BOS;" << std::endl; - - for (size_t p = 0; p < m_spans.size(); p++) { - Span& span = m_spans[p]; - for (size_t ni = 0; ni <= span.maximumLength(); ni++) { - Node* np = span.nodeOfLength(ni); - if (np) { - if (!p) { - sst << "BOS -> " << np->currentKeyValue().value << ";" << std::endl; - } - - sst << np->currentKeyValue().value << ";" << std::endl; - - if (p + ni < m_spans.size()) { - Span& dstSpan = m_spans[p + ni]; - for (size_t q = 0; q <= dstSpan.maximumLength(); q++) { - Node* dn = dstSpan.nodeOfLength(q); - if (dn) { - sst << np->currentKeyValue().value << " -> " - << dn->currentKeyValue().value << ";" << std::endl; - } - } - } - - if (p + ni == m_spans.size()) { - sst << np->currentKeyValue().value << " -> " - << "EOS;" << std::endl; - } - } - } - } - - sst << "EOS;" << std::endl; - sst << "}"; - return sst.str(); -} } // namespace Gramambular } // namespace Formosa From e8926284926e173bb2d666c614fc8f5cceecdbff Mon Sep 17 00:00:00 2001 From: Lukhnos Liu Date: Sat, 19 Feb 2022 10:13:26 -0800 Subject: [PATCH 6/8] Remove all the "inFoo" in-param style We don't use out-params in Gramambular, but even for them the best practices have been converging on using "T& param" for required output param and "T* param" for optional output param. At any rate the prefix was never necessary, and hence the removal. --- Source/Engine/Gramambular/Bigram.h | 58 ++++++------- .../Engine/Gramambular/BlockReadingBuilder.h | 21 +++-- Source/Engine/Gramambular/Grid.h | 82 +++++++++---------- Source/Engine/Gramambular/KeyValuePair.h | 24 +++--- Source/Engine/Gramambular/Node.h | 49 ++++++----- Source/Engine/Gramambular/NodeAnchor.h | 49 +++++------ Source/Engine/Gramambular/Span.h | 30 +++---- Source/Engine/Gramambular/Unigram.h | 51 ++++++------ Source/Engine/Gramambular/Walker.h | 14 ++-- 9 files changed, 183 insertions(+), 195 deletions(-) diff --git a/Source/Engine/Gramambular/Bigram.h b/Source/Engine/Gramambular/Bigram.h index caa46de2..1b30fc96 100644 --- a/Source/Engine/Gramambular/Bigram.h +++ b/Source/Engine/Gramambular/Bigram.h @@ -42,53 +42,53 @@ class Bigram { KeyValuePair keyValue; double score; - bool operator==(const Bigram& inAnother) const; - bool operator<(const Bigram& inAnother) const; + bool operator==(const Bigram& another) const; + bool operator<(const Bigram& another) const; }; -inline std::ostream& operator<<(std::ostream& inStream, const Bigram& inGram) { - std::streamsize p = inStream.precision(); - inStream.precision(6); - inStream << "(" << inGram.keyValue << "|" << inGram.preceedingKeyValue << "," - << inGram.score << ")"; - inStream.precision(p); - return inStream; +inline std::ostream& operator<<(std::ostream& stream, const Bigram& gram) { + std::streamsize p = stream.precision(); + stream.precision(6); + stream << "(" << gram.keyValue << "|" << gram.preceedingKeyValue << "," + << gram.score << ")"; + stream.precision(p); + return stream; } -inline std::ostream& operator<<(std::ostream& inStream, - const std::vector& inGrams) { - inStream << "[" << inGrams.size() << "]=>{"; +inline std::ostream& operator<<(std::ostream& stream, + const std::vector& grams) { + stream << "[" << grams.size() << "]=>{"; size_t index = 0; - for (std::vector::const_iterator gi = inGrams.begin(); - gi != inGrams.end(); ++gi, ++index) { - inStream << index << "=>"; - inStream << *gi; - if (gi + 1 != inGrams.end()) { - inStream << ","; + for (std::vector::const_iterator gi = grams.begin(); + gi != grams.end(); ++gi, ++index) { + stream << index << "=>"; + stream << *gi; + if (gi + 1 != grams.end()) { + stream << ","; } } - inStream << "}"; - return inStream; + stream << "}"; + return stream; } inline Bigram::Bigram() : score(0.0) {} -inline bool Bigram::operator==(const Bigram& inAnother) const { - return preceedingKeyValue == inAnother.preceedingKeyValue && - keyValue == inAnother.keyValue && score == inAnother.score; +inline bool Bigram::operator==(const Bigram& another) const { + return preceedingKeyValue == another.preceedingKeyValue && + keyValue == another.keyValue && score == another.score; } -inline bool Bigram::operator<(const Bigram& inAnother) const { - if (preceedingKeyValue < inAnother.preceedingKeyValue) { +inline bool Bigram::operator<(const Bigram& another) const { + if (preceedingKeyValue < another.preceedingKeyValue) { return true; - } else if (preceedingKeyValue == inAnother.preceedingKeyValue) { - if (keyValue < inAnother.keyValue) { + } else if (preceedingKeyValue == another.preceedingKeyValue) { + if (keyValue < another.keyValue) { return true; - } else if (keyValue == inAnother.keyValue) { - return score < inAnother.score; + } else if (keyValue == another.keyValue) { + return score < another.score; } return false; } diff --git a/Source/Engine/Gramambular/BlockReadingBuilder.h b/Source/Engine/Gramambular/BlockReadingBuilder.h index 56c335ca..dac88a12 100644 --- a/Source/Engine/Gramambular/BlockReadingBuilder.h +++ b/Source/Engine/Gramambular/BlockReadingBuilder.h @@ -38,13 +38,13 @@ namespace Gramambular { class BlockReadingBuilder { public: - BlockReadingBuilder(LanguageModel* inLM); + BlockReadingBuilder(LanguageModel* lm); void clear(); size_t length() const; size_t cursorIndex() const; - void setCursorIndex(size_t inNewIndex); - void insertReadingAtCursor(const std::string& inReading); + void setCursorIndex(size_t newIndex); + void insertReadingAtCursor(const std::string& reading); bool deleteReadingBeforeCursor(); // backspace bool deleteReadingAfterCursor(); // delete @@ -64,7 +64,7 @@ class BlockReadingBuilder { std::vector::const_iterator end, const std::string& separator); - //最多使用六個字組成一個詞 + // 最多使用六個字組成一個詞 static const size_t MaximumBuildSpanLength = 6; size_t m_cursorIndex; @@ -75,8 +75,8 @@ class BlockReadingBuilder { std::string m_joinSeparator; }; -inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel* inLM) - : m_LM(inLM), m_cursorIndex(0) {} +inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel* lm) + : m_LM(lm), m_cursorIndex(0) {} inline void BlockReadingBuilder::clear() { m_cursorIndex = 0; @@ -88,14 +88,13 @@ inline size_t BlockReadingBuilder::length() const { return m_readings.size(); } inline size_t BlockReadingBuilder::cursorIndex() const { return m_cursorIndex; } -inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex) { - m_cursorIndex = - inNewIndex > m_readings.size() ? m_readings.size() : inNewIndex; +inline void BlockReadingBuilder::setCursorIndex(size_t newIndex) { + m_cursorIndex = newIndex > m_readings.size() ? m_readings.size() : newIndex; } inline void BlockReadingBuilder::insertReadingAtCursor( - const std::string& inReading) { - m_readings.insert(m_readings.begin() + m_cursorIndex, inReading); + const std::string& reading) { + m_readings.insert(m_readings.begin() + m_cursorIndex, reading); m_grid.expandGridByOneAtLocation(m_cursorIndex); build(); diff --git a/Source/Engine/Gramambular/Grid.h b/Source/Engine/Gramambular/Grid.h index eb42659a..2653074a 100644 --- a/Source/Engine/Gramambular/Grid.h +++ b/Source/Engine/Gramambular/Grid.h @@ -39,18 +39,17 @@ namespace Gramambular { class Grid { public: void clear(); - void insertNode(const Node& inNode, size_t inLocation, - size_t inSpanningLength); - bool hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, - size_t inSpanningLength, - const std::string& inKey); + void insertNode(const Node& node, size_t location, size_t spanningLength); + bool hasNodeAtLocationSpanningLengthMatchingKey(size_t location, + size_t spanningLength, + const std::string& key); - void expandGridByOneAtLocation(size_t inLocation); - void shrinkGridByOneAtLocation(size_t inLocation); + void expandGridByOneAtLocation(size_t location); + void shrinkGridByOneAtLocation(size_t location); size_t width() const; - std::vector nodesEndingAt(size_t inLocation); - std::vector nodesCrossingOrEndingAt(size_t inLocation); + std::vector nodesEndingAt(size_t location); + std::vector nodesCrossingOrEndingAt(size_t location); // "Freeze" the node with the unigram that represents the selected candidate // value. After this, the node that contains the unigram will always be @@ -76,72 +75,72 @@ class Grid { inline void Grid::clear() { m_spans.clear(); } -inline void Grid::insertNode(const Node& inNode, size_t inLocation, - size_t inSpanningLength) { - if (inLocation >= m_spans.size()) { - size_t diff = inLocation - m_spans.size() + 1; +inline void Grid::insertNode(const Node& node, size_t location, + size_t spanningLength) { + if (location >= m_spans.size()) { + size_t diff = location - m_spans.size() + 1; for (size_t i = 0; i < diff; i++) { m_spans.push_back(Span()); } } - m_spans[inLocation].insertNodeOfLength(inNode, inSpanningLength); + m_spans[location].insertNodeOfLength(node, spanningLength); } inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey( - size_t inLocation, size_t inSpanningLength, const std::string& inKey) { - if (inLocation > m_spans.size()) { + size_t location, size_t spanningLength, const std::string& key) { + if (location > m_spans.size()) { return false; } - const Node* n = m_spans[inLocation].nodeOfLength(inSpanningLength); + const Node* n = m_spans[location].nodeOfLength(spanningLength); if (!n) { return false; } - return inKey == n->key(); + return key == n->key(); } -inline void Grid::expandGridByOneAtLocation(size_t inLocation) { - if (!inLocation || inLocation == m_spans.size()) { - m_spans.insert(m_spans.begin() + inLocation, Span()); +inline void Grid::expandGridByOneAtLocation(size_t location) { + if (!location || location == m_spans.size()) { + m_spans.insert(m_spans.begin() + location, Span()); } else { - m_spans.insert(m_spans.begin() + inLocation, Span()); - for (size_t i = 0; i < inLocation; i++) { + m_spans.insert(m_spans.begin() + location, Span()); + for (size_t i = 0; i < location; i++) { // zaps overlapping spans - m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i); + m_spans[i].removeNodeOfLengthGreaterThan(location - i); } } } -inline void Grid::shrinkGridByOneAtLocation(size_t inLocation) { - if (inLocation >= m_spans.size()) { +inline void Grid::shrinkGridByOneAtLocation(size_t location) { + if (location >= m_spans.size()) { return; } - m_spans.erase(m_spans.begin() + inLocation); - for (size_t i = 0; i < inLocation; i++) { + m_spans.erase(m_spans.begin() + location); + for (size_t i = 0; i < location; i++) { // zaps overlapping spans - m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i); + m_spans[i].removeNodeOfLengthGreaterThan(location - i); } } inline size_t Grid::width() const { return m_spans.size(); } -inline std::vector Grid::nodesEndingAt(size_t inLocation) { +inline std::vector Grid::nodesEndingAt(size_t location) { std::vector result; - if (m_spans.size() && inLocation <= m_spans.size()) { - for (size_t i = 0; i < inLocation; i++) { + if (m_spans.size() && location <= m_spans.size()) { + for (size_t i = 0; i < location; i++) { Span& span = m_spans[i]; - if (i + span.maximumLength() >= inLocation) { - Node* np = span.nodeOfLength(inLocation - i); + if (i + span.maximumLength() >= location) { + Node* np = span.nodeOfLength(location - i); if (np) { NodeAnchor na; na.node = np; na.location = i; - na.spanningLength = inLocation - i; + na.spanningLength = location - i; result.push_back(na); } @@ -152,17 +151,16 @@ inline std::vector Grid::nodesEndingAt(size_t inLocation) { return result; } -inline std::vector Grid::nodesCrossingOrEndingAt( - size_t inLocation) { +inline std::vector Grid::nodesCrossingOrEndingAt(size_t location) { std::vector result; - if (m_spans.size() && inLocation <= m_spans.size()) { - for (size_t i = 0; i < inLocation; i++) { + if (m_spans.size() && location <= m_spans.size()) { + for (size_t i = 0; i < location; i++) { Span& span = m_spans[i]; - if (i + span.maximumLength() >= inLocation) { + if (i + span.maximumLength() >= location) { for (size_t j = 1, m = span.maximumLength(); j <= m; j++) { - if (i + j < inLocation) { + if (i + j < location) { continue; } @@ -171,7 +169,7 @@ inline std::vector Grid::nodesCrossingOrEndingAt( NodeAnchor na; na.node = np; na.location = i; - na.spanningLength = inLocation - i; + na.spanningLength = location - i; result.push_back(na); } diff --git a/Source/Engine/Gramambular/KeyValuePair.h b/Source/Engine/Gramambular/KeyValuePair.h index 5c4f6b61..1059dfdc 100644 --- a/Source/Engine/Gramambular/KeyValuePair.h +++ b/Source/Engine/Gramambular/KeyValuePair.h @@ -39,25 +39,25 @@ class KeyValuePair { std::string key; std::string value; - bool operator==(const KeyValuePair& inAnother) const; - bool operator<(const KeyValuePair& inAnother) const; + bool operator==(const KeyValuePair& another) const; + bool operator<(const KeyValuePair& another) const; }; -inline std::ostream& operator<<(std::ostream& inStream, - const KeyValuePair& inPair) { - inStream << "(" << inPair.key << "," << inPair.value << ")"; - return inStream; +inline std::ostream& operator<<(std::ostream& stream, + const KeyValuePair& pair) { + stream << "(" << pair.key << "," << pair.value << ")"; + return stream; } -inline bool KeyValuePair::operator==(const KeyValuePair& inAnother) const { - return key == inAnother.key && value == inAnother.value; +inline bool KeyValuePair::operator==(const KeyValuePair& another) const { + return key == another.key && value == another.value; } -inline bool KeyValuePair::operator<(const KeyValuePair& inAnother) const { - if (key < inAnother.key) { +inline bool KeyValuePair::operator<(const KeyValuePair& another) const { + if (key < another.key) { return true; - } else if (key == inAnother.key) { - return value < inAnother.value; + } else if (key == another.key) { + return value < another.value; } return false; } diff --git a/Source/Engine/Gramambular/Node.h b/Source/Engine/Gramambular/Node.h index fe4690d9..a442c77f 100644 --- a/Source/Engine/Gramambular/Node.h +++ b/Source/Engine/Gramambular/Node.h @@ -39,15 +39,15 @@ namespace Gramambular { class Node { public: Node(); - Node(const std::string& inKey, const std::vector& inUnigrams, - const std::vector& inBigrams); + Node(const std::string& key, const std::vector& unigrams, + const std::vector& bigrams); void primeNodeWithPreceedingKeyValues( - const std::vector& inKeyValues); + const std::vector& keyValues); bool isCandidateFixed() const; const std::vector& candidates() const; - void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true); + void selectCandidateAtIndex(size_t index = 0, bool fix = true); void resetCandidate(); void selectFloatingCandidateAtIndex(size_t index, double score); @@ -71,25 +71,24 @@ class Node { bool m_candidateFixed; size_t m_selectedUnigramIndex; - friend std::ostream& operator<<(std::ostream& inStream, const Node& inNode); + friend std::ostream& operator<<(std::ostream& stream, const Node& node); }; -inline std::ostream& operator<<(std::ostream& inStream, const Node& inNode) { - inStream << "(node,key:" << inNode.m_key - << ",fixed:" << (inNode.m_candidateFixed ? "true" : "false") - << ",selected:" << inNode.m_selectedUnigramIndex << "," - << inNode.m_unigrams << ")"; - return inStream; +inline std::ostream& operator<<(std::ostream& stream, const Node& node) { + stream << "(node,key:" << node.m_key + << ",fixed:" << (node.m_candidateFixed ? "true" : "false") + << ",selected:" << node.m_selectedUnigramIndex << "," + << node.m_unigrams << ")"; + return stream; } inline Node::Node() : m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) {} -inline Node::Node(const std::string& inKey, - const std::vector& inUnigrams, - const std::vector& inBigrams) - : m_key(inKey), - m_unigrams(inUnigrams), +inline Node::Node(const std::string& key, const std::vector& unigrams, + const std::vector& bigrams) + : m_key(key), + m_unigrams(unigrams), m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) { @@ -108,20 +107,20 @@ inline Node::Node(const std::string& inKey, m_candidates.push_back((*ui).keyValue); } - for (std::vector::const_iterator bi = inBigrams.begin(); - bi != inBigrams.end(); ++bi) { + for (std::vector::const_iterator bi = bigrams.begin(); + bi != bigrams.end(); ++bi) { m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi); } } inline void Node::primeNodeWithPreceedingKeyValues( - const std::vector& inKeyValues) { + const std::vector& keyValues) { size_t newIndex = m_selectedUnigramIndex; double max = m_score; if (!isCandidateFixed()) { - for (std::vector::const_iterator kvi = inKeyValues.begin(); - kvi != inKeyValues.end(); ++kvi) { + for (std::vector::const_iterator kvi = keyValues.begin(); + kvi != keyValues.end(); ++kvi) { std::map >::const_iterator f = m_preceedingGramBigramMap.find(*kvi); if (f != m_preceedingGramBigramMap.end()) { @@ -158,14 +157,14 @@ inline const std::vector& Node::candidates() const { return m_candidates; } -inline void Node::selectCandidateAtIndex(size_t inIndex, bool inFix) { - if (inIndex >= m_unigrams.size()) { +inline void Node::selectCandidateAtIndex(size_t index, bool fix) { + if (index >= m_unigrams.size()) { m_selectedUnigramIndex = 0; } else { - m_selectedUnigramIndex = inIndex; + m_selectedUnigramIndex = index; } - m_candidateFixed = inFix; + m_candidateFixed = fix; m_score = 99; } diff --git a/Source/Engine/Gramambular/NodeAnchor.h b/Source/Engine/Gramambular/NodeAnchor.h index 2ddef073..f2b4dfc8 100644 --- a/Source/Engine/Gramambular/NodeAnchor.h +++ b/Source/Engine/Gramambular/NodeAnchor.h @@ -32,42 +32,37 @@ namespace Formosa { namespace Gramambular { -class NodeAnchor { - public: - NodeAnchor(); - const Node* node; - size_t location; - size_t spanningLength; - double accumulatedScore; + +struct NodeAnchor { + const Node* node = nullptr; + size_t location = 0; + size_t spanningLength = 0; + double accumulatedScore = 0.0; }; -inline NodeAnchor::NodeAnchor() - : node(0), location(0), spanningLength(0), accumulatedScore(0.0) {} - -inline std::ostream& operator<<(std::ostream& inStream, - const NodeAnchor& inAnchor) { - inStream << "{@(" << inAnchor.location << "," << inAnchor.spanningLength - << "),"; - if (inAnchor.node) { - inStream << *(inAnchor.node); +inline std::ostream& operator<<(std::ostream& stream, + const NodeAnchor& anchor) { + stream << "{@(" << anchor.location << "," << anchor.spanningLength << "),"; + if (anchor.node) { + stream << *(anchor.node); } else { - inStream << "null"; + stream << "null"; } - inStream << "}"; - return inStream; + stream << "}"; + return stream; } -inline std::ostream& operator<<(std::ostream& inStream, - const std::vector& inAnchor) { - for (std::vector::const_iterator i = inAnchor.begin(); - i != inAnchor.end(); ++i) { - inStream << *i; - if (i + 1 != inAnchor.end()) { - inStream << "<-"; +inline std::ostream& operator<<(std::ostream& stream, + const std::vector& anchor) { + for (std::vector::const_iterator i = anchor.begin(); + i != anchor.end(); ++i) { + stream << *i; + if (i + 1 != anchor.end()) { + stream << "<-"; } } - return inStream; + return stream; } } // namespace Gramambular } // namespace Formosa diff --git a/Source/Engine/Gramambular/Span.h b/Source/Engine/Gramambular/Span.h index c733886b..795c2a48 100644 --- a/Source/Engine/Gramambular/Span.h +++ b/Source/Engine/Gramambular/Span.h @@ -38,36 +38,32 @@ namespace Formosa { namespace Gramambular { class Span { public: - Span(); - void clear(); - void insertNodeOfLength(const Node& inNode, size_t inLength); - void removeNodeOfLengthGreaterThan(size_t inLength); + void insertNodeOfLength(const Node& node, size_t length); + void removeNodeOfLengthGreaterThan(size_t length); - Node* nodeOfLength(size_t inLength); + Node* nodeOfLength(size_t length); size_t maximumLength() const; protected: std::map m_lengthNodeMap; - size_t m_maximumLength; + size_t m_maximumLength = 0; }; -inline Span::Span() : m_maximumLength(0) {} - inline void Span::clear() { m_lengthNodeMap.clear(); m_maximumLength = 0; } -inline void Span::insertNodeOfLength(const Node& inNode, size_t inLength) { - m_lengthNodeMap[inLength] = inNode; - if (inLength > m_maximumLength) { - m_maximumLength = inLength; +inline void Span::insertNodeOfLength(const Node& node, size_t length) { + m_lengthNodeMap[length] = node; + if (length > m_maximumLength) { + m_maximumLength = length; } } -inline void Span::removeNodeOfLengthGreaterThan(size_t inLength) { - if (inLength > m_maximumLength) { +inline void Span::removeNodeOfLengthGreaterThan(size_t length) { + if (length > m_maximumLength) { return; } @@ -76,7 +72,7 @@ inline void Span::removeNodeOfLengthGreaterThan(size_t inLength) { for (std::map::iterator i = m_lengthNodeMap.begin(), e = m_lengthNodeMap.end(); i != e; ++i) { - if ((*i).first > inLength) { + if ((*i).first > length) { removeSet.insert((*i).first); } else { if ((*i).first > max) { @@ -93,8 +89,8 @@ inline void Span::removeNodeOfLengthGreaterThan(size_t inLength) { m_maximumLength = max; } -inline Node* Span::nodeOfLength(size_t inLength) { - std::map::iterator f = m_lengthNodeMap.find(inLength); +inline Node* Span::nodeOfLength(size_t length) { + std::map::iterator f = m_lengthNodeMap.find(length); return f == m_lengthNodeMap.end() ? 0 : &(*f).second; } diff --git a/Source/Engine/Gramambular/Unigram.h b/Source/Engine/Gramambular/Unigram.h index c40322e7..df09cd0c 100644 --- a/Source/Engine/Gramambular/Unigram.h +++ b/Source/Engine/Gramambular/Unigram.h @@ -34,6 +34,7 @@ namespace Formosa { namespace Gramambular { + class Unigram { public: Unigram(); @@ -41,50 +42,50 @@ class Unigram { KeyValuePair keyValue; double score; - bool operator==(const Unigram& inAnother) const; - bool operator<(const Unigram& inAnother) const; + bool operator==(const Unigram& another) const; + bool operator<(const Unigram& another) const; static bool ScoreCompare(const Unigram& a, const Unigram& b); }; -inline std::ostream& operator<<(std::ostream& inStream, const Unigram& inGram) { - std::streamsize p = inStream.precision(); - inStream.precision(6); - inStream << "(" << inGram.keyValue << "," << inGram.score << ")"; - inStream.precision(p); - return inStream; +inline std::ostream& operator<<(std::ostream& stream, const Unigram& gram) { + std::streamsize p = stream.precision(); + stream.precision(6); + stream << "(" << gram.keyValue << "," << gram.score << ")"; + stream.precision(p); + return stream; } -inline std::ostream& operator<<(std::ostream& inStream, - const std::vector& inGrams) { - inStream << "[" << inGrams.size() << "]=>{"; +inline std::ostream& operator<<(std::ostream& stream, + const std::vector& grams) { + stream << "[" << grams.size() << "]=>{"; size_t index = 0; - for (std::vector::const_iterator gi = inGrams.begin(); - gi != inGrams.end(); ++gi, ++index) { - inStream << index << "=>"; - inStream << *gi; - if (gi + 1 != inGrams.end()) { - inStream << ","; + for (std::vector::const_iterator gi = grams.begin(); + gi != grams.end(); ++gi, ++index) { + stream << index << "=>"; + stream << *gi; + if (gi + 1 != grams.end()) { + stream << ","; } } - inStream << "}"; - return inStream; + stream << "}"; + return stream; } inline Unigram::Unigram() : score(0.0) {} -inline bool Unigram::operator==(const Unigram& inAnother) const { - return keyValue == inAnother.keyValue && score == inAnother.score; +inline bool Unigram::operator==(const Unigram& another) const { + return keyValue == another.keyValue && score == another.score; } -inline bool Unigram::operator<(const Unigram& inAnother) const { - if (keyValue < inAnother.keyValue) { +inline bool Unigram::operator<(const Unigram& another) const { + if (keyValue < another.keyValue) { return true; - } else if (keyValue == inAnother.keyValue) { - return score < inAnother.score; + } else if (keyValue == another.keyValue) { + return score < another.score; } return false; } diff --git a/Source/Engine/Gramambular/Walker.h b/Source/Engine/Gramambular/Walker.h index 3c383105..b35d2322 100644 --- a/Source/Engine/Gramambular/Walker.h +++ b/Source/Engine/Gramambular/Walker.h @@ -38,8 +38,8 @@ namespace Gramambular { class Walker { public: Walker(Grid* inGrid); - const std::vector reverseWalk(size_t inLocation, - double inAccumulatedScore = 0.0); + const std::vector reverseWalk(size_t location, + double accumulatedScore = 0.0); protected: Grid* m_grid; @@ -48,14 +48,14 @@ class Walker { inline Walker::Walker(Grid* inGrid) : m_grid(inGrid) {} inline const std::vector Walker::reverseWalk( - size_t inLocation, double inAccumulatedScore) { - if (!inLocation || inLocation > m_grid->width()) { + size_t location, double accumulatedScore) { + if (!location || location > m_grid->width()) { return std::vector(); } std::vector > paths; - std::vector nodes = m_grid->nodesEndingAt(inLocation); + std::vector nodes = m_grid->nodesEndingAt(location); for (std::vector::iterator ni = nodes.begin(); ni != nodes.end(); ++ni) { @@ -63,10 +63,10 @@ inline const std::vector Walker::reverseWalk( continue; } - (*ni).accumulatedScore = inAccumulatedScore + (*ni).node->score(); + (*ni).accumulatedScore = accumulatedScore + (*ni).node->score(); std::vector path = - reverseWalk(inLocation - (*ni).spanningLength, (*ni).accumulatedScore); + reverseWalk(location - (*ni).spanningLength, (*ni).accumulatedScore); path.insert(path.begin(), *ni); paths.push_back(path); From ea477d6c5bfbffc5b3847f03e09c96cf386777fa Mon Sep 17 00:00:00 2001 From: Lukhnos Liu Date: Sat, 19 Feb 2022 10:22:24 -0800 Subject: [PATCH 7/8] Fix cpplint --- Source/Engine/Gramambular/Bigram.h | 4 ++-- Source/Engine/Gramambular/BlockReadingBuilder.h | 7 ++++--- Source/Engine/Gramambular/Gramambular.h | 4 ++-- Source/Engine/Gramambular/Grid.cpp | 6 +++--- Source/Engine/Gramambular/Grid.h | 7 ++++--- Source/Engine/Gramambular/KeyValuePair.h | 4 ++-- Source/Engine/Gramambular/LanguageModel.h | 5 +++-- Source/Engine/Gramambular/Node.h | 10 ++++++---- Source/Engine/Gramambular/NodeAnchor.h | 6 ++++-- Source/Engine/Gramambular/Span.h | 4 ++-- Source/Engine/Gramambular/Unigram.h | 4 ++-- Source/Engine/Gramambular/Walker.h | 7 ++++--- 12 files changed, 38 insertions(+), 30 deletions(-) diff --git a/Source/Engine/Gramambular/Bigram.h b/Source/Engine/Gramambular/Bigram.h index 1b30fc96..51b257d2 100644 --- a/Source/Engine/Gramambular/Bigram.h +++ b/Source/Engine/Gramambular/Bigram.h @@ -25,8 +25,8 @@ // OTHER DEALINGS IN THE SOFTWARE. // -#ifndef Bigram_h -#define Bigram_h +#ifndef BIGRAM_H_ +#define BIGRAM_H_ #include diff --git a/Source/Engine/Gramambular/BlockReadingBuilder.h b/Source/Engine/Gramambular/BlockReadingBuilder.h index dac88a12..5183df32 100644 --- a/Source/Engine/Gramambular/BlockReadingBuilder.h +++ b/Source/Engine/Gramambular/BlockReadingBuilder.h @@ -25,9 +25,10 @@ // OTHER DEALINGS IN THE SOFTWARE. // -#ifndef BlockReadingBuilder_h -#define BlockReadingBuilder_h +#ifndef BLOCKREADINGBUILDER_H_ +#define BLOCKREADINGBUILDER_H_ +#include #include #include "Grid.h" @@ -38,7 +39,7 @@ namespace Gramambular { class BlockReadingBuilder { public: - BlockReadingBuilder(LanguageModel* lm); + explicit BlockReadingBuilder(LanguageModel* lm); void clear(); size_t length() const; diff --git a/Source/Engine/Gramambular/Gramambular.h b/Source/Engine/Gramambular/Gramambular.h index 1036ff70..ceaf7eab 100644 --- a/Source/Engine/Gramambular/Gramambular.h +++ b/Source/Engine/Gramambular/Gramambular.h @@ -25,8 +25,8 @@ // OTHER DEALINGS IN THE SOFTWARE. // -#ifndef Gramambular_h -#define Gramambular_h +#ifndef GRAMAMBULAR_H_ +#define GRAMAMBULAR_H_ #include "Bigram.h" #include "BlockReadingBuilder.h" diff --git a/Source/Engine/Gramambular/Grid.cpp b/Source/Engine/Gramambular/Grid.cpp index 2019e139..55011175 100644 --- a/Source/Engine/Gramambular/Grid.cpp +++ b/Source/Engine/Gramambular/Grid.cpp @@ -35,9 +35,9 @@ std::string Grid::dumpDOT() { sst << "graph [ rankdir=LR ];" << std::endl; sst << "BOS;" << std::endl; - for (unsigned long p = 0; p < m_spans.size(); p++) { + for (size_t p = 0; p < m_spans.size(); p++) { Span& span = m_spans[p]; - for (unsigned long ni = 0; ni <= span.maximumLength(); ni++) { + for (size_t ni = 0; ni <= span.maximumLength(); ni++) { Node* np = span.nodeOfLength(ni); if (np) { if (!p) { @@ -48,7 +48,7 @@ std::string Grid::dumpDOT() { if (p + ni < m_spans.size()) { Span& dstSpan = m_spans[p + ni]; - for (unsigned long q = 0; q <= dstSpan.maximumLength(); q++) { + for (size_t q = 0; q <= dstSpan.maximumLength(); q++) { Node* dn = dstSpan.nodeOfLength(q); if (dn) { sst << np->currentKeyValue().value << " -> " diff --git a/Source/Engine/Gramambular/Grid.h b/Source/Engine/Gramambular/Grid.h index 2653074a..b11add0a 100644 --- a/Source/Engine/Gramambular/Grid.h +++ b/Source/Engine/Gramambular/Grid.h @@ -25,10 +25,12 @@ // OTHER DEALINGS IN THE SOFTWARE. // -#ifndef Grid_h -#define Grid_h +#ifndef GRID_H_ +#define GRID_H_ #include +#include +#include #include "NodeAnchor.h" #include "Span.h" @@ -198,7 +200,6 @@ inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, const_cast(nodeAnchor.node)->selectCandidateAtIndex(i); node = nodeAnchor; break; - ; } } } diff --git a/Source/Engine/Gramambular/KeyValuePair.h b/Source/Engine/Gramambular/KeyValuePair.h index 1059dfdc..ba33668a 100644 --- a/Source/Engine/Gramambular/KeyValuePair.h +++ b/Source/Engine/Gramambular/KeyValuePair.h @@ -25,8 +25,8 @@ // OTHER DEALINGS IN THE SOFTWARE. // -#ifndef KeyValuePair_h -#define KeyValuePair_h +#ifndef KEYVALUEPAIR_H_ +#define KEYVALUEPAIR_H_ #include #include diff --git a/Source/Engine/Gramambular/LanguageModel.h b/Source/Engine/Gramambular/LanguageModel.h index 79ae6ee2..39b19823 100644 --- a/Source/Engine/Gramambular/LanguageModel.h +++ b/Source/Engine/Gramambular/LanguageModel.h @@ -25,9 +25,10 @@ // OTHER DEALINGS IN THE SOFTWARE. // -#ifndef LanguageModel_h -#define LanguageModel_h +#ifndef LANGUAGEMODEL_H_ +#define LANGUAGEMODEL_H_ +#include #include #include "Bigram.h" diff --git a/Source/Engine/Gramambular/Node.h b/Source/Engine/Gramambular/Node.h index a442c77f..3059c64b 100644 --- a/Source/Engine/Gramambular/Node.h +++ b/Source/Engine/Gramambular/Node.h @@ -25,10 +25,12 @@ // OTHER DEALINGS IN THE SOFTWARE. // -#ifndef Node_h -#define Node_h +#ifndef NODE_H_ +#define NODE_H_ #include +#include +#include #include #include "LanguageModel.h" @@ -53,7 +55,7 @@ class Node { const std::string& key() const; double score() const; - double scoreForCandidate(std::string& candidate) const; + double scoreForCandidate(const std::string& candidate) const; const KeyValuePair currentKeyValue() const; double highestUnigramScore() const; @@ -190,7 +192,7 @@ inline const std::string& Node::key() const { return m_key; } inline double Node::score() const { return m_score; } -inline double Node::scoreForCandidate(std::string& candidate) const { +inline double Node::scoreForCandidate(const std::string& candidate) const { for (auto unigram : m_unigrams) { if (unigram.keyValue.value == candidate) { return unigram.score; diff --git a/Source/Engine/Gramambular/NodeAnchor.h b/Source/Engine/Gramambular/NodeAnchor.h index f2b4dfc8..3f81b4c2 100644 --- a/Source/Engine/Gramambular/NodeAnchor.h +++ b/Source/Engine/Gramambular/NodeAnchor.h @@ -25,8 +25,10 @@ // OTHER DEALINGS IN THE SOFTWARE. // -#ifndef NodeAnchor_h -#define NodeAnchor_h +#ifndef NODEANCHOR_H_ +#define NODEANCHOR_H_ + +#include #include "Node.h" diff --git a/Source/Engine/Gramambular/Span.h b/Source/Engine/Gramambular/Span.h index 795c2a48..aa1cf38e 100644 --- a/Source/Engine/Gramambular/Span.h +++ b/Source/Engine/Gramambular/Span.h @@ -25,8 +25,8 @@ // OTHER DEALINGS IN THE SOFTWARE. // -#ifndef Span_h -#define Span_h +#ifndef SPAN_H_ +#define SPAN_H_ #include #include diff --git a/Source/Engine/Gramambular/Unigram.h b/Source/Engine/Gramambular/Unigram.h index df09cd0c..6cd546b3 100644 --- a/Source/Engine/Gramambular/Unigram.h +++ b/Source/Engine/Gramambular/Unigram.h @@ -25,8 +25,8 @@ // OTHER DEALINGS IN THE SOFTWARE. // -#ifndef Unigram_h -#define Unigram_h +#ifndef UNIGRAM_H_ +#define UNIGRAM_H_ #include diff --git a/Source/Engine/Gramambular/Walker.h b/Source/Engine/Gramambular/Walker.h index b35d2322..972214e3 100644 --- a/Source/Engine/Gramambular/Walker.h +++ b/Source/Engine/Gramambular/Walker.h @@ -25,10 +25,11 @@ // OTHER DEALINGS IN THE SOFTWARE. // -#ifndef Walker_h -#define Walker_h +#ifndef WALKER_H_ +#define WALKER_H_ #include +#include #include "Grid.h" @@ -37,7 +38,7 @@ namespace Gramambular { class Walker { public: - Walker(Grid* inGrid); + explicit Walker(Grid* inGrid); const std::vector reverseWalk(size_t location, double accumulatedScore = 0.0); From 154c83bbd6c6cf33d34160262150b03aa843b062 Mon Sep 17 00:00:00 2001 From: Lukhnos Liu Date: Sat, 19 Feb 2022 10:24:39 -0800 Subject: [PATCH 8/8] Run GramambularTest as part of the CI This means we now have test coverage for every package in McBopomofo. --- .../workflows/continuous-integration-workflow-xcode-12.yml | 6 ++++++ .../continuous-integration-workflow-xcode-latest.yml | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/.github/workflows/continuous-integration-workflow-xcode-12.yml b/.github/workflows/continuous-integration-workflow-xcode-12.yml index f8f96779..ca993d6b 100644 --- a/.github/workflows/continuous-integration-workflow-xcode-12.yml +++ b/.github/workflows/continuous-integration-workflow-xcode-12.yml @@ -24,6 +24,12 @@ jobs: - name: Run MandarinTest run: make runTest working-directory: Source/Engine/Mandarin/build + - name: Build GramambularTest + run: cmake -S . -B build + working-directory: Source/Engine/Gramambular + - name: Run GramambularTest + run: make runTest + working-directory: Source/Engine/Gramambular/build - name: Test McBopomofo App Bundle run: xcodebuild -scheme McBopomofo -configuration Debug test - name: Test CandidateUI diff --git a/.github/workflows/continuous-integration-workflow-xcode-latest.yml b/.github/workflows/continuous-integration-workflow-xcode-latest.yml index a03bf5f8..5222a931 100644 --- a/.github/workflows/continuous-integration-workflow-xcode-latest.yml +++ b/.github/workflows/continuous-integration-workflow-xcode-latest.yml @@ -24,6 +24,12 @@ jobs: - name: Run MandarinTest run: make runTest working-directory: Source/Engine/Mandarin/build + - name: Build GramambularTest + run: cmake -S . -B build + working-directory: Source/Engine/Gramambular + - name: Run GramambularTest + run: make runTest + working-directory: Source/Engine/Gramambular/build - name: Test McBopomofo App Bundle run: xcodebuild -scheme McBopomofo -configuration Debug test - name: Test CandidateUI