diff --git a/Source/Engine/Gramambular/Bigram.h b/Source/Engine/Gramambular/Bigram.h index 42ac9033..750db4c1 100644 --- a/Source/Engine/Gramambular/Bigram.h +++ b/Source/Engine/Gramambular/Bigram.h @@ -33,74 +33,68 @@ #include "KeyValuePair.h" namespace Formosa { - namespace Gramambular { - class Bigram { - public: - Bigram(); - - KeyValuePair preceedingKeyValue; - KeyValuePair keyValue; - double score; - - bool operator==(const Bigram& inAnother) const; - bool operator<(const Bigram& inAnother) const; - }; +namespace Gramambular { +class Bigram { + public: + Bigram(); - inline ostream& operator<<(ostream& inStream, const Bigram& inGram) - { - streamsize p = inStream.precision(); - inStream.precision(6); - inStream << "(" << inGram.keyValue << "|" <& inGrams) - { - inStream << "[" << inGrams.size() << "]=>{"; - - size_t index = 0; - - for (vector::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) { - inStream << index << "=>"; - inStream << *gi; - if (gi + 1 != inGrams.end()) { - inStream << ","; - } - } - - inStream << "}"; - return inStream; - } - - inline Bigram::Bigram() - : score(0.0) - { - } - - inline bool Bigram::operator==(const Bigram& inAnother) const - { - return preceedingKeyValue == inAnother.preceedingKeyValue && keyValue == inAnother.keyValue && score == inAnother.score; - } - - inline bool Bigram::operator<(const Bigram& inAnother) const - { - if (preceedingKeyValue < inAnother.preceedingKeyValue) { - return true; - } - else if (preceedingKeyValue == inAnother.preceedingKeyValue) { - if (keyValue < inAnother.keyValue) { - return true; - } - else if (keyValue == inAnother.keyValue) { - return score < inAnother.score; - } - return false; - } + bool operator==(const Bigram& inAnother) const; + bool operator<(const Bigram& inAnother) const; +}; - return false; - } - } +inline ostream& operator<<(ostream& inStream, const Bigram& inGram) { + streamsize p = inStream.precision(); + inStream.precision(6); + inStream << "(" << inGram.keyValue << "|" << inGram.preceedingKeyValue << "," + << inGram.score << ")"; + inStream.precision(p); + return inStream; } +inline ostream& operator<<(ostream& inStream, const vector& inGrams) { + inStream << "[" << inGrams.size() << "]=>{"; + + size_t index = 0; + + for (vector::const_iterator gi = inGrams.begin(); gi != inGrams.end(); + ++gi, ++index) { + inStream << index << "=>"; + inStream << *gi; + if (gi + 1 != inGrams.end()) { + inStream << ","; + } + } + + inStream << "}"; + return inStream; +} + +inline Bigram::Bigram() : score(0.0) {} + +inline bool Bigram::operator==(const Bigram& inAnother) const { + return preceedingKeyValue == inAnother.preceedingKeyValue && + keyValue == inAnother.keyValue && score == inAnother.score; +} + +inline bool Bigram::operator<(const Bigram& inAnother) const { + if (preceedingKeyValue < inAnother.preceedingKeyValue) { + return true; + } else if (preceedingKeyValue == inAnother.preceedingKeyValue) { + if (keyValue < inAnother.keyValue) { + return true; + } else if (keyValue == inAnother.keyValue) { + return score < inAnother.score; + } + return false; + } + + return false; +} +} // namespace Gramambular +} // namespace Formosa + #endif diff --git a/Source/Engine/Gramambular/BlockReadingBuilder.h b/Source/Engine/Gramambular/BlockReadingBuilder.h index 8c503fcc..219e6a5a 100644 --- a/Source/Engine/Gramambular/BlockReadingBuilder.h +++ b/Source/Engine/Gramambular/BlockReadingBuilder.h @@ -29,198 +29,185 @@ #define BlockReadingBuilder_h #include + #include "Grid.h" #include "LanguageModel.h" namespace Formosa { - namespace Gramambular { - using namespace std; - - class BlockReadingBuilder { - public: - BlockReadingBuilder(LanguageModel *inLM); - void clear(); - - size_t length() const; - size_t cursorIndex() const; - void setCursorIndex(size_t inNewIndex); - void insertReadingAtCursor(const string& inReading); - bool deleteReadingBeforeCursor(); // backspace - bool deleteReadingAfterCursor(); // delete - - bool removeHeadReadings(size_t count); - - void setJoinSeparator(const string& separator); - const string joinSeparator() const; +namespace Gramambular { +using namespace std; - vector readings() const; +class BlockReadingBuilder { + public: + BlockReadingBuilder(LanguageModel* inLM); + void clear(); - Grid& grid(); - - protected: - void build(); - - static const string Join(vector::const_iterator begin, vector::const_iterator end, const string& separator); - - //最多使用六個字組成一個詞 - static const size_t MaximumBuildSpanLength = 6; - - size_t m_cursorIndex; - vector m_readings; - - Grid m_grid; - LanguageModel *m_LM; - string m_joinSeparator; - }; - - inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM) - : m_LM(inLM) - , m_cursorIndex(0) - { - } - - inline void BlockReadingBuilder::clear() - { - m_cursorIndex = 0; - m_readings.clear(); - m_grid.clear(); - } - - inline size_t BlockReadingBuilder::length() const - { - return m_readings.size(); - } - - inline size_t BlockReadingBuilder::cursorIndex() const - { - return m_cursorIndex; - } + size_t length() const; + size_t cursorIndex() const; + void setCursorIndex(size_t inNewIndex); + void insertReadingAtCursor(const string& inReading); + bool deleteReadingBeforeCursor(); // backspace + bool deleteReadingAfterCursor(); // delete - inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex) - { - m_cursorIndex = inNewIndex > m_readings.size() ? m_readings.size() : inNewIndex; - } - - inline void BlockReadingBuilder::insertReadingAtCursor(const string& inReading) - { - m_readings.insert(m_readings.begin() + m_cursorIndex, inReading); - - m_grid.expandGridByOneAtLocation(m_cursorIndex); - build(); - m_cursorIndex++; - } + bool removeHeadReadings(size_t count); - inline vector BlockReadingBuilder::readings() const - { - return m_readings; - } - - inline bool BlockReadingBuilder::deleteReadingBeforeCursor() - { - if (!m_cursorIndex) { - return false; - } - - m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.begin() + m_cursorIndex); - m_cursorIndex--; - m_grid.shrinkGridByOneAtLocation(m_cursorIndex); - build(); - return true; - } - - inline bool BlockReadingBuilder::deleteReadingAfterCursor() - { - if (m_cursorIndex == m_readings.size()) { - return false; - } - - m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.begin() + m_cursorIndex + 1); - m_grid.shrinkGridByOneAtLocation(m_cursorIndex); - build(); - return true; - } - - inline bool BlockReadingBuilder::removeHeadReadings(size_t count) - { - if (count > length()) { - return false; - } - - for (size_t i = 0; i < count; i++) { - if (m_cursorIndex) { - m_cursorIndex--; - } - m_readings.erase(m_readings.begin(), m_readings.begin() + 1); - m_grid.shrinkGridByOneAtLocation(0); - build(); - } - - return true; - } - - inline void BlockReadingBuilder::setJoinSeparator(const string& separator) - { - m_joinSeparator = separator; - } - - inline const string BlockReadingBuilder::joinSeparator() const - { - return m_joinSeparator; - } + void setJoinSeparator(const string& separator); + const string joinSeparator() const; - inline Grid& BlockReadingBuilder::grid() - { - return m_grid; - } + vector readings() const; - inline void BlockReadingBuilder::build() - { - if (!m_LM) { - return; - } - - size_t begin = 0; - size_t end = m_cursorIndex + MaximumBuildSpanLength; - - if (m_cursorIndex < MaximumBuildSpanLength) { - begin = 0; - } - else { - begin = m_cursorIndex - MaximumBuildSpanLength; - } - - if (end > m_readings.size()) { - end = m_readings.size(); - } - - for (size_t p = begin ; p < end ; p++) { - for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) { - string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator); - if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) { - vector unigrams = m_LM->unigramsForKey(combinedReading); + Grid& grid(); - if (unigrams.size() > 0) { - Node n(combinedReading, unigrams, vector()); - m_grid.insertNode(n, p, q); - } - } - } - } - } - - inline const string BlockReadingBuilder::Join(vector::const_iterator begin, vector::const_iterator end, const string& separator) - { - string result; - for (vector::const_iterator iter = begin ; iter != end ; ) { - result += *iter; - ++iter; - if (iter != end) { - result += separator; - } - } - return result; - } - } + protected: + void build(); + + static const string Join(vector::const_iterator begin, + vector::const_iterator end, + const string& separator); + + //最多使用六個字組成一個詞 + static const size_t MaximumBuildSpanLength = 6; + + size_t m_cursorIndex; + vector m_readings; + + Grid m_grid; + LanguageModel* m_LM; + string m_joinSeparator; +}; + +inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel* inLM) + : m_LM(inLM), m_cursorIndex(0) {} + +inline void BlockReadingBuilder::clear() { + m_cursorIndex = 0; + m_readings.clear(); + m_grid.clear(); } +inline size_t BlockReadingBuilder::length() const { return m_readings.size(); } + +inline size_t BlockReadingBuilder::cursorIndex() const { return m_cursorIndex; } + +inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex) { + m_cursorIndex = + inNewIndex > m_readings.size() ? m_readings.size() : inNewIndex; +} + +inline void BlockReadingBuilder::insertReadingAtCursor( + const string& inReading) { + m_readings.insert(m_readings.begin() + m_cursorIndex, inReading); + + m_grid.expandGridByOneAtLocation(m_cursorIndex); + build(); + m_cursorIndex++; +} + +inline vector BlockReadingBuilder::readings() const { + return m_readings; +} + +inline bool BlockReadingBuilder::deleteReadingBeforeCursor() { + if (!m_cursorIndex) { + return false; + } + + m_readings.erase(m_readings.begin() + m_cursorIndex - 1, + m_readings.begin() + m_cursorIndex); + m_cursorIndex--; + m_grid.shrinkGridByOneAtLocation(m_cursorIndex); + build(); + return true; +} + +inline bool BlockReadingBuilder::deleteReadingAfterCursor() { + if (m_cursorIndex == m_readings.size()) { + return false; + } + + m_readings.erase(m_readings.begin() + m_cursorIndex, + m_readings.begin() + m_cursorIndex + 1); + m_grid.shrinkGridByOneAtLocation(m_cursorIndex); + build(); + return true; +} + +inline bool BlockReadingBuilder::removeHeadReadings(size_t count) { + if (count > length()) { + return false; + } + + for (size_t i = 0; i < count; i++) { + if (m_cursorIndex) { + m_cursorIndex--; + } + m_readings.erase(m_readings.begin(), m_readings.begin() + 1); + m_grid.shrinkGridByOneAtLocation(0); + build(); + } + + return true; +} + +inline void BlockReadingBuilder::setJoinSeparator(const string& separator) { + m_joinSeparator = separator; +} + +inline const string BlockReadingBuilder::joinSeparator() const { + return m_joinSeparator; +} + +inline Grid& BlockReadingBuilder::grid() { return m_grid; } + +inline void BlockReadingBuilder::build() { + if (!m_LM) { + return; + } + + size_t begin = 0; + size_t end = m_cursorIndex + MaximumBuildSpanLength; + + if (m_cursorIndex < MaximumBuildSpanLength) { + begin = 0; + } else { + begin = m_cursorIndex - MaximumBuildSpanLength; + } + + if (end > m_readings.size()) { + end = m_readings.size(); + } + + for (size_t p = begin; p < end; p++) { + for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) { + string combinedReading = Join( + m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator); + if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, + combinedReading)) { + vector unigrams = m_LM->unigramsForKey(combinedReading); + + if (unigrams.size() > 0) { + Node n(combinedReading, unigrams, vector()); + m_grid.insertNode(n, p, q); + } + } + } + } +} + +inline const string BlockReadingBuilder::Join( + vector::const_iterator begin, vector::const_iterator end, + const string& separator) { + string result; + for (vector::const_iterator iter = begin; iter != end;) { + result += *iter; + ++iter; + if (iter != end) { + result += separator; + } + } + return result; +} +} // namespace Gramambular +} // namespace Formosa + #endif diff --git a/Source/Engine/Gramambular/GramambularTest.cpp b/Source/Engine/Gramambular/GramambularTest.cpp index d9ea65a3..8f0e008d 100644 --- a/Source/Engine/Gramambular/GramambularTest.cpp +++ b/Source/Engine/Gramambular/GramambularTest.cpp @@ -21,14 +21,15 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. -#include "gtest/gtest.h" #include +#include #include #include -#include -#include #include +#include + #include "Gramambular.h" +#include "gtest/gtest.h" const char* SampleData = R"( # @@ -122,11 +123,9 @@ const char* SampleData = R"( using namespace std; using namespace Formosa::Gramambular; -class SimpleLM : public LanguageModel -{ +class SimpleLM : public LanguageModel { public: - SimpleLM(const char* input, bool swapKeyValue = false) - { + SimpleLM(const char* input, bool swapKeyValue = false) { stringstream sstream(input); while (sstream.good()) { string line; @@ -149,8 +148,7 @@ class SimpleLM : public LanguageModel if (swapKeyValue) { u.keyValue.key = col1; u.keyValue.value = col0; - } - else { + } else { u.keyValue.key = col0; u.keyValue.value = col1; } @@ -161,19 +159,17 @@ class SimpleLM : public LanguageModel } } - const vector bigramsForKeys(const string &preceedingKey, const string& key) override - { + const vector bigramsForKeys(const string& preceedingKey, + const string& key) override { return vector(); } - const vector unigramsForKey(const string &key) override - { + const vector unigramsForKey(const string& key) override { map >::const_iterator f = m_db.find(key); return f == m_db.end() ? vector() : (*f).second; } - bool hasUnigramsForKey(const string& key) override - { + bool hasUnigramsForKey(const string& key) override { map >::const_iterator f = m_db.find(key); return f != m_db.end(); } @@ -208,7 +204,8 @@ TEST(GramambularTest, InputTest) { reverse(walked.begin(), walked.end()); vector composed; - for (vector::iterator wi = walked.begin() ; wi != walked.end() ; ++wi) { + for (vector::iterator wi = walked.begin(); wi != walked.end(); + ++wi) { composed.push_back((*wi).node->currentKeyValue().value); } ASSERT_EQ(composed, (vector{"高科技", "公司", "的", "年中", "獎金"})); @@ -233,8 +230,10 @@ TEST(GramambularTest, WordSegmentationTest) { reverse(walked.begin(), walked.end()); vector segmented; - for (vector::iterator wi = walked.begin(); wi != walked.end(); ++wi) { + for (vector::iterator wi = walked.begin(); wi != walked.end(); + ++wi) { segmented.push_back((*wi).node->currentKeyValue().key); } - ASSERT_EQ(segmented, (vector{"高科技", "公司", "的", "年終", "獎金"})); + ASSERT_EQ(segmented, + (vector{"高科技", "公司", "的", "年終", "獎金"})); } diff --git a/Source/Engine/Gramambular/Grid.h b/Source/Engine/Gramambular/Grid.h index ccceacc1..75c9c52c 100644 --- a/Source/Engine/Gramambular/Grid.h +++ b/Source/Engine/Gramambular/Grid.h @@ -29,244 +29,243 @@ #define Grid_h #include + #include "NodeAnchor.h" #include "Span.h" namespace Formosa { - namespace Gramambular { - - class Grid { - public: - void clear(); - void insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength); - bool hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey); +namespace Gramambular { - void expandGridByOneAtLocation(size_t inLocation); - void shrinkGridByOneAtLocation(size_t inLocation); +class Grid { + public: + void clear(); + void insertNode(const Node& inNode, size_t inLocation, + size_t inSpanningLength); + bool hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, + size_t inSpanningLength, + const string& inKey); - size_t width() const; - vector nodesEndingAt(size_t inLocation); - vector nodesCrossingOrEndingAt(size_t inLocation); + void expandGridByOneAtLocation(size_t inLocation); + void shrinkGridByOneAtLocation(size_t inLocation); - // "Freeze" the node with the unigram that represents the selected candidate value. - // After this, the node that contains the unigram will always be evaluated to that - // unigram, while all other overlapping nodes will be reset to their initial state - // (that is, if any of those nodes were "frozen" or fixed, they will be unfrozen.) - NodeAnchor fixNodeSelectedCandidate(size_t location, const string& value); + size_t width() const; + vector nodesEndingAt(size_t inLocation); + vector nodesCrossingOrEndingAt(size_t inLocation); - // Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, only - // boost the unigram that represents the value with an overriding score. This - // has the same side effect as fixNodeSelectedCandidate, which is that all other - // overlapping nodes will be reset to their initial state. - void overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore); - - const string dumpDOT(); - - protected: - vector m_spans; - }; - - inline void Grid::clear() - { - m_spans.clear(); - } - - inline void Grid::insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength) - { - if (inLocation >= m_spans.size()) { - size_t diff = inLocation - m_spans.size() + 1; - - for (size_t i = 0 ; i < diff ; i++) { - m_spans.push_back(Span()); - } - } + // "Freeze" the node with the unigram that represents the selected candidate + // value. After this, the node that contains the unigram will always be + // evaluated to that unigram, while all other overlapping nodes will be reset + // to their initial state (that is, if any of those nodes were "frozen" or + // fixed, they will be unfrozen.) + NodeAnchor fixNodeSelectedCandidate(size_t location, const string& value); - m_spans[inLocation].insertNodeOfLength(inNode, inSpanningLength); - } + // Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, + // only boost the unigram that represents the value with an overriding score. + // This has the same side effect as fixNodeSelectedCandidate, which is that + // all other overlapping nodes will be reset to their initial state. + void overrideNodeScoreForSelectedCandidate(size_t location, + const string& value, + float overridingScore); - inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey) - { - if (inLocation > m_spans.size()) { - return false; - } - - const Node *n = m_spans[inLocation].nodeOfLength(inSpanningLength); - if (!n) { - return false; - } - - return inKey == n->key(); - } + const string dumpDOT(); - inline void Grid::expandGridByOneAtLocation(size_t inLocation) - { - if (!inLocation || inLocation == m_spans.size()) { - m_spans.insert(m_spans.begin() + inLocation, Span()); - } - else { - m_spans.insert(m_spans.begin() + inLocation, Span()); - for (size_t i = 0 ; i < inLocation ; i++) { - // zaps overlapping spans - m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i); - } - } - } - - inline void Grid::shrinkGridByOneAtLocation(size_t inLocation) - { - if (inLocation >= m_spans.size()) { - return; - } - - m_spans.erase(m_spans.begin() + inLocation); - for (size_t i = 0 ; i < inLocation ; i++) { - // zaps overlapping spans - m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i); - } - } + protected: + vector m_spans; +}; - inline size_t Grid::width() const - { - return m_spans.size(); - } - - inline vector Grid::nodesEndingAt(size_t inLocation) - { - vector result; - - if (m_spans.size() && inLocation <= m_spans.size()) { - for (size_t i = 0 ; i < inLocation ; i++) { - Span& span = m_spans[i]; - if (i + span.maximumLength() >= inLocation) { - Node *np = span.nodeOfLength(inLocation - i); - if (np) { - NodeAnchor na; - na.node = np; - na.location = i; - na.spanningLength = inLocation - i; - - result.push_back(na); - } - } - } - } - - return result; - } +inline void Grid::clear() { m_spans.clear(); } - inline vector Grid::nodesCrossingOrEndingAt(size_t inLocation) - { - vector result; - - if (m_spans.size() && inLocation <= m_spans.size()) { - for (size_t i = 0 ; i < inLocation ; i++) { - Span& span = m_spans[i]; - - if (i + span.maximumLength() >= inLocation) { +inline void Grid::insertNode(const Node& inNode, size_t inLocation, + size_t inSpanningLength) { + if (inLocation >= m_spans.size()) { + size_t diff = inLocation - m_spans.size() + 1; - for (size_t j = 1, m = span.maximumLength(); j <= m ; j++) { - - if (i + j < inLocation) { - continue; - } - - Node *np = span.nodeOfLength(j); - if (np) { - NodeAnchor na; - na.node = np; - na.location = i; - na.spanningLength = inLocation - i; - - result.push_back(na); - } - } - } - } - } - - return result; - } - - // For nodes found at the location, fix their currently-selected candidate using the supplied string value. - inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, const string& value) - { - vector nodes = nodesCrossingOrEndingAt(location); - NodeAnchor node; - for (auto nodeAnchor : nodes) { - auto candidates = nodeAnchor.node->candidates(); - - // Reset the candidate-fixed state of every node at the location. - const_cast(nodeAnchor.node)->resetCandidate(); - - for (size_t i = 0, c = candidates.size(); i < c; ++i) { - if (candidates[i].value == value) { - const_cast(nodeAnchor.node)->selectCandidateAtIndex(i); - node = nodeAnchor; - break;; - } - } - } - return node; - } - - inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore) - { - vector nodes = nodesCrossingOrEndingAt(location); - for (auto nodeAnchor : nodes) { - auto candidates = nodeAnchor.node->candidates(); - - // Reset the candidate-fixed state of every node at the location. - const_cast(nodeAnchor.node)->resetCandidate(); - - for (size_t i = 0, c = candidates.size(); i < c; ++i) { - if (candidates[i].value == value) { - const_cast(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore); - break; - } - } - } - } - - inline const string Grid::dumpDOT() - { - stringstream sst; - sst << "digraph {" << endl; - sst << "graph [ rankdir=LR ];" << endl; - sst << "BOS;" << endl; - - for (size_t p = 0 ; p < m_spans.size() ; p++) { - Span& span = m_spans[p]; - for (size_t ni = 0 ; ni <= span.maximumLength() ; ni++) { - Node* np = span.nodeOfLength(ni); - if (np) { - if (!p) { - sst << "BOS -> " << np->currentKeyValue().value << ";" << endl; - } - - sst << np->currentKeyValue().value << ";" << endl; - - if (p + ni < m_spans.size()) { - Span& dstSpan = m_spans[p+ni]; - for (size_t q = 0 ; q <= dstSpan.maximumLength() ; q++) { - Node *dn = dstSpan.nodeOfLength(q); - if (dn) { - sst << np->currentKeyValue().value << " -> " << dn->currentKeyValue().value << ";" << endl; - } - } - } - - if (p + ni == m_spans.size()) { - sst << np->currentKeyValue().value << " -> " << "EOS;" << endl; - } - } - } - } - - sst << "EOS;" << endl; - sst << "}"; - return sst.str(); - } + for (size_t i = 0; i < diff; i++) { + m_spans.push_back(Span()); } + } + + m_spans[inLocation].insertNodeOfLength(inNode, inSpanningLength); } +inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey( + size_t inLocation, size_t inSpanningLength, const string& inKey) { + if (inLocation > m_spans.size()) { + return false; + } + + const Node* n = m_spans[inLocation].nodeOfLength(inSpanningLength); + if (!n) { + return false; + } + + return inKey == n->key(); +} + +inline void Grid::expandGridByOneAtLocation(size_t inLocation) { + if (!inLocation || inLocation == m_spans.size()) { + m_spans.insert(m_spans.begin() + inLocation, Span()); + } else { + m_spans.insert(m_spans.begin() + inLocation, Span()); + for (size_t i = 0; i < inLocation; i++) { + // zaps overlapping spans + m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i); + } + } +} + +inline void Grid::shrinkGridByOneAtLocation(size_t inLocation) { + if (inLocation >= m_spans.size()) { + return; + } + + m_spans.erase(m_spans.begin() + inLocation); + for (size_t i = 0; i < inLocation; i++) { + // zaps overlapping spans + m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i); + } +} + +inline size_t Grid::width() const { return m_spans.size(); } + +inline vector Grid::nodesEndingAt(size_t inLocation) { + vector result; + + if (m_spans.size() && inLocation <= m_spans.size()) { + for (size_t i = 0; i < inLocation; i++) { + Span& span = m_spans[i]; + if (i + span.maximumLength() >= inLocation) { + Node* np = span.nodeOfLength(inLocation - i); + if (np) { + NodeAnchor na; + na.node = np; + na.location = i; + na.spanningLength = inLocation - i; + + result.push_back(na); + } + } + } + } + + return result; +} + +inline vector Grid::nodesCrossingOrEndingAt(size_t inLocation) { + vector result; + + if (m_spans.size() && inLocation <= m_spans.size()) { + for (size_t i = 0; i < inLocation; i++) { + Span& span = m_spans[i]; + + if (i + span.maximumLength() >= inLocation) { + for (size_t j = 1, m = span.maximumLength(); j <= m; j++) { + if (i + j < inLocation) { + continue; + } + + Node* np = span.nodeOfLength(j); + if (np) { + NodeAnchor na; + na.node = np; + na.location = i; + na.spanningLength = inLocation - i; + + result.push_back(na); + } + } + } + } + } + + return result; +} + +// For nodes found at the location, fix their currently-selected candidate using +// the supplied string value. +inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, + const string& value) { + vector nodes = nodesCrossingOrEndingAt(location); + NodeAnchor node; + for (auto nodeAnchor : nodes) { + auto candidates = nodeAnchor.node->candidates(); + + // Reset the candidate-fixed state of every node at the location. + const_cast(nodeAnchor.node)->resetCandidate(); + + for (size_t i = 0, c = candidates.size(); i < c; ++i) { + if (candidates[i].value == value) { + const_cast(nodeAnchor.node)->selectCandidateAtIndex(i); + node = nodeAnchor; + break; + ; + } + } + } + return node; +} + +inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, + const string& value, + float overridingScore) { + vector nodes = nodesCrossingOrEndingAt(location); + for (auto nodeAnchor : nodes) { + auto candidates = nodeAnchor.node->candidates(); + + // Reset the candidate-fixed state of every node at the location. + const_cast(nodeAnchor.node)->resetCandidate(); + + for (size_t i = 0, c = candidates.size(); i < c; ++i) { + if (candidates[i].value == value) { + const_cast(nodeAnchor.node) + ->selectFloatingCandidateAtIndex(i, overridingScore); + break; + } + } + } +} + +inline const string Grid::dumpDOT() { + stringstream sst; + sst << "digraph {" << endl; + sst << "graph [ rankdir=LR ];" << endl; + sst << "BOS;" << endl; + + for (size_t p = 0; p < m_spans.size(); p++) { + Span& span = m_spans[p]; + for (size_t ni = 0; ni <= span.maximumLength(); ni++) { + Node* np = span.nodeOfLength(ni); + if (np) { + if (!p) { + sst << "BOS -> " << np->currentKeyValue().value << ";" << endl; + } + + sst << np->currentKeyValue().value << ";" << endl; + + if (p + ni < m_spans.size()) { + Span& dstSpan = m_spans[p + ni]; + for (size_t q = 0; q <= dstSpan.maximumLength(); q++) { + Node* dn = dstSpan.nodeOfLength(q); + if (dn) { + sst << np->currentKeyValue().value << " -> " + << dn->currentKeyValue().value << ";" << endl; + } + } + } + + if (p + ni == m_spans.size()) { + sst << np->currentKeyValue().value << " -> " + << "EOS;" << endl; + } + } + } + } + + sst << "EOS;" << endl; + sst << "}"; + return sst.str(); +} +} // namespace Gramambular +} // namespace Formosa + #endif diff --git a/Source/Engine/Gramambular/KeyValuePair.h b/Source/Engine/Gramambular/KeyValuePair.h index 0abbb891..ac4395de 100644 --- a/Source/Engine/Gramambular/KeyValuePair.h +++ b/Source/Engine/Gramambular/KeyValuePair.h @@ -32,40 +32,36 @@ #include namespace Formosa { - namespace Gramambular { - using namespace std; - - class KeyValuePair { - public: - string key; - string value; +namespace Gramambular { +using namespace std; - bool operator==(const KeyValuePair& inAnother) const; - bool operator<(const KeyValuePair& inAnother) const; - }; +class KeyValuePair { + public: + string key; + string value; - inline ostream& operator<<(ostream& inStream, const KeyValuePair& inPair) - { - inStream << "(" << inPair.key << "," << inPair.value << ")"; - return inStream; - } - - inline bool KeyValuePair::operator==(const KeyValuePair& inAnother) const - { - return key == inAnother.key && value == inAnother.value; - } + bool operator==(const KeyValuePair& inAnother) const; + bool operator<(const KeyValuePair& inAnother) const; +}; - inline bool KeyValuePair::operator<(const KeyValuePair& inAnother) const - { - if (key < inAnother.key) { - return true; - } - else if (key == inAnother.key) { - return value < inAnother.value; - } - return false; - } - } +inline ostream& operator<<(ostream& inStream, const KeyValuePair& inPair) { + inStream << "(" << inPair.key << "," << inPair.value << ")"; + return inStream; } +inline bool KeyValuePair::operator==(const KeyValuePair& inAnother) const { + return key == inAnother.key && value == inAnother.value; +} + +inline bool KeyValuePair::operator<(const KeyValuePair& inAnother) const { + if (key < inAnother.key) { + return true; + } else if (key == inAnother.key) { + return value < inAnother.value; + } + return false; +} +} // namespace Gramambular +} // namespace Formosa + #endif diff --git a/Source/Engine/Gramambular/LanguageModel.h b/Source/Engine/Gramambular/LanguageModel.h index 65331b37..8c41a529 100644 --- a/Source/Engine/Gramambular/LanguageModel.h +++ b/Source/Engine/Gramambular/LanguageModel.h @@ -29,24 +29,25 @@ #define LanguageModel_h #include + #include "Bigram.h" #include "Unigram.h" namespace Formosa { - namespace Gramambular { - - using namespace std; - - class LanguageModel { - public: - virtual ~LanguageModel() {} +namespace Gramambular { - virtual const vector bigramsForKeys(const string &preceedingKey, const string& key) = 0; - virtual const vector unigramsForKey(const string &key) = 0; - virtual bool hasUnigramsForKey(const string& key) = 0; - }; - } -} +using namespace std; +class LanguageModel { + public: + virtual ~LanguageModel() {} + + virtual const vector bigramsForKeys(const string& preceedingKey, + const string& key) = 0; + virtual const vector unigramsForKey(const string& key) = 0; + virtual bool hasUnigramsForKey(const string& key) = 0; +}; +} // namespace Gramambular +} // namespace Formosa #endif diff --git a/Source/Engine/Gramambular/Node.h b/Source/Engine/Gramambular/Node.h index a877d27c..9c15ce53 100644 --- a/Source/Engine/Gramambular/Node.h +++ b/Source/Engine/Gramambular/Node.h @@ -30,202 +30,191 @@ #include #include + #include "LanguageModel.h" namespace Formosa { - namespace Gramambular { - using namespace std; +namespace Gramambular { +using namespace std; - class Node { - public: - Node(); - Node(const string& inKey, const vector& inUnigrams, const vector& inBigrams); - - void primeNodeWithPreceedingKeyValues(const vector& inKeyValues); - - bool isCandidateFixed() const; - const vector& candidates() const; - void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true); - void resetCandidate(); - void selectFloatingCandidateAtIndex(size_t index, double score); - - const string& key() const; - double score() const; - double scoreForCandidate(string &candidate) const; - const KeyValuePair currentKeyValue() const; - double highestUnigramScore() const; - - protected: - const LanguageModel* m_LM; - - string m_key; - double m_score; - - vector m_unigrams; - vector m_candidates; - map m_valueUnigramIndexMap; - map > m_preceedingGramBigramMap; - - bool m_candidateFixed; - size_t m_selectedUnigramIndex; - - friend ostream& operator<<(ostream& inStream, const Node& inNode); - }; +class Node { + public: + Node(); + Node(const string& inKey, const vector& inUnigrams, + const vector& inBigrams); - inline ostream& operator<<(ostream& inStream, const Node& inNode) - { - inStream << "(node,key:" << inNode.m_key << ",fixed:" << (inNode.m_candidateFixed ? "true" : "false") - << ",selected:" << inNode.m_selectedUnigramIndex - << "," << inNode.m_unigrams << ")"; - return inStream; - } + void primeNodeWithPreceedingKeyValues( + const vector& inKeyValues); - inline Node::Node() - : m_candidateFixed(false) - , m_selectedUnigramIndex(0) - , m_score(0.0) - { - } + bool isCandidateFixed() const; + const vector& candidates() const; + void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true); + void resetCandidate(); + void selectFloatingCandidateAtIndex(size_t index, double score); - inline Node::Node(const string& inKey, const vector& inUnigrams, const vector& inBigrams) - : m_key(inKey) - , m_unigrams(inUnigrams) - , m_candidateFixed(false) - , m_selectedUnigramIndex(0) - , m_score(0.0) - { - stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare); - - if (m_unigrams.size()) { - m_score = m_unigrams[0].score; - } - - size_t i = 0; - for (vector::const_iterator ui = m_unigrams.begin() ; ui != m_unigrams.end() ; ++ui) { - m_valueUnigramIndexMap[(*ui).keyValue.value] = i; - i++; - - m_candidates.push_back((*ui).keyValue); - } - - for (vector::const_iterator bi = inBigrams.begin() ; bi != inBigrams.end() ; ++bi) { - m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi); - } - } - - inline void Node::primeNodeWithPreceedingKeyValues(const vector& inKeyValues) - { - size_t newIndex = m_selectedUnigramIndex; - double max = m_score; + const string& key() const; + double score() const; + double scoreForCandidate(string& candidate) const; + const KeyValuePair currentKeyValue() const; + double highestUnigramScore() const; - if (!isCandidateFixed()) { - for (vector::const_iterator kvi = inKeyValues.begin() ; kvi != inKeyValues.end() ; ++kvi) { - map >::const_iterator f = m_preceedingGramBigramMap.find(*kvi); - if (f != m_preceedingGramBigramMap.end()) { - const vector& bigrams = (*f).second; - - for (vector::const_iterator bi = bigrams.begin() ; bi != bigrams.end() ; ++bi) { - const Bigram& bigram = *bi; - if (bigram.score > max) { - map::const_iterator uf = m_valueUnigramIndexMap.find((*bi).keyValue.value); - if (uf != m_valueUnigramIndexMap.end()) { - newIndex = (*uf).second; - max = bigram.score; - } - } - } - } - } - } + protected: + const LanguageModel* m_LM; - if (m_score != max) { - m_score = max; - } - - if (newIndex != m_selectedUnigramIndex) { - m_selectedUnigramIndex = newIndex; - } - } - - inline bool Node::isCandidateFixed() const - { - return m_candidateFixed; - } - - inline const vector& Node::candidates() const - { - return m_candidates; - } + string m_key; + double m_score; - inline void Node::selectCandidateAtIndex(size_t inIndex, bool inFix) - { - if (inIndex >= m_unigrams.size()) { - m_selectedUnigramIndex = 0; - } - else { - m_selectedUnigramIndex = inIndex; - } - - m_candidateFixed = inFix; - m_score = 99; - } + vector m_unigrams; + vector m_candidates; + map m_valueUnigramIndexMap; + map > m_preceedingGramBigramMap; - inline void Node::resetCandidate() - { - m_selectedUnigramIndex = 0; - m_candidateFixed = 0; - if (m_unigrams.size()) { - m_score = m_unigrams[0].score; - } - } + bool m_candidateFixed; + size_t m_selectedUnigramIndex; - inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) { - if (index >= m_unigrams.size()) { - m_selectedUnigramIndex = 0; - } else { - m_selectedUnigramIndex = index; - } - m_candidateFixed = false; - m_score = score; - } - - inline const string& Node::key() const - { - return m_key; - } - - inline double Node::score() const - { - return m_score; - } + friend ostream& operator<<(ostream& inStream, const Node& inNode); +}; - inline double Node::scoreForCandidate(string &candidate) const - { - for (auto unigram : m_unigrams) { - if (unigram.keyValue.value == candidate) { - return unigram.score; - } - } - return 0.0; - } - - inline double Node::highestUnigramScore() const { - if (m_unigrams.empty()) { - return 0.0; - } - return m_unigrams[0].score; - } - - inline const KeyValuePair Node::currentKeyValue() const - { - if(m_selectedUnigramIndex >= m_unigrams.size()) { - return KeyValuePair(); - } - else { - return m_candidates[m_selectedUnigramIndex]; - } - } - } +inline ostream& operator<<(ostream& inStream, const Node& inNode) { + inStream << "(node,key:" << inNode.m_key + << ",fixed:" << (inNode.m_candidateFixed ? "true" : "false") + << ",selected:" << inNode.m_selectedUnigramIndex << "," + << inNode.m_unigrams << ")"; + return inStream; } +inline Node::Node() + : m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) {} + +inline Node::Node(const string& inKey, const vector& inUnigrams, + const vector& inBigrams) + : m_key(inKey), + m_unigrams(inUnigrams), + m_candidateFixed(false), + m_selectedUnigramIndex(0), + m_score(0.0) { + stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare); + + if (m_unigrams.size()) { + m_score = m_unigrams[0].score; + } + + size_t i = 0; + for (vector::const_iterator ui = m_unigrams.begin(); + ui != m_unigrams.end(); ++ui) { + m_valueUnigramIndexMap[(*ui).keyValue.value] = i; + i++; + + m_candidates.push_back((*ui).keyValue); + } + + for (vector::const_iterator bi = inBigrams.begin(); + bi != inBigrams.end(); ++bi) { + m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi); + } +} + +inline void Node::primeNodeWithPreceedingKeyValues( + const vector& inKeyValues) { + size_t newIndex = m_selectedUnigramIndex; + double max = m_score; + + if (!isCandidateFixed()) { + for (vector::const_iterator kvi = inKeyValues.begin(); + kvi != inKeyValues.end(); ++kvi) { + map >::const_iterator f = + m_preceedingGramBigramMap.find(*kvi); + if (f != m_preceedingGramBigramMap.end()) { + const vector& bigrams = (*f).second; + + for (vector::const_iterator bi = bigrams.begin(); + bi != bigrams.end(); ++bi) { + const Bigram& bigram = *bi; + if (bigram.score > max) { + map::const_iterator uf = + m_valueUnigramIndexMap.find((*bi).keyValue.value); + if (uf != m_valueUnigramIndexMap.end()) { + newIndex = (*uf).second; + max = bigram.score; + } + } + } + } + } + } + + if (m_score != max) { + m_score = max; + } + + if (newIndex != m_selectedUnigramIndex) { + m_selectedUnigramIndex = newIndex; + } +} + +inline bool Node::isCandidateFixed() const { return m_candidateFixed; } + +inline const vector& Node::candidates() const { + return m_candidates; +} + +inline void Node::selectCandidateAtIndex(size_t inIndex, bool inFix) { + if (inIndex >= m_unigrams.size()) { + m_selectedUnigramIndex = 0; + } else { + m_selectedUnigramIndex = inIndex; + } + + m_candidateFixed = inFix; + m_score = 99; +} + +inline void Node::resetCandidate() { + m_selectedUnigramIndex = 0; + m_candidateFixed = 0; + if (m_unigrams.size()) { + m_score = m_unigrams[0].score; + } +} + +inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) { + if (index >= m_unigrams.size()) { + m_selectedUnigramIndex = 0; + } else { + m_selectedUnigramIndex = index; + } + m_candidateFixed = false; + m_score = score; +} + +inline const string& Node::key() const { return m_key; } + +inline double Node::score() const { return m_score; } + +inline double Node::scoreForCandidate(string& candidate) const { + for (auto unigram : m_unigrams) { + if (unigram.keyValue.value == candidate) { + return unigram.score; + } + } + return 0.0; +} + +inline double Node::highestUnigramScore() const { + if (m_unigrams.empty()) { + return 0.0; + } + return m_unigrams[0].score; +} + +inline const KeyValuePair Node::currentKeyValue() const { + if (m_selectedUnigramIndex >= m_unigrams.size()) { + return KeyValuePair(); + } else { + return m_candidates[m_selectedUnigramIndex]; + } +} +} // namespace Gramambular +} // namespace Formosa + #endif diff --git a/Source/Engine/Gramambular/NodeAnchor.h b/Source/Engine/Gramambular/NodeAnchor.h index 62e5e12e..3fa7c7a3 100644 --- a/Source/Engine/Gramambular/NodeAnchor.h +++ b/Source/Engine/Gramambular/NodeAnchor.h @@ -31,49 +31,44 @@ #include "Node.h" namespace Formosa { - namespace Gramambular { - class NodeAnchor { - public: - NodeAnchor(); - const Node *node; - size_t location; - size_t spanningLength; - double accumulatedScore; - }; - - inline NodeAnchor::NodeAnchor() - : node(0) - , location(0) - , spanningLength(0) - , accumulatedScore(0.0) - { - } +namespace Gramambular { +class NodeAnchor { + public: + NodeAnchor(); + const Node* node; + size_t location; + size_t spanningLength; + double accumulatedScore; +}; - inline ostream& operator<<(ostream& inStream, const NodeAnchor& inAnchor) - { - inStream << "{@(" << inAnchor.location << "," << inAnchor.spanningLength << "),"; - if (inAnchor.node) { - inStream << *(inAnchor.node); - } - else { - inStream << "null"; - } - inStream << "}"; - return inStream; - } - - inline ostream& operator<<(ostream& inStream, const vector& inAnchor) - { - for (vector::const_iterator i = inAnchor.begin() ; i != inAnchor.end() ; ++i) { - inStream << *i; - if (i + 1 != inAnchor.end()) { - inStream << "<-"; - } - } - - return inStream; - } - } +inline NodeAnchor::NodeAnchor() + : node(0), location(0), spanningLength(0), accumulatedScore(0.0) {} + +inline ostream& operator<<(ostream& inStream, const NodeAnchor& inAnchor) { + inStream << "{@(" << inAnchor.location << "," << inAnchor.spanningLength + << "),"; + if (inAnchor.node) { + inStream << *(inAnchor.node); + } else { + inStream << "null"; + } + inStream << "}"; + return inStream; } +inline ostream& operator<<(ostream& inStream, + const vector& inAnchor) { + for (vector::const_iterator i = inAnchor.begin(); + i != inAnchor.end(); ++i) { + inStream << *i; + if (i + 1 != inAnchor.end()) { + inStream << "<-"; + } + } + + return inStream; +} +} // namespace Gramambular +} // namespace Formosa + #endif diff --git a/Source/Engine/Gramambular/Span.h b/Source/Engine/Gramambular/Span.h index 87cb6563..319ef738 100644 --- a/Source/Engine/Gramambular/Span.h +++ b/Source/Engine/Gramambular/Span.h @@ -31,82 +31,75 @@ #include #include #include + #include "Node.h" namespace Formosa { - namespace Gramambular { - class Span { - public: - Span(); +namespace Gramambular { +class Span { + public: + Span(); - void clear(); - void insertNodeOfLength(const Node& inNode, size_t inLength); - void removeNodeOfLengthGreaterThan(size_t inLength); - - Node* nodeOfLength(size_t inLength); - size_t maximumLength() const; + void clear(); + void insertNodeOfLength(const Node& inNode, size_t inLength); + void removeNodeOfLengthGreaterThan(size_t inLength); - protected: - map m_lengthNodeMap; - size_t m_maximumLength; - }; - - inline Span::Span() - : m_maximumLength(0) - { - } - - inline void Span::clear() - { - m_lengthNodeMap.clear(); - m_maximumLength = 0; - } - - inline void Span::insertNodeOfLength(const Node& inNode, size_t inLength) - { - m_lengthNodeMap[inLength] = inNode; - if (inLength > m_maximumLength) { - m_maximumLength = inLength; - } - } - - inline void Span::removeNodeOfLengthGreaterThan(size_t inLength) - { - if (inLength > m_maximumLength) { - return; - } - - size_t max = 0; - set removeSet; - for (map::iterator i = m_lengthNodeMap.begin(), e = m_lengthNodeMap.end() ; i != e ; ++i) { - if ((*i).first > inLength) { - removeSet.insert((*i).first); - } - else { - if ((*i).first > max) { - max = (*i).first; - } - } - } - - for (set::iterator i = removeSet.begin(), e = removeSet.end(); i != e; ++i) { - m_lengthNodeMap.erase(*i); - } + Node* nodeOfLength(size_t inLength); + size_t maximumLength() const; - m_maximumLength = max; - } - - inline Node* Span::nodeOfLength(size_t inLength) - { - map::iterator f = m_lengthNodeMap.find(inLength); - return f == m_lengthNodeMap.end() ? 0 : &(*f).second; - } - - inline size_t Span::maximumLength() const - { - return m_maximumLength; - } - } + protected: + map m_lengthNodeMap; + size_t m_maximumLength; +}; + +inline Span::Span() : m_maximumLength(0) {} + +inline void Span::clear() { + m_lengthNodeMap.clear(); + m_maximumLength = 0; } +inline void Span::insertNodeOfLength(const Node& inNode, size_t inLength) { + m_lengthNodeMap[inLength] = inNode; + if (inLength > m_maximumLength) { + m_maximumLength = inLength; + } +} + +inline void Span::removeNodeOfLengthGreaterThan(size_t inLength) { + if (inLength > m_maximumLength) { + return; + } + + size_t max = 0; + set removeSet; + for (map::iterator i = m_lengthNodeMap.begin(), + e = m_lengthNodeMap.end(); + i != e; ++i) { + if ((*i).first > inLength) { + removeSet.insert((*i).first); + } else { + if ((*i).first > max) { + max = (*i).first; + } + } + } + + for (set::iterator i = removeSet.begin(), e = removeSet.end(); i != e; + ++i) { + m_lengthNodeMap.erase(*i); + } + + m_maximumLength = max; +} + +inline Node* Span::nodeOfLength(size_t inLength) { + map::iterator f = m_lengthNodeMap.find(inLength); + return f == m_lengthNodeMap.end() ? 0 : &(*f).second; +} + +inline size_t Span::maximumLength() const { return m_maximumLength; } +} // namespace Gramambular +} // namespace Formosa + #endif diff --git a/Source/Engine/Gramambular/Unigram.h b/Source/Engine/Gramambular/Unigram.h index 5af28502..4aa0833e 100644 --- a/Source/Engine/Gramambular/Unigram.h +++ b/Source/Engine/Gramambular/Unigram.h @@ -29,76 +29,69 @@ #define Unigram_h #include + #include "KeyValuePair.h" namespace Formosa { - namespace Gramambular { - class Unigram { - public: - Unigram(); +namespace Gramambular { +class Unigram { + public: + Unigram(); - KeyValuePair keyValue; - double score; - - bool operator==(const Unigram& inAnother) const; - bool operator<(const Unigram& inAnother) const; - - static bool ScoreCompare(const Unigram& a, const Unigram& b); - }; + KeyValuePair keyValue; + double score; - inline ostream& operator<<(ostream& inStream, const Unigram& inGram) - { - streamsize p = inStream.precision(); - inStream.precision(6); - inStream << "(" << inGram.keyValue << "," << inGram.score << ")"; - inStream.precision(p); - return inStream; - } - - inline ostream& operator<<(ostream& inStream, const vector& inGrams) - { - inStream << "[" << inGrams.size() << "]=>{"; - - size_t index = 0; - - for (vector::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) { - inStream << index << "=>"; - inStream << *gi; - if (gi + 1 != inGrams.end()) { - inStream << ","; - } - } - - inStream << "}"; - return inStream; - } - - inline Unigram::Unigram() - : score(0.0) - { - } - - inline bool Unigram::operator==(const Unigram& inAnother) const - { - return keyValue == inAnother.keyValue && score == inAnother.score; - } - - inline bool Unigram::operator<(const Unigram& inAnother) const - { - if (keyValue < inAnother.keyValue) { - return true; - } - else if (keyValue == inAnother.keyValue) { - return score < inAnother.score; - } - return false; - } + bool operator==(const Unigram& inAnother) const; + bool operator<(const Unigram& inAnother) const; - inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) - { - return a.score > b.score; - } - } + static bool ScoreCompare(const Unigram& a, const Unigram& b); +}; + +inline ostream& operator<<(ostream& inStream, const Unigram& inGram) { + streamsize p = inStream.precision(); + inStream.precision(6); + inStream << "(" << inGram.keyValue << "," << inGram.score << ")"; + inStream.precision(p); + return inStream; } +inline ostream& operator<<(ostream& inStream, const vector& inGrams) { + inStream << "[" << inGrams.size() << "]=>{"; + + size_t index = 0; + + for (vector::const_iterator gi = inGrams.begin(); + gi != inGrams.end(); ++gi, ++index) { + inStream << index << "=>"; + inStream << *gi; + if (gi + 1 != inGrams.end()) { + inStream << ","; + } + } + + inStream << "}"; + return inStream; +} + +inline Unigram::Unigram() : score(0.0) {} + +inline bool Unigram::operator==(const Unigram& inAnother) const { + return keyValue == inAnother.keyValue && score == inAnother.score; +} + +inline bool Unigram::operator<(const Unigram& inAnother) const { + if (keyValue < inAnother.keyValue) { + return true; + } else if (keyValue == inAnother.keyValue) { + return score < inAnother.score; + } + return false; +} + +inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) { + return a.score > b.score; +} +} // namespace Gramambular +} // namespace Formosa + #endif diff --git a/Source/Engine/Gramambular/Walker.h b/Source/Engine/Gramambular/Walker.h index c40ffbf1..1f57fa0d 100644 --- a/Source/Engine/Gramambular/Walker.h +++ b/Source/Engine/Gramambular/Walker.h @@ -29,63 +29,65 @@ #define Walker_h #include + #include "Grid.h" namespace Formosa { - namespace Gramambular { - using namespace std; +namespace Gramambular { +using namespace std; - class Walker { - public: - Walker(Grid* inGrid); - const vector reverseWalk(size_t inLocation, double inAccumulatedScore = 0.0); - - protected: - Grid* m_grid; - }; - - inline Walker::Walker(Grid* inGrid) - : m_grid(inGrid) - { - } - - inline const vector Walker::reverseWalk(size_t inLocation, double inAccumulatedScore) - { - if (!inLocation || inLocation > m_grid->width()) { - return vector(); - } - - vector > paths; +class Walker { + public: + Walker(Grid* inGrid); + const vector reverseWalk(size_t inLocation, + double inAccumulatedScore = 0.0); - vector nodes = m_grid->nodesEndingAt(inLocation); - - for (vector::iterator ni = nodes.begin() ; ni != nodes.end() ; ++ni) { - if (!(*ni).node) { - continue; - } + protected: + Grid* m_grid; +}; - (*ni).accumulatedScore = inAccumulatedScore + (*ni).node->score(); +inline Walker::Walker(Grid* inGrid) : m_grid(inGrid) {} - vector path = reverseWalk(inLocation - (*ni).spanningLength, (*ni).accumulatedScore); - path.insert(path.begin(), *ni); - - paths.push_back(path); - } - - if (!paths.size()) { - return vector(); - } - - vector* result = &*(paths.begin()); - for (vector >::iterator pi = paths.begin() ; pi != paths.end() ; ++pi) { - if ((*pi).back().accumulatedScore > result->back().accumulatedScore) { - result = &*pi; - } - } - - return *result; - } +inline const vector Walker::reverseWalk(size_t inLocation, + double inAccumulatedScore) { + if (!inLocation || inLocation > m_grid->width()) { + return vector(); + } + + vector > paths; + + vector nodes = m_grid->nodesEndingAt(inLocation); + + for (vector::iterator ni = nodes.begin(); ni != nodes.end(); + ++ni) { + if (!(*ni).node) { + continue; } + + (*ni).accumulatedScore = inAccumulatedScore + (*ni).node->score(); + + vector path = + reverseWalk(inLocation - (*ni).spanningLength, (*ni).accumulatedScore); + path.insert(path.begin(), *ni); + + paths.push_back(path); + } + + if (!paths.size()) { + return vector(); + } + + vector* result = &*(paths.begin()); + for (vector >::iterator pi = paths.begin(); + pi != paths.end(); ++pi) { + if ((*pi).back().accumulatedScore > result->back().accumulatedScore) { + result = &*pi; + } + } + + return *result; } +} // namespace Gramambular +} // namespace Formosa #endif