Lukhnos: Gramambular // Modernization.

This commit is contained in:
ShikiSuen 2022-02-20 22:33:46 +08:00
parent 3903ac79a7
commit a2cf0bfdf1
13 changed files with 926 additions and 945 deletions

View File

@ -17,82 +17,77 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef Bigram_h #ifndef BIGRAM_H_
#define Bigram_h #define BIGRAM_H_
#include <vector> #include <vector>
#include "KeyValuePair.h" #include "KeyValuePair.h"
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
class Bigram { class Bigram {
public: public:
Bigram(); Bigram();
KeyValuePair preceedingKeyValue; KeyValuePair preceedingKeyValue;
KeyValuePair keyValue; KeyValuePair keyValue;
double score; double score;
bool operator==(const Bigram& inAnother) const; bool operator==(const Bigram& another) const;
bool operator<(const Bigram& inAnother) const; bool operator<(const Bigram& another) const;
}; };
inline ostream& operator<<(ostream& inStream, const Bigram& inGram) inline std::ostream& operator<<(std::ostream& stream, const Bigram& gram) {
{ std::streamsize p = stream.precision();
streamsize p = inStream.precision(); stream.precision(6);
inStream.precision(6); stream << "(" << gram.keyValue << "|" << gram.preceedingKeyValue << ","
inStream << "(" << inGram.keyValue << "|" <<inGram.preceedingKeyValue << "," << inGram.score << ")"; << gram.score << ")";
inStream.precision(p); stream.precision(p);
return inStream; return stream;
} }
inline ostream& operator<<(ostream& inStream, const vector<Bigram>& inGrams) inline std::ostream& operator<<(std::ostream& stream,
{ const std::vector<Bigram>& grams) {
inStream << "[" << inGrams.size() << "]=>{"; stream << "[" << grams.size() << "]=>{";
size_t index = 0; size_t index = 0;
for (vector<Bigram>::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) { for (std::vector<Bigram>::const_iterator gi = grams.begin();
inStream << index << "=>"; gi != grams.end(); ++gi, ++index) {
inStream << *gi; stream << index << "=>";
if (gi + 1 != inGrams.end()) { stream << *gi;
inStream << ","; if (gi + 1 != grams.end()) {
stream << ",";
} }
} }
inStream << "}"; stream << "}";
return inStream; return stream;
}
inline Bigram::Bigram()
: score(0.0)
{
}
inline bool Bigram::operator==(const Bigram& inAnother) const
{
return preceedingKeyValue == inAnother.preceedingKeyValue && keyValue == inAnother.keyValue && score == inAnother.score;
}
inline bool Bigram::operator<(const Bigram& inAnother) const
{
if (preceedingKeyValue < inAnother.preceedingKeyValue) {
return true;
}
else if (preceedingKeyValue == inAnother.preceedingKeyValue) {
if (keyValue < inAnother.keyValue) {
return true;
}
else if (keyValue == inAnother.keyValue) {
return score < inAnother.score;
}
return false;
}
return false;
}
}
} }
inline Bigram::Bigram() : score(0.0) {}
inline bool Bigram::operator==(const Bigram& another) const {
return preceedingKeyValue == another.preceedingKeyValue &&
keyValue == another.keyValue && score == another.score;
}
inline bool Bigram::operator<(const Bigram& another) const {
if (preceedingKeyValue < another.preceedingKeyValue) {
return true;
} else if (preceedingKeyValue == another.preceedingKeyValue) {
if (keyValue < another.keyValue) {
return true;
} else if (keyValue == another.keyValue) {
return score < another.score;
}
return false;
}
return false;
}
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -17,123 +17,113 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef BlockReadingBuilder_h #ifndef BLOCKREADINGBUILDER_H_
#define BlockReadingBuilder_h #define BLOCKREADINGBUILDER_H_
#include <string>
#include <vector> #include <vector>
#include "Grid.h" #include "Grid.h"
#include "LanguageModel.h" #include "LanguageModel.h"
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
using namespace std;
class BlockReadingBuilder { class BlockReadingBuilder {
public: public:
BlockReadingBuilder(LanguageModel *inLM); explicit BlockReadingBuilder(LanguageModel* lm);
void clear(); void clear();
size_t length() const; size_t length() const;
size_t cursorIndex() const; size_t cursorIndex() const;
void setCursorIndex(size_t inNewIndex); void setCursorIndex(size_t newIndex);
void insertReadingAtCursor(const string& inReading); void insertReadingAtCursor(const std::string& reading);
bool deleteReadingBeforeCursor(); // backspace bool deleteReadingBeforeCursor(); // backspace
bool deleteReadingAfterCursor(); // delete bool deleteReadingAfterCursor(); // delete
bool removeHeadReadings(size_t count); bool removeHeadReadings(size_t count);
void setJoinSeparator(const string& separator); void setJoinSeparator(const std::string& separator);
const string joinSeparator() const; const std::string joinSeparator() const;
vector<string> readings() const; std::vector<std::string> readings() const;
Grid& grid(); Grid& grid();
protected: protected:
void build(); void build();
static const string Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator); static const std::string Join(std::vector<std::string>::const_iterator begin,
std::vector<std::string>::const_iterator end,
const std::string& separator);
//最多使用六個字組成一個詞 // 最多使用六個字組成一個詞
static const size_t MaximumBuildSpanLength = 6; static const size_t MaximumBuildSpanLength = 6;
size_t m_cursorIndex; size_t m_cursorIndex;
vector<string> m_readings; std::vector<std::string> m_readings;
Grid m_grid; Grid m_grid;
LanguageModel *m_LM; LanguageModel* m_LM;
string m_joinSeparator; std::string m_joinSeparator;
}; };
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM) inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel* lm)
: m_LM(inLM) : m_LM(lm), m_cursorIndex(0) {}
, m_cursorIndex(0)
{
}
inline void BlockReadingBuilder::clear() inline void BlockReadingBuilder::clear() {
{
m_cursorIndex = 0; m_cursorIndex = 0;
m_readings.clear(); m_readings.clear();
m_grid.clear(); m_grid.clear();
} }
inline size_t BlockReadingBuilder::length() const inline size_t BlockReadingBuilder::length() const { return m_readings.size(); }
{
return m_readings.size();
}
inline size_t BlockReadingBuilder::cursorIndex() const inline size_t BlockReadingBuilder::cursorIndex() const { return m_cursorIndex; }
{
return m_cursorIndex;
}
inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex) inline void BlockReadingBuilder::setCursorIndex(size_t newIndex) {
{ m_cursorIndex = newIndex > m_readings.size() ? m_readings.size() : newIndex;
m_cursorIndex = inNewIndex > m_readings.size() ? m_readings.size() : inNewIndex; }
}
inline void BlockReadingBuilder::insertReadingAtCursor(const string& inReading) inline void BlockReadingBuilder::insertReadingAtCursor(
{ const std::string& reading) {
m_readings.insert(m_readings.begin() + m_cursorIndex, inReading); m_readings.insert(m_readings.begin() + m_cursorIndex, reading);
m_grid.expandGridByOneAtLocation(m_cursorIndex); m_grid.expandGridByOneAtLocation(m_cursorIndex);
build(); build();
m_cursorIndex++; m_cursorIndex++;
} }
inline vector<string> BlockReadingBuilder::readings() const inline std::vector<std::string> BlockReadingBuilder::readings() const {
{
return m_readings; return m_readings;
} }
inline bool BlockReadingBuilder::deleteReadingBeforeCursor() inline bool BlockReadingBuilder::deleteReadingBeforeCursor() {
{
if (!m_cursorIndex) { if (!m_cursorIndex) {
return false; return false;
} }
m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.begin() + m_cursorIndex); m_readings.erase(m_readings.begin() + m_cursorIndex - 1,
m_readings.begin() + m_cursorIndex);
m_cursorIndex--; m_cursorIndex--;
m_grid.shrinkGridByOneAtLocation(m_cursorIndex); m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build(); build();
return true; return true;
} }
inline bool BlockReadingBuilder::deleteReadingAfterCursor() inline bool BlockReadingBuilder::deleteReadingAfterCursor() {
{
if (m_cursorIndex == m_readings.size()) { if (m_cursorIndex == m_readings.size()) {
return false; return false;
} }
m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.begin() + m_cursorIndex + 1); m_readings.erase(m_readings.begin() + m_cursorIndex,
m_readings.begin() + m_cursorIndex + 1);
m_grid.shrinkGridByOneAtLocation(m_cursorIndex); m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build(); build();
return true; return true;
} }
inline bool BlockReadingBuilder::removeHeadReadings(size_t count) inline bool BlockReadingBuilder::removeHeadReadings(size_t count) {
{
if (count > length()) { if (count > length()) {
return false; return false;
} }
@ -148,25 +138,20 @@ namespace Taiyan {
} }
return true; return true;
} }
inline void BlockReadingBuilder::setJoinSeparator(const string& separator) inline void BlockReadingBuilder::setJoinSeparator(
{ const std::string& separator) {
m_joinSeparator = separator; m_joinSeparator = separator;
} }
inline const string BlockReadingBuilder::joinSeparator() const inline const std::string BlockReadingBuilder::joinSeparator() const {
{
return m_joinSeparator; return m_joinSeparator;
} }
inline Grid& BlockReadingBuilder::grid() inline Grid& BlockReadingBuilder::grid() { return m_grid; }
{
return m_grid;
}
inline void BlockReadingBuilder::build() inline void BlockReadingBuilder::build() {
{
if (!m_LM) { if (!m_LM) {
return; return;
} }
@ -176,8 +161,7 @@ namespace Taiyan {
if (m_cursorIndex < MaximumBuildSpanLength) { if (m_cursorIndex < MaximumBuildSpanLength) {
begin = 0; begin = 0;
} } else {
else {
begin = m_cursorIndex - MaximumBuildSpanLength; begin = m_cursorIndex - MaximumBuildSpanLength;
} }
@ -185,25 +169,29 @@ namespace Taiyan {
end = m_readings.size(); end = m_readings.size();
} }
for (size_t p = begin ; p < end ; p++) { for (size_t p = begin; p < end; p++) {
for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) { for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) {
string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator); std::string combinedReading = Join(
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) { m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading); if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q,
combinedReading)) {
std::vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
if (unigrams.size() > 0) { if (unigrams.size() > 0) {
Node n(combinedReading, unigrams, vector<Bigram>()); Node n(combinedReading, unigrams, std::vector<Bigram>());
m_grid.insertNode(n, p, q); m_grid.insertNode(n, p, q);
} }
} }
} }
} }
} }
inline const string BlockReadingBuilder::Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator) inline const std::string BlockReadingBuilder::Join(
{ std::vector<std::string>::const_iterator begin,
string result; std::vector<std::string>::const_iterator end,
for (vector<string>::const_iterator iter = begin ; iter != end ; ) { const std::string& separator) {
std::string result;
for (std::vector<std::string>::const_iterator iter = begin; iter != end;) {
result += *iter; result += *iter;
++iter; ++iter;
if (iter != end) { if (iter != end) {
@ -211,8 +199,8 @@ namespace Taiyan {
} }
} }
return result; return result;
}
}
} }
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -17,8 +17,8 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef Gramambular_h #ifndef GRAMAMBULAR_H_
#define Gramambular_h #define GRAMAMBULAR_H_
#include "Bigram.h" #include "Bigram.h"
#include "BlockReadingBuilder.h" #include "BlockReadingBuilder.h"

View File

@ -17,125 +17,124 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef Grid_h #ifndef GRID_H_
#define Grid_h #define GRID_H_
#include <map> #include <map>
#include <string>
#include <vector>
#include "NodeAnchor.h" #include "NodeAnchor.h"
#include "Span.h" #include "Span.h"
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
class Grid { class Grid {
public: public:
void clear(); void clear();
void insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength); void insertNode(const Node& node, size_t location, size_t spanningLength);
bool hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey); bool hasNodeAtLocationSpanningLengthMatchingKey(size_t location,
size_t spanningLength,
const std::string& key);
void expandGridByOneAtLocation(size_t inLocation); void expandGridByOneAtLocation(size_t location);
void shrinkGridByOneAtLocation(size_t inLocation); void shrinkGridByOneAtLocation(size_t location);
size_t width() const; size_t width() const;
vector<NodeAnchor> nodesEndingAt(size_t inLocation); std::vector<NodeAnchor> nodesEndingAt(size_t location);
vector<NodeAnchor> nodesCrossingOrEndingAt(size_t inLocation); std::vector<NodeAnchor> nodesCrossingOrEndingAt(size_t location);
// "Freeze" the node with the unigram that represents the selected candidate value. // "Freeze" the node with the unigram that represents the selected candidate
// After this, the node that contains the unigram will always be evaluated to that // value. After this, the node that contains the unigram will always be
// unigram, while all other overlapping nodes will be reset to their initial state // evaluated to that unigram, while all other overlapping nodes will be reset
// (that is, if any of those nodes were "frozen" or fixed, they will be unfrozen.) // to their initial state (that is, if any of those nodes were "frozen" or
NodeAnchor fixNodeSelectedCandidate(size_t location, const string& value); // fixed, they will be unfrozen.)
NodeAnchor fixNodeSelectedCandidate(size_t location,
const std::string& value);
// Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, only // Similar to fixNodeSelectedCandidate, but instead of "freezing" the node,
// boost the unigram that represents the value with an overriding score. This // only boost the unigram that represents the value with an overriding score.
// has the same side effect as fixNodeSelectedCandidate, which is that all other // This has the same side effect as fixNodeSelectedCandidate, which is that
// overlapping nodes will be reset to their initial state. // all other overlapping nodes will be reset to their initial state.
void overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore); void overrideNodeScoreForSelectedCandidate(size_t location,
const std::string& value,
float overridingScore);
const string dumpDOT(); std::string dumpDOT();
protected: protected:
vector<Span> m_spans; std::vector<Span> m_spans;
}; };
inline void Grid::clear() inline void Grid::clear() { m_spans.clear(); }
{
m_spans.clear();
}
inline void Grid::insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength) inline void Grid::insertNode(const Node& node, size_t location,
{ size_t spanningLength) {
if (inLocation >= m_spans.size()) { if (location >= m_spans.size()) {
size_t diff = inLocation - m_spans.size() + 1; size_t diff = location - m_spans.size() + 1;
for (size_t i = 0 ; i < diff ; i++) { for (size_t i = 0; i < diff; i++) {
m_spans.push_back(Span()); m_spans.push_back(Span());
} }
} }
m_spans[inLocation].insertNodeOfLength(inNode, inSpanningLength); m_spans[location].insertNodeOfLength(node, spanningLength);
} }
inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey) inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(
{ size_t location, size_t spanningLength, const std::string& key) {
if (inLocation > m_spans.size()) { if (location > m_spans.size()) {
return false; return false;
} }
const Node *n = m_spans[inLocation].nodeOfLength(inSpanningLength); const Node* n = m_spans[location].nodeOfLength(spanningLength);
if (!n) { if (!n) {
return false; return false;
} }
return inKey == n->key(); return key == n->key();
} }
inline void Grid::expandGridByOneAtLocation(size_t inLocation) inline void Grid::expandGridByOneAtLocation(size_t location) {
{ if (!location || location == m_spans.size()) {
if (!inLocation || inLocation == m_spans.size()) { m_spans.insert(m_spans.begin() + location, Span());
m_spans.insert(m_spans.begin() + inLocation, Span()); } else {
} m_spans.insert(m_spans.begin() + location, Span());
else { for (size_t i = 0; i < location; i++) {
m_spans.insert(m_spans.begin() + inLocation, Span());
for (size_t i = 0 ; i < inLocation ; i++) {
// zaps overlapping spans // zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i); m_spans[i].removeNodeOfLengthGreaterThan(location - i);
}
} }
} }
}
inline void Grid::shrinkGridByOneAtLocation(size_t inLocation) inline void Grid::shrinkGridByOneAtLocation(size_t location) {
{ if (location >= m_spans.size()) {
if (inLocation >= m_spans.size()) {
return; return;
} }
m_spans.erase(m_spans.begin() + inLocation); m_spans.erase(m_spans.begin() + location);
for (size_t i = 0 ; i < inLocation ; i++) { for (size_t i = 0; i < location; i++) {
// zaps overlapping spans // zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i); m_spans[i].removeNodeOfLengthGreaterThan(location - i);
}
} }
}
inline size_t Grid::width() const inline size_t Grid::width() const { return m_spans.size(); }
{
return m_spans.size();
}
inline vector<NodeAnchor> Grid::nodesEndingAt(size_t inLocation) inline std::vector<NodeAnchor> Grid::nodesEndingAt(size_t location) {
{ std::vector<NodeAnchor> result;
vector<NodeAnchor> result;
if (m_spans.size() && inLocation <= m_spans.size()) { if (m_spans.size() && location <= m_spans.size()) {
for (size_t i = 0 ; i < inLocation ; i++) { for (size_t i = 0; i < location; i++) {
Span& span = m_spans[i]; Span& span = m_spans[i];
if (i + span.maximumLength() >= inLocation) { if (i + span.maximumLength() >= location) {
Node *np = span.nodeOfLength(inLocation - i); Node* np = span.nodeOfLength(location - i);
if (np) { if (np) {
NodeAnchor na; NodeAnchor na;
na.node = np; na.node = np;
na.location = i; na.location = i;
na.spanningLength = inLocation - i; na.spanningLength = location - i;
result.push_back(na); result.push_back(na);
} }
@ -144,30 +143,27 @@ namespace Taiyan {
} }
return result; return result;
} }
inline vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t inLocation) inline std::vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t location) {
{ std::vector<NodeAnchor> result;
vector<NodeAnchor> result;
if (m_spans.size() && inLocation <= m_spans.size()) { if (m_spans.size() && location <= m_spans.size()) {
for (size_t i = 0 ; i < inLocation ; i++) { for (size_t i = 0; i < location; i++) {
Span& span = m_spans[i]; Span& span = m_spans[i];
if (i + span.maximumLength() >= inLocation) { if (i + span.maximumLength() >= location) {
for (size_t j = 1, m = span.maximumLength(); j <= m; j++) {
for (size_t j = 1, m = span.maximumLength(); j <= m ; j++) { if (i + j < location) {
if (i + j < inLocation) {
continue; continue;
} }
Node *np = span.nodeOfLength(j); Node* np = span.nodeOfLength(j);
if (np) { if (np) {
NodeAnchor na; NodeAnchor na;
na.node = np; na.node = np;
na.location = i; na.location = i;
na.spanningLength = inLocation - i; na.spanningLength = location - i;
result.push_back(na); result.push_back(na);
} }
@ -177,12 +173,13 @@ namespace Taiyan {
} }
return result; return result;
} }
// For nodes found at the location, fix their currently-selected candidate using the supplied string value. // For nodes found at the location, fix their currently-selected candidate using
inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, const string& value) // the supplied string value.
{ inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location,
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location); const std::string& value) {
std::vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
NodeAnchor node; NodeAnchor node;
for (auto nodeAnchor : nodes) { for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates(); auto candidates = nodeAnchor.node->candidates();
@ -194,16 +191,16 @@ namespace Taiyan {
if (candidates[i].value == value) { if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectCandidateAtIndex(i); const_cast<Node*>(nodeAnchor.node)->selectCandidateAtIndex(i);
node = nodeAnchor; node = nodeAnchor;
break;; break;
} }
} }
} }
return node; return node;
} }
inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore) inline void Grid::overrideNodeScoreForSelectedCandidate(
{ size_t location, const std::string& value, float overridingScore) {
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location); std::vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
for (auto nodeAnchor : nodes) { for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates(); auto candidates = nodeAnchor.node->candidates();
@ -212,53 +209,15 @@ namespace Taiyan {
for (size_t i = 0, c = candidates.size(); i < c; ++i) { for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) { if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore); const_cast<Node*>(nodeAnchor.node)
->selectFloatingCandidateAtIndex(i, overridingScore);
break; break;
} }
} }
} }
}
inline const string Grid::dumpDOT()
{
stringstream sst;
sst << "digraph {" << endl;
sst << "graph [ rankdir=LR ];" << endl;
sst << "BOS;" << endl;
for (size_t p = 0 ; p < m_spans.size() ; p++) {
Span& span = m_spans[p];
for (size_t ni = 0 ; ni <= span.maximumLength() ; ni++) {
Node* np = span.nodeOfLength(ni);
if (np) {
if (!p) {
sst << "BOS -> " << np->currentKeyValue().value << ";" << endl;
}
sst << np->currentKeyValue().value << ";" << endl;
if (p + ni < m_spans.size()) {
Span& dstSpan = m_spans[p+ni];
for (size_t q = 0 ; q <= dstSpan.maximumLength() ; q++) {
Node *dn = dstSpan.nodeOfLength(q);
if (dn) {
sst << np->currentKeyValue().value << " -> " << dn->currentKeyValue().value << ";" << endl;
}
}
}
if (p + ni == m_spans.size()) {
sst << np->currentKeyValue().value << " -> " << "EOS;" << endl;
}
}
}
}
sst << "EOS;" << endl;
sst << "}";
return sst.str();
}
}
} }
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -0,0 +1,70 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "Grid.h"
#include <iostream>
#include <string>
namespace Taiyan {
namespace Gramambular {
std::string Grid::dumpDOT() {
std::stringstream sst;
sst << "digraph {" << std::endl;
sst << "graph [ rankdir=LR ];" << std::endl;
sst << "BOS;" << std::endl;
for (size_t p = 0; p < m_spans.size(); p++) {
Span& span = m_spans[p];
for (size_t ni = 0; ni <= span.maximumLength(); ni++) {
Node* np = span.nodeOfLength(ni);
if (np) {
if (!p) {
sst << "BOS -> " << np->currentKeyValue().value << ";" << std::endl;
}
sst << np->currentKeyValue().value << ";" << std::endl;
if (p + ni < m_spans.size()) {
Span& dstSpan = m_spans[p + ni];
for (size_t q = 0; q <= dstSpan.maximumLength(); q++) {
Node* dn = dstSpan.nodeOfLength(q);
if (dn) {
sst << np->currentKeyValue().value << " -> "
<< dn->currentKeyValue().value << ";" << std::endl;
}
}
}
if (p + ni == m_spans.size()) {
sst << np->currentKeyValue().value << " -> "
<< "EOS;" << std::endl;
}
}
}
}
sst << "EOS;" << std::endl;
sst << "}";
return sst.str();
}
} // namespace Gramambular
} // namespace Taiyan

View File

@ -17,47 +17,43 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef KeyValuePair_h #ifndef KEYVALUEPAIR_H_
#define KeyValuePair_h #define KEYVALUEPAIR_H_
#include <ostream> #include <ostream>
#include <string> #include <string>
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
using namespace std;
class KeyValuePair { class KeyValuePair {
public: public:
string key; std::string key;
string value; std::string value;
bool operator==(const KeyValuePair& inAnother) const; bool operator==(const KeyValuePair& another) const;
bool operator<(const KeyValuePair& inAnother) const; bool operator<(const KeyValuePair& another) const;
}; };
inline ostream& operator<<(ostream& inStream, const KeyValuePair& inPair) inline std::ostream& operator<<(std::ostream& stream,
{ const KeyValuePair& pair) {
inStream << "(" << inPair.key << "," << inPair.value << ")"; stream << "(" << pair.key << "," << pair.value << ")";
return inStream; return stream;
}
inline bool KeyValuePair::operator==(const KeyValuePair& inAnother) const
{
return key == inAnother.key && value == inAnother.value;
}
inline bool KeyValuePair::operator<(const KeyValuePair& inAnother) const
{
if (key < inAnother.key) {
return true;
}
else if (key == inAnother.key) {
return value < inAnother.value;
}
return false;
}
}
} }
inline bool KeyValuePair::operator==(const KeyValuePair& another) const {
return key == another.key && value == another.value;
}
inline bool KeyValuePair::operator<(const KeyValuePair& another) const {
if (key < another.key) {
return true;
} else if (key == another.key) {
return value < another.value;
}
return false;
}
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -17,28 +17,28 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef LanguageModel_h #ifndef LANGUAGEMODEL_H_
#define LanguageModel_h #define LANGUAGEMODEL_H_
#include <string>
#include <vector> #include <vector>
#include "Bigram.h" #include "Bigram.h"
#include "Unigram.h" #include "Unigram.h"
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
using namespace std; class LanguageModel {
public:
class LanguageModel {
public:
virtual ~LanguageModel() {} virtual ~LanguageModel() {}
virtual const vector<Bigram> bigramsForKeys(const string &preceedingKey, const string& key) = 0; virtual const std::vector<Bigram> bigramsForKeys(
virtual const vector<Unigram> unigramsForKey(const string &key) = 0; const std::string& preceedingKey, const std::string& key) = 0;
virtual bool hasUnigramsForKey(const string& key) = 0; virtual const std::vector<Unigram> unigramsForKey(const std::string& key) = 0;
}; virtual bool hasUnigramsForKey(const std::string& key) = 0;
} };
} } // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -17,75 +17,75 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef Node_h #ifndef NODE_H_
#define Node_h #define NODE_H_
#include <limits> #include <limits>
#include <map>
#include <string>
#include <vector> #include <vector>
#include "LanguageModel.h" #include "LanguageModel.h"
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
using namespace std;
class Node { class Node {
public: public:
Node(); Node();
Node(const string& inKey, const vector<Unigram>& inUnigrams, const vector<Bigram>& inBigrams); Node(const std::string& key, const std::vector<Unigram>& unigrams,
const std::vector<Bigram>& bigrams);
void primeNodeWithPreceedingKeyValues(const vector<KeyValuePair>& inKeyValues); void primeNodeWithPreceedingKeyValues(
const std::vector<KeyValuePair>& keyValues);
bool isCandidateFixed() const; bool isCandidateFixed() const;
const vector<KeyValuePair>& candidates() const; const std::vector<KeyValuePair>& candidates() const;
void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true); void selectCandidateAtIndex(size_t index = 0, bool fix = true);
void resetCandidate(); void resetCandidate();
void selectFloatingCandidateAtIndex(size_t index, double score); void selectFloatingCandidateAtIndex(size_t index, double score);
const string& key() const; const std::string& key() const;
double score() const; double score() const;
// double scoreForCandidate(string &candidate) const; // Prevents the override model to remember symbols with scode -X or lower. double scoreForCandidate(const std::string& candidate) const;
const KeyValuePair currentKeyValue() const; const KeyValuePair currentKeyValue() const;
double highestUnigramScore() const; double highestUnigramScore() const;
protected: protected:
const LanguageModel* m_LM; const LanguageModel* m_LM;
string m_key; std::string m_key;
double m_score; double m_score;
vector<Unigram> m_unigrams; std::vector<Unigram> m_unigrams;
vector<KeyValuePair> m_candidates; std::vector<KeyValuePair> m_candidates;
map<string, size_t> m_valueUnigramIndexMap; std::map<std::string, size_t> m_valueUnigramIndexMap;
map<KeyValuePair, vector<Bigram> > m_preceedingGramBigramMap; std::map<KeyValuePair, std::vector<Bigram> > m_preceedingGramBigramMap;
bool m_candidateFixed; bool m_candidateFixed;
size_t m_selectedUnigramIndex; size_t m_selectedUnigramIndex;
friend ostream& operator<<(ostream& inStream, const Node& inNode); friend std::ostream& operator<<(std::ostream& stream, const Node& node);
}; };
inline ostream& operator<<(ostream& inStream, const Node& inNode) inline std::ostream& operator<<(std::ostream& stream, const Node& node) {
{ stream << "(node,key:" << node.m_key
inStream << "(node,key:" << inNode.m_key << ",fixed:" << (inNode.m_candidateFixed ? "true" : "false") << ",fixed:" << (node.m_candidateFixed ? "true" : "false")
<< ",selected:" << inNode.m_selectedUnigramIndex << ",selected:" << node.m_selectedUnigramIndex << ","
<< "," << inNode.m_unigrams << ")"; << node.m_unigrams << ")";
return inStream; return stream;
} }
inline Node::Node() inline Node::Node()
: m_candidateFixed(false) : m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) {}
, m_selectedUnigramIndex(0)
, m_score(0.0)
{
}
inline Node::Node(const string& inKey, const vector<Unigram>& inUnigrams, const vector<Bigram>& inBigrams) inline Node::Node(const std::string& key, const std::vector<Unigram>& unigrams,
: m_key(inKey) const std::vector<Bigram>& bigrams)
, m_unigrams(inUnigrams) : m_key(key),
, m_candidateFixed(false) m_unigrams(unigrams),
, m_selectedUnigramIndex(0) m_candidateFixed(false),
, m_score(0.0) m_selectedUnigramIndex(0),
{ m_score(0.0) {
stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare); stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare);
if (m_unigrams.size()) { if (m_unigrams.size()) {
@ -93,33 +93,39 @@ namespace Taiyan {
} }
size_t i = 0; size_t i = 0;
for (vector<Unigram>::const_iterator ui = m_unigrams.begin() ; ui != m_unigrams.end() ; ++ui) { for (std::vector<Unigram>::const_iterator ui = m_unigrams.begin();
ui != m_unigrams.end(); ++ui) {
m_valueUnigramIndexMap[(*ui).keyValue.value] = i; m_valueUnigramIndexMap[(*ui).keyValue.value] = i;
i++; i++;
m_candidates.push_back((*ui).keyValue); m_candidates.push_back((*ui).keyValue);
} }
for (vector<Bigram>::const_iterator bi = inBigrams.begin() ; bi != inBigrams.end() ; ++bi) { for (std::vector<Bigram>::const_iterator bi = bigrams.begin();
bi != bigrams.end(); ++bi) {
m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi); m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi);
} }
} }
inline void Node::primeNodeWithPreceedingKeyValues(const vector<KeyValuePair>& inKeyValues) inline void Node::primeNodeWithPreceedingKeyValues(
{ const std::vector<KeyValuePair>& keyValues) {
size_t newIndex = m_selectedUnigramIndex; size_t newIndex = m_selectedUnigramIndex;
double max = m_score; double max = m_score;
if (!isCandidateFixed()) { if (!isCandidateFixed()) {
for (vector<KeyValuePair>::const_iterator kvi = inKeyValues.begin() ; kvi != inKeyValues.end() ; ++kvi) { for (std::vector<KeyValuePair>::const_iterator kvi = keyValues.begin();
map<KeyValuePair, vector<Bigram> >::const_iterator f = m_preceedingGramBigramMap.find(*kvi); kvi != keyValues.end(); ++kvi) {
std::map<KeyValuePair, std::vector<Bigram> >::const_iterator f =
m_preceedingGramBigramMap.find(*kvi);
if (f != m_preceedingGramBigramMap.end()) { if (f != m_preceedingGramBigramMap.end()) {
const vector<Bigram>& bigrams = (*f).second; const std::vector<Bigram>& bigrams = (*f).second;
for (vector<Bigram>::const_iterator bi = bigrams.begin() ; bi != bigrams.end() ; ++bi) { for (std::vector<Bigram>::const_iterator bi = bigrams.begin();
bi != bigrams.end(); ++bi) {
const Bigram& bigram = *bi; const Bigram& bigram = *bi;
if (bigram.score > max) { if (bigram.score > max) {
map<string, size_t>::const_iterator uf = m_valueUnigramIndexMap.find((*bi).keyValue.value); std::map<std::string, size_t>::const_iterator uf =
m_valueUnigramIndexMap.find((*bi).keyValue.value);
if (uf != m_valueUnigramIndexMap.end()) { if (uf != m_valueUnigramIndexMap.end()) {
newIndex = (*uf).second; newIndex = (*uf).second;
max = bigram.score; max = bigram.score;
@ -137,41 +143,34 @@ namespace Taiyan {
if (newIndex != m_selectedUnigramIndex) { if (newIndex != m_selectedUnigramIndex) {
m_selectedUnigramIndex = newIndex; m_selectedUnigramIndex = newIndex;
} }
} }
inline bool Node::isCandidateFixed() const inline bool Node::isCandidateFixed() const { return m_candidateFixed; }
{
return m_candidateFixed;
}
inline const vector<KeyValuePair>& Node::candidates() const inline const std::vector<KeyValuePair>& Node::candidates() const {
{
return m_candidates; return m_candidates;
} }
inline void Node::selectCandidateAtIndex(size_t inIndex, bool inFix) inline void Node::selectCandidateAtIndex(size_t index, bool fix) {
{ if (index >= m_unigrams.size()) {
if (inIndex >= m_unigrams.size()) {
m_selectedUnigramIndex = 0; m_selectedUnigramIndex = 0;
} } else {
else { m_selectedUnigramIndex = index;
m_selectedUnigramIndex = inIndex;
} }
m_candidateFixed = inFix; m_candidateFixed = fix;
m_score = 99; m_score = 99;
} }
inline void Node::resetCandidate() inline void Node::resetCandidate() {
{
m_selectedUnigramIndex = 0; m_selectedUnigramIndex = 0;
m_candidateFixed = 0; m_candidateFixed = 0;
if (m_unigrams.size()) { if (m_unigrams.size()) {
m_score = m_unigrams[0].score; m_score = m_unigrams[0].score;
} }
} }
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) { inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) {
if (index >= m_unigrams.size()) { if (index >= m_unigrams.size()) {
m_selectedUnigramIndex = 0; m_selectedUnigramIndex = 0;
} else { } else {
@ -179,46 +178,37 @@ namespace Taiyan {
} }
m_candidateFixed = false; m_candidateFixed = false;
m_score = score; m_score = score;
} }
inline const string& Node::key() const inline const std::string& Node::key() const { return m_key; }
{
return m_key;
}
inline double Node::score() const inline double Node::score() const { return m_score; }
{
return m_score;
}
// Prevents the override model to remember symbols with scode -X or lower. // Prevents the override model to remember symbols with scode -X or lower.
// inline double Node::scoreForCandidate(string &candidate) const //inline double Node::scoreForCandidate(const std::string& candidate) const {
// {
// for (auto unigram : m_unigrams) { // for (auto unigram : m_unigrams) {
// if (unigram.keyValue.value == candidate) { // if (unigram.keyValue.value == candidate) {
// return unigram.score; // return unigram.score;
// } // }
// } // }
// return 0.0; // return 0.0;
// } //}
inline double Node::highestUnigramScore() const { inline double Node::highestUnigramScore() const {
if (m_unigrams.empty()) { if (m_unigrams.empty()) {
return 0.0; return 0.0;
} }
return m_unigrams[0].score; return m_unigrams[0].score;
}
inline const KeyValuePair Node::currentKeyValue() const
{
if(m_selectedUnigramIndex >= m_unigrams.size()) {
return KeyValuePair();
}
else {
return m_candidates[m_selectedUnigramIndex];
}
}
}
} }
inline const KeyValuePair Node::currentKeyValue() const {
if (m_selectedUnigramIndex >= m_unigrams.size()) {
return KeyValuePair();
} else {
return m_candidates[m_selectedUnigramIndex];
}
}
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -17,55 +17,48 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef NodeAnchor_h #ifndef NODEANCHOR_H_
#define NodeAnchor_h #define NODEANCHOR_H_
#include <vector>
#include "Node.h" #include "Node.h"
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
class NodeAnchor {
public:
NodeAnchor();
const Node *node;
size_t location;
size_t spanningLength;
double accumulatedScore;
};
inline NodeAnchor::NodeAnchor() struct NodeAnchor {
: node(0) const Node* node = nullptr;
, location(0) size_t location = 0;
, spanningLength(0) size_t spanningLength = 0;
, accumulatedScore(0.0) double accumulatedScore = 0.0;
{ };
}
inline ostream& operator<<(ostream& inStream, const NodeAnchor& inAnchor) inline std::ostream& operator<<(std::ostream& stream,
{ const NodeAnchor& anchor) {
inStream << "{@(" << inAnchor.location << "," << inAnchor.spanningLength << "),"; stream << "{@(" << anchor.location << "," << anchor.spanningLength << "),";
if (inAnchor.node) { if (anchor.node) {
inStream << *(inAnchor.node); stream << *(anchor.node);
} } else {
else { stream << "null";
inStream << "null";
}
inStream << "}";
return inStream;
}
inline ostream& operator<<(ostream& inStream, const vector<NodeAnchor>& inAnchor)
{
for (vector<NodeAnchor>::const_iterator i = inAnchor.begin() ; i != inAnchor.end() ; ++i) {
inStream << *i;
if (i + 1 != inAnchor.end()) {
inStream << "<-";
}
}
return inStream;
}
} }
stream << "}";
return stream;
} }
inline std::ostream& operator<<(std::ostream& stream,
const std::vector<NodeAnchor>& anchor) {
for (std::vector<NodeAnchor>::const_iterator i = anchor.begin();
i != anchor.end(); ++i) {
stream << *i;
if (i + 1 != anchor.end()) {
stream << "<-";
}
}
return stream;
}
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -17,88 +17,77 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef Span_h #ifndef SPAN_H_
#define Span_h #define SPAN_H_
#include <map> #include <map>
#include <set> #include <set>
#include <sstream> #include <sstream>
#include "Node.h" #include "Node.h"
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
class Span { class Span {
public: public:
Span();
void clear(); void clear();
void insertNodeOfLength(const Node& inNode, size_t inLength); void insertNodeOfLength(const Node& node, size_t length);
void removeNodeOfLengthGreaterThan(size_t inLength); void removeNodeOfLengthGreaterThan(size_t length);
Node* nodeOfLength(size_t inLength); Node* nodeOfLength(size_t length);
size_t maximumLength() const; size_t maximumLength() const;
protected: protected:
map<size_t, Node> m_lengthNodeMap; std::map<size_t, Node> m_lengthNodeMap;
size_t m_maximumLength; size_t m_maximumLength = 0;
}; };
inline Span::Span() inline void Span::clear() {
: m_maximumLength(0)
{
}
inline void Span::clear()
{
m_lengthNodeMap.clear(); m_lengthNodeMap.clear();
m_maximumLength = 0; m_maximumLength = 0;
} }
inline void Span::insertNodeOfLength(const Node& inNode, size_t inLength) inline void Span::insertNodeOfLength(const Node& node, size_t length) {
{ m_lengthNodeMap[length] = node;
m_lengthNodeMap[inLength] = inNode; if (length > m_maximumLength) {
if (inLength > m_maximumLength) { m_maximumLength = length;
m_maximumLength = inLength;
}
} }
}
inline void Span::removeNodeOfLengthGreaterThan(size_t inLength) inline void Span::removeNodeOfLengthGreaterThan(size_t length) {
{ if (length > m_maximumLength) {
if (inLength > m_maximumLength) {
return; return;
} }
size_t max = 0; size_t max = 0;
set<size_t> removeSet; std::set<size_t> removeSet;
for (map<size_t, Node>::iterator i = m_lengthNodeMap.begin(), e = m_lengthNodeMap.end() ; i != e ; ++i) { for (std::map<size_t, Node>::iterator i = m_lengthNodeMap.begin(),
if ((*i).first > inLength) { e = m_lengthNodeMap.end();
i != e; ++i) {
if ((*i).first > length) {
removeSet.insert((*i).first); removeSet.insert((*i).first);
} } else {
else {
if ((*i).first > max) { if ((*i).first > max) {
max = (*i).first; max = (*i).first;
} }
} }
} }
for (set<size_t>::iterator i = removeSet.begin(), e = removeSet.end(); i != e; ++i) { for (std::set<size_t>::iterator i = removeSet.begin(), e = removeSet.end();
i != e; ++i) {
m_lengthNodeMap.erase(*i); m_lengthNodeMap.erase(*i);
} }
m_maximumLength = max; m_maximumLength = max;
}
inline Node* Span::nodeOfLength(size_t inLength)
{
map<size_t, Node>::iterator f = m_lengthNodeMap.find(inLength);
return f == m_lengthNodeMap.end() ? 0 : &(*f).second;
}
inline size_t Span::maximumLength() const
{
return m_maximumLength;
}
}
} }
inline Node* Span::nodeOfLength(size_t length) {
std::map<size_t, Node>::iterator f = m_lengthNodeMap.find(length);
return f == m_lengthNodeMap.end() ? 0 : &(*f).second;
}
inline size_t Span::maximumLength() const { return m_maximumLength; }
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -17,80 +17,75 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef Unigram_h #ifndef UNIGRAM_H_
#define Unigram_h #define UNIGRAM_H_
#include <vector> #include <vector>
#include "KeyValuePair.h" #include "KeyValuePair.h"
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
class Unigram {
public: class Unigram {
public:
Unigram(); Unigram();
KeyValuePair keyValue; KeyValuePair keyValue;
double score; double score;
bool operator==(const Unigram& inAnother) const; bool operator==(const Unigram& another) const;
bool operator<(const Unigram& inAnother) const; bool operator<(const Unigram& another) const;
static bool ScoreCompare(const Unigram& a, const Unigram& b); static bool ScoreCompare(const Unigram& a, const Unigram& b);
}; };
inline ostream& operator<<(ostream& inStream, const Unigram& inGram) inline std::ostream& operator<<(std::ostream& stream, const Unigram& gram) {
{ std::streamsize p = stream.precision();
streamsize p = inStream.precision(); stream.precision(6);
inStream.precision(6); stream << "(" << gram.keyValue << "," << gram.score << ")";
inStream << "(" << inGram.keyValue << "," << inGram.score << ")"; stream.precision(p);
inStream.precision(p); return stream;
return inStream; }
}
inline ostream& operator<<(ostream& inStream, const vector<Unigram>& inGrams) inline std::ostream& operator<<(std::ostream& stream,
{ const std::vector<Unigram>& grams) {
inStream << "[" << inGrams.size() << "]=>{"; stream << "[" << grams.size() << "]=>{";
size_t index = 0; size_t index = 0;
for (vector<Unigram>::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) { for (std::vector<Unigram>::const_iterator gi = grams.begin();
inStream << index << "=>"; gi != grams.end(); ++gi, ++index) {
inStream << *gi; stream << index << "=>";
if (gi + 1 != inGrams.end()) { stream << *gi;
inStream << ","; if (gi + 1 != grams.end()) {
stream << ",";
} }
} }
inStream << "}"; stream << "}";
return inStream; return stream;
}
inline Unigram::Unigram()
: score(0.0)
{
}
inline bool Unigram::operator==(const Unigram& inAnother) const
{
return keyValue == inAnother.keyValue && score == inAnother.score;
}
inline bool Unigram::operator<(const Unigram& inAnother) const
{
if (keyValue < inAnother.keyValue) {
return true;
}
else if (keyValue == inAnother.keyValue) {
return score < inAnother.score;
}
return false;
}
inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b)
{
return a.score > b.score;
}
}
} }
inline Unigram::Unigram() : score(0.0) {}
inline bool Unigram::operator==(const Unigram& another) const {
return keyValue == another.keyValue && score == another.score;
}
inline bool Unigram::operator<(const Unigram& another) const {
if (keyValue < another.keyValue) {
return true;
} else if (keyValue == another.keyValue) {
return score < another.score;
}
return false;
}
inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) {
return a.score > b.score;
}
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -17,67 +17,69 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef Walker_h #ifndef WALKER_H_
#define Walker_h #define WALKER_H_
#include <algorithm> #include <algorithm>
#include <vector>
#include "Grid.h" #include "Grid.h"
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
using namespace std;
class Walker { class Walker {
public: public:
Walker(Grid* inGrid); explicit Walker(Grid* inGrid);
const vector<NodeAnchor> reverseWalk(size_t inLocation, double inAccumulatedScore = 0.0); const std::vector<NodeAnchor> reverseWalk(size_t location,
double accumulatedScore = 0.0);
protected: protected:
Grid* m_grid; Grid* m_grid;
}; };
inline Walker::Walker(Grid* inGrid) inline Walker::Walker(Grid* inGrid) : m_grid(inGrid) {}
: m_grid(inGrid)
{ inline const std::vector<NodeAnchor> Walker::reverseWalk(
size_t location, double accumulatedScore) {
if (!location || location > m_grid->width()) {
return std::vector<NodeAnchor>();
} }
inline const vector<NodeAnchor> Walker::reverseWalk(size_t inLocation, double inAccumulatedScore) std::vector<std::vector<NodeAnchor> > paths;
{
if (!inLocation || inLocation > m_grid->width()) {
return vector<NodeAnchor>();
}
vector<vector<NodeAnchor> > paths; std::vector<NodeAnchor> nodes = m_grid->nodesEndingAt(location);
vector<NodeAnchor> nodes = m_grid->nodesEndingAt(inLocation); for (std::vector<NodeAnchor>::iterator ni = nodes.begin(); ni != nodes.end();
++ni) {
for (vector<NodeAnchor>::iterator ni = nodes.begin() ; ni != nodes.end() ; ++ni) {
if (!(*ni).node) { if (!(*ni).node) {
continue; continue;
} }
(*ni).accumulatedScore = inAccumulatedScore + (*ni).node->score(); (*ni).accumulatedScore = accumulatedScore + (*ni).node->score();
vector<NodeAnchor> path = reverseWalk(inLocation - (*ni).spanningLength, (*ni).accumulatedScore); std::vector<NodeAnchor> path =
reverseWalk(location - (*ni).spanningLength, (*ni).accumulatedScore);
path.insert(path.begin(), *ni); path.insert(path.begin(), *ni);
paths.push_back(path); paths.push_back(path);
} }
if (!paths.size()) { if (!paths.size()) {
return vector<NodeAnchor>(); return std::vector<NodeAnchor>();
} }
vector<NodeAnchor>* result = &*(paths.begin()); std::vector<NodeAnchor>* result = &*(paths.begin());
for (vector<vector<NodeAnchor> >::iterator pi = paths.begin() ; pi != paths.end() ; ++pi) { for (std::vector<std::vector<NodeAnchor> >::iterator pi = paths.begin();
pi != paths.end(); ++pi) {
if ((*pi).back().accumulatedScore > result->back().accumulatedScore) { if ((*pi).back().accumulatedScore > result->back().accumulatedScore) {
result = &*pi; result = &*pi;
} }
} }
return *result; return *result;
}
}
} }
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -50,6 +50,7 @@
5BD05C6827B2BBEF004C4F1D /* Content.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BD05C6327B2BBEF004C4F1D /* Content.swift */; }; 5BD05C6827B2BBEF004C4F1D /* Content.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BD05C6327B2BBEF004C4F1D /* Content.swift */; };
5BD05C6927B2BBEF004C4F1D /* WindowController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BD05C6427B2BBEF004C4F1D /* WindowController.swift */; }; 5BD05C6927B2BBEF004C4F1D /* WindowController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BD05C6427B2BBEF004C4F1D /* WindowController.swift */; };
5BD05C6A27B2BBEF004C4F1D /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BD05C6527B2BBEF004C4F1D /* ViewController.swift */; }; 5BD05C6A27B2BBEF004C4F1D /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BD05C6527B2BBEF004C4F1D /* ViewController.swift */; };
5BDC5CAB27C2873D00E1CCE2 /* Grid.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5BDC5CAA27C2873D00E1CCE2 /* Grid.mm */; };
5BDCBB2E27B4E67A00D0CC59 /* vChewingPhraseEditor.app in Resources */ = {isa = PBXBuildFile; fileRef = 5BD05BB827B2A429004C4F1D /* vChewingPhraseEditor.app */; }; 5BDCBB2E27B4E67A00D0CC59 /* vChewingPhraseEditor.app in Resources */ = {isa = PBXBuildFile; fileRef = 5BD05BB827B2A429004C4F1D /* vChewingPhraseEditor.app */; };
5BE78BD927B3775B005EA1BE /* ctlAboutWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BE78BD827B37750005EA1BE /* ctlAboutWindow.swift */; }; 5BE78BD927B3775B005EA1BE /* ctlAboutWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BE78BD827B37750005EA1BE /* ctlAboutWindow.swift */; };
5BE78BDD27B3776D005EA1BE /* frmAboutWindow.xib in Resources */ = {isa = PBXBuildFile; fileRef = 5BE78BDA27B37764005EA1BE /* frmAboutWindow.xib */; }; 5BE78BDD27B3776D005EA1BE /* frmAboutWindow.xib in Resources */ = {isa = PBXBuildFile; fileRef = 5BE78BDA27B37764005EA1BE /* frmAboutWindow.xib */; };
@ -195,6 +196,7 @@
5BD05C6327B2BBEF004C4F1D /* Content.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Content.swift; sourceTree = "<group>"; }; 5BD05C6327B2BBEF004C4F1D /* Content.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Content.swift; sourceTree = "<group>"; };
5BD05C6427B2BBEF004C4F1D /* WindowController.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = WindowController.swift; sourceTree = "<group>"; }; 5BD05C6427B2BBEF004C4F1D /* WindowController.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = WindowController.swift; sourceTree = "<group>"; };
5BD05C6527B2BBEF004C4F1D /* ViewController.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; }; 5BD05C6527B2BBEF004C4F1D /* ViewController.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; };
5BDC5CAA27C2873D00E1CCE2 /* Grid.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = Grid.mm; sourceTree = "<group>"; };
5BDCBB4227B4F6C600D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/MainMenu.strings"; sourceTree = "<group>"; }; 5BDCBB4227B4F6C600D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/MainMenu.strings"; sourceTree = "<group>"; };
5BDCBB4327B4F6C600D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/frmAboutWindow.strings"; sourceTree = "<group>"; }; 5BDCBB4327B4F6C600D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/frmAboutWindow.strings"; sourceTree = "<group>"; };
5BDCBB4527B4F6C600D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "Source/WindowNIBs/zh-Hant.lproj/frmPrefWindow.strings"; sourceTree = "<group>"; }; 5BDCBB4527B4F6C600D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "Source/WindowNIBs/zh-Hant.lproj/frmPrefWindow.strings"; sourceTree = "<group>"; };
@ -663,6 +665,7 @@
6A0D4F1515FC0EB100ABF4B3 /* BlockReadingBuilder.h */, 6A0D4F1515FC0EB100ABF4B3 /* BlockReadingBuilder.h */,
6A0D4F1615FC0EB100ABF4B3 /* Gramambular.h */, 6A0D4F1615FC0EB100ABF4B3 /* Gramambular.h */,
6A0D4F1715FC0EB100ABF4B3 /* Grid.h */, 6A0D4F1715FC0EB100ABF4B3 /* Grid.h */,
5BDC5CAA27C2873D00E1CCE2 /* Grid.mm */,
6A0D4F1815FC0EB100ABF4B3 /* KeyValuePair.h */, 6A0D4F1815FC0EB100ABF4B3 /* KeyValuePair.h */,
6A0D4F1915FC0EB100ABF4B3 /* LanguageModel.h */, 6A0D4F1915FC0EB100ABF4B3 /* LanguageModel.h */,
6A0D4F1A15FC0EB100ABF4B3 /* Node.h */, 6A0D4F1A15FC0EB100ABF4B3 /* Node.h */,
@ -943,6 +946,7 @@
D47F7DD3278C1263002F9DD7 /* UserOverrideModel.cpp in Sources */, D47F7DD3278C1263002F9DD7 /* UserOverrideModel.cpp in Sources */,
5B62A33627AE795800A19448 /* PreferencesModule.swift in Sources */, 5B62A33627AE795800A19448 /* PreferencesModule.swift in Sources */,
5B62A33827AE79CD00A19448 /* NSStringUtils.swift in Sources */, 5B62A33827AE79CD00A19448 /* NSStringUtils.swift in Sources */,
5BDC5CAB27C2873D00E1CCE2 /* Grid.mm in Sources */,
5B62A33227AE792F00A19448 /* InputSourceHelper.swift in Sources */, 5B62A33227AE792F00A19448 /* InputSourceHelper.swift in Sources */,
5B62A34927AE7CD900A19448 /* TooltipController.swift in Sources */, 5B62A34927AE7CD900A19448 /* TooltipController.swift in Sources */,
6A0D4F4515FC0EB100ABF4B3 /* Mandarin.cpp in Sources */, 6A0D4F4515FC0EB100ABF4B3 /* Mandarin.cpp in Sources */,