Lukhnos: Gramambular // Modernization.

Co-Authored-By: Lukhnos Liu <lukhnos@lukhnos.org>
This commit is contained in:
ShikiSuen 2022-02-20 22:33:46 +08:00
parent 256a20d93f
commit 8bb1ad0be3
13 changed files with 926 additions and 945 deletions

View File

@ -17,82 +17,77 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef Bigram_h #ifndef BIGRAM_H_
#define Bigram_h #define BIGRAM_H_
#include <vector> #include <vector>
#include "KeyValuePair.h" #include "KeyValuePair.h"
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
class Bigram { class Bigram {
public: public:
Bigram(); Bigram();
KeyValuePair preceedingKeyValue; KeyValuePair preceedingKeyValue;
KeyValuePair keyValue; KeyValuePair keyValue;
double score; double score;
bool operator==(const Bigram& inAnother) const; bool operator==(const Bigram& another) const;
bool operator<(const Bigram& inAnother) const; bool operator<(const Bigram& another) const;
}; };
inline ostream& operator<<(ostream& inStream, const Bigram& inGram) inline std::ostream& operator<<(std::ostream& stream, const Bigram& gram) {
{ std::streamsize p = stream.precision();
streamsize p = inStream.precision(); stream.precision(6);
inStream.precision(6); stream << "(" << gram.keyValue << "|" << gram.preceedingKeyValue << ","
inStream << "(" << inGram.keyValue << "|" <<inGram.preceedingKeyValue << "," << inGram.score << ")"; << gram.score << ")";
inStream.precision(p); stream.precision(p);
return inStream; return stream;
}
inline ostream& operator<<(ostream& inStream, const vector<Bigram>& inGrams)
{
inStream << "[" << inGrams.size() << "]=>{";
size_t index = 0;
for (vector<Bigram>::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) {
inStream << index << "=>";
inStream << *gi;
if (gi + 1 != inGrams.end()) {
inStream << ",";
}
}
inStream << "}";
return inStream;
}
inline Bigram::Bigram()
: score(0.0)
{
}
inline bool Bigram::operator==(const Bigram& inAnother) const
{
return preceedingKeyValue == inAnother.preceedingKeyValue && keyValue == inAnother.keyValue && score == inAnother.score;
}
inline bool Bigram::operator<(const Bigram& inAnother) const
{
if (preceedingKeyValue < inAnother.preceedingKeyValue) {
return true;
}
else if (preceedingKeyValue == inAnother.preceedingKeyValue) {
if (keyValue < inAnother.keyValue) {
return true;
}
else if (keyValue == inAnother.keyValue) {
return score < inAnother.score;
}
return false;
}
return false;
}
}
} }
inline std::ostream& operator<<(std::ostream& stream,
const std::vector<Bigram>& grams) {
stream << "[" << grams.size() << "]=>{";
size_t index = 0;
for (std::vector<Bigram>::const_iterator gi = grams.begin();
gi != grams.end(); ++gi, ++index) {
stream << index << "=>";
stream << *gi;
if (gi + 1 != grams.end()) {
stream << ",";
}
}
stream << "}";
return stream;
}
inline Bigram::Bigram() : score(0.0) {}
inline bool Bigram::operator==(const Bigram& another) const {
return preceedingKeyValue == another.preceedingKeyValue &&
keyValue == another.keyValue && score == another.score;
}
inline bool Bigram::operator<(const Bigram& another) const {
if (preceedingKeyValue < another.preceedingKeyValue) {
return true;
} else if (preceedingKeyValue == another.preceedingKeyValue) {
if (keyValue < another.keyValue) {
return true;
} else if (keyValue == another.keyValue) {
return score < another.score;
}
return false;
}
return false;
}
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -17,202 +17,190 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef BlockReadingBuilder_h #ifndef BLOCKREADINGBUILDER_H_
#define BlockReadingBuilder_h #define BLOCKREADINGBUILDER_H_
#include <string>
#include <vector> #include <vector>
#include "Grid.h" #include "Grid.h"
#include "LanguageModel.h" #include "LanguageModel.h"
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
using namespace std;
class BlockReadingBuilder { class BlockReadingBuilder {
public: public:
BlockReadingBuilder(LanguageModel *inLM); explicit BlockReadingBuilder(LanguageModel* lm);
void clear(); void clear();
size_t length() const; size_t length() const;
size_t cursorIndex() const; size_t cursorIndex() const;
void setCursorIndex(size_t inNewIndex); void setCursorIndex(size_t newIndex);
void insertReadingAtCursor(const string& inReading); void insertReadingAtCursor(const std::string& reading);
bool deleteReadingBeforeCursor(); // backspace bool deleteReadingBeforeCursor(); // backspace
bool deleteReadingAfterCursor(); // delete bool deleteReadingAfterCursor(); // delete
bool removeHeadReadings(size_t count); bool removeHeadReadings(size_t count);
void setJoinSeparator(const string& separator); void setJoinSeparator(const std::string& separator);
const string joinSeparator() const; const std::string joinSeparator() const;
vector<string> readings() const; std::vector<std::string> readings() const;
Grid& grid(); Grid& grid();
protected: protected:
void build(); void build();
static const string Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator); static const std::string Join(std::vector<std::string>::const_iterator begin,
std::vector<std::string>::const_iterator end,
const std::string& separator);
//最多使用六個字組成一個詞 // 最多使用六個字組成一個詞
static const size_t MaximumBuildSpanLength = 6; static const size_t MaximumBuildSpanLength = 6;
size_t m_cursorIndex; size_t m_cursorIndex;
vector<string> m_readings; std::vector<std::string> m_readings;
Grid m_grid; Grid m_grid;
LanguageModel *m_LM; LanguageModel* m_LM;
string m_joinSeparator; std::string m_joinSeparator;
}; };
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM) inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel* lm)
: m_LM(inLM) : m_LM(lm), m_cursorIndex(0) {}
, m_cursorIndex(0)
{
}
inline void BlockReadingBuilder::clear() inline void BlockReadingBuilder::clear() {
{ m_cursorIndex = 0;
m_cursorIndex = 0; m_readings.clear();
m_readings.clear(); m_grid.clear();
m_grid.clear(); }
}
inline size_t BlockReadingBuilder::length() const inline size_t BlockReadingBuilder::length() const { return m_readings.size(); }
{
return m_readings.size();
}
inline size_t BlockReadingBuilder::cursorIndex() const inline size_t BlockReadingBuilder::cursorIndex() const { return m_cursorIndex; }
{
return m_cursorIndex;
}
inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex) inline void BlockReadingBuilder::setCursorIndex(size_t newIndex) {
{ m_cursorIndex = newIndex > m_readings.size() ? m_readings.size() : newIndex;
m_cursorIndex = inNewIndex > m_readings.size() ? m_readings.size() : inNewIndex; }
}
inline void BlockReadingBuilder::insertReadingAtCursor(const string& inReading) inline void BlockReadingBuilder::insertReadingAtCursor(
{ const std::string& reading) {
m_readings.insert(m_readings.begin() + m_cursorIndex, inReading); m_readings.insert(m_readings.begin() + m_cursorIndex, reading);
m_grid.expandGridByOneAtLocation(m_cursorIndex); m_grid.expandGridByOneAtLocation(m_cursorIndex);
build(); build();
m_cursorIndex++; m_cursorIndex++;
} }
inline vector<string> BlockReadingBuilder::readings() const inline std::vector<std::string> BlockReadingBuilder::readings() const {
{ return m_readings;
return m_readings; }
}
inline bool BlockReadingBuilder::deleteReadingBeforeCursor() inline bool BlockReadingBuilder::deleteReadingBeforeCursor() {
{ if (!m_cursorIndex) {
if (!m_cursorIndex) { return false;
return false; }
}
m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.begin() + m_cursorIndex); m_readings.erase(m_readings.begin() + m_cursorIndex - 1,
m_readings.begin() + m_cursorIndex);
m_cursorIndex--;
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
}
inline bool BlockReadingBuilder::deleteReadingAfterCursor() {
if (m_cursorIndex == m_readings.size()) {
return false;
}
m_readings.erase(m_readings.begin() + m_cursorIndex,
m_readings.begin() + m_cursorIndex + 1);
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
}
inline bool BlockReadingBuilder::removeHeadReadings(size_t count) {
if (count > length()) {
return false;
}
for (size_t i = 0; i < count; i++) {
if (m_cursorIndex) {
m_cursorIndex--; m_cursorIndex--;
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
} }
m_readings.erase(m_readings.begin(), m_readings.begin() + 1);
m_grid.shrinkGridByOneAtLocation(0);
build();
}
inline bool BlockReadingBuilder::deleteReadingAfterCursor() return true;
{ }
if (m_cursorIndex == m_readings.size()) {
return false;
}
m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.begin() + m_cursorIndex + 1); inline void BlockReadingBuilder::setJoinSeparator(
m_grid.shrinkGridByOneAtLocation(m_cursorIndex); const std::string& separator) {
build(); m_joinSeparator = separator;
return true; }
}
inline bool BlockReadingBuilder::removeHeadReadings(size_t count) inline const std::string BlockReadingBuilder::joinSeparator() const {
{ return m_joinSeparator;
if (count > length()) { }
return false;
}
for (size_t i = 0; i < count; i++) { inline Grid& BlockReadingBuilder::grid() { return m_grid; }
if (m_cursorIndex) {
m_cursorIndex--;
}
m_readings.erase(m_readings.begin(), m_readings.begin() + 1);
m_grid.shrinkGridByOneAtLocation(0);
build();
}
return true; inline void BlockReadingBuilder::build() {
} if (!m_LM) {
return;
}
inline void BlockReadingBuilder::setJoinSeparator(const string& separator) size_t begin = 0;
{ size_t end = m_cursorIndex + MaximumBuildSpanLength;
m_joinSeparator = separator;
}
inline const string BlockReadingBuilder::joinSeparator() const if (m_cursorIndex < MaximumBuildSpanLength) {
{ begin = 0;
return m_joinSeparator; } else {
} begin = m_cursorIndex - MaximumBuildSpanLength;
}
inline Grid& BlockReadingBuilder::grid() if (end > m_readings.size()) {
{ end = m_readings.size();
return m_grid; }
}
inline void BlockReadingBuilder::build() for (size_t p = begin; p < end; p++) {
{ for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) {
if (!m_LM) { std::string combinedReading = Join(
return; m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
} if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q,
combinedReading)) {
std::vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
size_t begin = 0; if (unigrams.size() > 0) {
size_t end = m_cursorIndex + MaximumBuildSpanLength; Node n(combinedReading, unigrams, std::vector<Bigram>());
m_grid.insertNode(n, p, q);
if (m_cursorIndex < MaximumBuildSpanLength) {
begin = 0;
}
else {
begin = m_cursorIndex - MaximumBuildSpanLength;
}
if (end > m_readings.size()) {
end = m_readings.size();
}
for (size_t p = begin ; p < end ; p++) {
for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) {
string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) {
vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
if (unigrams.size() > 0) {
Node n(combinedReading, unigrams, vector<Bigram>());
m_grid.insertNode(n, p, q);
}
}
} }
} }
} }
inline const string BlockReadingBuilder::Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator)
{
string result;
for (vector<string>::const_iterator iter = begin ; iter != end ; ) {
result += *iter;
++iter;
if (iter != end) {
result += separator;
}
}
return result;
}
} }
} }
inline const std::string BlockReadingBuilder::Join(
std::vector<std::string>::const_iterator begin,
std::vector<std::string>::const_iterator end,
const std::string& separator) {
std::string result;
for (std::vector<std::string>::const_iterator iter = begin; iter != end;) {
result += *iter;
++iter;
if (iter != end) {
result += separator;
}
}
return result;
}
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -17,8 +17,8 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef Gramambular_h #ifndef GRAMAMBULAR_H_
#define Gramambular_h #define GRAMAMBULAR_H_
#include "Bigram.h" #include "Bigram.h"
#include "BlockReadingBuilder.h" #include "BlockReadingBuilder.h"

View File

@ -17,248 +17,207 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef Grid_h #ifndef GRID_H_
#define Grid_h #define GRID_H_
#include <map> #include <map>
#include <string>
#include <vector>
#include "NodeAnchor.h" #include "NodeAnchor.h"
#include "Span.h" #include "Span.h"
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
class Grid { class Grid {
public: public:
void clear(); void clear();
void insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength); void insertNode(const Node& node, size_t location, size_t spanningLength);
bool hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey); bool hasNodeAtLocationSpanningLengthMatchingKey(size_t location,
size_t spanningLength,
const std::string& key);
void expandGridByOneAtLocation(size_t inLocation); void expandGridByOneAtLocation(size_t location);
void shrinkGridByOneAtLocation(size_t inLocation); void shrinkGridByOneAtLocation(size_t location);
size_t width() const; size_t width() const;
vector<NodeAnchor> nodesEndingAt(size_t inLocation); std::vector<NodeAnchor> nodesEndingAt(size_t location);
vector<NodeAnchor> nodesCrossingOrEndingAt(size_t inLocation); std::vector<NodeAnchor> nodesCrossingOrEndingAt(size_t location);
// "Freeze" the node with the unigram that represents the selected candidate value. // "Freeze" the node with the unigram that represents the selected candidate
// After this, the node that contains the unigram will always be evaluated to that // value. After this, the node that contains the unigram will always be
// unigram, while all other overlapping nodes will be reset to their initial state // evaluated to that unigram, while all other overlapping nodes will be reset
// (that is, if any of those nodes were "frozen" or fixed, they will be unfrozen.) // to their initial state (that is, if any of those nodes were "frozen" or
NodeAnchor fixNodeSelectedCandidate(size_t location, const string& value); // fixed, they will be unfrozen.)
NodeAnchor fixNodeSelectedCandidate(size_t location,
const std::string& value);
// Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, only // Similar to fixNodeSelectedCandidate, but instead of "freezing" the node,
// boost the unigram that represents the value with an overriding score. This // only boost the unigram that represents the value with an overriding score.
// has the same side effect as fixNodeSelectedCandidate, which is that all other // This has the same side effect as fixNodeSelectedCandidate, which is that
// overlapping nodes will be reset to their initial state. // all other overlapping nodes will be reset to their initial state.
void overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore); void overrideNodeScoreForSelectedCandidate(size_t location,
const std::string& value,
float overridingScore);
const string dumpDOT(); std::string dumpDOT();
protected: protected:
vector<Span> m_spans; std::vector<Span> m_spans;
}; };
inline void Grid::clear() inline void Grid::clear() { m_spans.clear(); }
{
m_spans.clear(); inline void Grid::insertNode(const Node& node, size_t location,
size_t spanningLength) {
if (location >= m_spans.size()) {
size_t diff = location - m_spans.size() + 1;
for (size_t i = 0; i < diff; i++) {
m_spans.push_back(Span());
} }
}
inline void Grid::insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength) m_spans[location].insertNodeOfLength(node, spanningLength);
{ }
if (inLocation >= m_spans.size()) {
size_t diff = inLocation - m_spans.size() + 1;
for (size_t i = 0 ; i < diff ; i++) { inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(
m_spans.push_back(Span()); size_t location, size_t spanningLength, const std::string& key) {
} if (location > m_spans.size()) {
} return false;
}
m_spans[inLocation].insertNodeOfLength(inNode, inSpanningLength); const Node* n = m_spans[location].nodeOfLength(spanningLength);
} if (!n) {
return false;
}
inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey) return key == n->key();
{ }
if (inLocation > m_spans.size()) {
return false;
}
const Node *n = m_spans[inLocation].nodeOfLength(inSpanningLength); inline void Grid::expandGridByOneAtLocation(size_t location) {
if (!n) { if (!location || location == m_spans.size()) {
return false; m_spans.insert(m_spans.begin() + location, Span());
} } else {
m_spans.insert(m_spans.begin() + location, Span());
return inKey == n->key(); for (size_t i = 0; i < location; i++) {
} // zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(location - i);
inline void Grid::expandGridByOneAtLocation(size_t inLocation)
{
if (!inLocation || inLocation == m_spans.size()) {
m_spans.insert(m_spans.begin() + inLocation, Span());
}
else {
m_spans.insert(m_spans.begin() + inLocation, Span());
for (size_t i = 0 ; i < inLocation ; i++) {
// zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i);
}
}
}
inline void Grid::shrinkGridByOneAtLocation(size_t inLocation)
{
if (inLocation >= m_spans.size()) {
return;
}
m_spans.erase(m_spans.begin() + inLocation);
for (size_t i = 0 ; i < inLocation ; i++) {
// zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i);
}
}
inline size_t Grid::width() const
{
return m_spans.size();
}
inline vector<NodeAnchor> Grid::nodesEndingAt(size_t inLocation)
{
vector<NodeAnchor> result;
if (m_spans.size() && inLocation <= m_spans.size()) {
for (size_t i = 0 ; i < inLocation ; i++) {
Span& span = m_spans[i];
if (i + span.maximumLength() >= inLocation) {
Node *np = span.nodeOfLength(inLocation - i);
if (np) {
NodeAnchor na;
na.node = np;
na.location = i;
na.spanningLength = inLocation - i;
result.push_back(na);
}
}
}
}
return result;
}
inline vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t inLocation)
{
vector<NodeAnchor> result;
if (m_spans.size() && inLocation <= m_spans.size()) {
for (size_t i = 0 ; i < inLocation ; i++) {
Span& span = m_spans[i];
if (i + span.maximumLength() >= inLocation) {
for (size_t j = 1, m = span.maximumLength(); j <= m ; j++) {
if (i + j < inLocation) {
continue;
}
Node *np = span.nodeOfLength(j);
if (np) {
NodeAnchor na;
na.node = np;
na.location = i;
na.spanningLength = inLocation - i;
result.push_back(na);
}
}
}
}
}
return result;
}
// For nodes found at the location, fix their currently-selected candidate using the supplied string value.
inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, const string& value)
{
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
NodeAnchor node;
for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectCandidateAtIndex(i);
node = nodeAnchor;
break;;
}
}
}
return node;
}
inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore)
{
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore);
break;
}
}
}
}
inline const string Grid::dumpDOT()
{
stringstream sst;
sst << "digraph {" << endl;
sst << "graph [ rankdir=LR ];" << endl;
sst << "BOS;" << endl;
for (size_t p = 0 ; p < m_spans.size() ; p++) {
Span& span = m_spans[p];
for (size_t ni = 0 ; ni <= span.maximumLength() ; ni++) {
Node* np = span.nodeOfLength(ni);
if (np) {
if (!p) {
sst << "BOS -> " << np->currentKeyValue().value << ";" << endl;
}
sst << np->currentKeyValue().value << ";" << endl;
if (p + ni < m_spans.size()) {
Span& dstSpan = m_spans[p+ni];
for (size_t q = 0 ; q <= dstSpan.maximumLength() ; q++) {
Node *dn = dstSpan.nodeOfLength(q);
if (dn) {
sst << np->currentKeyValue().value << " -> " << dn->currentKeyValue().value << ";" << endl;
}
}
}
if (p + ni == m_spans.size()) {
sst << np->currentKeyValue().value << " -> " << "EOS;" << endl;
}
}
}
}
sst << "EOS;" << endl;
sst << "}";
return sst.str();
} }
} }
} }
inline void Grid::shrinkGridByOneAtLocation(size_t location) {
if (location >= m_spans.size()) {
return;
}
m_spans.erase(m_spans.begin() + location);
for (size_t i = 0; i < location; i++) {
// zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(location - i);
}
}
inline size_t Grid::width() const { return m_spans.size(); }
inline std::vector<NodeAnchor> Grid::nodesEndingAt(size_t location) {
std::vector<NodeAnchor> result;
if (m_spans.size() && location <= m_spans.size()) {
for (size_t i = 0; i < location; i++) {
Span& span = m_spans[i];
if (i + span.maximumLength() >= location) {
Node* np = span.nodeOfLength(location - i);
if (np) {
NodeAnchor na;
na.node = np;
na.location = i;
na.spanningLength = location - i;
result.push_back(na);
}
}
}
}
return result;
}
inline std::vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t location) {
std::vector<NodeAnchor> result;
if (m_spans.size() && location <= m_spans.size()) {
for (size_t i = 0; i < location; i++) {
Span& span = m_spans[i];
if (i + span.maximumLength() >= location) {
for (size_t j = 1, m = span.maximumLength(); j <= m; j++) {
if (i + j < location) {
continue;
}
Node* np = span.nodeOfLength(j);
if (np) {
NodeAnchor na;
na.node = np;
na.location = i;
na.spanningLength = location - i;
result.push_back(na);
}
}
}
}
}
return result;
}
// For nodes found at the location, fix their currently-selected candidate using
// the supplied string value.
inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location,
const std::string& value) {
std::vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
NodeAnchor node;
for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectCandidateAtIndex(i);
node = nodeAnchor;
break;
}
}
}
return node;
}
inline void Grid::overrideNodeScoreForSelectedCandidate(
size_t location, const std::string& value, float overridingScore) {
std::vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)
->selectFloatingCandidateAtIndex(i, overridingScore);
break;
}
}
}
}
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -0,0 +1,70 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "Grid.h"
#include <iostream>
#include <string>
namespace Taiyan {
namespace Gramambular {
std::string Grid::dumpDOT() {
std::stringstream sst;
sst << "digraph {" << std::endl;
sst << "graph [ rankdir=LR ];" << std::endl;
sst << "BOS;" << std::endl;
for (size_t p = 0; p < m_spans.size(); p++) {
Span& span = m_spans[p];
for (size_t ni = 0; ni <= span.maximumLength(); ni++) {
Node* np = span.nodeOfLength(ni);
if (np) {
if (!p) {
sst << "BOS -> " << np->currentKeyValue().value << ";" << std::endl;
}
sst << np->currentKeyValue().value << ";" << std::endl;
if (p + ni < m_spans.size()) {
Span& dstSpan = m_spans[p + ni];
for (size_t q = 0; q <= dstSpan.maximumLength(); q++) {
Node* dn = dstSpan.nodeOfLength(q);
if (dn) {
sst << np->currentKeyValue().value << " -> "
<< dn->currentKeyValue().value << ";" << std::endl;
}
}
}
if (p + ni == m_spans.size()) {
sst << np->currentKeyValue().value << " -> "
<< "EOS;" << std::endl;
}
}
}
}
sst << "EOS;" << std::endl;
sst << "}";
return sst.str();
}
} // namespace Gramambular
} // namespace Taiyan

View File

@ -17,47 +17,43 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef KeyValuePair_h #ifndef KEYVALUEPAIR_H_
#define KeyValuePair_h #define KEYVALUEPAIR_H_
#include <ostream> #include <ostream>
#include <string> #include <string>
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
using namespace std;
class KeyValuePair { class KeyValuePair {
public: public:
string key; std::string key;
string value; std::string value;
bool operator==(const KeyValuePair& inAnother) const; bool operator==(const KeyValuePair& another) const;
bool operator<(const KeyValuePair& inAnother) const; bool operator<(const KeyValuePair& another) const;
}; };
inline ostream& operator<<(ostream& inStream, const KeyValuePair& inPair) inline std::ostream& operator<<(std::ostream& stream,
{ const KeyValuePair& pair) {
inStream << "(" << inPair.key << "," << inPair.value << ")"; stream << "(" << pair.key << "," << pair.value << ")";
return inStream; return stream;
}
inline bool KeyValuePair::operator==(const KeyValuePair& inAnother) const
{
return key == inAnother.key && value == inAnother.value;
}
inline bool KeyValuePair::operator<(const KeyValuePair& inAnother) const
{
if (key < inAnother.key) {
return true;
}
else if (key == inAnother.key) {
return value < inAnother.value;
}
return false;
}
}
} }
inline bool KeyValuePair::operator==(const KeyValuePair& another) const {
return key == another.key && value == another.value;
}
inline bool KeyValuePair::operator<(const KeyValuePair& another) const {
if (key < another.key) {
return true;
} else if (key == another.key) {
return value < another.value;
}
return false;
}
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -17,28 +17,28 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef LanguageModel_h #ifndef LANGUAGEMODEL_H_
#define LanguageModel_h #define LANGUAGEMODEL_H_
#include <string>
#include <vector> #include <vector>
#include "Bigram.h" #include "Bigram.h"
#include "Unigram.h" #include "Unigram.h"
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
using namespace std; class LanguageModel {
public:
class LanguageModel { virtual ~LanguageModel() {}
public:
virtual ~LanguageModel() {}
virtual const vector<Bigram> bigramsForKeys(const string &preceedingKey, const string& key) = 0;
virtual const vector<Unigram> unigramsForKey(const string &key) = 0;
virtual bool hasUnigramsForKey(const string& key) = 0;
};
}
}
virtual const std::vector<Bigram> bigramsForKeys(
const std::string& preceedingKey, const std::string& key) = 0;
virtual const std::vector<Unigram> unigramsForKey(const std::string& key) = 0;
virtual bool hasUnigramsForKey(const std::string& key) = 0;
};
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -17,208 +17,198 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef Node_h #ifndef NODE_H_
#define Node_h #define NODE_H_
#include <limits> #include <limits>
#include <map>
#include <string>
#include <vector> #include <vector>
#include "LanguageModel.h" #include "LanguageModel.h"
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
using namespace std;
class Node { class Node {
public: public:
Node(); Node();
Node(const string& inKey, const vector<Unigram>& inUnigrams, const vector<Bigram>& inBigrams); Node(const std::string& key, const std::vector<Unigram>& unigrams,
const std::vector<Bigram>& bigrams);
void primeNodeWithPreceedingKeyValues(const vector<KeyValuePair>& inKeyValues); void primeNodeWithPreceedingKeyValues(
const std::vector<KeyValuePair>& keyValues);
bool isCandidateFixed() const; bool isCandidateFixed() const;
const vector<KeyValuePair>& candidates() const; const std::vector<KeyValuePair>& candidates() const;
void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true); void selectCandidateAtIndex(size_t index = 0, bool fix = true);
void resetCandidate(); void resetCandidate();
void selectFloatingCandidateAtIndex(size_t index, double score); void selectFloatingCandidateAtIndex(size_t index, double score);
const string& key() const; const std::string& key() const;
double score() const; double score() const;
// double scoreForCandidate(string &candidate) const; // Prevents the override model to remember symbols with scode -X or lower. double scoreForCandidate(const std::string& candidate) const;
const KeyValuePair currentKeyValue() const; const KeyValuePair currentKeyValue() const;
double highestUnigramScore() const; double highestUnigramScore() const;
protected: protected:
const LanguageModel* m_LM; const LanguageModel* m_LM;
string m_key; std::string m_key;
double m_score; double m_score;
vector<Unigram> m_unigrams; std::vector<Unigram> m_unigrams;
vector<KeyValuePair> m_candidates; std::vector<KeyValuePair> m_candidates;
map<string, size_t> m_valueUnigramIndexMap; std::map<std::string, size_t> m_valueUnigramIndexMap;
map<KeyValuePair, vector<Bigram> > m_preceedingGramBigramMap; std::map<KeyValuePair, std::vector<Bigram> > m_preceedingGramBigramMap;
bool m_candidateFixed; bool m_candidateFixed;
size_t m_selectedUnigramIndex; size_t m_selectedUnigramIndex;
friend ostream& operator<<(ostream& inStream, const Node& inNode); friend std::ostream& operator<<(std::ostream& stream, const Node& node);
}; };
inline ostream& operator<<(ostream& inStream, const Node& inNode) inline std::ostream& operator<<(std::ostream& stream, const Node& node) {
{ stream << "(node,key:" << node.m_key
inStream << "(node,key:" << inNode.m_key << ",fixed:" << (inNode.m_candidateFixed ? "true" : "false") << ",fixed:" << (node.m_candidateFixed ? "true" : "false")
<< ",selected:" << inNode.m_selectedUnigramIndex << ",selected:" << node.m_selectedUnigramIndex << ","
<< "," << inNode.m_unigrams << ")"; << node.m_unigrams << ")";
return inStream; return stream;
} }
inline Node::Node() inline Node::Node()
: m_candidateFixed(false) : m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) {}
, m_selectedUnigramIndex(0)
, m_score(0.0)
{
}
inline Node::Node(const string& inKey, const vector<Unigram>& inUnigrams, const vector<Bigram>& inBigrams) inline Node::Node(const std::string& key, const std::vector<Unigram>& unigrams,
: m_key(inKey) const std::vector<Bigram>& bigrams)
, m_unigrams(inUnigrams) : m_key(key),
, m_candidateFixed(false) m_unigrams(unigrams),
, m_selectedUnigramIndex(0) m_candidateFixed(false),
, m_score(0.0) m_selectedUnigramIndex(0),
{ m_score(0.0) {
stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare); stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare);
if (m_unigrams.size()) { if (m_unigrams.size()) {
m_score = m_unigrams[0].score; m_score = m_unigrams[0].score;
} }
size_t i = 0; size_t i = 0;
for (vector<Unigram>::const_iterator ui = m_unigrams.begin() ; ui != m_unigrams.end() ; ++ui) { for (std::vector<Unigram>::const_iterator ui = m_unigrams.begin();
m_valueUnigramIndexMap[(*ui).keyValue.value] = i; ui != m_unigrams.end(); ++ui) {
i++; m_valueUnigramIndexMap[(*ui).keyValue.value] = i;
i++;
m_candidates.push_back((*ui).keyValue); m_candidates.push_back((*ui).keyValue);
} }
for (vector<Bigram>::const_iterator bi = inBigrams.begin() ; bi != inBigrams.end() ; ++bi) { for (std::vector<Bigram>::const_iterator bi = bigrams.begin();
m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi); bi != bigrams.end(); ++bi) {
} m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi);
} }
}
inline void Node::primeNodeWithPreceedingKeyValues(const vector<KeyValuePair>& inKeyValues) inline void Node::primeNodeWithPreceedingKeyValues(
{ const std::vector<KeyValuePair>& keyValues) {
size_t newIndex = m_selectedUnigramIndex; size_t newIndex = m_selectedUnigramIndex;
double max = m_score; double max = m_score;
if (!isCandidateFixed()) { if (!isCandidateFixed()) {
for (vector<KeyValuePair>::const_iterator kvi = inKeyValues.begin() ; kvi != inKeyValues.end() ; ++kvi) { for (std::vector<KeyValuePair>::const_iterator kvi = keyValues.begin();
map<KeyValuePair, vector<Bigram> >::const_iterator f = m_preceedingGramBigramMap.find(*kvi); kvi != keyValues.end(); ++kvi) {
if (f != m_preceedingGramBigramMap.end()) { std::map<KeyValuePair, std::vector<Bigram> >::const_iterator f =
const vector<Bigram>& bigrams = (*f).second; m_preceedingGramBigramMap.find(*kvi);
if (f != m_preceedingGramBigramMap.end()) {
const std::vector<Bigram>& bigrams = (*f).second;
for (vector<Bigram>::const_iterator bi = bigrams.begin() ; bi != bigrams.end() ; ++bi) { for (std::vector<Bigram>::const_iterator bi = bigrams.begin();
const Bigram& bigram = *bi; bi != bigrams.end(); ++bi) {
if (bigram.score > max) { const Bigram& bigram = *bi;
map<string, size_t>::const_iterator uf = m_valueUnigramIndexMap.find((*bi).keyValue.value); if (bigram.score > max) {
if (uf != m_valueUnigramIndexMap.end()) { std::map<std::string, size_t>::const_iterator uf =
newIndex = (*uf).second; m_valueUnigramIndexMap.find((*bi).keyValue.value);
max = bigram.score; if (uf != m_valueUnigramIndexMap.end()) {
} newIndex = (*uf).second;
} max = bigram.score;
} }
} }
} }
} }
if (m_score != max) {
m_score = max;
}
if (newIndex != m_selectedUnigramIndex) {
m_selectedUnigramIndex = newIndex;
}
} }
}
inline bool Node::isCandidateFixed() const if (m_score != max) {
{ m_score = max;
return m_candidateFixed; }
}
inline const vector<KeyValuePair>& Node::candidates() const if (newIndex != m_selectedUnigramIndex) {
{ m_selectedUnigramIndex = newIndex;
return m_candidates;
}
inline void Node::selectCandidateAtIndex(size_t inIndex, bool inFix)
{
if (inIndex >= m_unigrams.size()) {
m_selectedUnigramIndex = 0;
}
else {
m_selectedUnigramIndex = inIndex;
}
m_candidateFixed = inFix;
m_score = 99;
}
inline void Node::resetCandidate()
{
m_selectedUnigramIndex = 0;
m_candidateFixed = 0;
if (m_unigrams.size()) {
m_score = m_unigrams[0].score;
}
}
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) {
if (index >= m_unigrams.size()) {
m_selectedUnigramIndex = 0;
} else {
m_selectedUnigramIndex = index;
}
m_candidateFixed = false;
m_score = score;
}
inline const string& Node::key() const
{
return m_key;
}
inline double Node::score() const
{
return m_score;
}
// Prevents the override model to remember symbols with scode -X or lower.
// inline double Node::scoreForCandidate(string &candidate) const
// {
// for (auto unigram : m_unigrams) {
// if (unigram.keyValue.value == candidate) {
// return unigram.score;
// }
// }
// return 0.0;
// }
inline double Node::highestUnigramScore() const {
if (m_unigrams.empty()) {
return 0.0;
}
return m_unigrams[0].score;
}
inline const KeyValuePair Node::currentKeyValue() const
{
if(m_selectedUnigramIndex >= m_unigrams.size()) {
return KeyValuePair();
}
else {
return m_candidates[m_selectedUnigramIndex];
}
}
} }
} }
inline bool Node::isCandidateFixed() const { return m_candidateFixed; }
inline const std::vector<KeyValuePair>& Node::candidates() const {
return m_candidates;
}
inline void Node::selectCandidateAtIndex(size_t index, bool fix) {
if (index >= m_unigrams.size()) {
m_selectedUnigramIndex = 0;
} else {
m_selectedUnigramIndex = index;
}
m_candidateFixed = fix;
m_score = 99;
}
inline void Node::resetCandidate() {
m_selectedUnigramIndex = 0;
m_candidateFixed = 0;
if (m_unigrams.size()) {
m_score = m_unigrams[0].score;
}
}
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) {
if (index >= m_unigrams.size()) {
m_selectedUnigramIndex = 0;
} else {
m_selectedUnigramIndex = index;
}
m_candidateFixed = false;
m_score = score;
}
inline const std::string& Node::key() const { return m_key; }
inline double Node::score() const { return m_score; }
// Prevents the override model to remember symbols with scode -X or lower.
//inline double Node::scoreForCandidate(const std::string& candidate) const {
// for (auto unigram : m_unigrams) {
// if (unigram.keyValue.value == candidate) {
// return unigram.score;
// }
// }
// return 0.0;
//}
inline double Node::highestUnigramScore() const {
if (m_unigrams.empty()) {
return 0.0;
}
return m_unigrams[0].score;
}
inline const KeyValuePair Node::currentKeyValue() const {
if (m_selectedUnigramIndex >= m_unigrams.size()) {
return KeyValuePair();
} else {
return m_candidates[m_selectedUnigramIndex];
}
}
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -17,55 +17,48 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef NodeAnchor_h #ifndef NODEANCHOR_H_
#define NodeAnchor_h #define NODEANCHOR_H_
#include <vector>
#include "Node.h" #include "Node.h"
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
class NodeAnchor {
public:
NodeAnchor();
const Node *node;
size_t location;
size_t spanningLength;
double accumulatedScore;
};
inline NodeAnchor::NodeAnchor() struct NodeAnchor {
: node(0) const Node* node = nullptr;
, location(0) size_t location = 0;
, spanningLength(0) size_t spanningLength = 0;
, accumulatedScore(0.0) double accumulatedScore = 0.0;
{ };
}
inline ostream& operator<<(ostream& inStream, const NodeAnchor& inAnchor) inline std::ostream& operator<<(std::ostream& stream,
{ const NodeAnchor& anchor) {
inStream << "{@(" << inAnchor.location << "," << inAnchor.spanningLength << "),"; stream << "{@(" << anchor.location << "," << anchor.spanningLength << "),";
if (inAnchor.node) { if (anchor.node) {
inStream << *(inAnchor.node); stream << *(anchor.node);
} } else {
else { stream << "null";
inStream << "null";
}
inStream << "}";
return inStream;
}
inline ostream& operator<<(ostream& inStream, const vector<NodeAnchor>& inAnchor)
{
for (vector<NodeAnchor>::const_iterator i = inAnchor.begin() ; i != inAnchor.end() ; ++i) {
inStream << *i;
if (i + 1 != inAnchor.end()) {
inStream << "<-";
}
}
return inStream;
}
} }
stream << "}";
return stream;
} }
inline std::ostream& operator<<(std::ostream& stream,
const std::vector<NodeAnchor>& anchor) {
for (std::vector<NodeAnchor>::const_iterator i = anchor.begin();
i != anchor.end(); ++i) {
stream << *i;
if (i + 1 != anchor.end()) {
stream << "<-";
}
}
return stream;
}
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -17,88 +17,77 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef Span_h #ifndef SPAN_H_
#define Span_h #define SPAN_H_
#include <map> #include <map>
#include <set> #include <set>
#include <sstream> #include <sstream>
#include "Node.h" #include "Node.h"
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
class Span { class Span {
public: public:
Span(); void clear();
void insertNodeOfLength(const Node& node, size_t length);
void removeNodeOfLengthGreaterThan(size_t length);
void clear(); Node* nodeOfLength(size_t length);
void insertNodeOfLength(const Node& inNode, size_t inLength); size_t maximumLength() const;
void removeNodeOfLengthGreaterThan(size_t inLength);
Node* nodeOfLength(size_t inLength); protected:
size_t maximumLength() const; std::map<size_t, Node> m_lengthNodeMap;
size_t m_maximumLength = 0;
};
protected: inline void Span::clear() {
map<size_t, Node> m_lengthNodeMap; m_lengthNodeMap.clear();
size_t m_maximumLength; m_maximumLength = 0;
}; }
inline Span::Span() inline void Span::insertNodeOfLength(const Node& node, size_t length) {
: m_maximumLength(0) m_lengthNodeMap[length] = node;
{ if (length > m_maximumLength) {
} m_maximumLength = length;
inline void Span::clear()
{
m_lengthNodeMap.clear();
m_maximumLength = 0;
}
inline void Span::insertNodeOfLength(const Node& inNode, size_t inLength)
{
m_lengthNodeMap[inLength] = inNode;
if (inLength > m_maximumLength) {
m_maximumLength = inLength;
}
}
inline void Span::removeNodeOfLengthGreaterThan(size_t inLength)
{
if (inLength > m_maximumLength) {
return;
}
size_t max = 0;
set<size_t> removeSet;
for (map<size_t, Node>::iterator i = m_lengthNodeMap.begin(), e = m_lengthNodeMap.end() ; i != e ; ++i) {
if ((*i).first > inLength) {
removeSet.insert((*i).first);
}
else {
if ((*i).first > max) {
max = (*i).first;
}
}
}
for (set<size_t>::iterator i = removeSet.begin(), e = removeSet.end(); i != e; ++i) {
m_lengthNodeMap.erase(*i);
}
m_maximumLength = max;
}
inline Node* Span::nodeOfLength(size_t inLength)
{
map<size_t, Node>::iterator f = m_lengthNodeMap.find(inLength);
return f == m_lengthNodeMap.end() ? 0 : &(*f).second;
}
inline size_t Span::maximumLength() const
{
return m_maximumLength;
}
} }
} }
inline void Span::removeNodeOfLengthGreaterThan(size_t length) {
if (length > m_maximumLength) {
return;
}
size_t max = 0;
std::set<size_t> removeSet;
for (std::map<size_t, Node>::iterator i = m_lengthNodeMap.begin(),
e = m_lengthNodeMap.end();
i != e; ++i) {
if ((*i).first > length) {
removeSet.insert((*i).first);
} else {
if ((*i).first > max) {
max = (*i).first;
}
}
}
for (std::set<size_t>::iterator i = removeSet.begin(), e = removeSet.end();
i != e; ++i) {
m_lengthNodeMap.erase(*i);
}
m_maximumLength = max;
}
inline Node* Span::nodeOfLength(size_t length) {
std::map<size_t, Node>::iterator f = m_lengthNodeMap.find(length);
return f == m_lengthNodeMap.end() ? 0 : &(*f).second;
}
inline size_t Span::maximumLength() const { return m_maximumLength; }
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -17,80 +17,75 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef Unigram_h #ifndef UNIGRAM_H_
#define Unigram_h #define UNIGRAM_H_
#include <vector> #include <vector>
#include "KeyValuePair.h" #include "KeyValuePair.h"
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
class Unigram {
public:
Unigram();
KeyValuePair keyValue; class Unigram {
double score; public:
Unigram();
bool operator==(const Unigram& inAnother) const; KeyValuePair keyValue;
bool operator<(const Unigram& inAnother) const; double score;
static bool ScoreCompare(const Unigram& a, const Unigram& b); bool operator==(const Unigram& another) const;
}; bool operator<(const Unigram& another) const;
inline ostream& operator<<(ostream& inStream, const Unigram& inGram) static bool ScoreCompare(const Unigram& a, const Unigram& b);
{ };
streamsize p = inStream.precision();
inStream.precision(6);
inStream << "(" << inGram.keyValue << "," << inGram.score << ")";
inStream.precision(p);
return inStream;
}
inline ostream& operator<<(ostream& inStream, const vector<Unigram>& inGrams) inline std::ostream& operator<<(std::ostream& stream, const Unigram& gram) {
{ std::streamsize p = stream.precision();
inStream << "[" << inGrams.size() << "]=>{"; stream.precision(6);
stream << "(" << gram.keyValue << "," << gram.score << ")";
size_t index = 0; stream.precision(p);
return stream;
for (vector<Unigram>::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) {
inStream << index << "=>";
inStream << *gi;
if (gi + 1 != inGrams.end()) {
inStream << ",";
}
}
inStream << "}";
return inStream;
}
inline Unigram::Unigram()
: score(0.0)
{
}
inline bool Unigram::operator==(const Unigram& inAnother) const
{
return keyValue == inAnother.keyValue && score == inAnother.score;
}
inline bool Unigram::operator<(const Unigram& inAnother) const
{
if (keyValue < inAnother.keyValue) {
return true;
}
else if (keyValue == inAnother.keyValue) {
return score < inAnother.score;
}
return false;
}
inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b)
{
return a.score > b.score;
}
}
} }
inline std::ostream& operator<<(std::ostream& stream,
const std::vector<Unigram>& grams) {
stream << "[" << grams.size() << "]=>{";
size_t index = 0;
for (std::vector<Unigram>::const_iterator gi = grams.begin();
gi != grams.end(); ++gi, ++index) {
stream << index << "=>";
stream << *gi;
if (gi + 1 != grams.end()) {
stream << ",";
}
}
stream << "}";
return stream;
}
inline Unigram::Unigram() : score(0.0) {}
inline bool Unigram::operator==(const Unigram& another) const {
return keyValue == another.keyValue && score == another.score;
}
inline bool Unigram::operator<(const Unigram& another) const {
if (keyValue < another.keyValue) {
return true;
} else if (keyValue == another.keyValue) {
return score < another.score;
}
return false;
}
inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) {
return a.score > b.score;
}
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -17,67 +17,69 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABI
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef Walker_h #ifndef WALKER_H_
#define Walker_h #define WALKER_H_
#include <algorithm> #include <algorithm>
#include <vector>
#include "Grid.h" #include "Grid.h"
namespace Taiyan { namespace Taiyan {
namespace Gramambular { namespace Gramambular {
using namespace std;
class Walker { class Walker {
public: public:
Walker(Grid* inGrid); explicit Walker(Grid* inGrid);
const vector<NodeAnchor> reverseWalk(size_t inLocation, double inAccumulatedScore = 0.0); const std::vector<NodeAnchor> reverseWalk(size_t location,
double accumulatedScore = 0.0);
protected: protected:
Grid* m_grid; Grid* m_grid;
}; };
inline Walker::Walker(Grid* inGrid) inline Walker::Walker(Grid* inGrid) : m_grid(inGrid) {}
: m_grid(inGrid)
{ inline const std::vector<NodeAnchor> Walker::reverseWalk(
size_t location, double accumulatedScore) {
if (!location || location > m_grid->width()) {
return std::vector<NodeAnchor>();
}
std::vector<std::vector<NodeAnchor> > paths;
std::vector<NodeAnchor> nodes = m_grid->nodesEndingAt(location);
for (std::vector<NodeAnchor>::iterator ni = nodes.begin(); ni != nodes.end();
++ni) {
if (!(*ni).node) {
continue;
} }
inline const vector<NodeAnchor> Walker::reverseWalk(size_t inLocation, double inAccumulatedScore) (*ni).accumulatedScore = accumulatedScore + (*ni).node->score();
{
if (!inLocation || inLocation > m_grid->width()) {
return vector<NodeAnchor>();
}
vector<vector<NodeAnchor> > paths; std::vector<NodeAnchor> path =
reverseWalk(location - (*ni).spanningLength, (*ni).accumulatedScore);
path.insert(path.begin(), *ni);
vector<NodeAnchor> nodes = m_grid->nodesEndingAt(inLocation); paths.push_back(path);
}
for (vector<NodeAnchor>::iterator ni = nodes.begin() ; ni != nodes.end() ; ++ni) { if (!paths.size()) {
if (!(*ni).node) { return std::vector<NodeAnchor>();
continue; }
}
(*ni).accumulatedScore = inAccumulatedScore + (*ni).node->score(); std::vector<NodeAnchor>* result = &*(paths.begin());
for (std::vector<std::vector<NodeAnchor> >::iterator pi = paths.begin();
vector<NodeAnchor> path = reverseWalk(inLocation - (*ni).spanningLength, (*ni).accumulatedScore); pi != paths.end(); ++pi) {
path.insert(path.begin(), *ni); if ((*pi).back().accumulatedScore > result->back().accumulatedScore) {
result = &*pi;
paths.push_back(path);
}
if (!paths.size()) {
return vector<NodeAnchor>();
}
vector<NodeAnchor>* result = &*(paths.begin());
for (vector<vector<NodeAnchor> >::iterator pi = paths.begin() ; pi != paths.end() ; ++pi) {
if ((*pi).back().accumulatedScore > result->back().accumulatedScore) {
result = &*pi;
}
}
return *result;
} }
} }
return *result;
} }
} // namespace Gramambular
} // namespace Taiyan
#endif #endif

View File

@ -50,6 +50,7 @@
5BD05C6827B2BBEF004C4F1D /* Content.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BD05C6327B2BBEF004C4F1D /* Content.swift */; }; 5BD05C6827B2BBEF004C4F1D /* Content.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BD05C6327B2BBEF004C4F1D /* Content.swift */; };
5BD05C6927B2BBEF004C4F1D /* WindowController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BD05C6427B2BBEF004C4F1D /* WindowController.swift */; }; 5BD05C6927B2BBEF004C4F1D /* WindowController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BD05C6427B2BBEF004C4F1D /* WindowController.swift */; };
5BD05C6A27B2BBEF004C4F1D /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BD05C6527B2BBEF004C4F1D /* ViewController.swift */; }; 5BD05C6A27B2BBEF004C4F1D /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BD05C6527B2BBEF004C4F1D /* ViewController.swift */; };
5BDC5CAB27C2873D00E1CCE2 /* Grid.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5BDC5CAA27C2873D00E1CCE2 /* Grid.mm */; };
5BDCBB2E27B4E67A00D0CC59 /* vChewingPhraseEditor.app in Resources */ = {isa = PBXBuildFile; fileRef = 5BD05BB827B2A429004C4F1D /* vChewingPhraseEditor.app */; }; 5BDCBB2E27B4E67A00D0CC59 /* vChewingPhraseEditor.app in Resources */ = {isa = PBXBuildFile; fileRef = 5BD05BB827B2A429004C4F1D /* vChewingPhraseEditor.app */; };
5BE78BD927B3775B005EA1BE /* ctlAboutWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BE78BD827B37750005EA1BE /* ctlAboutWindow.swift */; }; 5BE78BD927B3775B005EA1BE /* ctlAboutWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BE78BD827B37750005EA1BE /* ctlAboutWindow.swift */; };
5BE78BDD27B3776D005EA1BE /* frmAboutWindow.xib in Resources */ = {isa = PBXBuildFile; fileRef = 5BE78BDA27B37764005EA1BE /* frmAboutWindow.xib */; }; 5BE78BDD27B3776D005EA1BE /* frmAboutWindow.xib in Resources */ = {isa = PBXBuildFile; fileRef = 5BE78BDA27B37764005EA1BE /* frmAboutWindow.xib */; };
@ -195,6 +196,7 @@
5BD05C6327B2BBEF004C4F1D /* Content.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Content.swift; sourceTree = "<group>"; }; 5BD05C6327B2BBEF004C4F1D /* Content.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Content.swift; sourceTree = "<group>"; };
5BD05C6427B2BBEF004C4F1D /* WindowController.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = WindowController.swift; sourceTree = "<group>"; }; 5BD05C6427B2BBEF004C4F1D /* WindowController.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = WindowController.swift; sourceTree = "<group>"; };
5BD05C6527B2BBEF004C4F1D /* ViewController.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; }; 5BD05C6527B2BBEF004C4F1D /* ViewController.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; };
5BDC5CAA27C2873D00E1CCE2 /* Grid.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = Grid.mm; sourceTree = "<group>"; };
5BDCBB4227B4F6C600D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/MainMenu.strings"; sourceTree = "<group>"; }; 5BDCBB4227B4F6C600D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/MainMenu.strings"; sourceTree = "<group>"; };
5BDCBB4327B4F6C600D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/frmAboutWindow.strings"; sourceTree = "<group>"; }; 5BDCBB4327B4F6C600D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/frmAboutWindow.strings"; sourceTree = "<group>"; };
5BDCBB4527B4F6C600D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "Source/WindowNIBs/zh-Hant.lproj/frmPrefWindow.strings"; sourceTree = "<group>"; }; 5BDCBB4527B4F6C600D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "Source/WindowNIBs/zh-Hant.lproj/frmPrefWindow.strings"; sourceTree = "<group>"; };
@ -663,6 +665,7 @@
6A0D4F1515FC0EB100ABF4B3 /* BlockReadingBuilder.h */, 6A0D4F1515FC0EB100ABF4B3 /* BlockReadingBuilder.h */,
6A0D4F1615FC0EB100ABF4B3 /* Gramambular.h */, 6A0D4F1615FC0EB100ABF4B3 /* Gramambular.h */,
6A0D4F1715FC0EB100ABF4B3 /* Grid.h */, 6A0D4F1715FC0EB100ABF4B3 /* Grid.h */,
5BDC5CAA27C2873D00E1CCE2 /* Grid.mm */,
6A0D4F1815FC0EB100ABF4B3 /* KeyValuePair.h */, 6A0D4F1815FC0EB100ABF4B3 /* KeyValuePair.h */,
6A0D4F1915FC0EB100ABF4B3 /* LanguageModel.h */, 6A0D4F1915FC0EB100ABF4B3 /* LanguageModel.h */,
6A0D4F1A15FC0EB100ABF4B3 /* Node.h */, 6A0D4F1A15FC0EB100ABF4B3 /* Node.h */,
@ -943,6 +946,7 @@
D47F7DD3278C1263002F9DD7 /* UserOverrideModel.cpp in Sources */, D47F7DD3278C1263002F9DD7 /* UserOverrideModel.cpp in Sources */,
5B62A33627AE795800A19448 /* PreferencesModule.swift in Sources */, 5B62A33627AE795800A19448 /* PreferencesModule.swift in Sources */,
5B62A33827AE79CD00A19448 /* NSStringUtils.swift in Sources */, 5B62A33827AE79CD00A19448 /* NSStringUtils.swift in Sources */,
5BDC5CAB27C2873D00E1CCE2 /* Grid.mm in Sources */,
5B62A33227AE792F00A19448 /* InputSourceHelper.swift in Sources */, 5B62A33227AE792F00A19448 /* InputSourceHelper.swift in Sources */,
5B62A34927AE7CD900A19448 /* TooltipController.swift in Sources */, 5B62A34927AE7CD900A19448 /* TooltipController.swift in Sources */,
6A0D4F4515FC0EB100ABF4B3 /* Mandarin.cpp in Sources */, 6A0D4F4515FC0EB100ABF4B3 /* Mandarin.cpp in Sources */,