Reformat Gramambular with clang-format

This commit is contained in:
Lukhnos Liu 2022-02-19 09:28:04 -08:00
parent d4d8d14004
commit d3302ef70a
11 changed files with 887 additions and 939 deletions

View File

@ -33,74 +33,68 @@
#include "KeyValuePair.h"
namespace Formosa {
namespace Gramambular {
class Bigram {
public:
Bigram();
KeyValuePair preceedingKeyValue;
KeyValuePair keyValue;
double score;
bool operator==(const Bigram& inAnother) const;
bool operator<(const Bigram& inAnother) const;
};
namespace Gramambular {
class Bigram {
public:
Bigram();
inline ostream& operator<<(ostream& inStream, const Bigram& inGram)
{
streamsize p = inStream.precision();
inStream.precision(6);
inStream << "(" << inGram.keyValue << "|" <<inGram.preceedingKeyValue << "," << inGram.score << ")";
inStream.precision(p);
return inStream;
}
KeyValuePair preceedingKeyValue;
KeyValuePair keyValue;
double score;
inline ostream& operator<<(ostream& inStream, const vector<Bigram>& inGrams)
{
inStream << "[" << inGrams.size() << "]=>{";
size_t index = 0;
for (vector<Bigram>::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) {
inStream << index << "=>";
inStream << *gi;
if (gi + 1 != inGrams.end()) {
inStream << ",";
}
}
inStream << "}";
return inStream;
}
inline Bigram::Bigram()
: score(0.0)
{
}
inline bool Bigram::operator==(const Bigram& inAnother) const
{
return preceedingKeyValue == inAnother.preceedingKeyValue && keyValue == inAnother.keyValue && score == inAnother.score;
}
inline bool Bigram::operator<(const Bigram& inAnother) const
{
if (preceedingKeyValue < inAnother.preceedingKeyValue) {
return true;
}
else if (preceedingKeyValue == inAnother.preceedingKeyValue) {
if (keyValue < inAnother.keyValue) {
return true;
}
else if (keyValue == inAnother.keyValue) {
return score < inAnother.score;
}
return false;
}
bool operator==(const Bigram& inAnother) const;
bool operator<(const Bigram& inAnother) const;
};
return false;
}
}
inline ostream& operator<<(ostream& inStream, const Bigram& inGram) {
streamsize p = inStream.precision();
inStream.precision(6);
inStream << "(" << inGram.keyValue << "|" << inGram.preceedingKeyValue << ","
<< inGram.score << ")";
inStream.precision(p);
return inStream;
}
inline ostream& operator<<(ostream& inStream, const vector<Bigram>& inGrams) {
inStream << "[" << inGrams.size() << "]=>{";
size_t index = 0;
for (vector<Bigram>::const_iterator gi = inGrams.begin(); gi != inGrams.end();
++gi, ++index) {
inStream << index << "=>";
inStream << *gi;
if (gi + 1 != inGrams.end()) {
inStream << ",";
}
}
inStream << "}";
return inStream;
}
inline Bigram::Bigram() : score(0.0) {}
inline bool Bigram::operator==(const Bigram& inAnother) const {
return preceedingKeyValue == inAnother.preceedingKeyValue &&
keyValue == inAnother.keyValue && score == inAnother.score;
}
inline bool Bigram::operator<(const Bigram& inAnother) const {
if (preceedingKeyValue < inAnother.preceedingKeyValue) {
return true;
} else if (preceedingKeyValue == inAnother.preceedingKeyValue) {
if (keyValue < inAnother.keyValue) {
return true;
} else if (keyValue == inAnother.keyValue) {
return score < inAnother.score;
}
return false;
}
return false;
}
} // namespace Gramambular
} // namespace Formosa
#endif

View File

@ -29,198 +29,185 @@
#define BlockReadingBuilder_h
#include <vector>
#include "Grid.h"
#include "LanguageModel.h"
namespace Formosa {
namespace Gramambular {
using namespace std;
class BlockReadingBuilder {
public:
BlockReadingBuilder(LanguageModel *inLM);
void clear();
size_t length() const;
size_t cursorIndex() const;
void setCursorIndex(size_t inNewIndex);
void insertReadingAtCursor(const string& inReading);
bool deleteReadingBeforeCursor(); // backspace
bool deleteReadingAfterCursor(); // delete
bool removeHeadReadings(size_t count);
void setJoinSeparator(const string& separator);
const string joinSeparator() const;
namespace Gramambular {
using namespace std;
vector<string> readings() const;
class BlockReadingBuilder {
public:
BlockReadingBuilder(LanguageModel* inLM);
void clear();
Grid& grid();
protected:
void build();
static const string Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator);
//最多使用六個字組成一個詞
static const size_t MaximumBuildSpanLength = 6;
size_t m_cursorIndex;
vector<string> m_readings;
Grid m_grid;
LanguageModel *m_LM;
string m_joinSeparator;
};
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM)
: m_LM(inLM)
, m_cursorIndex(0)
{
}
inline void BlockReadingBuilder::clear()
{
m_cursorIndex = 0;
m_readings.clear();
m_grid.clear();
}
inline size_t BlockReadingBuilder::length() const
{
return m_readings.size();
}
inline size_t BlockReadingBuilder::cursorIndex() const
{
return m_cursorIndex;
}
size_t length() const;
size_t cursorIndex() const;
void setCursorIndex(size_t inNewIndex);
void insertReadingAtCursor(const string& inReading);
bool deleteReadingBeforeCursor(); // backspace
bool deleteReadingAfterCursor(); // delete
inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex)
{
m_cursorIndex = inNewIndex > m_readings.size() ? m_readings.size() : inNewIndex;
}
inline void BlockReadingBuilder::insertReadingAtCursor(const string& inReading)
{
m_readings.insert(m_readings.begin() + m_cursorIndex, inReading);
m_grid.expandGridByOneAtLocation(m_cursorIndex);
build();
m_cursorIndex++;
}
bool removeHeadReadings(size_t count);
inline vector<string> BlockReadingBuilder::readings() const
{
return m_readings;
}
inline bool BlockReadingBuilder::deleteReadingBeforeCursor()
{
if (!m_cursorIndex) {
return false;
}
m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.begin() + m_cursorIndex);
m_cursorIndex--;
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
}
inline bool BlockReadingBuilder::deleteReadingAfterCursor()
{
if (m_cursorIndex == m_readings.size()) {
return false;
}
m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.begin() + m_cursorIndex + 1);
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
}
inline bool BlockReadingBuilder::removeHeadReadings(size_t count)
{
if (count > length()) {
return false;
}
for (size_t i = 0; i < count; i++) {
if (m_cursorIndex) {
m_cursorIndex--;
}
m_readings.erase(m_readings.begin(), m_readings.begin() + 1);
m_grid.shrinkGridByOneAtLocation(0);
build();
}
return true;
}
inline void BlockReadingBuilder::setJoinSeparator(const string& separator)
{
m_joinSeparator = separator;
}
inline const string BlockReadingBuilder::joinSeparator() const
{
return m_joinSeparator;
}
void setJoinSeparator(const string& separator);
const string joinSeparator() const;
inline Grid& BlockReadingBuilder::grid()
{
return m_grid;
}
vector<string> readings() const;
inline void BlockReadingBuilder::build()
{
if (!m_LM) {
return;
}
size_t begin = 0;
size_t end = m_cursorIndex + MaximumBuildSpanLength;
if (m_cursorIndex < MaximumBuildSpanLength) {
begin = 0;
}
else {
begin = m_cursorIndex - MaximumBuildSpanLength;
}
if (end > m_readings.size()) {
end = m_readings.size();
}
for (size_t p = begin ; p < end ; p++) {
for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) {
string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) {
vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
Grid& grid();
if (unigrams.size() > 0) {
Node n(combinedReading, unigrams, vector<Bigram>());
m_grid.insertNode(n, p, q);
}
}
}
}
}
inline const string BlockReadingBuilder::Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator)
{
string result;
for (vector<string>::const_iterator iter = begin ; iter != end ; ) {
result += *iter;
++iter;
if (iter != end) {
result += separator;
}
}
return result;
}
}
protected:
void build();
static const string Join(vector<string>::const_iterator begin,
vector<string>::const_iterator end,
const string& separator);
//最多使用六個字組成一個詞
static const size_t MaximumBuildSpanLength = 6;
size_t m_cursorIndex;
vector<string> m_readings;
Grid m_grid;
LanguageModel* m_LM;
string m_joinSeparator;
};
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel* inLM)
: m_LM(inLM), m_cursorIndex(0) {}
inline void BlockReadingBuilder::clear() {
m_cursorIndex = 0;
m_readings.clear();
m_grid.clear();
}
inline size_t BlockReadingBuilder::length() const { return m_readings.size(); }
inline size_t BlockReadingBuilder::cursorIndex() const { return m_cursorIndex; }
inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex) {
m_cursorIndex =
inNewIndex > m_readings.size() ? m_readings.size() : inNewIndex;
}
inline void BlockReadingBuilder::insertReadingAtCursor(
const string& inReading) {
m_readings.insert(m_readings.begin() + m_cursorIndex, inReading);
m_grid.expandGridByOneAtLocation(m_cursorIndex);
build();
m_cursorIndex++;
}
inline vector<string> BlockReadingBuilder::readings() const {
return m_readings;
}
inline bool BlockReadingBuilder::deleteReadingBeforeCursor() {
if (!m_cursorIndex) {
return false;
}
m_readings.erase(m_readings.begin() + m_cursorIndex - 1,
m_readings.begin() + m_cursorIndex);
m_cursorIndex--;
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
}
inline bool BlockReadingBuilder::deleteReadingAfterCursor() {
if (m_cursorIndex == m_readings.size()) {
return false;
}
m_readings.erase(m_readings.begin() + m_cursorIndex,
m_readings.begin() + m_cursorIndex + 1);
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
}
inline bool BlockReadingBuilder::removeHeadReadings(size_t count) {
if (count > length()) {
return false;
}
for (size_t i = 0; i < count; i++) {
if (m_cursorIndex) {
m_cursorIndex--;
}
m_readings.erase(m_readings.begin(), m_readings.begin() + 1);
m_grid.shrinkGridByOneAtLocation(0);
build();
}
return true;
}
inline void BlockReadingBuilder::setJoinSeparator(const string& separator) {
m_joinSeparator = separator;
}
inline const string BlockReadingBuilder::joinSeparator() const {
return m_joinSeparator;
}
inline Grid& BlockReadingBuilder::grid() { return m_grid; }
inline void BlockReadingBuilder::build() {
if (!m_LM) {
return;
}
size_t begin = 0;
size_t end = m_cursorIndex + MaximumBuildSpanLength;
if (m_cursorIndex < MaximumBuildSpanLength) {
begin = 0;
} else {
begin = m_cursorIndex - MaximumBuildSpanLength;
}
if (end > m_readings.size()) {
end = m_readings.size();
}
for (size_t p = begin; p < end; p++) {
for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) {
string combinedReading = Join(
m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q,
combinedReading)) {
vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
if (unigrams.size() > 0) {
Node n(combinedReading, unigrams, vector<Bigram>());
m_grid.insertNode(n, p, q);
}
}
}
}
}
inline const string BlockReadingBuilder::Join(
vector<string>::const_iterator begin, vector<string>::const_iterator end,
const string& separator) {
string result;
for (vector<string>::const_iterator iter = begin; iter != end;) {
result += *iter;
++iter;
if (iter != end) {
result += separator;
}
}
return result;
}
} // namespace Gramambular
} // namespace Formosa
#endif

View File

@ -21,14 +21,15 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "gtest/gtest.h"
#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <map>
#include <vector>
#include <cstdlib>
#include <sstream>
#include <vector>
#include "Gramambular.h"
#include "gtest/gtest.h"
const char* SampleData = R"(
#
@ -122,11 +123,9 @@ const char* SampleData = R"(
using namespace std;
using namespace Formosa::Gramambular;
class SimpleLM : public LanguageModel
{
class SimpleLM : public LanguageModel {
public:
SimpleLM(const char* input, bool swapKeyValue = false)
{
SimpleLM(const char* input, bool swapKeyValue = false) {
stringstream sstream(input);
while (sstream.good()) {
string line;
@ -149,8 +148,7 @@ class SimpleLM : public LanguageModel
if (swapKeyValue) {
u.keyValue.key = col1;
u.keyValue.value = col0;
}
else {
} else {
u.keyValue.key = col0;
u.keyValue.value = col1;
}
@ -161,19 +159,17 @@ class SimpleLM : public LanguageModel
}
}
const vector<Bigram> bigramsForKeys(const string &preceedingKey, const string& key) override
{
const vector<Bigram> bigramsForKeys(const string& preceedingKey,
const string& key) override {
return vector<Bigram>();
}
const vector<Unigram> unigramsForKey(const string &key) override
{
const vector<Unigram> unigramsForKey(const string& key) override {
map<string, vector<Unigram> >::const_iterator f = m_db.find(key);
return f == m_db.end() ? vector<Unigram>() : (*f).second;
}
bool hasUnigramsForKey(const string& key) override
{
bool hasUnigramsForKey(const string& key) override {
map<string, vector<Unigram> >::const_iterator f = m_db.find(key);
return f != m_db.end();
}
@ -208,7 +204,8 @@ TEST(GramambularTest, InputTest) {
reverse(walked.begin(), walked.end());
vector<string> composed;
for (vector<NodeAnchor>::iterator wi = walked.begin() ; wi != walked.end() ; ++wi) {
for (vector<NodeAnchor>::iterator wi = walked.begin(); wi != walked.end();
++wi) {
composed.push_back((*wi).node->currentKeyValue().value);
}
ASSERT_EQ(composed, (vector<string>{"高科技", "公司", "", "年中", "獎金"}));
@ -233,8 +230,10 @@ TEST(GramambularTest, WordSegmentationTest) {
reverse(walked.begin(), walked.end());
vector<string> segmented;
for (vector<NodeAnchor>::iterator wi = walked.begin(); wi != walked.end(); ++wi) {
for (vector<NodeAnchor>::iterator wi = walked.begin(); wi != walked.end();
++wi) {
segmented.push_back((*wi).node->currentKeyValue().key);
}
ASSERT_EQ(segmented, (vector<string>{"高科技", "公司", "", "年終", "獎金"}));
ASSERT_EQ(segmented,
(vector<string>{"高科技", "公司", "", "年終", "獎金"}));
}

View File

@ -29,244 +29,243 @@
#define Grid_h
#include <map>
#include "NodeAnchor.h"
#include "Span.h"
namespace Formosa {
namespace Gramambular {
class Grid {
public:
void clear();
void insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength);
bool hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey);
namespace Gramambular {
void expandGridByOneAtLocation(size_t inLocation);
void shrinkGridByOneAtLocation(size_t inLocation);
class Grid {
public:
void clear();
void insertNode(const Node& inNode, size_t inLocation,
size_t inSpanningLength);
bool hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation,
size_t inSpanningLength,
const string& inKey);
size_t width() const;
vector<NodeAnchor> nodesEndingAt(size_t inLocation);
vector<NodeAnchor> nodesCrossingOrEndingAt(size_t inLocation);
void expandGridByOneAtLocation(size_t inLocation);
void shrinkGridByOneAtLocation(size_t inLocation);
// "Freeze" the node with the unigram that represents the selected candidate value.
// After this, the node that contains the unigram will always be evaluated to that
// unigram, while all other overlapping nodes will be reset to their initial state
// (that is, if any of those nodes were "frozen" or fixed, they will be unfrozen.)
NodeAnchor fixNodeSelectedCandidate(size_t location, const string& value);
size_t width() const;
vector<NodeAnchor> nodesEndingAt(size_t inLocation);
vector<NodeAnchor> nodesCrossingOrEndingAt(size_t inLocation);
// Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, only
// boost the unigram that represents the value with an overriding score. This
// has the same side effect as fixNodeSelectedCandidate, which is that all other
// overlapping nodes will be reset to their initial state.
void overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore);
const string dumpDOT();
protected:
vector<Span> m_spans;
};
inline void Grid::clear()
{
m_spans.clear();
}
inline void Grid::insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength)
{
if (inLocation >= m_spans.size()) {
size_t diff = inLocation - m_spans.size() + 1;
for (size_t i = 0 ; i < diff ; i++) {
m_spans.push_back(Span());
}
}
// "Freeze" the node with the unigram that represents the selected candidate
// value. After this, the node that contains the unigram will always be
// evaluated to that unigram, while all other overlapping nodes will be reset
// to their initial state (that is, if any of those nodes were "frozen" or
// fixed, they will be unfrozen.)
NodeAnchor fixNodeSelectedCandidate(size_t location, const string& value);
m_spans[inLocation].insertNodeOfLength(inNode, inSpanningLength);
}
// Similar to fixNodeSelectedCandidate, but instead of "freezing" the node,
// only boost the unigram that represents the value with an overriding score.
// This has the same side effect as fixNodeSelectedCandidate, which is that
// all other overlapping nodes will be reset to their initial state.
void overrideNodeScoreForSelectedCandidate(size_t location,
const string& value,
float overridingScore);
inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey)
{
if (inLocation > m_spans.size()) {
return false;
}
const Node *n = m_spans[inLocation].nodeOfLength(inSpanningLength);
if (!n) {
return false;
}
return inKey == n->key();
}
const string dumpDOT();
inline void Grid::expandGridByOneAtLocation(size_t inLocation)
{
if (!inLocation || inLocation == m_spans.size()) {
m_spans.insert(m_spans.begin() + inLocation, Span());
}
else {
m_spans.insert(m_spans.begin() + inLocation, Span());
for (size_t i = 0 ; i < inLocation ; i++) {
// zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i);
}
}
}
inline void Grid::shrinkGridByOneAtLocation(size_t inLocation)
{
if (inLocation >= m_spans.size()) {
return;
}
m_spans.erase(m_spans.begin() + inLocation);
for (size_t i = 0 ; i < inLocation ; i++) {
// zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i);
}
}
protected:
vector<Span> m_spans;
};
inline size_t Grid::width() const
{
return m_spans.size();
}
inline vector<NodeAnchor> Grid::nodesEndingAt(size_t inLocation)
{
vector<NodeAnchor> result;
if (m_spans.size() && inLocation <= m_spans.size()) {
for (size_t i = 0 ; i < inLocation ; i++) {
Span& span = m_spans[i];
if (i + span.maximumLength() >= inLocation) {
Node *np = span.nodeOfLength(inLocation - i);
if (np) {
NodeAnchor na;
na.node = np;
na.location = i;
na.spanningLength = inLocation - i;
result.push_back(na);
}
}
}
}
return result;
}
inline void Grid::clear() { m_spans.clear(); }
inline vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t inLocation)
{
vector<NodeAnchor> result;
if (m_spans.size() && inLocation <= m_spans.size()) {
for (size_t i = 0 ; i < inLocation ; i++) {
Span& span = m_spans[i];
if (i + span.maximumLength() >= inLocation) {
inline void Grid::insertNode(const Node& inNode, size_t inLocation,
size_t inSpanningLength) {
if (inLocation >= m_spans.size()) {
size_t diff = inLocation - m_spans.size() + 1;
for (size_t j = 1, m = span.maximumLength(); j <= m ; j++) {
if (i + j < inLocation) {
continue;
}
Node *np = span.nodeOfLength(j);
if (np) {
NodeAnchor na;
na.node = np;
na.location = i;
na.spanningLength = inLocation - i;
result.push_back(na);
}
}
}
}
}
return result;
}
// For nodes found at the location, fix their currently-selected candidate using the supplied string value.
inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, const string& value)
{
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
NodeAnchor node;
for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectCandidateAtIndex(i);
node = nodeAnchor;
break;;
}
}
}
return node;
}
inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore)
{
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore);
break;
}
}
}
}
inline const string Grid::dumpDOT()
{
stringstream sst;
sst << "digraph {" << endl;
sst << "graph [ rankdir=LR ];" << endl;
sst << "BOS;" << endl;
for (size_t p = 0 ; p < m_spans.size() ; p++) {
Span& span = m_spans[p];
for (size_t ni = 0 ; ni <= span.maximumLength() ; ni++) {
Node* np = span.nodeOfLength(ni);
if (np) {
if (!p) {
sst << "BOS -> " << np->currentKeyValue().value << ";" << endl;
}
sst << np->currentKeyValue().value << ";" << endl;
if (p + ni < m_spans.size()) {
Span& dstSpan = m_spans[p+ni];
for (size_t q = 0 ; q <= dstSpan.maximumLength() ; q++) {
Node *dn = dstSpan.nodeOfLength(q);
if (dn) {
sst << np->currentKeyValue().value << " -> " << dn->currentKeyValue().value << ";" << endl;
}
}
}
if (p + ni == m_spans.size()) {
sst << np->currentKeyValue().value << " -> " << "EOS;" << endl;
}
}
}
}
sst << "EOS;" << endl;
sst << "}";
return sst.str();
}
for (size_t i = 0; i < diff; i++) {
m_spans.push_back(Span());
}
}
m_spans[inLocation].insertNodeOfLength(inNode, inSpanningLength);
}
inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(
size_t inLocation, size_t inSpanningLength, const string& inKey) {
if (inLocation > m_spans.size()) {
return false;
}
const Node* n = m_spans[inLocation].nodeOfLength(inSpanningLength);
if (!n) {
return false;
}
return inKey == n->key();
}
inline void Grid::expandGridByOneAtLocation(size_t inLocation) {
if (!inLocation || inLocation == m_spans.size()) {
m_spans.insert(m_spans.begin() + inLocation, Span());
} else {
m_spans.insert(m_spans.begin() + inLocation, Span());
for (size_t i = 0; i < inLocation; i++) {
// zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i);
}
}
}
inline void Grid::shrinkGridByOneAtLocation(size_t inLocation) {
if (inLocation >= m_spans.size()) {
return;
}
m_spans.erase(m_spans.begin() + inLocation);
for (size_t i = 0; i < inLocation; i++) {
// zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i);
}
}
inline size_t Grid::width() const { return m_spans.size(); }
inline vector<NodeAnchor> Grid::nodesEndingAt(size_t inLocation) {
vector<NodeAnchor> result;
if (m_spans.size() && inLocation <= m_spans.size()) {
for (size_t i = 0; i < inLocation; i++) {
Span& span = m_spans[i];
if (i + span.maximumLength() >= inLocation) {
Node* np = span.nodeOfLength(inLocation - i);
if (np) {
NodeAnchor na;
na.node = np;
na.location = i;
na.spanningLength = inLocation - i;
result.push_back(na);
}
}
}
}
return result;
}
inline vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t inLocation) {
vector<NodeAnchor> result;
if (m_spans.size() && inLocation <= m_spans.size()) {
for (size_t i = 0; i < inLocation; i++) {
Span& span = m_spans[i];
if (i + span.maximumLength() >= inLocation) {
for (size_t j = 1, m = span.maximumLength(); j <= m; j++) {
if (i + j < inLocation) {
continue;
}
Node* np = span.nodeOfLength(j);
if (np) {
NodeAnchor na;
na.node = np;
na.location = i;
na.spanningLength = inLocation - i;
result.push_back(na);
}
}
}
}
}
return result;
}
// For nodes found at the location, fix their currently-selected candidate using
// the supplied string value.
inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location,
const string& value) {
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
NodeAnchor node;
for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectCandidateAtIndex(i);
node = nodeAnchor;
break;
;
}
}
}
return node;
}
inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location,
const string& value,
float overridingScore) {
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)
->selectFloatingCandidateAtIndex(i, overridingScore);
break;
}
}
}
}
inline const string Grid::dumpDOT() {
stringstream sst;
sst << "digraph {" << endl;
sst << "graph [ rankdir=LR ];" << endl;
sst << "BOS;" << endl;
for (size_t p = 0; p < m_spans.size(); p++) {
Span& span = m_spans[p];
for (size_t ni = 0; ni <= span.maximumLength(); ni++) {
Node* np = span.nodeOfLength(ni);
if (np) {
if (!p) {
sst << "BOS -> " << np->currentKeyValue().value << ";" << endl;
}
sst << np->currentKeyValue().value << ";" << endl;
if (p + ni < m_spans.size()) {
Span& dstSpan = m_spans[p + ni];
for (size_t q = 0; q <= dstSpan.maximumLength(); q++) {
Node* dn = dstSpan.nodeOfLength(q);
if (dn) {
sst << np->currentKeyValue().value << " -> "
<< dn->currentKeyValue().value << ";" << endl;
}
}
}
if (p + ni == m_spans.size()) {
sst << np->currentKeyValue().value << " -> "
<< "EOS;" << endl;
}
}
}
}
sst << "EOS;" << endl;
sst << "}";
return sst.str();
}
} // namespace Gramambular
} // namespace Formosa
#endif

View File

@ -32,40 +32,36 @@
#include <string>
namespace Formosa {
namespace Gramambular {
using namespace std;
class KeyValuePair {
public:
string key;
string value;
namespace Gramambular {
using namespace std;
bool operator==(const KeyValuePair& inAnother) const;
bool operator<(const KeyValuePair& inAnother) const;
};
class KeyValuePair {
public:
string key;
string value;
inline ostream& operator<<(ostream& inStream, const KeyValuePair& inPair)
{
inStream << "(" << inPair.key << "," << inPair.value << ")";
return inStream;
}
inline bool KeyValuePair::operator==(const KeyValuePair& inAnother) const
{
return key == inAnother.key && value == inAnother.value;
}
bool operator==(const KeyValuePair& inAnother) const;
bool operator<(const KeyValuePair& inAnother) const;
};
inline bool KeyValuePair::operator<(const KeyValuePair& inAnother) const
{
if (key < inAnother.key) {
return true;
}
else if (key == inAnother.key) {
return value < inAnother.value;
}
return false;
}
}
inline ostream& operator<<(ostream& inStream, const KeyValuePair& inPair) {
inStream << "(" << inPair.key << "," << inPair.value << ")";
return inStream;
}
inline bool KeyValuePair::operator==(const KeyValuePair& inAnother) const {
return key == inAnother.key && value == inAnother.value;
}
inline bool KeyValuePair::operator<(const KeyValuePair& inAnother) const {
if (key < inAnother.key) {
return true;
} else if (key == inAnother.key) {
return value < inAnother.value;
}
return false;
}
} // namespace Gramambular
} // namespace Formosa
#endif

View File

@ -29,24 +29,25 @@
#define LanguageModel_h
#include <vector>
#include "Bigram.h"
#include "Unigram.h"
namespace Formosa {
namespace Gramambular {
using namespace std;
class LanguageModel {
public:
virtual ~LanguageModel() {}
namespace Gramambular {
virtual const vector<Bigram> bigramsForKeys(const string &preceedingKey, const string& key) = 0;
virtual const vector<Unigram> unigramsForKey(const string &key) = 0;
virtual bool hasUnigramsForKey(const string& key) = 0;
};
}
}
using namespace std;
class LanguageModel {
public:
virtual ~LanguageModel() {}
virtual const vector<Bigram> bigramsForKeys(const string& preceedingKey,
const string& key) = 0;
virtual const vector<Unigram> unigramsForKey(const string& key) = 0;
virtual bool hasUnigramsForKey(const string& key) = 0;
};
} // namespace Gramambular
} // namespace Formosa
#endif

View File

@ -30,202 +30,191 @@
#include <limits>
#include <vector>
#include "LanguageModel.h"
namespace Formosa {
namespace Gramambular {
using namespace std;
namespace Gramambular {
using namespace std;
class Node {
public:
Node();
Node(const string& inKey, const vector<Unigram>& inUnigrams, const vector<Bigram>& inBigrams);
void primeNodeWithPreceedingKeyValues(const vector<KeyValuePair>& inKeyValues);
bool isCandidateFixed() const;
const vector<KeyValuePair>& candidates() const;
void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true);
void resetCandidate();
void selectFloatingCandidateAtIndex(size_t index, double score);
const string& key() const;
double score() const;
double scoreForCandidate(string &candidate) const;
const KeyValuePair currentKeyValue() const;
double highestUnigramScore() const;
protected:
const LanguageModel* m_LM;
string m_key;
double m_score;
vector<Unigram> m_unigrams;
vector<KeyValuePair> m_candidates;
map<string, size_t> m_valueUnigramIndexMap;
map<KeyValuePair, vector<Bigram> > m_preceedingGramBigramMap;
bool m_candidateFixed;
size_t m_selectedUnigramIndex;
friend ostream& operator<<(ostream& inStream, const Node& inNode);
};
class Node {
public:
Node();
Node(const string& inKey, const vector<Unigram>& inUnigrams,
const vector<Bigram>& inBigrams);
inline ostream& operator<<(ostream& inStream, const Node& inNode)
{
inStream << "(node,key:" << inNode.m_key << ",fixed:" << (inNode.m_candidateFixed ? "true" : "false")
<< ",selected:" << inNode.m_selectedUnigramIndex
<< "," << inNode.m_unigrams << ")";
return inStream;
}
void primeNodeWithPreceedingKeyValues(
const vector<KeyValuePair>& inKeyValues);
inline Node::Node()
: m_candidateFixed(false)
, m_selectedUnigramIndex(0)
, m_score(0.0)
{
}
bool isCandidateFixed() const;
const vector<KeyValuePair>& candidates() const;
void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true);
void resetCandidate();
void selectFloatingCandidateAtIndex(size_t index, double score);
inline Node::Node(const string& inKey, const vector<Unigram>& inUnigrams, const vector<Bigram>& inBigrams)
: m_key(inKey)
, m_unigrams(inUnigrams)
, m_candidateFixed(false)
, m_selectedUnigramIndex(0)
, m_score(0.0)
{
stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare);
if (m_unigrams.size()) {
m_score = m_unigrams[0].score;
}
size_t i = 0;
for (vector<Unigram>::const_iterator ui = m_unigrams.begin() ; ui != m_unigrams.end() ; ++ui) {
m_valueUnigramIndexMap[(*ui).keyValue.value] = i;
i++;
m_candidates.push_back((*ui).keyValue);
}
for (vector<Bigram>::const_iterator bi = inBigrams.begin() ; bi != inBigrams.end() ; ++bi) {
m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi);
}
}
inline void Node::primeNodeWithPreceedingKeyValues(const vector<KeyValuePair>& inKeyValues)
{
size_t newIndex = m_selectedUnigramIndex;
double max = m_score;
const string& key() const;
double score() const;
double scoreForCandidate(string& candidate) const;
const KeyValuePair currentKeyValue() const;
double highestUnigramScore() const;
if (!isCandidateFixed()) {
for (vector<KeyValuePair>::const_iterator kvi = inKeyValues.begin() ; kvi != inKeyValues.end() ; ++kvi) {
map<KeyValuePair, vector<Bigram> >::const_iterator f = m_preceedingGramBigramMap.find(*kvi);
if (f != m_preceedingGramBigramMap.end()) {
const vector<Bigram>& bigrams = (*f).second;
for (vector<Bigram>::const_iterator bi = bigrams.begin() ; bi != bigrams.end() ; ++bi) {
const Bigram& bigram = *bi;
if (bigram.score > max) {
map<string, size_t>::const_iterator uf = m_valueUnigramIndexMap.find((*bi).keyValue.value);
if (uf != m_valueUnigramIndexMap.end()) {
newIndex = (*uf).second;
max = bigram.score;
}
}
}
}
}
}
protected:
const LanguageModel* m_LM;
if (m_score != max) {
m_score = max;
}
if (newIndex != m_selectedUnigramIndex) {
m_selectedUnigramIndex = newIndex;
}
}
inline bool Node::isCandidateFixed() const
{
return m_candidateFixed;
}
inline const vector<KeyValuePair>& Node::candidates() const
{
return m_candidates;
}
string m_key;
double m_score;
inline void Node::selectCandidateAtIndex(size_t inIndex, bool inFix)
{
if (inIndex >= m_unigrams.size()) {
m_selectedUnigramIndex = 0;
}
else {
m_selectedUnigramIndex = inIndex;
}
m_candidateFixed = inFix;
m_score = 99;
}
vector<Unigram> m_unigrams;
vector<KeyValuePair> m_candidates;
map<string, size_t> m_valueUnigramIndexMap;
map<KeyValuePair, vector<Bigram> > m_preceedingGramBigramMap;
inline void Node::resetCandidate()
{
m_selectedUnigramIndex = 0;
m_candidateFixed = 0;
if (m_unigrams.size()) {
m_score = m_unigrams[0].score;
}
}
bool m_candidateFixed;
size_t m_selectedUnigramIndex;
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) {
if (index >= m_unigrams.size()) {
m_selectedUnigramIndex = 0;
} else {
m_selectedUnigramIndex = index;
}
m_candidateFixed = false;
m_score = score;
}
inline const string& Node::key() const
{
return m_key;
}
inline double Node::score() const
{
return m_score;
}
friend ostream& operator<<(ostream& inStream, const Node& inNode);
};
inline double Node::scoreForCandidate(string &candidate) const
{
for (auto unigram : m_unigrams) {
if (unigram.keyValue.value == candidate) {
return unigram.score;
}
}
return 0.0;
}
inline double Node::highestUnigramScore() const {
if (m_unigrams.empty()) {
return 0.0;
}
return m_unigrams[0].score;
}
inline const KeyValuePair Node::currentKeyValue() const
{
if(m_selectedUnigramIndex >= m_unigrams.size()) {
return KeyValuePair();
}
else {
return m_candidates[m_selectedUnigramIndex];
}
}
}
inline ostream& operator<<(ostream& inStream, const Node& inNode) {
inStream << "(node,key:" << inNode.m_key
<< ",fixed:" << (inNode.m_candidateFixed ? "true" : "false")
<< ",selected:" << inNode.m_selectedUnigramIndex << ","
<< inNode.m_unigrams << ")";
return inStream;
}
inline Node::Node()
: m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) {}
inline Node::Node(const string& inKey, const vector<Unigram>& inUnigrams,
const vector<Bigram>& inBigrams)
: m_key(inKey),
m_unigrams(inUnigrams),
m_candidateFixed(false),
m_selectedUnigramIndex(0),
m_score(0.0) {
stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare);
if (m_unigrams.size()) {
m_score = m_unigrams[0].score;
}
size_t i = 0;
for (vector<Unigram>::const_iterator ui = m_unigrams.begin();
ui != m_unigrams.end(); ++ui) {
m_valueUnigramIndexMap[(*ui).keyValue.value] = i;
i++;
m_candidates.push_back((*ui).keyValue);
}
for (vector<Bigram>::const_iterator bi = inBigrams.begin();
bi != inBigrams.end(); ++bi) {
m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi);
}
}
inline void Node::primeNodeWithPreceedingKeyValues(
const vector<KeyValuePair>& inKeyValues) {
size_t newIndex = m_selectedUnigramIndex;
double max = m_score;
if (!isCandidateFixed()) {
for (vector<KeyValuePair>::const_iterator kvi = inKeyValues.begin();
kvi != inKeyValues.end(); ++kvi) {
map<KeyValuePair, vector<Bigram> >::const_iterator f =
m_preceedingGramBigramMap.find(*kvi);
if (f != m_preceedingGramBigramMap.end()) {
const vector<Bigram>& bigrams = (*f).second;
for (vector<Bigram>::const_iterator bi = bigrams.begin();
bi != bigrams.end(); ++bi) {
const Bigram& bigram = *bi;
if (bigram.score > max) {
map<string, size_t>::const_iterator uf =
m_valueUnigramIndexMap.find((*bi).keyValue.value);
if (uf != m_valueUnigramIndexMap.end()) {
newIndex = (*uf).second;
max = bigram.score;
}
}
}
}
}
}
if (m_score != max) {
m_score = max;
}
if (newIndex != m_selectedUnigramIndex) {
m_selectedUnigramIndex = newIndex;
}
}
inline bool Node::isCandidateFixed() const { return m_candidateFixed; }
inline const vector<KeyValuePair>& Node::candidates() const {
return m_candidates;
}
inline void Node::selectCandidateAtIndex(size_t inIndex, bool inFix) {
if (inIndex >= m_unigrams.size()) {
m_selectedUnigramIndex = 0;
} else {
m_selectedUnigramIndex = inIndex;
}
m_candidateFixed = inFix;
m_score = 99;
}
inline void Node::resetCandidate() {
m_selectedUnigramIndex = 0;
m_candidateFixed = 0;
if (m_unigrams.size()) {
m_score = m_unigrams[0].score;
}
}
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) {
if (index >= m_unigrams.size()) {
m_selectedUnigramIndex = 0;
} else {
m_selectedUnigramIndex = index;
}
m_candidateFixed = false;
m_score = score;
}
inline const string& Node::key() const { return m_key; }
inline double Node::score() const { return m_score; }
inline double Node::scoreForCandidate(string& candidate) const {
for (auto unigram : m_unigrams) {
if (unigram.keyValue.value == candidate) {
return unigram.score;
}
}
return 0.0;
}
inline double Node::highestUnigramScore() const {
if (m_unigrams.empty()) {
return 0.0;
}
return m_unigrams[0].score;
}
inline const KeyValuePair Node::currentKeyValue() const {
if (m_selectedUnigramIndex >= m_unigrams.size()) {
return KeyValuePair();
} else {
return m_candidates[m_selectedUnigramIndex];
}
}
} // namespace Gramambular
} // namespace Formosa
#endif

View File

@ -31,49 +31,44 @@
#include "Node.h"
namespace Formosa {
namespace Gramambular {
class NodeAnchor {
public:
NodeAnchor();
const Node *node;
size_t location;
size_t spanningLength;
double accumulatedScore;
};
inline NodeAnchor::NodeAnchor()
: node(0)
, location(0)
, spanningLength(0)
, accumulatedScore(0.0)
{
}
namespace Gramambular {
class NodeAnchor {
public:
NodeAnchor();
const Node* node;
size_t location;
size_t spanningLength;
double accumulatedScore;
};
inline ostream& operator<<(ostream& inStream, const NodeAnchor& inAnchor)
{
inStream << "{@(" << inAnchor.location << "," << inAnchor.spanningLength << "),";
if (inAnchor.node) {
inStream << *(inAnchor.node);
}
else {
inStream << "null";
}
inStream << "}";
return inStream;
}
inline ostream& operator<<(ostream& inStream, const vector<NodeAnchor>& inAnchor)
{
for (vector<NodeAnchor>::const_iterator i = inAnchor.begin() ; i != inAnchor.end() ; ++i) {
inStream << *i;
if (i + 1 != inAnchor.end()) {
inStream << "<-";
}
}
return inStream;
}
}
inline NodeAnchor::NodeAnchor()
: node(0), location(0), spanningLength(0), accumulatedScore(0.0) {}
inline ostream& operator<<(ostream& inStream, const NodeAnchor& inAnchor) {
inStream << "{@(" << inAnchor.location << "," << inAnchor.spanningLength
<< "),";
if (inAnchor.node) {
inStream << *(inAnchor.node);
} else {
inStream << "null";
}
inStream << "}";
return inStream;
}
inline ostream& operator<<(ostream& inStream,
const vector<NodeAnchor>& inAnchor) {
for (vector<NodeAnchor>::const_iterator i = inAnchor.begin();
i != inAnchor.end(); ++i) {
inStream << *i;
if (i + 1 != inAnchor.end()) {
inStream << "<-";
}
}
return inStream;
}
} // namespace Gramambular
} // namespace Formosa
#endif

View File

@ -31,82 +31,75 @@
#include <map>
#include <set>
#include <sstream>
#include "Node.h"
namespace Formosa {
namespace Gramambular {
class Span {
public:
Span();
namespace Gramambular {
class Span {
public:
Span();
void clear();
void insertNodeOfLength(const Node& inNode, size_t inLength);
void removeNodeOfLengthGreaterThan(size_t inLength);
Node* nodeOfLength(size_t inLength);
size_t maximumLength() const;
void clear();
void insertNodeOfLength(const Node& inNode, size_t inLength);
void removeNodeOfLengthGreaterThan(size_t inLength);
protected:
map<size_t, Node> m_lengthNodeMap;
size_t m_maximumLength;
};
inline Span::Span()
: m_maximumLength(0)
{
}
inline void Span::clear()
{
m_lengthNodeMap.clear();
m_maximumLength = 0;
}
inline void Span::insertNodeOfLength(const Node& inNode, size_t inLength)
{
m_lengthNodeMap[inLength] = inNode;
if (inLength > m_maximumLength) {
m_maximumLength = inLength;
}
}
inline void Span::removeNodeOfLengthGreaterThan(size_t inLength)
{
if (inLength > m_maximumLength) {
return;
}
size_t max = 0;
set<size_t> removeSet;
for (map<size_t, Node>::iterator i = m_lengthNodeMap.begin(), e = m_lengthNodeMap.end() ; i != e ; ++i) {
if ((*i).first > inLength) {
removeSet.insert((*i).first);
}
else {
if ((*i).first > max) {
max = (*i).first;
}
}
}
for (set<size_t>::iterator i = removeSet.begin(), e = removeSet.end(); i != e; ++i) {
m_lengthNodeMap.erase(*i);
}
Node* nodeOfLength(size_t inLength);
size_t maximumLength() const;
m_maximumLength = max;
}
inline Node* Span::nodeOfLength(size_t inLength)
{
map<size_t, Node>::iterator f = m_lengthNodeMap.find(inLength);
return f == m_lengthNodeMap.end() ? 0 : &(*f).second;
}
inline size_t Span::maximumLength() const
{
return m_maximumLength;
}
}
protected:
map<size_t, Node> m_lengthNodeMap;
size_t m_maximumLength;
};
inline Span::Span() : m_maximumLength(0) {}
inline void Span::clear() {
m_lengthNodeMap.clear();
m_maximumLength = 0;
}
inline void Span::insertNodeOfLength(const Node& inNode, size_t inLength) {
m_lengthNodeMap[inLength] = inNode;
if (inLength > m_maximumLength) {
m_maximumLength = inLength;
}
}
inline void Span::removeNodeOfLengthGreaterThan(size_t inLength) {
if (inLength > m_maximumLength) {
return;
}
size_t max = 0;
set<size_t> removeSet;
for (map<size_t, Node>::iterator i = m_lengthNodeMap.begin(),
e = m_lengthNodeMap.end();
i != e; ++i) {
if ((*i).first > inLength) {
removeSet.insert((*i).first);
} else {
if ((*i).first > max) {
max = (*i).first;
}
}
}
for (set<size_t>::iterator i = removeSet.begin(), e = removeSet.end(); i != e;
++i) {
m_lengthNodeMap.erase(*i);
}
m_maximumLength = max;
}
inline Node* Span::nodeOfLength(size_t inLength) {
map<size_t, Node>::iterator f = m_lengthNodeMap.find(inLength);
return f == m_lengthNodeMap.end() ? 0 : &(*f).second;
}
inline size_t Span::maximumLength() const { return m_maximumLength; }
} // namespace Gramambular
} // namespace Formosa
#endif

View File

@ -29,76 +29,69 @@
#define Unigram_h
#include <vector>
#include "KeyValuePair.h"
namespace Formosa {
namespace Gramambular {
class Unigram {
public:
Unigram();
namespace Gramambular {
class Unigram {
public:
Unigram();
KeyValuePair keyValue;
double score;
bool operator==(const Unigram& inAnother) const;
bool operator<(const Unigram& inAnother) const;
static bool ScoreCompare(const Unigram& a, const Unigram& b);
};
KeyValuePair keyValue;
double score;
inline ostream& operator<<(ostream& inStream, const Unigram& inGram)
{
streamsize p = inStream.precision();
inStream.precision(6);
inStream << "(" << inGram.keyValue << "," << inGram.score << ")";
inStream.precision(p);
return inStream;
}
inline ostream& operator<<(ostream& inStream, const vector<Unigram>& inGrams)
{
inStream << "[" << inGrams.size() << "]=>{";
size_t index = 0;
for (vector<Unigram>::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) {
inStream << index << "=>";
inStream << *gi;
if (gi + 1 != inGrams.end()) {
inStream << ",";
}
}
inStream << "}";
return inStream;
}
inline Unigram::Unigram()
: score(0.0)
{
}
inline bool Unigram::operator==(const Unigram& inAnother) const
{
return keyValue == inAnother.keyValue && score == inAnother.score;
}
inline bool Unigram::operator<(const Unigram& inAnother) const
{
if (keyValue < inAnother.keyValue) {
return true;
}
else if (keyValue == inAnother.keyValue) {
return score < inAnother.score;
}
return false;
}
bool operator==(const Unigram& inAnother) const;
bool operator<(const Unigram& inAnother) const;
inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b)
{
return a.score > b.score;
}
}
static bool ScoreCompare(const Unigram& a, const Unigram& b);
};
inline ostream& operator<<(ostream& inStream, const Unigram& inGram) {
streamsize p = inStream.precision();
inStream.precision(6);
inStream << "(" << inGram.keyValue << "," << inGram.score << ")";
inStream.precision(p);
return inStream;
}
inline ostream& operator<<(ostream& inStream, const vector<Unigram>& inGrams) {
inStream << "[" << inGrams.size() << "]=>{";
size_t index = 0;
for (vector<Unigram>::const_iterator gi = inGrams.begin();
gi != inGrams.end(); ++gi, ++index) {
inStream << index << "=>";
inStream << *gi;
if (gi + 1 != inGrams.end()) {
inStream << ",";
}
}
inStream << "}";
return inStream;
}
inline Unigram::Unigram() : score(0.0) {}
inline bool Unigram::operator==(const Unigram& inAnother) const {
return keyValue == inAnother.keyValue && score == inAnother.score;
}
inline bool Unigram::operator<(const Unigram& inAnother) const {
if (keyValue < inAnother.keyValue) {
return true;
} else if (keyValue == inAnother.keyValue) {
return score < inAnother.score;
}
return false;
}
inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) {
return a.score > b.score;
}
} // namespace Gramambular
} // namespace Formosa
#endif

View File

@ -29,63 +29,65 @@
#define Walker_h
#include <algorithm>
#include "Grid.h"
namespace Formosa {
namespace Gramambular {
using namespace std;
namespace Gramambular {
using namespace std;
class Walker {
public:
Walker(Grid* inGrid);
const vector<NodeAnchor> reverseWalk(size_t inLocation, double inAccumulatedScore = 0.0);
protected:
Grid* m_grid;
};
inline Walker::Walker(Grid* inGrid)
: m_grid(inGrid)
{
}
inline const vector<NodeAnchor> Walker::reverseWalk(size_t inLocation, double inAccumulatedScore)
{
if (!inLocation || inLocation > m_grid->width()) {
return vector<NodeAnchor>();
}
vector<vector<NodeAnchor> > paths;
class Walker {
public:
Walker(Grid* inGrid);
const vector<NodeAnchor> reverseWalk(size_t inLocation,
double inAccumulatedScore = 0.0);
vector<NodeAnchor> nodes = m_grid->nodesEndingAt(inLocation);
for (vector<NodeAnchor>::iterator ni = nodes.begin() ; ni != nodes.end() ; ++ni) {
if (!(*ni).node) {
continue;
}
protected:
Grid* m_grid;
};
(*ni).accumulatedScore = inAccumulatedScore + (*ni).node->score();
inline Walker::Walker(Grid* inGrid) : m_grid(inGrid) {}
vector<NodeAnchor> path = reverseWalk(inLocation - (*ni).spanningLength, (*ni).accumulatedScore);
path.insert(path.begin(), *ni);
paths.push_back(path);
}
if (!paths.size()) {
return vector<NodeAnchor>();
}
vector<NodeAnchor>* result = &*(paths.begin());
for (vector<vector<NodeAnchor> >::iterator pi = paths.begin() ; pi != paths.end() ; ++pi) {
if ((*pi).back().accumulatedScore > result->back().accumulatedScore) {
result = &*pi;
}
}
return *result;
}
inline const vector<NodeAnchor> Walker::reverseWalk(size_t inLocation,
double inAccumulatedScore) {
if (!inLocation || inLocation > m_grid->width()) {
return vector<NodeAnchor>();
}
vector<vector<NodeAnchor> > paths;
vector<NodeAnchor> nodes = m_grid->nodesEndingAt(inLocation);
for (vector<NodeAnchor>::iterator ni = nodes.begin(); ni != nodes.end();
++ni) {
if (!(*ni).node) {
continue;
}
(*ni).accumulatedScore = inAccumulatedScore + (*ni).node->score();
vector<NodeAnchor> path =
reverseWalk(inLocation - (*ni).spanningLength, (*ni).accumulatedScore);
path.insert(path.begin(), *ni);
paths.push_back(path);
}
if (!paths.size()) {
return vector<NodeAnchor>();
}
vector<NodeAnchor>* result = &*(paths.begin());
for (vector<vector<NodeAnchor> >::iterator pi = paths.begin();
pi != paths.end(); ++pi) {
if ((*pi).back().accumulatedScore > result->back().accumulatedScore) {
result = &*pi;
}
}
return *result;
}
} // namespace Gramambular
} // namespace Formosa
#endif