Reformat Gramambular with clang-format

This commit is contained in:
Lukhnos Liu 2022-02-19 09:28:04 -08:00
parent d4d8d14004
commit d3302ef70a
11 changed files with 887 additions and 939 deletions

View File

@ -33,74 +33,68 @@
#include "KeyValuePair.h" #include "KeyValuePair.h"
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
class Bigram { class Bigram {
public: public:
Bigram(); Bigram();
KeyValuePair preceedingKeyValue;
KeyValuePair keyValue;
double score;
bool operator==(const Bigram& inAnother) const;
bool operator<(const Bigram& inAnother) const;
};
inline ostream& operator<<(ostream& inStream, const Bigram& inGram) KeyValuePair preceedingKeyValue;
{ KeyValuePair keyValue;
streamsize p = inStream.precision(); double score;
inStream.precision(6);
inStream << "(" << inGram.keyValue << "|" <<inGram.preceedingKeyValue << "," << inGram.score << ")";
inStream.precision(p);
return inStream;
}
inline ostream& operator<<(ostream& inStream, const vector<Bigram>& inGrams) bool operator==(const Bigram& inAnother) const;
{ bool operator<(const Bigram& inAnother) const;
inStream << "[" << inGrams.size() << "]=>{"; };
size_t index = 0;
for (vector<Bigram>::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) {
inStream << index << "=>";
inStream << *gi;
if (gi + 1 != inGrams.end()) {
inStream << ",";
}
}
inStream << "}";
return inStream;
}
inline Bigram::Bigram()
: score(0.0)
{
}
inline bool Bigram::operator==(const Bigram& inAnother) const
{
return preceedingKeyValue == inAnother.preceedingKeyValue && keyValue == inAnother.keyValue && score == inAnother.score;
}
inline bool Bigram::operator<(const Bigram& inAnother) const
{
if (preceedingKeyValue < inAnother.preceedingKeyValue) {
return true;
}
else if (preceedingKeyValue == inAnother.preceedingKeyValue) {
if (keyValue < inAnother.keyValue) {
return true;
}
else if (keyValue == inAnother.keyValue) {
return score < inAnother.score;
}
return false;
}
return false; inline ostream& operator<<(ostream& inStream, const Bigram& inGram) {
} streamsize p = inStream.precision();
} inStream.precision(6);
inStream << "(" << inGram.keyValue << "|" << inGram.preceedingKeyValue << ","
<< inGram.score << ")";
inStream.precision(p);
return inStream;
} }
inline ostream& operator<<(ostream& inStream, const vector<Bigram>& inGrams) {
inStream << "[" << inGrams.size() << "]=>{";
size_t index = 0;
for (vector<Bigram>::const_iterator gi = inGrams.begin(); gi != inGrams.end();
++gi, ++index) {
inStream << index << "=>";
inStream << *gi;
if (gi + 1 != inGrams.end()) {
inStream << ",";
}
}
inStream << "}";
return inStream;
}
inline Bigram::Bigram() : score(0.0) {}
inline bool Bigram::operator==(const Bigram& inAnother) const {
return preceedingKeyValue == inAnother.preceedingKeyValue &&
keyValue == inAnother.keyValue && score == inAnother.score;
}
inline bool Bigram::operator<(const Bigram& inAnother) const {
if (preceedingKeyValue < inAnother.preceedingKeyValue) {
return true;
} else if (preceedingKeyValue == inAnother.preceedingKeyValue) {
if (keyValue < inAnother.keyValue) {
return true;
} else if (keyValue == inAnother.keyValue) {
return score < inAnother.score;
}
return false;
}
return false;
}
} // namespace Gramambular
} // namespace Formosa
#endif #endif

View File

@ -29,198 +29,185 @@
#define BlockReadingBuilder_h #define BlockReadingBuilder_h
#include <vector> #include <vector>
#include "Grid.h" #include "Grid.h"
#include "LanguageModel.h" #include "LanguageModel.h"
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
using namespace std; using namespace std;
class BlockReadingBuilder {
public:
BlockReadingBuilder(LanguageModel *inLM);
void clear();
size_t length() const;
size_t cursorIndex() const;
void setCursorIndex(size_t inNewIndex);
void insertReadingAtCursor(const string& inReading);
bool deleteReadingBeforeCursor(); // backspace
bool deleteReadingAfterCursor(); // delete
bool removeHeadReadings(size_t count);
void setJoinSeparator(const string& separator);
const string joinSeparator() const;
vector<string> readings() const; class BlockReadingBuilder {
public:
BlockReadingBuilder(LanguageModel* inLM);
void clear();
Grid& grid(); size_t length() const;
size_t cursorIndex() const;
protected: void setCursorIndex(size_t inNewIndex);
void build(); void insertReadingAtCursor(const string& inReading);
bool deleteReadingBeforeCursor(); // backspace
static const string Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator); bool deleteReadingAfterCursor(); // delete
//最多使用六個字組成一個詞
static const size_t MaximumBuildSpanLength = 6;
size_t m_cursorIndex;
vector<string> m_readings;
Grid m_grid;
LanguageModel *m_LM;
string m_joinSeparator;
};
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM)
: m_LM(inLM)
, m_cursorIndex(0)
{
}
inline void BlockReadingBuilder::clear()
{
m_cursorIndex = 0;
m_readings.clear();
m_grid.clear();
}
inline size_t BlockReadingBuilder::length() const
{
return m_readings.size();
}
inline size_t BlockReadingBuilder::cursorIndex() const
{
return m_cursorIndex;
}
inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex) bool removeHeadReadings(size_t count);
{
m_cursorIndex = inNewIndex > m_readings.size() ? m_readings.size() : inNewIndex;
}
inline void BlockReadingBuilder::insertReadingAtCursor(const string& inReading)
{
m_readings.insert(m_readings.begin() + m_cursorIndex, inReading);
m_grid.expandGridByOneAtLocation(m_cursorIndex);
build();
m_cursorIndex++;
}
inline vector<string> BlockReadingBuilder::readings() const void setJoinSeparator(const string& separator);
{ const string joinSeparator() const;
return m_readings;
}
inline bool BlockReadingBuilder::deleteReadingBeforeCursor()
{
if (!m_cursorIndex) {
return false;
}
m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.begin() + m_cursorIndex);
m_cursorIndex--;
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
}
inline bool BlockReadingBuilder::deleteReadingAfterCursor()
{
if (m_cursorIndex == m_readings.size()) {
return false;
}
m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.begin() + m_cursorIndex + 1);
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
}
inline bool BlockReadingBuilder::removeHeadReadings(size_t count)
{
if (count > length()) {
return false;
}
for (size_t i = 0; i < count; i++) {
if (m_cursorIndex) {
m_cursorIndex--;
}
m_readings.erase(m_readings.begin(), m_readings.begin() + 1);
m_grid.shrinkGridByOneAtLocation(0);
build();
}
return true;
}
inline void BlockReadingBuilder::setJoinSeparator(const string& separator)
{
m_joinSeparator = separator;
}
inline const string BlockReadingBuilder::joinSeparator() const
{
return m_joinSeparator;
}
inline Grid& BlockReadingBuilder::grid() vector<string> readings() const;
{
return m_grid;
}
inline void BlockReadingBuilder::build() Grid& grid();
{
if (!m_LM) {
return;
}
size_t begin = 0;
size_t end = m_cursorIndex + MaximumBuildSpanLength;
if (m_cursorIndex < MaximumBuildSpanLength) {
begin = 0;
}
else {
begin = m_cursorIndex - MaximumBuildSpanLength;
}
if (end > m_readings.size()) {
end = m_readings.size();
}
for (size_t p = begin ; p < end ; p++) {
for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) {
string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) {
vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
if (unigrams.size() > 0) { protected:
Node n(combinedReading, unigrams, vector<Bigram>()); void build();
m_grid.insertNode(n, p, q);
} static const string Join(vector<string>::const_iterator begin,
} vector<string>::const_iterator end,
} const string& separator);
}
} //最多使用六個字組成一個詞
static const size_t MaximumBuildSpanLength = 6;
inline const string BlockReadingBuilder::Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator)
{ size_t m_cursorIndex;
string result; vector<string> m_readings;
for (vector<string>::const_iterator iter = begin ; iter != end ; ) {
result += *iter; Grid m_grid;
++iter; LanguageModel* m_LM;
if (iter != end) { string m_joinSeparator;
result += separator; };
}
} inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel* inLM)
return result; : m_LM(inLM), m_cursorIndex(0) {}
}
} inline void BlockReadingBuilder::clear() {
m_cursorIndex = 0;
m_readings.clear();
m_grid.clear();
} }
inline size_t BlockReadingBuilder::length() const { return m_readings.size(); }
inline size_t BlockReadingBuilder::cursorIndex() const { return m_cursorIndex; }
inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex) {
m_cursorIndex =
inNewIndex > m_readings.size() ? m_readings.size() : inNewIndex;
}
inline void BlockReadingBuilder::insertReadingAtCursor(
const string& inReading) {
m_readings.insert(m_readings.begin() + m_cursorIndex, inReading);
m_grid.expandGridByOneAtLocation(m_cursorIndex);
build();
m_cursorIndex++;
}
inline vector<string> BlockReadingBuilder::readings() const {
return m_readings;
}
inline bool BlockReadingBuilder::deleteReadingBeforeCursor() {
if (!m_cursorIndex) {
return false;
}
m_readings.erase(m_readings.begin() + m_cursorIndex - 1,
m_readings.begin() + m_cursorIndex);
m_cursorIndex--;
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
}
inline bool BlockReadingBuilder::deleteReadingAfterCursor() {
if (m_cursorIndex == m_readings.size()) {
return false;
}
m_readings.erase(m_readings.begin() + m_cursorIndex,
m_readings.begin() + m_cursorIndex + 1);
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
}
inline bool BlockReadingBuilder::removeHeadReadings(size_t count) {
if (count > length()) {
return false;
}
for (size_t i = 0; i < count; i++) {
if (m_cursorIndex) {
m_cursorIndex--;
}
m_readings.erase(m_readings.begin(), m_readings.begin() + 1);
m_grid.shrinkGridByOneAtLocation(0);
build();
}
return true;
}
inline void BlockReadingBuilder::setJoinSeparator(const string& separator) {
m_joinSeparator = separator;
}
inline const string BlockReadingBuilder::joinSeparator() const {
return m_joinSeparator;
}
inline Grid& BlockReadingBuilder::grid() { return m_grid; }
inline void BlockReadingBuilder::build() {
if (!m_LM) {
return;
}
size_t begin = 0;
size_t end = m_cursorIndex + MaximumBuildSpanLength;
if (m_cursorIndex < MaximumBuildSpanLength) {
begin = 0;
} else {
begin = m_cursorIndex - MaximumBuildSpanLength;
}
if (end > m_readings.size()) {
end = m_readings.size();
}
for (size_t p = begin; p < end; p++) {
for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) {
string combinedReading = Join(
m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q,
combinedReading)) {
vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
if (unigrams.size() > 0) {
Node n(combinedReading, unigrams, vector<Bigram>());
m_grid.insertNode(n, p, q);
}
}
}
}
}
inline const string BlockReadingBuilder::Join(
vector<string>::const_iterator begin, vector<string>::const_iterator end,
const string& separator) {
string result;
for (vector<string>::const_iterator iter = begin; iter != end;) {
result += *iter;
++iter;
if (iter != end) {
result += separator;
}
}
return result;
}
} // namespace Gramambular
} // namespace Formosa
#endif #endif

View File

@ -21,14 +21,15 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
#include "gtest/gtest.h"
#include <algorithm> #include <algorithm>
#include <cstdlib>
#include <iostream> #include <iostream>
#include <map> #include <map>
#include <vector>
#include <cstdlib>
#include <sstream> #include <sstream>
#include <vector>
#include "Gramambular.h" #include "Gramambular.h"
#include "gtest/gtest.h"
const char* SampleData = R"( const char* SampleData = R"(
# #
@ -122,11 +123,9 @@ const char* SampleData = R"(
using namespace std; using namespace std;
using namespace Formosa::Gramambular; using namespace Formosa::Gramambular;
class SimpleLM : public LanguageModel class SimpleLM : public LanguageModel {
{
public: public:
SimpleLM(const char* input, bool swapKeyValue = false) SimpleLM(const char* input, bool swapKeyValue = false) {
{
stringstream sstream(input); stringstream sstream(input);
while (sstream.good()) { while (sstream.good()) {
string line; string line;
@ -149,8 +148,7 @@ class SimpleLM : public LanguageModel
if (swapKeyValue) { if (swapKeyValue) {
u.keyValue.key = col1; u.keyValue.key = col1;
u.keyValue.value = col0; u.keyValue.value = col0;
} } else {
else {
u.keyValue.key = col0; u.keyValue.key = col0;
u.keyValue.value = col1; u.keyValue.value = col1;
} }
@ -161,19 +159,17 @@ class SimpleLM : public LanguageModel
} }
} }
const vector<Bigram> bigramsForKeys(const string &preceedingKey, const string& key) override const vector<Bigram> bigramsForKeys(const string& preceedingKey,
{ const string& key) override {
return vector<Bigram>(); return vector<Bigram>();
} }
const vector<Unigram> unigramsForKey(const string &key) override const vector<Unigram> unigramsForKey(const string& key) override {
{
map<string, vector<Unigram> >::const_iterator f = m_db.find(key); map<string, vector<Unigram> >::const_iterator f = m_db.find(key);
return f == m_db.end() ? vector<Unigram>() : (*f).second; return f == m_db.end() ? vector<Unigram>() : (*f).second;
} }
bool hasUnigramsForKey(const string& key) override bool hasUnigramsForKey(const string& key) override {
{
map<string, vector<Unigram> >::const_iterator f = m_db.find(key); map<string, vector<Unigram> >::const_iterator f = m_db.find(key);
return f != m_db.end(); return f != m_db.end();
} }
@ -208,7 +204,8 @@ TEST(GramambularTest, InputTest) {
reverse(walked.begin(), walked.end()); reverse(walked.begin(), walked.end());
vector<string> composed; vector<string> composed;
for (vector<NodeAnchor>::iterator wi = walked.begin() ; wi != walked.end() ; ++wi) { for (vector<NodeAnchor>::iterator wi = walked.begin(); wi != walked.end();
++wi) {
composed.push_back((*wi).node->currentKeyValue().value); composed.push_back((*wi).node->currentKeyValue().value);
} }
ASSERT_EQ(composed, (vector<string>{"高科技", "公司", "", "年中", "獎金"})); ASSERT_EQ(composed, (vector<string>{"高科技", "公司", "", "年中", "獎金"}));
@ -233,8 +230,10 @@ TEST(GramambularTest, WordSegmentationTest) {
reverse(walked.begin(), walked.end()); reverse(walked.begin(), walked.end());
vector<string> segmented; vector<string> segmented;
for (vector<NodeAnchor>::iterator wi = walked.begin(); wi != walked.end(); ++wi) { for (vector<NodeAnchor>::iterator wi = walked.begin(); wi != walked.end();
++wi) {
segmented.push_back((*wi).node->currentKeyValue().key); segmented.push_back((*wi).node->currentKeyValue().key);
} }
ASSERT_EQ(segmented, (vector<string>{"高科技", "公司", "", "年終", "獎金"})); ASSERT_EQ(segmented,
(vector<string>{"高科技", "公司", "", "年終", "獎金"}));
} }

View File

@ -29,244 +29,243 @@
#define Grid_h #define Grid_h
#include <map> #include <map>
#include "NodeAnchor.h" #include "NodeAnchor.h"
#include "Span.h" #include "Span.h"
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
class Grid {
public:
void clear();
void insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength);
bool hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey);
void expandGridByOneAtLocation(size_t inLocation); class Grid {
void shrinkGridByOneAtLocation(size_t inLocation); public:
void clear();
void insertNode(const Node& inNode, size_t inLocation,
size_t inSpanningLength);
bool hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation,
size_t inSpanningLength,
const string& inKey);
size_t width() const; void expandGridByOneAtLocation(size_t inLocation);
vector<NodeAnchor> nodesEndingAt(size_t inLocation); void shrinkGridByOneAtLocation(size_t inLocation);
vector<NodeAnchor> nodesCrossingOrEndingAt(size_t inLocation);
// "Freeze" the node with the unigram that represents the selected candidate value. size_t width() const;
// After this, the node that contains the unigram will always be evaluated to that vector<NodeAnchor> nodesEndingAt(size_t inLocation);
// unigram, while all other overlapping nodes will be reset to their initial state vector<NodeAnchor> nodesCrossingOrEndingAt(size_t inLocation);
// (that is, if any of those nodes were "frozen" or fixed, they will be unfrozen.)
NodeAnchor fixNodeSelectedCandidate(size_t location, const string& value);
// Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, only // "Freeze" the node with the unigram that represents the selected candidate
// boost the unigram that represents the value with an overriding score. This // value. After this, the node that contains the unigram will always be
// has the same side effect as fixNodeSelectedCandidate, which is that all other // evaluated to that unigram, while all other overlapping nodes will be reset
// overlapping nodes will be reset to their initial state. // to their initial state (that is, if any of those nodes were "frozen" or
void overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore); // fixed, they will be unfrozen.)
NodeAnchor fixNodeSelectedCandidate(size_t location, const string& value);
const string dumpDOT();
protected:
vector<Span> m_spans;
};
inline void Grid::clear()
{
m_spans.clear();
}
inline void Grid::insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength)
{
if (inLocation >= m_spans.size()) {
size_t diff = inLocation - m_spans.size() + 1;
for (size_t i = 0 ; i < diff ; i++) {
m_spans.push_back(Span());
}
}
m_spans[inLocation].insertNodeOfLength(inNode, inSpanningLength); // Similar to fixNodeSelectedCandidate, but instead of "freezing" the node,
} // only boost the unigram that represents the value with an overriding score.
// This has the same side effect as fixNodeSelectedCandidate, which is that
// all other overlapping nodes will be reset to their initial state.
void overrideNodeScoreForSelectedCandidate(size_t location,
const string& value,
float overridingScore);
inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey) const string dumpDOT();
{
if (inLocation > m_spans.size()) {
return false;
}
const Node *n = m_spans[inLocation].nodeOfLength(inSpanningLength);
if (!n) {
return false;
}
return inKey == n->key();
}
inline void Grid::expandGridByOneAtLocation(size_t inLocation) protected:
{ vector<Span> m_spans;
if (!inLocation || inLocation == m_spans.size()) { };
m_spans.insert(m_spans.begin() + inLocation, Span());
}
else {
m_spans.insert(m_spans.begin() + inLocation, Span());
for (size_t i = 0 ; i < inLocation ; i++) {
// zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i);
}
}
}
inline void Grid::shrinkGridByOneAtLocation(size_t inLocation)
{
if (inLocation >= m_spans.size()) {
return;
}
m_spans.erase(m_spans.begin() + inLocation);
for (size_t i = 0 ; i < inLocation ; i++) {
// zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i);
}
}
inline size_t Grid::width() const inline void Grid::clear() { m_spans.clear(); }
{
return m_spans.size();
}
inline vector<NodeAnchor> Grid::nodesEndingAt(size_t inLocation)
{
vector<NodeAnchor> result;
if (m_spans.size() && inLocation <= m_spans.size()) {
for (size_t i = 0 ; i < inLocation ; i++) {
Span& span = m_spans[i];
if (i + span.maximumLength() >= inLocation) {
Node *np = span.nodeOfLength(inLocation - i);
if (np) {
NodeAnchor na;
na.node = np;
na.location = i;
na.spanningLength = inLocation - i;
result.push_back(na);
}
}
}
}
return result;
}
inline vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t inLocation) inline void Grid::insertNode(const Node& inNode, size_t inLocation,
{ size_t inSpanningLength) {
vector<NodeAnchor> result; if (inLocation >= m_spans.size()) {
size_t diff = inLocation - m_spans.size() + 1;
if (m_spans.size() && inLocation <= m_spans.size()) {
for (size_t i = 0 ; i < inLocation ; i++) {
Span& span = m_spans[i];
if (i + span.maximumLength() >= inLocation) {
for (size_t j = 1, m = span.maximumLength(); j <= m ; j++) { for (size_t i = 0; i < diff; i++) {
m_spans.push_back(Span());
if (i + j < inLocation) {
continue;
}
Node *np = span.nodeOfLength(j);
if (np) {
NodeAnchor na;
na.node = np;
na.location = i;
na.spanningLength = inLocation - i;
result.push_back(na);
}
}
}
}
}
return result;
}
// For nodes found at the location, fix their currently-selected candidate using the supplied string value.
inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, const string& value)
{
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
NodeAnchor node;
for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectCandidateAtIndex(i);
node = nodeAnchor;
break;;
}
}
}
return node;
}
inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore)
{
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore);
break;
}
}
}
}
inline const string Grid::dumpDOT()
{
stringstream sst;
sst << "digraph {" << endl;
sst << "graph [ rankdir=LR ];" << endl;
sst << "BOS;" << endl;
for (size_t p = 0 ; p < m_spans.size() ; p++) {
Span& span = m_spans[p];
for (size_t ni = 0 ; ni <= span.maximumLength() ; ni++) {
Node* np = span.nodeOfLength(ni);
if (np) {
if (!p) {
sst << "BOS -> " << np->currentKeyValue().value << ";" << endl;
}
sst << np->currentKeyValue().value << ";" << endl;
if (p + ni < m_spans.size()) {
Span& dstSpan = m_spans[p+ni];
for (size_t q = 0 ; q <= dstSpan.maximumLength() ; q++) {
Node *dn = dstSpan.nodeOfLength(q);
if (dn) {
sst << np->currentKeyValue().value << " -> " << dn->currentKeyValue().value << ";" << endl;
}
}
}
if (p + ni == m_spans.size()) {
sst << np->currentKeyValue().value << " -> " << "EOS;" << endl;
}
}
}
}
sst << "EOS;" << endl;
sst << "}";
return sst.str();
}
} }
}
m_spans[inLocation].insertNodeOfLength(inNode, inSpanningLength);
} }
inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(
size_t inLocation, size_t inSpanningLength, const string& inKey) {
if (inLocation > m_spans.size()) {
return false;
}
const Node* n = m_spans[inLocation].nodeOfLength(inSpanningLength);
if (!n) {
return false;
}
return inKey == n->key();
}
inline void Grid::expandGridByOneAtLocation(size_t inLocation) {
if (!inLocation || inLocation == m_spans.size()) {
m_spans.insert(m_spans.begin() + inLocation, Span());
} else {
m_spans.insert(m_spans.begin() + inLocation, Span());
for (size_t i = 0; i < inLocation; i++) {
// zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i);
}
}
}
inline void Grid::shrinkGridByOneAtLocation(size_t inLocation) {
if (inLocation >= m_spans.size()) {
return;
}
m_spans.erase(m_spans.begin() + inLocation);
for (size_t i = 0; i < inLocation; i++) {
// zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i);
}
}
inline size_t Grid::width() const { return m_spans.size(); }
inline vector<NodeAnchor> Grid::nodesEndingAt(size_t inLocation) {
vector<NodeAnchor> result;
if (m_spans.size() && inLocation <= m_spans.size()) {
for (size_t i = 0; i < inLocation; i++) {
Span& span = m_spans[i];
if (i + span.maximumLength() >= inLocation) {
Node* np = span.nodeOfLength(inLocation - i);
if (np) {
NodeAnchor na;
na.node = np;
na.location = i;
na.spanningLength = inLocation - i;
result.push_back(na);
}
}
}
}
return result;
}
inline vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t inLocation) {
vector<NodeAnchor> result;
if (m_spans.size() && inLocation <= m_spans.size()) {
for (size_t i = 0; i < inLocation; i++) {
Span& span = m_spans[i];
if (i + span.maximumLength() >= inLocation) {
for (size_t j = 1, m = span.maximumLength(); j <= m; j++) {
if (i + j < inLocation) {
continue;
}
Node* np = span.nodeOfLength(j);
if (np) {
NodeAnchor na;
na.node = np;
na.location = i;
na.spanningLength = inLocation - i;
result.push_back(na);
}
}
}
}
}
return result;
}
// For nodes found at the location, fix their currently-selected candidate using
// the supplied string value.
inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location,
const string& value) {
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
NodeAnchor node;
for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectCandidateAtIndex(i);
node = nodeAnchor;
break;
;
}
}
}
return node;
}
inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location,
const string& value,
float overridingScore) {
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)
->selectFloatingCandidateAtIndex(i, overridingScore);
break;
}
}
}
}
inline const string Grid::dumpDOT() {
stringstream sst;
sst << "digraph {" << endl;
sst << "graph [ rankdir=LR ];" << endl;
sst << "BOS;" << endl;
for (size_t p = 0; p < m_spans.size(); p++) {
Span& span = m_spans[p];
for (size_t ni = 0; ni <= span.maximumLength(); ni++) {
Node* np = span.nodeOfLength(ni);
if (np) {
if (!p) {
sst << "BOS -> " << np->currentKeyValue().value << ";" << endl;
}
sst << np->currentKeyValue().value << ";" << endl;
if (p + ni < m_spans.size()) {
Span& dstSpan = m_spans[p + ni];
for (size_t q = 0; q <= dstSpan.maximumLength(); q++) {
Node* dn = dstSpan.nodeOfLength(q);
if (dn) {
sst << np->currentKeyValue().value << " -> "
<< dn->currentKeyValue().value << ";" << endl;
}
}
}
if (p + ni == m_spans.size()) {
sst << np->currentKeyValue().value << " -> "
<< "EOS;" << endl;
}
}
}
}
sst << "EOS;" << endl;
sst << "}";
return sst.str();
}
} // namespace Gramambular
} // namespace Formosa
#endif #endif

View File

@ -32,40 +32,36 @@
#include <string> #include <string>
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
using namespace std; using namespace std;
class KeyValuePair {
public:
string key;
string value;
bool operator==(const KeyValuePair& inAnother) const; class KeyValuePair {
bool operator<(const KeyValuePair& inAnother) const; public:
}; string key;
string value;
inline ostream& operator<<(ostream& inStream, const KeyValuePair& inPair) bool operator==(const KeyValuePair& inAnother) const;
{ bool operator<(const KeyValuePair& inAnother) const;
inStream << "(" << inPair.key << "," << inPair.value << ")"; };
return inStream;
}
inline bool KeyValuePair::operator==(const KeyValuePair& inAnother) const
{
return key == inAnother.key && value == inAnother.value;
}
inline bool KeyValuePair::operator<(const KeyValuePair& inAnother) const inline ostream& operator<<(ostream& inStream, const KeyValuePair& inPair) {
{ inStream << "(" << inPair.key << "," << inPair.value << ")";
if (key < inAnother.key) { return inStream;
return true;
}
else if (key == inAnother.key) {
return value < inAnother.value;
}
return false;
}
}
} }
inline bool KeyValuePair::operator==(const KeyValuePair& inAnother) const {
return key == inAnother.key && value == inAnother.value;
}
inline bool KeyValuePair::operator<(const KeyValuePair& inAnother) const {
if (key < inAnother.key) {
return true;
} else if (key == inAnother.key) {
return value < inAnother.value;
}
return false;
}
} // namespace Gramambular
} // namespace Formosa
#endif #endif

View File

@ -29,24 +29,25 @@
#define LanguageModel_h #define LanguageModel_h
#include <vector> #include <vector>
#include "Bigram.h" #include "Bigram.h"
#include "Unigram.h" #include "Unigram.h"
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
using namespace std;
class LanguageModel {
public:
virtual ~LanguageModel() {}
virtual const vector<Bigram> bigramsForKeys(const string &preceedingKey, const string& key) = 0; using namespace std;
virtual const vector<Unigram> unigramsForKey(const string &key) = 0;
virtual bool hasUnigramsForKey(const string& key) = 0;
};
}
}
class LanguageModel {
public:
virtual ~LanguageModel() {}
virtual const vector<Bigram> bigramsForKeys(const string& preceedingKey,
const string& key) = 0;
virtual const vector<Unigram> unigramsForKey(const string& key) = 0;
virtual bool hasUnigramsForKey(const string& key) = 0;
};
} // namespace Gramambular
} // namespace Formosa
#endif #endif

View File

@ -30,202 +30,191 @@
#include <limits> #include <limits>
#include <vector> #include <vector>
#include "LanguageModel.h" #include "LanguageModel.h"
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
using namespace std; using namespace std;
class Node { class Node {
public: public:
Node(); Node();
Node(const string& inKey, const vector<Unigram>& inUnigrams, const vector<Bigram>& inBigrams); Node(const string& inKey, const vector<Unigram>& inUnigrams,
const vector<Bigram>& inBigrams);
void primeNodeWithPreceedingKeyValues(const vector<KeyValuePair>& inKeyValues);
bool isCandidateFixed() const;
const vector<KeyValuePair>& candidates() const;
void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true);
void resetCandidate();
void selectFloatingCandidateAtIndex(size_t index, double score);
const string& key() const;
double score() const;
double scoreForCandidate(string &candidate) const;
const KeyValuePair currentKeyValue() const;
double highestUnigramScore() const;
protected:
const LanguageModel* m_LM;
string m_key;
double m_score;
vector<Unigram> m_unigrams;
vector<KeyValuePair> m_candidates;
map<string, size_t> m_valueUnigramIndexMap;
map<KeyValuePair, vector<Bigram> > m_preceedingGramBigramMap;
bool m_candidateFixed;
size_t m_selectedUnigramIndex;
friend ostream& operator<<(ostream& inStream, const Node& inNode);
};
inline ostream& operator<<(ostream& inStream, const Node& inNode) void primeNodeWithPreceedingKeyValues(
{ const vector<KeyValuePair>& inKeyValues);
inStream << "(node,key:" << inNode.m_key << ",fixed:" << (inNode.m_candidateFixed ? "true" : "false")
<< ",selected:" << inNode.m_selectedUnigramIndex
<< "," << inNode.m_unigrams << ")";
return inStream;
}
inline Node::Node() bool isCandidateFixed() const;
: m_candidateFixed(false) const vector<KeyValuePair>& candidates() const;
, m_selectedUnigramIndex(0) void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true);
, m_score(0.0) void resetCandidate();
{ void selectFloatingCandidateAtIndex(size_t index, double score);
}
inline Node::Node(const string& inKey, const vector<Unigram>& inUnigrams, const vector<Bigram>& inBigrams) const string& key() const;
: m_key(inKey) double score() const;
, m_unigrams(inUnigrams) double scoreForCandidate(string& candidate) const;
, m_candidateFixed(false) const KeyValuePair currentKeyValue() const;
, m_selectedUnigramIndex(0) double highestUnigramScore() const;
, m_score(0.0)
{
stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare);
if (m_unigrams.size()) {
m_score = m_unigrams[0].score;
}
size_t i = 0;
for (vector<Unigram>::const_iterator ui = m_unigrams.begin() ; ui != m_unigrams.end() ; ++ui) {
m_valueUnigramIndexMap[(*ui).keyValue.value] = i;
i++;
m_candidates.push_back((*ui).keyValue);
}
for (vector<Bigram>::const_iterator bi = inBigrams.begin() ; bi != inBigrams.end() ; ++bi) {
m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi);
}
}
inline void Node::primeNodeWithPreceedingKeyValues(const vector<KeyValuePair>& inKeyValues)
{
size_t newIndex = m_selectedUnigramIndex;
double max = m_score;
if (!isCandidateFixed()) { protected:
for (vector<KeyValuePair>::const_iterator kvi = inKeyValues.begin() ; kvi != inKeyValues.end() ; ++kvi) { const LanguageModel* m_LM;
map<KeyValuePair, vector<Bigram> >::const_iterator f = m_preceedingGramBigramMap.find(*kvi);
if (f != m_preceedingGramBigramMap.end()) {
const vector<Bigram>& bigrams = (*f).second;
for (vector<Bigram>::const_iterator bi = bigrams.begin() ; bi != bigrams.end() ; ++bi) {
const Bigram& bigram = *bi;
if (bigram.score > max) {
map<string, size_t>::const_iterator uf = m_valueUnigramIndexMap.find((*bi).keyValue.value);
if (uf != m_valueUnigramIndexMap.end()) {
newIndex = (*uf).second;
max = bigram.score;
}
}
}
}
}
}
if (m_score != max) { string m_key;
m_score = max; double m_score;
}
if (newIndex != m_selectedUnigramIndex) {
m_selectedUnigramIndex = newIndex;
}
}
inline bool Node::isCandidateFixed() const
{
return m_candidateFixed;
}
inline const vector<KeyValuePair>& Node::candidates() const
{
return m_candidates;
}
inline void Node::selectCandidateAtIndex(size_t inIndex, bool inFix) vector<Unigram> m_unigrams;
{ vector<KeyValuePair> m_candidates;
if (inIndex >= m_unigrams.size()) { map<string, size_t> m_valueUnigramIndexMap;
m_selectedUnigramIndex = 0; map<KeyValuePair, vector<Bigram> > m_preceedingGramBigramMap;
}
else {
m_selectedUnigramIndex = inIndex;
}
m_candidateFixed = inFix;
m_score = 99;
}
inline void Node::resetCandidate() bool m_candidateFixed;
{ size_t m_selectedUnigramIndex;
m_selectedUnigramIndex = 0;
m_candidateFixed = 0;
if (m_unigrams.size()) {
m_score = m_unigrams[0].score;
}
}
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) { friend ostream& operator<<(ostream& inStream, const Node& inNode);
if (index >= m_unigrams.size()) { };
m_selectedUnigramIndex = 0;
} else {
m_selectedUnigramIndex = index;
}
m_candidateFixed = false;
m_score = score;
}
inline const string& Node::key() const
{
return m_key;
}
inline double Node::score() const
{
return m_score;
}
inline double Node::scoreForCandidate(string &candidate) const inline ostream& operator<<(ostream& inStream, const Node& inNode) {
{ inStream << "(node,key:" << inNode.m_key
for (auto unigram : m_unigrams) { << ",fixed:" << (inNode.m_candidateFixed ? "true" : "false")
if (unigram.keyValue.value == candidate) { << ",selected:" << inNode.m_selectedUnigramIndex << ","
return unigram.score; << inNode.m_unigrams << ")";
} return inStream;
}
return 0.0;
}
inline double Node::highestUnigramScore() const {
if (m_unigrams.empty()) {
return 0.0;
}
return m_unigrams[0].score;
}
inline const KeyValuePair Node::currentKeyValue() const
{
if(m_selectedUnigramIndex >= m_unigrams.size()) {
return KeyValuePair();
}
else {
return m_candidates[m_selectedUnigramIndex];
}
}
}
} }
inline Node::Node()
: m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) {}
inline Node::Node(const string& inKey, const vector<Unigram>& inUnigrams,
const vector<Bigram>& inBigrams)
: m_key(inKey),
m_unigrams(inUnigrams),
m_candidateFixed(false),
m_selectedUnigramIndex(0),
m_score(0.0) {
stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare);
if (m_unigrams.size()) {
m_score = m_unigrams[0].score;
}
size_t i = 0;
for (vector<Unigram>::const_iterator ui = m_unigrams.begin();
ui != m_unigrams.end(); ++ui) {
m_valueUnigramIndexMap[(*ui).keyValue.value] = i;
i++;
m_candidates.push_back((*ui).keyValue);
}
for (vector<Bigram>::const_iterator bi = inBigrams.begin();
bi != inBigrams.end(); ++bi) {
m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi);
}
}
inline void Node::primeNodeWithPreceedingKeyValues(
const vector<KeyValuePair>& inKeyValues) {
size_t newIndex = m_selectedUnigramIndex;
double max = m_score;
if (!isCandidateFixed()) {
for (vector<KeyValuePair>::const_iterator kvi = inKeyValues.begin();
kvi != inKeyValues.end(); ++kvi) {
map<KeyValuePair, vector<Bigram> >::const_iterator f =
m_preceedingGramBigramMap.find(*kvi);
if (f != m_preceedingGramBigramMap.end()) {
const vector<Bigram>& bigrams = (*f).second;
for (vector<Bigram>::const_iterator bi = bigrams.begin();
bi != bigrams.end(); ++bi) {
const Bigram& bigram = *bi;
if (bigram.score > max) {
map<string, size_t>::const_iterator uf =
m_valueUnigramIndexMap.find((*bi).keyValue.value);
if (uf != m_valueUnigramIndexMap.end()) {
newIndex = (*uf).second;
max = bigram.score;
}
}
}
}
}
}
if (m_score != max) {
m_score = max;
}
if (newIndex != m_selectedUnigramIndex) {
m_selectedUnigramIndex = newIndex;
}
}
inline bool Node::isCandidateFixed() const { return m_candidateFixed; }
inline const vector<KeyValuePair>& Node::candidates() const {
return m_candidates;
}
inline void Node::selectCandidateAtIndex(size_t inIndex, bool inFix) {
if (inIndex >= m_unigrams.size()) {
m_selectedUnigramIndex = 0;
} else {
m_selectedUnigramIndex = inIndex;
}
m_candidateFixed = inFix;
m_score = 99;
}
inline void Node::resetCandidate() {
m_selectedUnigramIndex = 0;
m_candidateFixed = 0;
if (m_unigrams.size()) {
m_score = m_unigrams[0].score;
}
}
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) {
if (index >= m_unigrams.size()) {
m_selectedUnigramIndex = 0;
} else {
m_selectedUnigramIndex = index;
}
m_candidateFixed = false;
m_score = score;
}
inline const string& Node::key() const { return m_key; }
inline double Node::score() const { return m_score; }
inline double Node::scoreForCandidate(string& candidate) const {
for (auto unigram : m_unigrams) {
if (unigram.keyValue.value == candidate) {
return unigram.score;
}
}
return 0.0;
}
inline double Node::highestUnigramScore() const {
if (m_unigrams.empty()) {
return 0.0;
}
return m_unigrams[0].score;
}
inline const KeyValuePair Node::currentKeyValue() const {
if (m_selectedUnigramIndex >= m_unigrams.size()) {
return KeyValuePair();
} else {
return m_candidates[m_selectedUnigramIndex];
}
}
} // namespace Gramambular
} // namespace Formosa
#endif #endif

View File

@ -31,49 +31,44 @@
#include "Node.h" #include "Node.h"
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
class NodeAnchor { class NodeAnchor {
public: public:
NodeAnchor(); NodeAnchor();
const Node *node; const Node* node;
size_t location; size_t location;
size_t spanningLength; size_t spanningLength;
double accumulatedScore; double accumulatedScore;
}; };
inline NodeAnchor::NodeAnchor()
: node(0)
, location(0)
, spanningLength(0)
, accumulatedScore(0.0)
{
}
inline ostream& operator<<(ostream& inStream, const NodeAnchor& inAnchor) inline NodeAnchor::NodeAnchor()
{ : node(0), location(0), spanningLength(0), accumulatedScore(0.0) {}
inStream << "{@(" << inAnchor.location << "," << inAnchor.spanningLength << "),";
if (inAnchor.node) { inline ostream& operator<<(ostream& inStream, const NodeAnchor& inAnchor) {
inStream << *(inAnchor.node); inStream << "{@(" << inAnchor.location << "," << inAnchor.spanningLength
} << "),";
else { if (inAnchor.node) {
inStream << "null"; inStream << *(inAnchor.node);
} } else {
inStream << "}"; inStream << "null";
return inStream; }
} inStream << "}";
return inStream;
inline ostream& operator<<(ostream& inStream, const vector<NodeAnchor>& inAnchor)
{
for (vector<NodeAnchor>::const_iterator i = inAnchor.begin() ; i != inAnchor.end() ; ++i) {
inStream << *i;
if (i + 1 != inAnchor.end()) {
inStream << "<-";
}
}
return inStream;
}
}
} }
inline ostream& operator<<(ostream& inStream,
const vector<NodeAnchor>& inAnchor) {
for (vector<NodeAnchor>::const_iterator i = inAnchor.begin();
i != inAnchor.end(); ++i) {
inStream << *i;
if (i + 1 != inAnchor.end()) {
inStream << "<-";
}
}
return inStream;
}
} // namespace Gramambular
} // namespace Formosa
#endif #endif

View File

@ -31,82 +31,75 @@
#include <map> #include <map>
#include <set> #include <set>
#include <sstream> #include <sstream>
#include "Node.h" #include "Node.h"
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
class Span { class Span {
public: public:
Span(); Span();
void clear(); void clear();
void insertNodeOfLength(const Node& inNode, size_t inLength); void insertNodeOfLength(const Node& inNode, size_t inLength);
void removeNodeOfLengthGreaterThan(size_t inLength); void removeNodeOfLengthGreaterThan(size_t inLength);
Node* nodeOfLength(size_t inLength);
size_t maximumLength() const;
protected: Node* nodeOfLength(size_t inLength);
map<size_t, Node> m_lengthNodeMap; size_t maximumLength() const;
size_t m_maximumLength;
};
inline Span::Span()
: m_maximumLength(0)
{
}
inline void Span::clear()
{
m_lengthNodeMap.clear();
m_maximumLength = 0;
}
inline void Span::insertNodeOfLength(const Node& inNode, size_t inLength)
{
m_lengthNodeMap[inLength] = inNode;
if (inLength > m_maximumLength) {
m_maximumLength = inLength;
}
}
inline void Span::removeNodeOfLengthGreaterThan(size_t inLength)
{
if (inLength > m_maximumLength) {
return;
}
size_t max = 0;
set<size_t> removeSet;
for (map<size_t, Node>::iterator i = m_lengthNodeMap.begin(), e = m_lengthNodeMap.end() ; i != e ; ++i) {
if ((*i).first > inLength) {
removeSet.insert((*i).first);
}
else {
if ((*i).first > max) {
max = (*i).first;
}
}
}
for (set<size_t>::iterator i = removeSet.begin(), e = removeSet.end(); i != e; ++i) {
m_lengthNodeMap.erase(*i);
}
m_maximumLength = max; protected:
} map<size_t, Node> m_lengthNodeMap;
size_t m_maximumLength;
inline Node* Span::nodeOfLength(size_t inLength) };
{
map<size_t, Node>::iterator f = m_lengthNodeMap.find(inLength); inline Span::Span() : m_maximumLength(0) {}
return f == m_lengthNodeMap.end() ? 0 : &(*f).second;
} inline void Span::clear() {
m_lengthNodeMap.clear();
inline size_t Span::maximumLength() const m_maximumLength = 0;
{
return m_maximumLength;
}
}
} }
inline void Span::insertNodeOfLength(const Node& inNode, size_t inLength) {
m_lengthNodeMap[inLength] = inNode;
if (inLength > m_maximumLength) {
m_maximumLength = inLength;
}
}
inline void Span::removeNodeOfLengthGreaterThan(size_t inLength) {
if (inLength > m_maximumLength) {
return;
}
size_t max = 0;
set<size_t> removeSet;
for (map<size_t, Node>::iterator i = m_lengthNodeMap.begin(),
e = m_lengthNodeMap.end();
i != e; ++i) {
if ((*i).first > inLength) {
removeSet.insert((*i).first);
} else {
if ((*i).first > max) {
max = (*i).first;
}
}
}
for (set<size_t>::iterator i = removeSet.begin(), e = removeSet.end(); i != e;
++i) {
m_lengthNodeMap.erase(*i);
}
m_maximumLength = max;
}
inline Node* Span::nodeOfLength(size_t inLength) {
map<size_t, Node>::iterator f = m_lengthNodeMap.find(inLength);
return f == m_lengthNodeMap.end() ? 0 : &(*f).second;
}
inline size_t Span::maximumLength() const { return m_maximumLength; }
} // namespace Gramambular
} // namespace Formosa
#endif #endif

View File

@ -29,76 +29,69 @@
#define Unigram_h #define Unigram_h
#include <vector> #include <vector>
#include "KeyValuePair.h" #include "KeyValuePair.h"
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
class Unigram { class Unigram {
public: public:
Unigram(); Unigram();
KeyValuePair keyValue; KeyValuePair keyValue;
double score; double score;
bool operator==(const Unigram& inAnother) const;
bool operator<(const Unigram& inAnother) const;
static bool ScoreCompare(const Unigram& a, const Unigram& b);
};
inline ostream& operator<<(ostream& inStream, const Unigram& inGram) bool operator==(const Unigram& inAnother) const;
{ bool operator<(const Unigram& inAnother) const;
streamsize p = inStream.precision();
inStream.precision(6);
inStream << "(" << inGram.keyValue << "," << inGram.score << ")";
inStream.precision(p);
return inStream;
}
inline ostream& operator<<(ostream& inStream, const vector<Unigram>& inGrams)
{
inStream << "[" << inGrams.size() << "]=>{";
size_t index = 0;
for (vector<Unigram>::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) {
inStream << index << "=>";
inStream << *gi;
if (gi + 1 != inGrams.end()) {
inStream << ",";
}
}
inStream << "}";
return inStream;
}
inline Unigram::Unigram()
: score(0.0)
{
}
inline bool Unigram::operator==(const Unigram& inAnother) const
{
return keyValue == inAnother.keyValue && score == inAnother.score;
}
inline bool Unigram::operator<(const Unigram& inAnother) const
{
if (keyValue < inAnother.keyValue) {
return true;
}
else if (keyValue == inAnother.keyValue) {
return score < inAnother.score;
}
return false;
}
inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) static bool ScoreCompare(const Unigram& a, const Unigram& b);
{ };
return a.score > b.score;
} inline ostream& operator<<(ostream& inStream, const Unigram& inGram) {
} streamsize p = inStream.precision();
inStream.precision(6);
inStream << "(" << inGram.keyValue << "," << inGram.score << ")";
inStream.precision(p);
return inStream;
} }
inline ostream& operator<<(ostream& inStream, const vector<Unigram>& inGrams) {
inStream << "[" << inGrams.size() << "]=>{";
size_t index = 0;
for (vector<Unigram>::const_iterator gi = inGrams.begin();
gi != inGrams.end(); ++gi, ++index) {
inStream << index << "=>";
inStream << *gi;
if (gi + 1 != inGrams.end()) {
inStream << ",";
}
}
inStream << "}";
return inStream;
}
inline Unigram::Unigram() : score(0.0) {}
inline bool Unigram::operator==(const Unigram& inAnother) const {
return keyValue == inAnother.keyValue && score == inAnother.score;
}
inline bool Unigram::operator<(const Unigram& inAnother) const {
if (keyValue < inAnother.keyValue) {
return true;
} else if (keyValue == inAnother.keyValue) {
return score < inAnother.score;
}
return false;
}
inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) {
return a.score > b.score;
}
} // namespace Gramambular
} // namespace Formosa
#endif #endif

View File

@ -29,63 +29,65 @@
#define Walker_h #define Walker_h
#include <algorithm> #include <algorithm>
#include "Grid.h" #include "Grid.h"
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
using namespace std; using namespace std;
class Walker { class Walker {
public: public:
Walker(Grid* inGrid); Walker(Grid* inGrid);
const vector<NodeAnchor> reverseWalk(size_t inLocation, double inAccumulatedScore = 0.0); const vector<NodeAnchor> reverseWalk(size_t inLocation,
double inAccumulatedScore = 0.0);
protected:
Grid* m_grid;
};
inline Walker::Walker(Grid* inGrid)
: m_grid(inGrid)
{
}
inline const vector<NodeAnchor> Walker::reverseWalk(size_t inLocation, double inAccumulatedScore)
{
if (!inLocation || inLocation > m_grid->width()) {
return vector<NodeAnchor>();
}
vector<vector<NodeAnchor> > paths;
vector<NodeAnchor> nodes = m_grid->nodesEndingAt(inLocation); protected:
Grid* m_grid;
for (vector<NodeAnchor>::iterator ni = nodes.begin() ; ni != nodes.end() ; ++ni) { };
if (!(*ni).node) {
continue;
}
(*ni).accumulatedScore = inAccumulatedScore + (*ni).node->score(); inline Walker::Walker(Grid* inGrid) : m_grid(inGrid) {}
vector<NodeAnchor> path = reverseWalk(inLocation - (*ni).spanningLength, (*ni).accumulatedScore); inline const vector<NodeAnchor> Walker::reverseWalk(size_t inLocation,
path.insert(path.begin(), *ni); double inAccumulatedScore) {
if (!inLocation || inLocation > m_grid->width()) {
paths.push_back(path); return vector<NodeAnchor>();
} }
if (!paths.size()) { vector<vector<NodeAnchor> > paths;
return vector<NodeAnchor>();
} vector<NodeAnchor> nodes = m_grid->nodesEndingAt(inLocation);
vector<NodeAnchor>* result = &*(paths.begin()); for (vector<NodeAnchor>::iterator ni = nodes.begin(); ni != nodes.end();
for (vector<vector<NodeAnchor> >::iterator pi = paths.begin() ; pi != paths.end() ; ++pi) { ++ni) {
if ((*pi).back().accumulatedScore > result->back().accumulatedScore) { if (!(*ni).node) {
result = &*pi; continue;
}
}
return *result;
}
} }
(*ni).accumulatedScore = inAccumulatedScore + (*ni).node->score();
vector<NodeAnchor> path =
reverseWalk(inLocation - (*ni).spanningLength, (*ni).accumulatedScore);
path.insert(path.begin(), *ni);
paths.push_back(path);
}
if (!paths.size()) {
return vector<NodeAnchor>();
}
vector<NodeAnchor>* result = &*(paths.begin());
for (vector<vector<NodeAnchor> >::iterator pi = paths.begin();
pi != paths.end(); ++pi) {
if ((*pi).back().accumulatedScore > result->back().accumulatedScore) {
result = &*pi;
}
}
return *result;
} }
} // namespace Gramambular
} // namespace Formosa
#endif #endif