Reformat Gramambular with clang-format

This commit is contained in:
Lukhnos Liu 2022-02-19 09:28:04 -08:00
parent d4d8d14004
commit d3302ef70a
11 changed files with 887 additions and 939 deletions

View File

@ -46,22 +46,22 @@ namespace Formosa {
bool operator<(const Bigram& inAnother) const; bool operator<(const Bigram& inAnother) const;
}; };
inline ostream& operator<<(ostream& inStream, const Bigram& inGram) inline ostream& operator<<(ostream& inStream, const Bigram& inGram) {
{
streamsize p = inStream.precision(); streamsize p = inStream.precision();
inStream.precision(6); inStream.precision(6);
inStream << "(" << inGram.keyValue << "|" <<inGram.preceedingKeyValue << "," << inGram.score << ")"; inStream << "(" << inGram.keyValue << "|" << inGram.preceedingKeyValue << ","
<< inGram.score << ")";
inStream.precision(p); inStream.precision(p);
return inStream; return inStream;
} }
inline ostream& operator<<(ostream& inStream, const vector<Bigram>& inGrams) inline ostream& operator<<(ostream& inStream, const vector<Bigram>& inGrams) {
{
inStream << "[" << inGrams.size() << "]=>{"; inStream << "[" << inGrams.size() << "]=>{";
size_t index = 0; size_t index = 0;
for (vector<Bigram>::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) { for (vector<Bigram>::const_iterator gi = inGrams.begin(); gi != inGrams.end();
++gi, ++index) {
inStream << index << "=>"; inStream << index << "=>";
inStream << *gi; inStream << *gi;
if (gi + 1 != inGrams.end()) { if (gi + 1 != inGrams.end()) {
@ -73,26 +73,20 @@ namespace Formosa {
return inStream; return inStream;
} }
inline Bigram::Bigram() inline Bigram::Bigram() : score(0.0) {}
: score(0.0)
{ inline bool Bigram::operator==(const Bigram& inAnother) const {
return preceedingKeyValue == inAnother.preceedingKeyValue &&
keyValue == inAnother.keyValue && score == inAnother.score;
} }
inline bool Bigram::operator==(const Bigram& inAnother) const inline bool Bigram::operator<(const Bigram& inAnother) const {
{
return preceedingKeyValue == inAnother.preceedingKeyValue && keyValue == inAnother.keyValue && score == inAnother.score;
}
inline bool Bigram::operator<(const Bigram& inAnother) const
{
if (preceedingKeyValue < inAnother.preceedingKeyValue) { if (preceedingKeyValue < inAnother.preceedingKeyValue) {
return true; return true;
} } else if (preceedingKeyValue == inAnother.preceedingKeyValue) {
else if (preceedingKeyValue == inAnother.preceedingKeyValue) {
if (keyValue < inAnother.keyValue) { if (keyValue < inAnother.keyValue) {
return true; return true;
} } else if (keyValue == inAnother.keyValue) {
else if (keyValue == inAnother.keyValue) {
return score < inAnother.score; return score < inAnother.score;
} }
return false; return false;
@ -100,7 +94,7 @@ namespace Formosa {
return false; return false;
} }
} } // namespace Gramambular
} } // namespace Formosa
#endif #endif

View File

@ -29,6 +29,7 @@
#define BlockReadingBuilder_h #define BlockReadingBuilder_h
#include <vector> #include <vector>
#include "Grid.h" #include "Grid.h"
#include "LanguageModel.h" #include "LanguageModel.h"
@ -60,7 +61,9 @@ namespace Formosa {
protected: protected:
void build(); void build();
static const string Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator); static const string Join(vector<string>::const_iterator begin,
vector<string>::const_iterator end,
const string& separator);
//最多使用六個字組成一個詞 //最多使用六個字組成一個詞
static const size_t MaximumBuildSpanLength = 6; static const size_t MaximumBuildSpanLength = 6;
@ -74,35 +77,25 @@ namespace Formosa {
}; };
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel* inLM) inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel* inLM)
: m_LM(inLM) : m_LM(inLM), m_cursorIndex(0) {}
, m_cursorIndex(0)
{
}
inline void BlockReadingBuilder::clear() inline void BlockReadingBuilder::clear() {
{
m_cursorIndex = 0; m_cursorIndex = 0;
m_readings.clear(); m_readings.clear();
m_grid.clear(); m_grid.clear();
} }
inline size_t BlockReadingBuilder::length() const inline size_t BlockReadingBuilder::length() const { return m_readings.size(); }
{
return m_readings.size(); inline size_t BlockReadingBuilder::cursorIndex() const { return m_cursorIndex; }
inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex) {
m_cursorIndex =
inNewIndex > m_readings.size() ? m_readings.size() : inNewIndex;
} }
inline size_t BlockReadingBuilder::cursorIndex() const inline void BlockReadingBuilder::insertReadingAtCursor(
{ const string& inReading) {
return m_cursorIndex;
}
inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex)
{
m_cursorIndex = inNewIndex > m_readings.size() ? m_readings.size() : inNewIndex;
}
inline void BlockReadingBuilder::insertReadingAtCursor(const string& inReading)
{
m_readings.insert(m_readings.begin() + m_cursorIndex, inReading); m_readings.insert(m_readings.begin() + m_cursorIndex, inReading);
m_grid.expandGridByOneAtLocation(m_cursorIndex); m_grid.expandGridByOneAtLocation(m_cursorIndex);
@ -110,38 +103,36 @@ namespace Formosa {
m_cursorIndex++; m_cursorIndex++;
} }
inline vector<string> BlockReadingBuilder::readings() const inline vector<string> BlockReadingBuilder::readings() const {
{
return m_readings; return m_readings;
} }
inline bool BlockReadingBuilder::deleteReadingBeforeCursor() inline bool BlockReadingBuilder::deleteReadingBeforeCursor() {
{
if (!m_cursorIndex) { if (!m_cursorIndex) {
return false; return false;
} }
m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.begin() + m_cursorIndex); m_readings.erase(m_readings.begin() + m_cursorIndex - 1,
m_readings.begin() + m_cursorIndex);
m_cursorIndex--; m_cursorIndex--;
m_grid.shrinkGridByOneAtLocation(m_cursorIndex); m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build(); build();
return true; return true;
} }
inline bool BlockReadingBuilder::deleteReadingAfterCursor() inline bool BlockReadingBuilder::deleteReadingAfterCursor() {
{
if (m_cursorIndex == m_readings.size()) { if (m_cursorIndex == m_readings.size()) {
return false; return false;
} }
m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.begin() + m_cursorIndex + 1); m_readings.erase(m_readings.begin() + m_cursorIndex,
m_readings.begin() + m_cursorIndex + 1);
m_grid.shrinkGridByOneAtLocation(m_cursorIndex); m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build(); build();
return true; return true;
} }
inline bool BlockReadingBuilder::removeHeadReadings(size_t count) inline bool BlockReadingBuilder::removeHeadReadings(size_t count) {
{
if (count > length()) { if (count > length()) {
return false; return false;
} }
@ -158,23 +149,17 @@ namespace Formosa {
return true; return true;
} }
inline void BlockReadingBuilder::setJoinSeparator(const string& separator) inline void BlockReadingBuilder::setJoinSeparator(const string& separator) {
{
m_joinSeparator = separator; m_joinSeparator = separator;
} }
inline const string BlockReadingBuilder::joinSeparator() const inline const string BlockReadingBuilder::joinSeparator() const {
{
return m_joinSeparator; return m_joinSeparator;
} }
inline Grid& BlockReadingBuilder::grid() inline Grid& BlockReadingBuilder::grid() { return m_grid; }
{
return m_grid;
}
inline void BlockReadingBuilder::build() inline void BlockReadingBuilder::build() {
{
if (!m_LM) { if (!m_LM) {
return; return;
} }
@ -184,8 +169,7 @@ namespace Formosa {
if (m_cursorIndex < MaximumBuildSpanLength) { if (m_cursorIndex < MaximumBuildSpanLength) {
begin = 0; begin = 0;
} } else {
else {
begin = m_cursorIndex - MaximumBuildSpanLength; begin = m_cursorIndex - MaximumBuildSpanLength;
} }
@ -195,8 +179,10 @@ namespace Formosa {
for (size_t p = begin; p < end; p++) { for (size_t p = begin; p < end; p++) {
for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) { for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) {
string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator); string combinedReading = Join(
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) { m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q,
combinedReading)) {
vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading); vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
if (unigrams.size() > 0) { if (unigrams.size() > 0) {
@ -208,8 +194,9 @@ namespace Formosa {
} }
} }
inline const string BlockReadingBuilder::Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator) inline const string BlockReadingBuilder::Join(
{ vector<string>::const_iterator begin, vector<string>::const_iterator end,
const string& separator) {
string result; string result;
for (vector<string>::const_iterator iter = begin; iter != end;) { for (vector<string>::const_iterator iter = begin; iter != end;) {
result += *iter; result += *iter;
@ -220,7 +207,7 @@ namespace Formosa {
} }
return result; return result;
} }
} } // namespace Gramambular
} } // namespace Formosa
#endif #endif

View File

@ -21,14 +21,15 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
#include "gtest/gtest.h"
#include <algorithm> #include <algorithm>
#include <cstdlib>
#include <iostream> #include <iostream>
#include <map> #include <map>
#include <vector>
#include <cstdlib>
#include <sstream> #include <sstream>
#include <vector>
#include "Gramambular.h" #include "Gramambular.h"
#include "gtest/gtest.h"
const char* SampleData = R"( const char* SampleData = R"(
# #
@ -122,11 +123,9 @@ const char* SampleData = R"(
using namespace std; using namespace std;
using namespace Formosa::Gramambular; using namespace Formosa::Gramambular;
class SimpleLM : public LanguageModel class SimpleLM : public LanguageModel {
{
public: public:
SimpleLM(const char* input, bool swapKeyValue = false) SimpleLM(const char* input, bool swapKeyValue = false) {
{
stringstream sstream(input); stringstream sstream(input);
while (sstream.good()) { while (sstream.good()) {
string line; string line;
@ -149,8 +148,7 @@ class SimpleLM : public LanguageModel
if (swapKeyValue) { if (swapKeyValue) {
u.keyValue.key = col1; u.keyValue.key = col1;
u.keyValue.value = col0; u.keyValue.value = col0;
} } else {
else {
u.keyValue.key = col0; u.keyValue.key = col0;
u.keyValue.value = col1; u.keyValue.value = col1;
} }
@ -161,19 +159,17 @@ class SimpleLM : public LanguageModel
} }
} }
const vector<Bigram> bigramsForKeys(const string &preceedingKey, const string& key) override const vector<Bigram> bigramsForKeys(const string& preceedingKey,
{ const string& key) override {
return vector<Bigram>(); return vector<Bigram>();
} }
const vector<Unigram> unigramsForKey(const string &key) override const vector<Unigram> unigramsForKey(const string& key) override {
{
map<string, vector<Unigram> >::const_iterator f = m_db.find(key); map<string, vector<Unigram> >::const_iterator f = m_db.find(key);
return f == m_db.end() ? vector<Unigram>() : (*f).second; return f == m_db.end() ? vector<Unigram>() : (*f).second;
} }
bool hasUnigramsForKey(const string& key) override bool hasUnigramsForKey(const string& key) override {
{
map<string, vector<Unigram> >::const_iterator f = m_db.find(key); map<string, vector<Unigram> >::const_iterator f = m_db.find(key);
return f != m_db.end(); return f != m_db.end();
} }
@ -208,7 +204,8 @@ TEST(GramambularTest, InputTest) {
reverse(walked.begin(), walked.end()); reverse(walked.begin(), walked.end());
vector<string> composed; vector<string> composed;
for (vector<NodeAnchor>::iterator wi = walked.begin() ; wi != walked.end() ; ++wi) { for (vector<NodeAnchor>::iterator wi = walked.begin(); wi != walked.end();
++wi) {
composed.push_back((*wi).node->currentKeyValue().value); composed.push_back((*wi).node->currentKeyValue().value);
} }
ASSERT_EQ(composed, (vector<string>{"高科技", "公司", "", "年中", "獎金"})); ASSERT_EQ(composed, (vector<string>{"高科技", "公司", "", "年中", "獎金"}));
@ -233,8 +230,10 @@ TEST(GramambularTest, WordSegmentationTest) {
reverse(walked.begin(), walked.end()); reverse(walked.begin(), walked.end());
vector<string> segmented; vector<string> segmented;
for (vector<NodeAnchor>::iterator wi = walked.begin(); wi != walked.end(); ++wi) { for (vector<NodeAnchor>::iterator wi = walked.begin(); wi != walked.end();
++wi) {
segmented.push_back((*wi).node->currentKeyValue().key); segmented.push_back((*wi).node->currentKeyValue().key);
} }
ASSERT_EQ(segmented, (vector<string>{"高科技", "公司", "", "年終", "獎金"})); ASSERT_EQ(segmented,
(vector<string>{"高科技", "公司", "", "年終", "獎金"}));
} }

View File

@ -29,6 +29,7 @@
#define Grid_h #define Grid_h
#include <map> #include <map>
#include "NodeAnchor.h" #include "NodeAnchor.h"
#include "Span.h" #include "Span.h"
@ -38,8 +39,11 @@ namespace Formosa {
class Grid { class Grid {
public: public:
void clear(); void clear();
void insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength); void insertNode(const Node& inNode, size_t inLocation,
bool hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey); size_t inSpanningLength);
bool hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation,
size_t inSpanningLength,
const string& inKey);
void expandGridByOneAtLocation(size_t inLocation); void expandGridByOneAtLocation(size_t inLocation);
void shrinkGridByOneAtLocation(size_t inLocation); void shrinkGridByOneAtLocation(size_t inLocation);
@ -48,17 +52,20 @@ namespace Formosa {
vector<NodeAnchor> nodesEndingAt(size_t inLocation); vector<NodeAnchor> nodesEndingAt(size_t inLocation);
vector<NodeAnchor> nodesCrossingOrEndingAt(size_t inLocation); vector<NodeAnchor> nodesCrossingOrEndingAt(size_t inLocation);
// "Freeze" the node with the unigram that represents the selected candidate value. // "Freeze" the node with the unigram that represents the selected candidate
// After this, the node that contains the unigram will always be evaluated to that // value. After this, the node that contains the unigram will always be
// unigram, while all other overlapping nodes will be reset to their initial state // evaluated to that unigram, while all other overlapping nodes will be reset
// (that is, if any of those nodes were "frozen" or fixed, they will be unfrozen.) // to their initial state (that is, if any of those nodes were "frozen" or
// fixed, they will be unfrozen.)
NodeAnchor fixNodeSelectedCandidate(size_t location, const string& value); NodeAnchor fixNodeSelectedCandidate(size_t location, const string& value);
// Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, only // Similar to fixNodeSelectedCandidate, but instead of "freezing" the node,
// boost the unigram that represents the value with an overriding score. This // only boost the unigram that represents the value with an overriding score.
// has the same side effect as fixNodeSelectedCandidate, which is that all other // This has the same side effect as fixNodeSelectedCandidate, which is that
// overlapping nodes will be reset to their initial state. // all other overlapping nodes will be reset to their initial state.
void overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore); void overrideNodeScoreForSelectedCandidate(size_t location,
const string& value,
float overridingScore);
const string dumpDOT(); const string dumpDOT();
@ -66,13 +73,10 @@ namespace Formosa {
vector<Span> m_spans; vector<Span> m_spans;
}; };
inline void Grid::clear() inline void Grid::clear() { m_spans.clear(); }
{
m_spans.clear();
}
inline void Grid::insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength) inline void Grid::insertNode(const Node& inNode, size_t inLocation,
{ size_t inSpanningLength) {
if (inLocation >= m_spans.size()) { if (inLocation >= m_spans.size()) {
size_t diff = inLocation - m_spans.size() + 1; size_t diff = inLocation - m_spans.size() + 1;
@ -84,8 +88,8 @@ namespace Formosa {
m_spans[inLocation].insertNodeOfLength(inNode, inSpanningLength); m_spans[inLocation].insertNodeOfLength(inNode, inSpanningLength);
} }
inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey) inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(
{ size_t inLocation, size_t inSpanningLength, const string& inKey) {
if (inLocation > m_spans.size()) { if (inLocation > m_spans.size()) {
return false; return false;
} }
@ -98,12 +102,10 @@ namespace Formosa {
return inKey == n->key(); return inKey == n->key();
} }
inline void Grid::expandGridByOneAtLocation(size_t inLocation) inline void Grid::expandGridByOneAtLocation(size_t inLocation) {
{
if (!inLocation || inLocation == m_spans.size()) { if (!inLocation || inLocation == m_spans.size()) {
m_spans.insert(m_spans.begin() + inLocation, Span()); m_spans.insert(m_spans.begin() + inLocation, Span());
} } else {
else {
m_spans.insert(m_spans.begin() + inLocation, Span()); m_spans.insert(m_spans.begin() + inLocation, Span());
for (size_t i = 0; i < inLocation; i++) { for (size_t i = 0; i < inLocation; i++) {
// zaps overlapping spans // zaps overlapping spans
@ -112,8 +114,7 @@ namespace Formosa {
} }
} }
inline void Grid::shrinkGridByOneAtLocation(size_t inLocation) inline void Grid::shrinkGridByOneAtLocation(size_t inLocation) {
{
if (inLocation >= m_spans.size()) { if (inLocation >= m_spans.size()) {
return; return;
} }
@ -125,13 +126,9 @@ namespace Formosa {
} }
} }
inline size_t Grid::width() const inline size_t Grid::width() const { return m_spans.size(); }
{
return m_spans.size();
}
inline vector<NodeAnchor> Grid::nodesEndingAt(size_t inLocation) inline vector<NodeAnchor> Grid::nodesEndingAt(size_t inLocation) {
{
vector<NodeAnchor> result; vector<NodeAnchor> result;
if (m_spans.size() && inLocation <= m_spans.size()) { if (m_spans.size() && inLocation <= m_spans.size()) {
@ -154,8 +151,7 @@ namespace Formosa {
return result; return result;
} }
inline vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t inLocation) inline vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t inLocation) {
{
vector<NodeAnchor> result; vector<NodeAnchor> result;
if (m_spans.size() && inLocation <= m_spans.size()) { if (m_spans.size() && inLocation <= m_spans.size()) {
@ -163,9 +159,7 @@ namespace Formosa {
Span& span = m_spans[i]; Span& span = m_spans[i];
if (i + span.maximumLength() >= inLocation) { if (i + span.maximumLength() >= inLocation) {
for (size_t j = 1, m = span.maximumLength(); j <= m; j++) { for (size_t j = 1, m = span.maximumLength(); j <= m; j++) {
if (i + j < inLocation) { if (i + j < inLocation) {
continue; continue;
} }
@ -187,9 +181,10 @@ namespace Formosa {
return result; return result;
} }
// For nodes found at the location, fix their currently-selected candidate using the supplied string value. // For nodes found at the location, fix their currently-selected candidate using
inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, const string& value) // the supplied string value.
{ inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location,
const string& value) {
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location); vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
NodeAnchor node; NodeAnchor node;
for (auto nodeAnchor : nodes) { for (auto nodeAnchor : nodes) {
@ -202,15 +197,17 @@ namespace Formosa {
if (candidates[i].value == value) { if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectCandidateAtIndex(i); const_cast<Node*>(nodeAnchor.node)->selectCandidateAtIndex(i);
node = nodeAnchor; node = nodeAnchor;
break;; break;
;
} }
} }
} }
return node; return node;
} }
inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore) inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location,
{ const string& value,
float overridingScore) {
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location); vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
for (auto nodeAnchor : nodes) { for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates(); auto candidates = nodeAnchor.node->candidates();
@ -220,15 +217,15 @@ namespace Formosa {
for (size_t i = 0, c = candidates.size(); i < c; ++i) { for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) { if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore); const_cast<Node*>(nodeAnchor.node)
->selectFloatingCandidateAtIndex(i, overridingScore);
break; break;
} }
} }
} }
} }
inline const string Grid::dumpDOT() inline const string Grid::dumpDOT() {
{
stringstream sst; stringstream sst;
sst << "digraph {" << endl; sst << "digraph {" << endl;
sst << "graph [ rankdir=LR ];" << endl; sst << "graph [ rankdir=LR ];" << endl;
@ -250,13 +247,15 @@ namespace Formosa {
for (size_t q = 0; q <= dstSpan.maximumLength(); q++) { for (size_t q = 0; q <= dstSpan.maximumLength(); q++) {
Node* dn = dstSpan.nodeOfLength(q); Node* dn = dstSpan.nodeOfLength(q);
if (dn) { if (dn) {
sst << np->currentKeyValue().value << " -> " << dn->currentKeyValue().value << ";" << endl; sst << np->currentKeyValue().value << " -> "
<< dn->currentKeyValue().value << ";" << endl;
} }
} }
} }
if (p + ni == m_spans.size()) { if (p + ni == m_spans.size()) {
sst << np->currentKeyValue().value << " -> " << "EOS;" << endl; sst << np->currentKeyValue().value << " -> "
<< "EOS;" << endl;
} }
} }
} }
@ -266,7 +265,7 @@ namespace Formosa {
sst << "}"; sst << "}";
return sst.str(); return sst.str();
} }
} } // namespace Gramambular
} } // namespace Formosa
#endif #endif

View File

@ -44,28 +44,24 @@ namespace Formosa {
bool operator<(const KeyValuePair& inAnother) const; bool operator<(const KeyValuePair& inAnother) const;
}; };
inline ostream& operator<<(ostream& inStream, const KeyValuePair& inPair) inline ostream& operator<<(ostream& inStream, const KeyValuePair& inPair) {
{
inStream << "(" << inPair.key << "," << inPair.value << ")"; inStream << "(" << inPair.key << "," << inPair.value << ")";
return inStream; return inStream;
} }
inline bool KeyValuePair::operator==(const KeyValuePair& inAnother) const inline bool KeyValuePair::operator==(const KeyValuePair& inAnother) const {
{
return key == inAnother.key && value == inAnother.value; return key == inAnother.key && value == inAnother.value;
} }
inline bool KeyValuePair::operator<(const KeyValuePair& inAnother) const inline bool KeyValuePair::operator<(const KeyValuePair& inAnother) const {
{
if (key < inAnother.key) { if (key < inAnother.key) {
return true; return true;
} } else if (key == inAnother.key) {
else if (key == inAnother.key) {
return value < inAnother.value; return value < inAnother.value;
} }
return false; return false;
} }
} } // namespace Gramambular
} } // namespace Formosa
#endif #endif

View File

@ -29,6 +29,7 @@
#define LanguageModel_h #define LanguageModel_h
#include <vector> #include <vector>
#include "Bigram.h" #include "Bigram.h"
#include "Unigram.h" #include "Unigram.h"
@ -41,12 +42,12 @@ namespace Formosa {
public: public:
virtual ~LanguageModel() {} virtual ~LanguageModel() {}
virtual const vector<Bigram> bigramsForKeys(const string &preceedingKey, const string& key) = 0; virtual const vector<Bigram> bigramsForKeys(const string& preceedingKey,
const string& key) = 0;
virtual const vector<Unigram> unigramsForKey(const string& key) = 0; virtual const vector<Unigram> unigramsForKey(const string& key) = 0;
virtual bool hasUnigramsForKey(const string& key) = 0; virtual bool hasUnigramsForKey(const string& key) = 0;
}; };
} } // namespace Gramambular
} } // namespace Formosa
#endif #endif

View File

@ -30,6 +30,7 @@
#include <limits> #include <limits>
#include <vector> #include <vector>
#include "LanguageModel.h" #include "LanguageModel.h"
namespace Formosa { namespace Formosa {
@ -39,9 +40,11 @@ namespace Formosa {
class Node { class Node {
public: public:
Node(); Node();
Node(const string& inKey, const vector<Unigram>& inUnigrams, const vector<Bigram>& inBigrams); Node(const string& inKey, const vector<Unigram>& inUnigrams,
const vector<Bigram>& inBigrams);
void primeNodeWithPreceedingKeyValues(const vector<KeyValuePair>& inKeyValues); void primeNodeWithPreceedingKeyValues(
const vector<KeyValuePair>& inKeyValues);
bool isCandidateFixed() const; bool isCandidateFixed() const;
const vector<KeyValuePair>& candidates() const; const vector<KeyValuePair>& candidates() const;
@ -72,28 +75,24 @@ namespace Formosa {
friend ostream& operator<<(ostream& inStream, const Node& inNode); friend ostream& operator<<(ostream& inStream, const Node& inNode);
}; };
inline ostream& operator<<(ostream& inStream, const Node& inNode) inline ostream& operator<<(ostream& inStream, const Node& inNode) {
{ inStream << "(node,key:" << inNode.m_key
inStream << "(node,key:" << inNode.m_key << ",fixed:" << (inNode.m_candidateFixed ? "true" : "false") << ",fixed:" << (inNode.m_candidateFixed ? "true" : "false")
<< ",selected:" << inNode.m_selectedUnigramIndex << ",selected:" << inNode.m_selectedUnigramIndex << ","
<< "," << inNode.m_unigrams << ")"; << inNode.m_unigrams << ")";
return inStream; return inStream;
} }
inline Node::Node() inline Node::Node()
: m_candidateFixed(false) : m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) {}
, m_selectedUnigramIndex(0)
, m_score(0.0)
{
}
inline Node::Node(const string& inKey, const vector<Unigram>& inUnigrams, const vector<Bigram>& inBigrams) inline Node::Node(const string& inKey, const vector<Unigram>& inUnigrams,
: m_key(inKey) const vector<Bigram>& inBigrams)
, m_unigrams(inUnigrams) : m_key(inKey),
, m_candidateFixed(false) m_unigrams(inUnigrams),
, m_selectedUnigramIndex(0) m_candidateFixed(false),
, m_score(0.0) m_selectedUnigramIndex(0),
{ m_score(0.0) {
stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare); stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare);
if (m_unigrams.size()) { if (m_unigrams.size()) {
@ -101,33 +100,39 @@ namespace Formosa {
} }
size_t i = 0; size_t i = 0;
for (vector<Unigram>::const_iterator ui = m_unigrams.begin() ; ui != m_unigrams.end() ; ++ui) { for (vector<Unigram>::const_iterator ui = m_unigrams.begin();
ui != m_unigrams.end(); ++ui) {
m_valueUnigramIndexMap[(*ui).keyValue.value] = i; m_valueUnigramIndexMap[(*ui).keyValue.value] = i;
i++; i++;
m_candidates.push_back((*ui).keyValue); m_candidates.push_back((*ui).keyValue);
} }
for (vector<Bigram>::const_iterator bi = inBigrams.begin() ; bi != inBigrams.end() ; ++bi) { for (vector<Bigram>::const_iterator bi = inBigrams.begin();
bi != inBigrams.end(); ++bi) {
m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi); m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi);
} }
} }
inline void Node::primeNodeWithPreceedingKeyValues(const vector<KeyValuePair>& inKeyValues) inline void Node::primeNodeWithPreceedingKeyValues(
{ const vector<KeyValuePair>& inKeyValues) {
size_t newIndex = m_selectedUnigramIndex; size_t newIndex = m_selectedUnigramIndex;
double max = m_score; double max = m_score;
if (!isCandidateFixed()) { if (!isCandidateFixed()) {
for (vector<KeyValuePair>::const_iterator kvi = inKeyValues.begin() ; kvi != inKeyValues.end() ; ++kvi) { for (vector<KeyValuePair>::const_iterator kvi = inKeyValues.begin();
map<KeyValuePair, vector<Bigram> >::const_iterator f = m_preceedingGramBigramMap.find(*kvi); kvi != inKeyValues.end(); ++kvi) {
map<KeyValuePair, vector<Bigram> >::const_iterator f =
m_preceedingGramBigramMap.find(*kvi);
if (f != m_preceedingGramBigramMap.end()) { if (f != m_preceedingGramBigramMap.end()) {
const vector<Bigram>& bigrams = (*f).second; const vector<Bigram>& bigrams = (*f).second;
for (vector<Bigram>::const_iterator bi = bigrams.begin() ; bi != bigrams.end() ; ++bi) { for (vector<Bigram>::const_iterator bi = bigrams.begin();
bi != bigrams.end(); ++bi) {
const Bigram& bigram = *bi; const Bigram& bigram = *bi;
if (bigram.score > max) { if (bigram.score > max) {
map<string, size_t>::const_iterator uf = m_valueUnigramIndexMap.find((*bi).keyValue.value); map<string, size_t>::const_iterator uf =
m_valueUnigramIndexMap.find((*bi).keyValue.value);
if (uf != m_valueUnigramIndexMap.end()) { if (uf != m_valueUnigramIndexMap.end()) {
newIndex = (*uf).second; newIndex = (*uf).second;
max = bigram.score; max = bigram.score;
@ -147,22 +152,16 @@ namespace Formosa {
} }
} }
inline bool Node::isCandidateFixed() const inline bool Node::isCandidateFixed() const { return m_candidateFixed; }
{
return m_candidateFixed;
}
inline const vector<KeyValuePair>& Node::candidates() const inline const vector<KeyValuePair>& Node::candidates() const {
{
return m_candidates; return m_candidates;
} }
inline void Node::selectCandidateAtIndex(size_t inIndex, bool inFix) inline void Node::selectCandidateAtIndex(size_t inIndex, bool inFix) {
{
if (inIndex >= m_unigrams.size()) { if (inIndex >= m_unigrams.size()) {
m_selectedUnigramIndex = 0; m_selectedUnigramIndex = 0;
} } else {
else {
m_selectedUnigramIndex = inIndex; m_selectedUnigramIndex = inIndex;
} }
@ -170,8 +169,7 @@ namespace Formosa {
m_score = 99; m_score = 99;
} }
inline void Node::resetCandidate() inline void Node::resetCandidate() {
{
m_selectedUnigramIndex = 0; m_selectedUnigramIndex = 0;
m_candidateFixed = 0; m_candidateFixed = 0;
if (m_unigrams.size()) { if (m_unigrams.size()) {
@ -189,18 +187,11 @@ namespace Formosa {
m_score = score; m_score = score;
} }
inline const string& Node::key() const inline const string& Node::key() const { return m_key; }
{
return m_key;
}
inline double Node::score() const inline double Node::score() const { return m_score; }
{
return m_score;
}
inline double Node::scoreForCandidate(string &candidate) const inline double Node::scoreForCandidate(string& candidate) const {
{
for (auto unigram : m_unigrams) { for (auto unigram : m_unigrams) {
if (unigram.keyValue.value == candidate) { if (unigram.keyValue.value == candidate) {
return unigram.score; return unigram.score;
@ -216,16 +207,14 @@ namespace Formosa {
return m_unigrams[0].score; return m_unigrams[0].score;
} }
inline const KeyValuePair Node::currentKeyValue() const inline const KeyValuePair Node::currentKeyValue() const {
{
if (m_selectedUnigramIndex >= m_unigrams.size()) { if (m_selectedUnigramIndex >= m_unigrams.size()) {
return KeyValuePair(); return KeyValuePair();
} } else {
else {
return m_candidates[m_selectedUnigramIndex]; return m_candidates[m_selectedUnigramIndex];
} }
} }
} } // namespace Gramambular
} } // namespace Formosa
#endif #endif

View File

@ -42,29 +42,24 @@ namespace Formosa {
}; };
inline NodeAnchor::NodeAnchor() inline NodeAnchor::NodeAnchor()
: node(0) : node(0), location(0), spanningLength(0), accumulatedScore(0.0) {}
, location(0)
, spanningLength(0)
, accumulatedScore(0.0)
{
}
inline ostream& operator<<(ostream& inStream, const NodeAnchor& inAnchor) inline ostream& operator<<(ostream& inStream, const NodeAnchor& inAnchor) {
{ inStream << "{@(" << inAnchor.location << "," << inAnchor.spanningLength
inStream << "{@(" << inAnchor.location << "," << inAnchor.spanningLength << "),"; << "),";
if (inAnchor.node) { if (inAnchor.node) {
inStream << *(inAnchor.node); inStream << *(inAnchor.node);
} } else {
else {
inStream << "null"; inStream << "null";
} }
inStream << "}"; inStream << "}";
return inStream; return inStream;
} }
inline ostream& operator<<(ostream& inStream, const vector<NodeAnchor>& inAnchor) inline ostream& operator<<(ostream& inStream,
{ const vector<NodeAnchor>& inAnchor) {
for (vector<NodeAnchor>::const_iterator i = inAnchor.begin() ; i != inAnchor.end() ; ++i) { for (vector<NodeAnchor>::const_iterator i = inAnchor.begin();
i != inAnchor.end(); ++i) {
inStream << *i; inStream << *i;
if (i + 1 != inAnchor.end()) { if (i + 1 != inAnchor.end()) {
inStream << "<-"; inStream << "<-";
@ -73,7 +68,7 @@ namespace Formosa {
return inStream; return inStream;
} }
} } // namespace Gramambular
} } // namespace Formosa
#endif #endif

View File

@ -31,6 +31,7 @@
#include <map> #include <map>
#include <set> #include <set>
#include <sstream> #include <sstream>
#include "Node.h" #include "Node.h"
namespace Formosa { namespace Formosa {
@ -51,62 +52,54 @@ namespace Formosa {
size_t m_maximumLength; size_t m_maximumLength;
}; };
inline Span::Span() inline Span::Span() : m_maximumLength(0) {}
: m_maximumLength(0)
{
}
inline void Span::clear() inline void Span::clear() {
{
m_lengthNodeMap.clear(); m_lengthNodeMap.clear();
m_maximumLength = 0; m_maximumLength = 0;
} }
inline void Span::insertNodeOfLength(const Node& inNode, size_t inLength) inline void Span::insertNodeOfLength(const Node& inNode, size_t inLength) {
{
m_lengthNodeMap[inLength] = inNode; m_lengthNodeMap[inLength] = inNode;
if (inLength > m_maximumLength) { if (inLength > m_maximumLength) {
m_maximumLength = inLength; m_maximumLength = inLength;
} }
} }
inline void Span::removeNodeOfLengthGreaterThan(size_t inLength) inline void Span::removeNodeOfLengthGreaterThan(size_t inLength) {
{
if (inLength > m_maximumLength) { if (inLength > m_maximumLength) {
return; return;
} }
size_t max = 0; size_t max = 0;
set<size_t> removeSet; set<size_t> removeSet;
for (map<size_t, Node>::iterator i = m_lengthNodeMap.begin(), e = m_lengthNodeMap.end() ; i != e ; ++i) { for (map<size_t, Node>::iterator i = m_lengthNodeMap.begin(),
e = m_lengthNodeMap.end();
i != e; ++i) {
if ((*i).first > inLength) { if ((*i).first > inLength) {
removeSet.insert((*i).first); removeSet.insert((*i).first);
} } else {
else {
if ((*i).first > max) { if ((*i).first > max) {
max = (*i).first; max = (*i).first;
} }
} }
} }
for (set<size_t>::iterator i = removeSet.begin(), e = removeSet.end(); i != e; ++i) { for (set<size_t>::iterator i = removeSet.begin(), e = removeSet.end(); i != e;
++i) {
m_lengthNodeMap.erase(*i); m_lengthNodeMap.erase(*i);
} }
m_maximumLength = max; m_maximumLength = max;
} }
inline Node* Span::nodeOfLength(size_t inLength) inline Node* Span::nodeOfLength(size_t inLength) {
{
map<size_t, Node>::iterator f = m_lengthNodeMap.find(inLength); map<size_t, Node>::iterator f = m_lengthNodeMap.find(inLength);
return f == m_lengthNodeMap.end() ? 0 : &(*f).second; return f == m_lengthNodeMap.end() ? 0 : &(*f).second;
} }
inline size_t Span::maximumLength() const inline size_t Span::maximumLength() const { return m_maximumLength; }
{ } // namespace Gramambular
return m_maximumLength; } // namespace Formosa
}
}
}
#endif #endif

View File

@ -29,6 +29,7 @@
#define Unigram_h #define Unigram_h
#include <vector> #include <vector>
#include "KeyValuePair.h" #include "KeyValuePair.h"
namespace Formosa { namespace Formosa {
@ -46,8 +47,7 @@ namespace Formosa {
static bool ScoreCompare(const Unigram& a, const Unigram& b); static bool ScoreCompare(const Unigram& a, const Unigram& b);
}; };
inline ostream& operator<<(ostream& inStream, const Unigram& inGram) inline ostream& operator<<(ostream& inStream, const Unigram& inGram) {
{
streamsize p = inStream.precision(); streamsize p = inStream.precision();
inStream.precision(6); inStream.precision(6);
inStream << "(" << inGram.keyValue << "," << inGram.score << ")"; inStream << "(" << inGram.keyValue << "," << inGram.score << ")";
@ -55,13 +55,13 @@ namespace Formosa {
return inStream; return inStream;
} }
inline ostream& operator<<(ostream& inStream, const vector<Unigram>& inGrams) inline ostream& operator<<(ostream& inStream, const vector<Unigram>& inGrams) {
{
inStream << "[" << inGrams.size() << "]=>{"; inStream << "[" << inGrams.size() << "]=>{";
size_t index = 0; size_t index = 0;
for (vector<Unigram>::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) { for (vector<Unigram>::const_iterator gi = inGrams.begin();
gi != inGrams.end(); ++gi, ++index) {
inStream << index << "=>"; inStream << index << "=>";
inStream << *gi; inStream << *gi;
if (gi + 1 != inGrams.end()) { if (gi + 1 != inGrams.end()) {
@ -73,32 +73,25 @@ namespace Formosa {
return inStream; return inStream;
} }
inline Unigram::Unigram() inline Unigram::Unigram() : score(0.0) {}
: score(0.0)
{
}
inline bool Unigram::operator==(const Unigram& inAnother) const inline bool Unigram::operator==(const Unigram& inAnother) const {
{
return keyValue == inAnother.keyValue && score == inAnother.score; return keyValue == inAnother.keyValue && score == inAnother.score;
} }
inline bool Unigram::operator<(const Unigram& inAnother) const inline bool Unigram::operator<(const Unigram& inAnother) const {
{
if (keyValue < inAnother.keyValue) { if (keyValue < inAnother.keyValue) {
return true; return true;
} } else if (keyValue == inAnother.keyValue) {
else if (keyValue == inAnother.keyValue) {
return score < inAnother.score; return score < inAnother.score;
} }
return false; return false;
} }
inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) {
{
return a.score > b.score; return a.score > b.score;
} }
} } // namespace Gramambular
} } // namespace Formosa
#endif #endif

View File

@ -29,6 +29,7 @@
#define Walker_h #define Walker_h
#include <algorithm> #include <algorithm>
#include "Grid.h" #include "Grid.h"
namespace Formosa { namespace Formosa {
@ -38,19 +39,17 @@ namespace Formosa {
class Walker { class Walker {
public: public:
Walker(Grid* inGrid); Walker(Grid* inGrid);
const vector<NodeAnchor> reverseWalk(size_t inLocation, double inAccumulatedScore = 0.0); const vector<NodeAnchor> reverseWalk(size_t inLocation,
double inAccumulatedScore = 0.0);
protected: protected:
Grid* m_grid; Grid* m_grid;
}; };
inline Walker::Walker(Grid* inGrid) inline Walker::Walker(Grid* inGrid) : m_grid(inGrid) {}
: m_grid(inGrid)
{
}
inline const vector<NodeAnchor> Walker::reverseWalk(size_t inLocation, double inAccumulatedScore) inline const vector<NodeAnchor> Walker::reverseWalk(size_t inLocation,
{ double inAccumulatedScore) {
if (!inLocation || inLocation > m_grid->width()) { if (!inLocation || inLocation > m_grid->width()) {
return vector<NodeAnchor>(); return vector<NodeAnchor>();
} }
@ -59,14 +58,16 @@ namespace Formosa {
vector<NodeAnchor> nodes = m_grid->nodesEndingAt(inLocation); vector<NodeAnchor> nodes = m_grid->nodesEndingAt(inLocation);
for (vector<NodeAnchor>::iterator ni = nodes.begin() ; ni != nodes.end() ; ++ni) { for (vector<NodeAnchor>::iterator ni = nodes.begin(); ni != nodes.end();
++ni) {
if (!(*ni).node) { if (!(*ni).node) {
continue; continue;
} }
(*ni).accumulatedScore = inAccumulatedScore + (*ni).node->score(); (*ni).accumulatedScore = inAccumulatedScore + (*ni).node->score();
vector<NodeAnchor> path = reverseWalk(inLocation - (*ni).spanningLength, (*ni).accumulatedScore); vector<NodeAnchor> path =
reverseWalk(inLocation - (*ni).spanningLength, (*ni).accumulatedScore);
path.insert(path.begin(), *ni); path.insert(path.begin(), *ni);
paths.push_back(path); paths.push_back(path);
@ -77,7 +78,8 @@ namespace Formosa {
} }
vector<NodeAnchor>* result = &*(paths.begin()); vector<NodeAnchor>* result = &*(paths.begin());
for (vector<vector<NodeAnchor> >::iterator pi = paths.begin() ; pi != paths.end() ; ++pi) { for (vector<vector<NodeAnchor> >::iterator pi = paths.begin();
pi != paths.end(); ++pi) {
if ((*pi).back().accumulatedScore > result->back().accumulatedScore) { if ((*pi).back().accumulatedScore > result->back().accumulatedScore) {
result = &*pi; result = &*pi;
} }
@ -85,7 +87,7 @@ namespace Formosa {
return *result; return *result;
} }
} } // namespace Gramambular
} } // namespace Formosa
#endif #endif