Reformat Gramambular with clang-format
This commit is contained in:
parent
d4d8d14004
commit
d3302ef70a
|
@ -33,74 +33,68 @@
|
||||||
#include "KeyValuePair.h"
|
#include "KeyValuePair.h"
|
||||||
|
|
||||||
namespace Formosa {
|
namespace Formosa {
|
||||||
namespace Gramambular {
|
namespace Gramambular {
|
||||||
class Bigram {
|
class Bigram {
|
||||||
public:
|
public:
|
||||||
Bigram();
|
Bigram();
|
||||||
|
|
||||||
KeyValuePair preceedingKeyValue;
|
|
||||||
KeyValuePair keyValue;
|
|
||||||
double score;
|
|
||||||
|
|
||||||
bool operator==(const Bigram& inAnother) const;
|
|
||||||
bool operator<(const Bigram& inAnother) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
inline ostream& operator<<(ostream& inStream, const Bigram& inGram)
|
KeyValuePair preceedingKeyValue;
|
||||||
{
|
KeyValuePair keyValue;
|
||||||
streamsize p = inStream.precision();
|
double score;
|
||||||
inStream.precision(6);
|
|
||||||
inStream << "(" << inGram.keyValue << "|" <<inGram.preceedingKeyValue << "," << inGram.score << ")";
|
|
||||||
inStream.precision(p);
|
|
||||||
return inStream;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline ostream& operator<<(ostream& inStream, const vector<Bigram>& inGrams)
|
bool operator==(const Bigram& inAnother) const;
|
||||||
{
|
bool operator<(const Bigram& inAnother) const;
|
||||||
inStream << "[" << inGrams.size() << "]=>{";
|
};
|
||||||
|
|
||||||
size_t index = 0;
|
|
||||||
|
|
||||||
for (vector<Bigram>::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) {
|
|
||||||
inStream << index << "=>";
|
|
||||||
inStream << *gi;
|
|
||||||
if (gi + 1 != inGrams.end()) {
|
|
||||||
inStream << ",";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inStream << "}";
|
|
||||||
return inStream;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline Bigram::Bigram()
|
|
||||||
: score(0.0)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool Bigram::operator==(const Bigram& inAnother) const
|
|
||||||
{
|
|
||||||
return preceedingKeyValue == inAnother.preceedingKeyValue && keyValue == inAnother.keyValue && score == inAnother.score;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool Bigram::operator<(const Bigram& inAnother) const
|
|
||||||
{
|
|
||||||
if (preceedingKeyValue < inAnother.preceedingKeyValue) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else if (preceedingKeyValue == inAnother.preceedingKeyValue) {
|
|
||||||
if (keyValue < inAnother.keyValue) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else if (keyValue == inAnother.keyValue) {
|
|
||||||
return score < inAnother.score;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
inline ostream& operator<<(ostream& inStream, const Bigram& inGram) {
|
||||||
}
|
streamsize p = inStream.precision();
|
||||||
}
|
inStream.precision(6);
|
||||||
|
inStream << "(" << inGram.keyValue << "|" << inGram.preceedingKeyValue << ","
|
||||||
|
<< inGram.score << ")";
|
||||||
|
inStream.precision(p);
|
||||||
|
return inStream;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline ostream& operator<<(ostream& inStream, const vector<Bigram>& inGrams) {
|
||||||
|
inStream << "[" << inGrams.size() << "]=>{";
|
||||||
|
|
||||||
|
size_t index = 0;
|
||||||
|
|
||||||
|
for (vector<Bigram>::const_iterator gi = inGrams.begin(); gi != inGrams.end();
|
||||||
|
++gi, ++index) {
|
||||||
|
inStream << index << "=>";
|
||||||
|
inStream << *gi;
|
||||||
|
if (gi + 1 != inGrams.end()) {
|
||||||
|
inStream << ",";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inStream << "}";
|
||||||
|
return inStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Bigram::Bigram() : score(0.0) {}
|
||||||
|
|
||||||
|
inline bool Bigram::operator==(const Bigram& inAnother) const {
|
||||||
|
return preceedingKeyValue == inAnother.preceedingKeyValue &&
|
||||||
|
keyValue == inAnother.keyValue && score == inAnother.score;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool Bigram::operator<(const Bigram& inAnother) const {
|
||||||
|
if (preceedingKeyValue < inAnother.preceedingKeyValue) {
|
||||||
|
return true;
|
||||||
|
} else if (preceedingKeyValue == inAnother.preceedingKeyValue) {
|
||||||
|
if (keyValue < inAnother.keyValue) {
|
||||||
|
return true;
|
||||||
|
} else if (keyValue == inAnother.keyValue) {
|
||||||
|
return score < inAnother.score;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} // namespace Gramambular
|
||||||
|
} // namespace Formosa
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -29,198 +29,185 @@
|
||||||
#define BlockReadingBuilder_h
|
#define BlockReadingBuilder_h
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "Grid.h"
|
#include "Grid.h"
|
||||||
#include "LanguageModel.h"
|
#include "LanguageModel.h"
|
||||||
|
|
||||||
namespace Formosa {
|
namespace Formosa {
|
||||||
namespace Gramambular {
|
namespace Gramambular {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
class BlockReadingBuilder {
|
|
||||||
public:
|
|
||||||
BlockReadingBuilder(LanguageModel *inLM);
|
|
||||||
void clear();
|
|
||||||
|
|
||||||
size_t length() const;
|
|
||||||
size_t cursorIndex() const;
|
|
||||||
void setCursorIndex(size_t inNewIndex);
|
|
||||||
void insertReadingAtCursor(const string& inReading);
|
|
||||||
bool deleteReadingBeforeCursor(); // backspace
|
|
||||||
bool deleteReadingAfterCursor(); // delete
|
|
||||||
|
|
||||||
bool removeHeadReadings(size_t count);
|
|
||||||
|
|
||||||
void setJoinSeparator(const string& separator);
|
|
||||||
const string joinSeparator() const;
|
|
||||||
|
|
||||||
vector<string> readings() const;
|
class BlockReadingBuilder {
|
||||||
|
public:
|
||||||
|
BlockReadingBuilder(LanguageModel* inLM);
|
||||||
|
void clear();
|
||||||
|
|
||||||
Grid& grid();
|
size_t length() const;
|
||||||
|
size_t cursorIndex() const;
|
||||||
protected:
|
void setCursorIndex(size_t inNewIndex);
|
||||||
void build();
|
void insertReadingAtCursor(const string& inReading);
|
||||||
|
bool deleteReadingBeforeCursor(); // backspace
|
||||||
static const string Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator);
|
bool deleteReadingAfterCursor(); // delete
|
||||||
|
|
||||||
//最多使用六個字組成一個詞
|
|
||||||
static const size_t MaximumBuildSpanLength = 6;
|
|
||||||
|
|
||||||
size_t m_cursorIndex;
|
|
||||||
vector<string> m_readings;
|
|
||||||
|
|
||||||
Grid m_grid;
|
|
||||||
LanguageModel *m_LM;
|
|
||||||
string m_joinSeparator;
|
|
||||||
};
|
|
||||||
|
|
||||||
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM)
|
|
||||||
: m_LM(inLM)
|
|
||||||
, m_cursorIndex(0)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void BlockReadingBuilder::clear()
|
|
||||||
{
|
|
||||||
m_cursorIndex = 0;
|
|
||||||
m_readings.clear();
|
|
||||||
m_grid.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
inline size_t BlockReadingBuilder::length() const
|
|
||||||
{
|
|
||||||
return m_readings.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
inline size_t BlockReadingBuilder::cursorIndex() const
|
|
||||||
{
|
|
||||||
return m_cursorIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex)
|
bool removeHeadReadings(size_t count);
|
||||||
{
|
|
||||||
m_cursorIndex = inNewIndex > m_readings.size() ? m_readings.size() : inNewIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void BlockReadingBuilder::insertReadingAtCursor(const string& inReading)
|
|
||||||
{
|
|
||||||
m_readings.insert(m_readings.begin() + m_cursorIndex, inReading);
|
|
||||||
|
|
||||||
m_grid.expandGridByOneAtLocation(m_cursorIndex);
|
|
||||||
build();
|
|
||||||
m_cursorIndex++;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline vector<string> BlockReadingBuilder::readings() const
|
void setJoinSeparator(const string& separator);
|
||||||
{
|
const string joinSeparator() const;
|
||||||
return m_readings;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool BlockReadingBuilder::deleteReadingBeforeCursor()
|
|
||||||
{
|
|
||||||
if (!m_cursorIndex) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.begin() + m_cursorIndex);
|
|
||||||
m_cursorIndex--;
|
|
||||||
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
|
|
||||||
build();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool BlockReadingBuilder::deleteReadingAfterCursor()
|
|
||||||
{
|
|
||||||
if (m_cursorIndex == m_readings.size()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.begin() + m_cursorIndex + 1);
|
|
||||||
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
|
|
||||||
build();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool BlockReadingBuilder::removeHeadReadings(size_t count)
|
|
||||||
{
|
|
||||||
if (count > length()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = 0; i < count; i++) {
|
|
||||||
if (m_cursorIndex) {
|
|
||||||
m_cursorIndex--;
|
|
||||||
}
|
|
||||||
m_readings.erase(m_readings.begin(), m_readings.begin() + 1);
|
|
||||||
m_grid.shrinkGridByOneAtLocation(0);
|
|
||||||
build();
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void BlockReadingBuilder::setJoinSeparator(const string& separator)
|
|
||||||
{
|
|
||||||
m_joinSeparator = separator;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline const string BlockReadingBuilder::joinSeparator() const
|
|
||||||
{
|
|
||||||
return m_joinSeparator;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline Grid& BlockReadingBuilder::grid()
|
vector<string> readings() const;
|
||||||
{
|
|
||||||
return m_grid;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void BlockReadingBuilder::build()
|
Grid& grid();
|
||||||
{
|
|
||||||
if (!m_LM) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t begin = 0;
|
|
||||||
size_t end = m_cursorIndex + MaximumBuildSpanLength;
|
|
||||||
|
|
||||||
if (m_cursorIndex < MaximumBuildSpanLength) {
|
|
||||||
begin = 0;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
begin = m_cursorIndex - MaximumBuildSpanLength;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (end > m_readings.size()) {
|
|
||||||
end = m_readings.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t p = begin ; p < end ; p++) {
|
|
||||||
for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) {
|
|
||||||
string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
|
|
||||||
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) {
|
|
||||||
vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
|
|
||||||
|
|
||||||
if (unigrams.size() > 0) {
|
protected:
|
||||||
Node n(combinedReading, unigrams, vector<Bigram>());
|
void build();
|
||||||
m_grid.insertNode(n, p, q);
|
|
||||||
}
|
static const string Join(vector<string>::const_iterator begin,
|
||||||
}
|
vector<string>::const_iterator end,
|
||||||
}
|
const string& separator);
|
||||||
}
|
|
||||||
}
|
//最多使用六個字組成一個詞
|
||||||
|
static const size_t MaximumBuildSpanLength = 6;
|
||||||
inline const string BlockReadingBuilder::Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator)
|
|
||||||
{
|
size_t m_cursorIndex;
|
||||||
string result;
|
vector<string> m_readings;
|
||||||
for (vector<string>::const_iterator iter = begin ; iter != end ; ) {
|
|
||||||
result += *iter;
|
Grid m_grid;
|
||||||
++iter;
|
LanguageModel* m_LM;
|
||||||
if (iter != end) {
|
string m_joinSeparator;
|
||||||
result += separator;
|
};
|
||||||
}
|
|
||||||
}
|
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel* inLM)
|
||||||
return result;
|
: m_LM(inLM), m_cursorIndex(0) {}
|
||||||
}
|
|
||||||
}
|
inline void BlockReadingBuilder::clear() {
|
||||||
|
m_cursorIndex = 0;
|
||||||
|
m_readings.clear();
|
||||||
|
m_grid.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline size_t BlockReadingBuilder::length() const { return m_readings.size(); }
|
||||||
|
|
||||||
|
inline size_t BlockReadingBuilder::cursorIndex() const { return m_cursorIndex; }
|
||||||
|
|
||||||
|
inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex) {
|
||||||
|
m_cursorIndex =
|
||||||
|
inNewIndex > m_readings.size() ? m_readings.size() : inNewIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void BlockReadingBuilder::insertReadingAtCursor(
|
||||||
|
const string& inReading) {
|
||||||
|
m_readings.insert(m_readings.begin() + m_cursorIndex, inReading);
|
||||||
|
|
||||||
|
m_grid.expandGridByOneAtLocation(m_cursorIndex);
|
||||||
|
build();
|
||||||
|
m_cursorIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline vector<string> BlockReadingBuilder::readings() const {
|
||||||
|
return m_readings;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool BlockReadingBuilder::deleteReadingBeforeCursor() {
|
||||||
|
if (!m_cursorIndex) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_readings.erase(m_readings.begin() + m_cursorIndex - 1,
|
||||||
|
m_readings.begin() + m_cursorIndex);
|
||||||
|
m_cursorIndex--;
|
||||||
|
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
|
||||||
|
build();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool BlockReadingBuilder::deleteReadingAfterCursor() {
|
||||||
|
if (m_cursorIndex == m_readings.size()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_readings.erase(m_readings.begin() + m_cursorIndex,
|
||||||
|
m_readings.begin() + m_cursorIndex + 1);
|
||||||
|
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
|
||||||
|
build();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool BlockReadingBuilder::removeHeadReadings(size_t count) {
|
||||||
|
if (count > length()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < count; i++) {
|
||||||
|
if (m_cursorIndex) {
|
||||||
|
m_cursorIndex--;
|
||||||
|
}
|
||||||
|
m_readings.erase(m_readings.begin(), m_readings.begin() + 1);
|
||||||
|
m_grid.shrinkGridByOneAtLocation(0);
|
||||||
|
build();
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void BlockReadingBuilder::setJoinSeparator(const string& separator) {
|
||||||
|
m_joinSeparator = separator;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline const string BlockReadingBuilder::joinSeparator() const {
|
||||||
|
return m_joinSeparator;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Grid& BlockReadingBuilder::grid() { return m_grid; }
|
||||||
|
|
||||||
|
inline void BlockReadingBuilder::build() {
|
||||||
|
if (!m_LM) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t begin = 0;
|
||||||
|
size_t end = m_cursorIndex + MaximumBuildSpanLength;
|
||||||
|
|
||||||
|
if (m_cursorIndex < MaximumBuildSpanLength) {
|
||||||
|
begin = 0;
|
||||||
|
} else {
|
||||||
|
begin = m_cursorIndex - MaximumBuildSpanLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (end > m_readings.size()) {
|
||||||
|
end = m_readings.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t p = begin; p < end; p++) {
|
||||||
|
for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) {
|
||||||
|
string combinedReading = Join(
|
||||||
|
m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
|
||||||
|
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q,
|
||||||
|
combinedReading)) {
|
||||||
|
vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
|
||||||
|
|
||||||
|
if (unigrams.size() > 0) {
|
||||||
|
Node n(combinedReading, unigrams, vector<Bigram>());
|
||||||
|
m_grid.insertNode(n, p, q);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline const string BlockReadingBuilder::Join(
|
||||||
|
vector<string>::const_iterator begin, vector<string>::const_iterator end,
|
||||||
|
const string& separator) {
|
||||||
|
string result;
|
||||||
|
for (vector<string>::const_iterator iter = begin; iter != end;) {
|
||||||
|
result += *iter;
|
||||||
|
++iter;
|
||||||
|
if (iter != end) {
|
||||||
|
result += separator;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
} // namespace Gramambular
|
||||||
|
} // namespace Formosa
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -21,14 +21,15 @@
|
||||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
// OTHER DEALINGS IN THE SOFTWARE.
|
// OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
#include "gtest/gtest.h"
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <cstdlib>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <vector>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "Gramambular.h"
|
#include "Gramambular.h"
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
const char* SampleData = R"(
|
const char* SampleData = R"(
|
||||||
#
|
#
|
||||||
|
@ -122,11 +123,9 @@ const char* SampleData = R"(
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Formosa::Gramambular;
|
using namespace Formosa::Gramambular;
|
||||||
|
|
||||||
class SimpleLM : public LanguageModel
|
class SimpleLM : public LanguageModel {
|
||||||
{
|
|
||||||
public:
|
public:
|
||||||
SimpleLM(const char* input, bool swapKeyValue = false)
|
SimpleLM(const char* input, bool swapKeyValue = false) {
|
||||||
{
|
|
||||||
stringstream sstream(input);
|
stringstream sstream(input);
|
||||||
while (sstream.good()) {
|
while (sstream.good()) {
|
||||||
string line;
|
string line;
|
||||||
|
@ -149,8 +148,7 @@ class SimpleLM : public LanguageModel
|
||||||
if (swapKeyValue) {
|
if (swapKeyValue) {
|
||||||
u.keyValue.key = col1;
|
u.keyValue.key = col1;
|
||||||
u.keyValue.value = col0;
|
u.keyValue.value = col0;
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
u.keyValue.key = col0;
|
u.keyValue.key = col0;
|
||||||
u.keyValue.value = col1;
|
u.keyValue.value = col1;
|
||||||
}
|
}
|
||||||
|
@ -161,19 +159,17 @@ class SimpleLM : public LanguageModel
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const vector<Bigram> bigramsForKeys(const string &preceedingKey, const string& key) override
|
const vector<Bigram> bigramsForKeys(const string& preceedingKey,
|
||||||
{
|
const string& key) override {
|
||||||
return vector<Bigram>();
|
return vector<Bigram>();
|
||||||
}
|
}
|
||||||
|
|
||||||
const vector<Unigram> unigramsForKey(const string &key) override
|
const vector<Unigram> unigramsForKey(const string& key) override {
|
||||||
{
|
|
||||||
map<string, vector<Unigram> >::const_iterator f = m_db.find(key);
|
map<string, vector<Unigram> >::const_iterator f = m_db.find(key);
|
||||||
return f == m_db.end() ? vector<Unigram>() : (*f).second;
|
return f == m_db.end() ? vector<Unigram>() : (*f).second;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool hasUnigramsForKey(const string& key) override
|
bool hasUnigramsForKey(const string& key) override {
|
||||||
{
|
|
||||||
map<string, vector<Unigram> >::const_iterator f = m_db.find(key);
|
map<string, vector<Unigram> >::const_iterator f = m_db.find(key);
|
||||||
return f != m_db.end();
|
return f != m_db.end();
|
||||||
}
|
}
|
||||||
|
@ -208,7 +204,8 @@ TEST(GramambularTest, InputTest) {
|
||||||
reverse(walked.begin(), walked.end());
|
reverse(walked.begin(), walked.end());
|
||||||
|
|
||||||
vector<string> composed;
|
vector<string> composed;
|
||||||
for (vector<NodeAnchor>::iterator wi = walked.begin() ; wi != walked.end() ; ++wi) {
|
for (vector<NodeAnchor>::iterator wi = walked.begin(); wi != walked.end();
|
||||||
|
++wi) {
|
||||||
composed.push_back((*wi).node->currentKeyValue().value);
|
composed.push_back((*wi).node->currentKeyValue().value);
|
||||||
}
|
}
|
||||||
ASSERT_EQ(composed, (vector<string>{"高科技", "公司", "的", "年中", "獎金"}));
|
ASSERT_EQ(composed, (vector<string>{"高科技", "公司", "的", "年中", "獎金"}));
|
||||||
|
@ -233,8 +230,10 @@ TEST(GramambularTest, WordSegmentationTest) {
|
||||||
reverse(walked.begin(), walked.end());
|
reverse(walked.begin(), walked.end());
|
||||||
|
|
||||||
vector<string> segmented;
|
vector<string> segmented;
|
||||||
for (vector<NodeAnchor>::iterator wi = walked.begin(); wi != walked.end(); ++wi) {
|
for (vector<NodeAnchor>::iterator wi = walked.begin(); wi != walked.end();
|
||||||
|
++wi) {
|
||||||
segmented.push_back((*wi).node->currentKeyValue().key);
|
segmented.push_back((*wi).node->currentKeyValue().key);
|
||||||
}
|
}
|
||||||
ASSERT_EQ(segmented, (vector<string>{"高科技", "公司", "的", "年終", "獎金"}));
|
ASSERT_EQ(segmented,
|
||||||
|
(vector<string>{"高科技", "公司", "的", "年終", "獎金"}));
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,244 +29,243 @@
|
||||||
#define Grid_h
|
#define Grid_h
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
|
|
||||||
#include "NodeAnchor.h"
|
#include "NodeAnchor.h"
|
||||||
#include "Span.h"
|
#include "Span.h"
|
||||||
|
|
||||||
namespace Formosa {
|
namespace Formosa {
|
||||||
namespace Gramambular {
|
namespace Gramambular {
|
||||||
|
|
||||||
class Grid {
|
|
||||||
public:
|
|
||||||
void clear();
|
|
||||||
void insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength);
|
|
||||||
bool hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey);
|
|
||||||
|
|
||||||
void expandGridByOneAtLocation(size_t inLocation);
|
class Grid {
|
||||||
void shrinkGridByOneAtLocation(size_t inLocation);
|
public:
|
||||||
|
void clear();
|
||||||
|
void insertNode(const Node& inNode, size_t inLocation,
|
||||||
|
size_t inSpanningLength);
|
||||||
|
bool hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation,
|
||||||
|
size_t inSpanningLength,
|
||||||
|
const string& inKey);
|
||||||
|
|
||||||
size_t width() const;
|
void expandGridByOneAtLocation(size_t inLocation);
|
||||||
vector<NodeAnchor> nodesEndingAt(size_t inLocation);
|
void shrinkGridByOneAtLocation(size_t inLocation);
|
||||||
vector<NodeAnchor> nodesCrossingOrEndingAt(size_t inLocation);
|
|
||||||
|
|
||||||
// "Freeze" the node with the unigram that represents the selected candidate value.
|
size_t width() const;
|
||||||
// After this, the node that contains the unigram will always be evaluated to that
|
vector<NodeAnchor> nodesEndingAt(size_t inLocation);
|
||||||
// unigram, while all other overlapping nodes will be reset to their initial state
|
vector<NodeAnchor> nodesCrossingOrEndingAt(size_t inLocation);
|
||||||
// (that is, if any of those nodes were "frozen" or fixed, they will be unfrozen.)
|
|
||||||
NodeAnchor fixNodeSelectedCandidate(size_t location, const string& value);
|
|
||||||
|
|
||||||
// Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, only
|
// "Freeze" the node with the unigram that represents the selected candidate
|
||||||
// boost the unigram that represents the value with an overriding score. This
|
// value. After this, the node that contains the unigram will always be
|
||||||
// has the same side effect as fixNodeSelectedCandidate, which is that all other
|
// evaluated to that unigram, while all other overlapping nodes will be reset
|
||||||
// overlapping nodes will be reset to their initial state.
|
// to their initial state (that is, if any of those nodes were "frozen" or
|
||||||
void overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore);
|
// fixed, they will be unfrozen.)
|
||||||
|
NodeAnchor fixNodeSelectedCandidate(size_t location, const string& value);
|
||||||
const string dumpDOT();
|
|
||||||
|
|
||||||
protected:
|
|
||||||
vector<Span> m_spans;
|
|
||||||
};
|
|
||||||
|
|
||||||
inline void Grid::clear()
|
|
||||||
{
|
|
||||||
m_spans.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Grid::insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength)
|
|
||||||
{
|
|
||||||
if (inLocation >= m_spans.size()) {
|
|
||||||
size_t diff = inLocation - m_spans.size() + 1;
|
|
||||||
|
|
||||||
for (size_t i = 0 ; i < diff ; i++) {
|
|
||||||
m_spans.push_back(Span());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
m_spans[inLocation].insertNodeOfLength(inNode, inSpanningLength);
|
// Similar to fixNodeSelectedCandidate, but instead of "freezing" the node,
|
||||||
}
|
// only boost the unigram that represents the value with an overriding score.
|
||||||
|
// This has the same side effect as fixNodeSelectedCandidate, which is that
|
||||||
|
// all other overlapping nodes will be reset to their initial state.
|
||||||
|
void overrideNodeScoreForSelectedCandidate(size_t location,
|
||||||
|
const string& value,
|
||||||
|
float overridingScore);
|
||||||
|
|
||||||
inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey)
|
const string dumpDOT();
|
||||||
{
|
|
||||||
if (inLocation > m_spans.size()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
const Node *n = m_spans[inLocation].nodeOfLength(inSpanningLength);
|
|
||||||
if (!n) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return inKey == n->key();
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Grid::expandGridByOneAtLocation(size_t inLocation)
|
protected:
|
||||||
{
|
vector<Span> m_spans;
|
||||||
if (!inLocation || inLocation == m_spans.size()) {
|
};
|
||||||
m_spans.insert(m_spans.begin() + inLocation, Span());
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
m_spans.insert(m_spans.begin() + inLocation, Span());
|
|
||||||
for (size_t i = 0 ; i < inLocation ; i++) {
|
|
||||||
// zaps overlapping spans
|
|
||||||
m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Grid::shrinkGridByOneAtLocation(size_t inLocation)
|
|
||||||
{
|
|
||||||
if (inLocation >= m_spans.size()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
m_spans.erase(m_spans.begin() + inLocation);
|
|
||||||
for (size_t i = 0 ; i < inLocation ; i++) {
|
|
||||||
// zaps overlapping spans
|
|
||||||
m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline size_t Grid::width() const
|
inline void Grid::clear() { m_spans.clear(); }
|
||||||
{
|
|
||||||
return m_spans.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
inline vector<NodeAnchor> Grid::nodesEndingAt(size_t inLocation)
|
|
||||||
{
|
|
||||||
vector<NodeAnchor> result;
|
|
||||||
|
|
||||||
if (m_spans.size() && inLocation <= m_spans.size()) {
|
|
||||||
for (size_t i = 0 ; i < inLocation ; i++) {
|
|
||||||
Span& span = m_spans[i];
|
|
||||||
if (i + span.maximumLength() >= inLocation) {
|
|
||||||
Node *np = span.nodeOfLength(inLocation - i);
|
|
||||||
if (np) {
|
|
||||||
NodeAnchor na;
|
|
||||||
na.node = np;
|
|
||||||
na.location = i;
|
|
||||||
na.spanningLength = inLocation - i;
|
|
||||||
|
|
||||||
result.push_back(na);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t inLocation)
|
inline void Grid::insertNode(const Node& inNode, size_t inLocation,
|
||||||
{
|
size_t inSpanningLength) {
|
||||||
vector<NodeAnchor> result;
|
if (inLocation >= m_spans.size()) {
|
||||||
|
size_t diff = inLocation - m_spans.size() + 1;
|
||||||
if (m_spans.size() && inLocation <= m_spans.size()) {
|
|
||||||
for (size_t i = 0 ; i < inLocation ; i++) {
|
|
||||||
Span& span = m_spans[i];
|
|
||||||
|
|
||||||
if (i + span.maximumLength() >= inLocation) {
|
|
||||||
|
|
||||||
for (size_t j = 1, m = span.maximumLength(); j <= m ; j++) {
|
for (size_t i = 0; i < diff; i++) {
|
||||||
|
m_spans.push_back(Span());
|
||||||
if (i + j < inLocation) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
Node *np = span.nodeOfLength(j);
|
|
||||||
if (np) {
|
|
||||||
NodeAnchor na;
|
|
||||||
na.node = np;
|
|
||||||
na.location = i;
|
|
||||||
na.spanningLength = inLocation - i;
|
|
||||||
|
|
||||||
result.push_back(na);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// For nodes found at the location, fix their currently-selected candidate using the supplied string value.
|
|
||||||
inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, const string& value)
|
|
||||||
{
|
|
||||||
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
|
|
||||||
NodeAnchor node;
|
|
||||||
for (auto nodeAnchor : nodes) {
|
|
||||||
auto candidates = nodeAnchor.node->candidates();
|
|
||||||
|
|
||||||
// Reset the candidate-fixed state of every node at the location.
|
|
||||||
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
|
|
||||||
|
|
||||||
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
|
|
||||||
if (candidates[i].value == value) {
|
|
||||||
const_cast<Node*>(nodeAnchor.node)->selectCandidateAtIndex(i);
|
|
||||||
node = nodeAnchor;
|
|
||||||
break;;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return node;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore)
|
|
||||||
{
|
|
||||||
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
|
|
||||||
for (auto nodeAnchor : nodes) {
|
|
||||||
auto candidates = nodeAnchor.node->candidates();
|
|
||||||
|
|
||||||
// Reset the candidate-fixed state of every node at the location.
|
|
||||||
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
|
|
||||||
|
|
||||||
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
|
|
||||||
if (candidates[i].value == value) {
|
|
||||||
const_cast<Node*>(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline const string Grid::dumpDOT()
|
|
||||||
{
|
|
||||||
stringstream sst;
|
|
||||||
sst << "digraph {" << endl;
|
|
||||||
sst << "graph [ rankdir=LR ];" << endl;
|
|
||||||
sst << "BOS;" << endl;
|
|
||||||
|
|
||||||
for (size_t p = 0 ; p < m_spans.size() ; p++) {
|
|
||||||
Span& span = m_spans[p];
|
|
||||||
for (size_t ni = 0 ; ni <= span.maximumLength() ; ni++) {
|
|
||||||
Node* np = span.nodeOfLength(ni);
|
|
||||||
if (np) {
|
|
||||||
if (!p) {
|
|
||||||
sst << "BOS -> " << np->currentKeyValue().value << ";" << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
sst << np->currentKeyValue().value << ";" << endl;
|
|
||||||
|
|
||||||
if (p + ni < m_spans.size()) {
|
|
||||||
Span& dstSpan = m_spans[p+ni];
|
|
||||||
for (size_t q = 0 ; q <= dstSpan.maximumLength() ; q++) {
|
|
||||||
Node *dn = dstSpan.nodeOfLength(q);
|
|
||||||
if (dn) {
|
|
||||||
sst << np->currentKeyValue().value << " -> " << dn->currentKeyValue().value << ";" << endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (p + ni == m_spans.size()) {
|
|
||||||
sst << np->currentKeyValue().value << " -> " << "EOS;" << endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sst << "EOS;" << endl;
|
|
||||||
sst << "}";
|
|
||||||
return sst.str();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m_spans[inLocation].insertNodeOfLength(inNode, inSpanningLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(
|
||||||
|
size_t inLocation, size_t inSpanningLength, const string& inKey) {
|
||||||
|
if (inLocation > m_spans.size()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Node* n = m_spans[inLocation].nodeOfLength(inSpanningLength);
|
||||||
|
if (!n) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return inKey == n->key();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void Grid::expandGridByOneAtLocation(size_t inLocation) {
|
||||||
|
if (!inLocation || inLocation == m_spans.size()) {
|
||||||
|
m_spans.insert(m_spans.begin() + inLocation, Span());
|
||||||
|
} else {
|
||||||
|
m_spans.insert(m_spans.begin() + inLocation, Span());
|
||||||
|
for (size_t i = 0; i < inLocation; i++) {
|
||||||
|
// zaps overlapping spans
|
||||||
|
m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void Grid::shrinkGridByOneAtLocation(size_t inLocation) {
|
||||||
|
if (inLocation >= m_spans.size()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_spans.erase(m_spans.begin() + inLocation);
|
||||||
|
for (size_t i = 0; i < inLocation; i++) {
|
||||||
|
// zaps overlapping spans
|
||||||
|
m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline size_t Grid::width() const { return m_spans.size(); }
|
||||||
|
|
||||||
|
inline vector<NodeAnchor> Grid::nodesEndingAt(size_t inLocation) {
|
||||||
|
vector<NodeAnchor> result;
|
||||||
|
|
||||||
|
if (m_spans.size() && inLocation <= m_spans.size()) {
|
||||||
|
for (size_t i = 0; i < inLocation; i++) {
|
||||||
|
Span& span = m_spans[i];
|
||||||
|
if (i + span.maximumLength() >= inLocation) {
|
||||||
|
Node* np = span.nodeOfLength(inLocation - i);
|
||||||
|
if (np) {
|
||||||
|
NodeAnchor na;
|
||||||
|
na.node = np;
|
||||||
|
na.location = i;
|
||||||
|
na.spanningLength = inLocation - i;
|
||||||
|
|
||||||
|
result.push_back(na);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t inLocation) {
|
||||||
|
vector<NodeAnchor> result;
|
||||||
|
|
||||||
|
if (m_spans.size() && inLocation <= m_spans.size()) {
|
||||||
|
for (size_t i = 0; i < inLocation; i++) {
|
||||||
|
Span& span = m_spans[i];
|
||||||
|
|
||||||
|
if (i + span.maximumLength() >= inLocation) {
|
||||||
|
for (size_t j = 1, m = span.maximumLength(); j <= m; j++) {
|
||||||
|
if (i + j < inLocation) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
Node* np = span.nodeOfLength(j);
|
||||||
|
if (np) {
|
||||||
|
NodeAnchor na;
|
||||||
|
na.node = np;
|
||||||
|
na.location = i;
|
||||||
|
na.spanningLength = inLocation - i;
|
||||||
|
|
||||||
|
result.push_back(na);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For nodes found at the location, fix their currently-selected candidate using
|
||||||
|
// the supplied string value.
|
||||||
|
inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location,
|
||||||
|
const string& value) {
|
||||||
|
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
|
||||||
|
NodeAnchor node;
|
||||||
|
for (auto nodeAnchor : nodes) {
|
||||||
|
auto candidates = nodeAnchor.node->candidates();
|
||||||
|
|
||||||
|
// Reset the candidate-fixed state of every node at the location.
|
||||||
|
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
|
||||||
|
|
||||||
|
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
|
||||||
|
if (candidates[i].value == value) {
|
||||||
|
const_cast<Node*>(nodeAnchor.node)->selectCandidateAtIndex(i);
|
||||||
|
node = nodeAnchor;
|
||||||
|
break;
|
||||||
|
;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location,
|
||||||
|
const string& value,
|
||||||
|
float overridingScore) {
|
||||||
|
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
|
||||||
|
for (auto nodeAnchor : nodes) {
|
||||||
|
auto candidates = nodeAnchor.node->candidates();
|
||||||
|
|
||||||
|
// Reset the candidate-fixed state of every node at the location.
|
||||||
|
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
|
||||||
|
|
||||||
|
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
|
||||||
|
if (candidates[i].value == value) {
|
||||||
|
const_cast<Node*>(nodeAnchor.node)
|
||||||
|
->selectFloatingCandidateAtIndex(i, overridingScore);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline const string Grid::dumpDOT() {
|
||||||
|
stringstream sst;
|
||||||
|
sst << "digraph {" << endl;
|
||||||
|
sst << "graph [ rankdir=LR ];" << endl;
|
||||||
|
sst << "BOS;" << endl;
|
||||||
|
|
||||||
|
for (size_t p = 0; p < m_spans.size(); p++) {
|
||||||
|
Span& span = m_spans[p];
|
||||||
|
for (size_t ni = 0; ni <= span.maximumLength(); ni++) {
|
||||||
|
Node* np = span.nodeOfLength(ni);
|
||||||
|
if (np) {
|
||||||
|
if (!p) {
|
||||||
|
sst << "BOS -> " << np->currentKeyValue().value << ";" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
sst << np->currentKeyValue().value << ";" << endl;
|
||||||
|
|
||||||
|
if (p + ni < m_spans.size()) {
|
||||||
|
Span& dstSpan = m_spans[p + ni];
|
||||||
|
for (size_t q = 0; q <= dstSpan.maximumLength(); q++) {
|
||||||
|
Node* dn = dstSpan.nodeOfLength(q);
|
||||||
|
if (dn) {
|
||||||
|
sst << np->currentKeyValue().value << " -> "
|
||||||
|
<< dn->currentKeyValue().value << ";" << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (p + ni == m_spans.size()) {
|
||||||
|
sst << np->currentKeyValue().value << " -> "
|
||||||
|
<< "EOS;" << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sst << "EOS;" << endl;
|
||||||
|
sst << "}";
|
||||||
|
return sst.str();
|
||||||
|
}
|
||||||
|
} // namespace Gramambular
|
||||||
|
} // namespace Formosa
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -32,40 +32,36 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
namespace Formosa {
|
namespace Formosa {
|
||||||
namespace Gramambular {
|
namespace Gramambular {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
class KeyValuePair {
|
|
||||||
public:
|
|
||||||
string key;
|
|
||||||
string value;
|
|
||||||
|
|
||||||
bool operator==(const KeyValuePair& inAnother) const;
|
class KeyValuePair {
|
||||||
bool operator<(const KeyValuePair& inAnother) const;
|
public:
|
||||||
};
|
string key;
|
||||||
|
string value;
|
||||||
|
|
||||||
inline ostream& operator<<(ostream& inStream, const KeyValuePair& inPair)
|
bool operator==(const KeyValuePair& inAnother) const;
|
||||||
{
|
bool operator<(const KeyValuePair& inAnother) const;
|
||||||
inStream << "(" << inPair.key << "," << inPair.value << ")";
|
};
|
||||||
return inStream;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool KeyValuePair::operator==(const KeyValuePair& inAnother) const
|
|
||||||
{
|
|
||||||
return key == inAnother.key && value == inAnother.value;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool KeyValuePair::operator<(const KeyValuePair& inAnother) const
|
inline ostream& operator<<(ostream& inStream, const KeyValuePair& inPair) {
|
||||||
{
|
inStream << "(" << inPair.key << "," << inPair.value << ")";
|
||||||
if (key < inAnother.key) {
|
return inStream;
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else if (key == inAnother.key) {
|
|
||||||
return value < inAnother.value;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline bool KeyValuePair::operator==(const KeyValuePair& inAnother) const {
|
||||||
|
return key == inAnother.key && value == inAnother.value;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool KeyValuePair::operator<(const KeyValuePair& inAnother) const {
|
||||||
|
if (key < inAnother.key) {
|
||||||
|
return true;
|
||||||
|
} else if (key == inAnother.key) {
|
||||||
|
return value < inAnother.value;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} // namespace Gramambular
|
||||||
|
} // namespace Formosa
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -29,24 +29,25 @@
|
||||||
#define LanguageModel_h
|
#define LanguageModel_h
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "Bigram.h"
|
#include "Bigram.h"
|
||||||
#include "Unigram.h"
|
#include "Unigram.h"
|
||||||
|
|
||||||
namespace Formosa {
|
namespace Formosa {
|
||||||
namespace Gramambular {
|
namespace Gramambular {
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
class LanguageModel {
|
|
||||||
public:
|
|
||||||
virtual ~LanguageModel() {}
|
|
||||||
|
|
||||||
virtual const vector<Bigram> bigramsForKeys(const string &preceedingKey, const string& key) = 0;
|
using namespace std;
|
||||||
virtual const vector<Unigram> unigramsForKey(const string &key) = 0;
|
|
||||||
virtual bool hasUnigramsForKey(const string& key) = 0;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
class LanguageModel {
|
||||||
|
public:
|
||||||
|
virtual ~LanguageModel() {}
|
||||||
|
|
||||||
|
virtual const vector<Bigram> bigramsForKeys(const string& preceedingKey,
|
||||||
|
const string& key) = 0;
|
||||||
|
virtual const vector<Unigram> unigramsForKey(const string& key) = 0;
|
||||||
|
virtual bool hasUnigramsForKey(const string& key) = 0;
|
||||||
|
};
|
||||||
|
} // namespace Gramambular
|
||||||
|
} // namespace Formosa
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -30,202 +30,191 @@
|
||||||
|
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "LanguageModel.h"
|
#include "LanguageModel.h"
|
||||||
|
|
||||||
namespace Formosa {
|
namespace Formosa {
|
||||||
namespace Gramambular {
|
namespace Gramambular {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
class Node {
|
class Node {
|
||||||
public:
|
public:
|
||||||
Node();
|
Node();
|
||||||
Node(const string& inKey, const vector<Unigram>& inUnigrams, const vector<Bigram>& inBigrams);
|
Node(const string& inKey, const vector<Unigram>& inUnigrams,
|
||||||
|
const vector<Bigram>& inBigrams);
|
||||||
void primeNodeWithPreceedingKeyValues(const vector<KeyValuePair>& inKeyValues);
|
|
||||||
|
|
||||||
bool isCandidateFixed() const;
|
|
||||||
const vector<KeyValuePair>& candidates() const;
|
|
||||||
void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true);
|
|
||||||
void resetCandidate();
|
|
||||||
void selectFloatingCandidateAtIndex(size_t index, double score);
|
|
||||||
|
|
||||||
const string& key() const;
|
|
||||||
double score() const;
|
|
||||||
double scoreForCandidate(string &candidate) const;
|
|
||||||
const KeyValuePair currentKeyValue() const;
|
|
||||||
double highestUnigramScore() const;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
const LanguageModel* m_LM;
|
|
||||||
|
|
||||||
string m_key;
|
|
||||||
double m_score;
|
|
||||||
|
|
||||||
vector<Unigram> m_unigrams;
|
|
||||||
vector<KeyValuePair> m_candidates;
|
|
||||||
map<string, size_t> m_valueUnigramIndexMap;
|
|
||||||
map<KeyValuePair, vector<Bigram> > m_preceedingGramBigramMap;
|
|
||||||
|
|
||||||
bool m_candidateFixed;
|
|
||||||
size_t m_selectedUnigramIndex;
|
|
||||||
|
|
||||||
friend ostream& operator<<(ostream& inStream, const Node& inNode);
|
|
||||||
};
|
|
||||||
|
|
||||||
inline ostream& operator<<(ostream& inStream, const Node& inNode)
|
void primeNodeWithPreceedingKeyValues(
|
||||||
{
|
const vector<KeyValuePair>& inKeyValues);
|
||||||
inStream << "(node,key:" << inNode.m_key << ",fixed:" << (inNode.m_candidateFixed ? "true" : "false")
|
|
||||||
<< ",selected:" << inNode.m_selectedUnigramIndex
|
|
||||||
<< "," << inNode.m_unigrams << ")";
|
|
||||||
return inStream;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline Node::Node()
|
bool isCandidateFixed() const;
|
||||||
: m_candidateFixed(false)
|
const vector<KeyValuePair>& candidates() const;
|
||||||
, m_selectedUnigramIndex(0)
|
void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true);
|
||||||
, m_score(0.0)
|
void resetCandidate();
|
||||||
{
|
void selectFloatingCandidateAtIndex(size_t index, double score);
|
||||||
}
|
|
||||||
|
|
||||||
inline Node::Node(const string& inKey, const vector<Unigram>& inUnigrams, const vector<Bigram>& inBigrams)
|
const string& key() const;
|
||||||
: m_key(inKey)
|
double score() const;
|
||||||
, m_unigrams(inUnigrams)
|
double scoreForCandidate(string& candidate) const;
|
||||||
, m_candidateFixed(false)
|
const KeyValuePair currentKeyValue() const;
|
||||||
, m_selectedUnigramIndex(0)
|
double highestUnigramScore() const;
|
||||||
, m_score(0.0)
|
|
||||||
{
|
|
||||||
stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare);
|
|
||||||
|
|
||||||
if (m_unigrams.size()) {
|
|
||||||
m_score = m_unigrams[0].score;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t i = 0;
|
|
||||||
for (vector<Unigram>::const_iterator ui = m_unigrams.begin() ; ui != m_unigrams.end() ; ++ui) {
|
|
||||||
m_valueUnigramIndexMap[(*ui).keyValue.value] = i;
|
|
||||||
i++;
|
|
||||||
|
|
||||||
m_candidates.push_back((*ui).keyValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (vector<Bigram>::const_iterator bi = inBigrams.begin() ; bi != inBigrams.end() ; ++bi) {
|
|
||||||
m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Node::primeNodeWithPreceedingKeyValues(const vector<KeyValuePair>& inKeyValues)
|
|
||||||
{
|
|
||||||
size_t newIndex = m_selectedUnigramIndex;
|
|
||||||
double max = m_score;
|
|
||||||
|
|
||||||
if (!isCandidateFixed()) {
|
protected:
|
||||||
for (vector<KeyValuePair>::const_iterator kvi = inKeyValues.begin() ; kvi != inKeyValues.end() ; ++kvi) {
|
const LanguageModel* m_LM;
|
||||||
map<KeyValuePair, vector<Bigram> >::const_iterator f = m_preceedingGramBigramMap.find(*kvi);
|
|
||||||
if (f != m_preceedingGramBigramMap.end()) {
|
|
||||||
const vector<Bigram>& bigrams = (*f).second;
|
|
||||||
|
|
||||||
for (vector<Bigram>::const_iterator bi = bigrams.begin() ; bi != bigrams.end() ; ++bi) {
|
|
||||||
const Bigram& bigram = *bi;
|
|
||||||
if (bigram.score > max) {
|
|
||||||
map<string, size_t>::const_iterator uf = m_valueUnigramIndexMap.find((*bi).keyValue.value);
|
|
||||||
if (uf != m_valueUnigramIndexMap.end()) {
|
|
||||||
newIndex = (*uf).second;
|
|
||||||
max = bigram.score;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (m_score != max) {
|
string m_key;
|
||||||
m_score = max;
|
double m_score;
|
||||||
}
|
|
||||||
|
|
||||||
if (newIndex != m_selectedUnigramIndex) {
|
|
||||||
m_selectedUnigramIndex = newIndex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool Node::isCandidateFixed() const
|
|
||||||
{
|
|
||||||
return m_candidateFixed;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline const vector<KeyValuePair>& Node::candidates() const
|
|
||||||
{
|
|
||||||
return m_candidates;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Node::selectCandidateAtIndex(size_t inIndex, bool inFix)
|
vector<Unigram> m_unigrams;
|
||||||
{
|
vector<KeyValuePair> m_candidates;
|
||||||
if (inIndex >= m_unigrams.size()) {
|
map<string, size_t> m_valueUnigramIndexMap;
|
||||||
m_selectedUnigramIndex = 0;
|
map<KeyValuePair, vector<Bigram> > m_preceedingGramBigramMap;
|
||||||
}
|
|
||||||
else {
|
|
||||||
m_selectedUnigramIndex = inIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
m_candidateFixed = inFix;
|
|
||||||
m_score = 99;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Node::resetCandidate()
|
bool m_candidateFixed;
|
||||||
{
|
size_t m_selectedUnigramIndex;
|
||||||
m_selectedUnigramIndex = 0;
|
|
||||||
m_candidateFixed = 0;
|
|
||||||
if (m_unigrams.size()) {
|
|
||||||
m_score = m_unigrams[0].score;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) {
|
friend ostream& operator<<(ostream& inStream, const Node& inNode);
|
||||||
if (index >= m_unigrams.size()) {
|
};
|
||||||
m_selectedUnigramIndex = 0;
|
|
||||||
} else {
|
|
||||||
m_selectedUnigramIndex = index;
|
|
||||||
}
|
|
||||||
m_candidateFixed = false;
|
|
||||||
m_score = score;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline const string& Node::key() const
|
|
||||||
{
|
|
||||||
return m_key;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline double Node::score() const
|
|
||||||
{
|
|
||||||
return m_score;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline double Node::scoreForCandidate(string &candidate) const
|
inline ostream& operator<<(ostream& inStream, const Node& inNode) {
|
||||||
{
|
inStream << "(node,key:" << inNode.m_key
|
||||||
for (auto unigram : m_unigrams) {
|
<< ",fixed:" << (inNode.m_candidateFixed ? "true" : "false")
|
||||||
if (unigram.keyValue.value == candidate) {
|
<< ",selected:" << inNode.m_selectedUnigramIndex << ","
|
||||||
return unigram.score;
|
<< inNode.m_unigrams << ")";
|
||||||
}
|
return inStream;
|
||||||
}
|
|
||||||
return 0.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline double Node::highestUnigramScore() const {
|
|
||||||
if (m_unigrams.empty()) {
|
|
||||||
return 0.0;
|
|
||||||
}
|
|
||||||
return m_unigrams[0].score;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline const KeyValuePair Node::currentKeyValue() const
|
|
||||||
{
|
|
||||||
if(m_selectedUnigramIndex >= m_unigrams.size()) {
|
|
||||||
return KeyValuePair();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return m_candidates[m_selectedUnigramIndex];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline Node::Node()
|
||||||
|
: m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) {}
|
||||||
|
|
||||||
|
inline Node::Node(const string& inKey, const vector<Unigram>& inUnigrams,
|
||||||
|
const vector<Bigram>& inBigrams)
|
||||||
|
: m_key(inKey),
|
||||||
|
m_unigrams(inUnigrams),
|
||||||
|
m_candidateFixed(false),
|
||||||
|
m_selectedUnigramIndex(0),
|
||||||
|
m_score(0.0) {
|
||||||
|
stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare);
|
||||||
|
|
||||||
|
if (m_unigrams.size()) {
|
||||||
|
m_score = m_unigrams[0].score;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t i = 0;
|
||||||
|
for (vector<Unigram>::const_iterator ui = m_unigrams.begin();
|
||||||
|
ui != m_unigrams.end(); ++ui) {
|
||||||
|
m_valueUnigramIndexMap[(*ui).keyValue.value] = i;
|
||||||
|
i++;
|
||||||
|
|
||||||
|
m_candidates.push_back((*ui).keyValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (vector<Bigram>::const_iterator bi = inBigrams.begin();
|
||||||
|
bi != inBigrams.end(); ++bi) {
|
||||||
|
m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void Node::primeNodeWithPreceedingKeyValues(
|
||||||
|
const vector<KeyValuePair>& inKeyValues) {
|
||||||
|
size_t newIndex = m_selectedUnigramIndex;
|
||||||
|
double max = m_score;
|
||||||
|
|
||||||
|
if (!isCandidateFixed()) {
|
||||||
|
for (vector<KeyValuePair>::const_iterator kvi = inKeyValues.begin();
|
||||||
|
kvi != inKeyValues.end(); ++kvi) {
|
||||||
|
map<KeyValuePair, vector<Bigram> >::const_iterator f =
|
||||||
|
m_preceedingGramBigramMap.find(*kvi);
|
||||||
|
if (f != m_preceedingGramBigramMap.end()) {
|
||||||
|
const vector<Bigram>& bigrams = (*f).second;
|
||||||
|
|
||||||
|
for (vector<Bigram>::const_iterator bi = bigrams.begin();
|
||||||
|
bi != bigrams.end(); ++bi) {
|
||||||
|
const Bigram& bigram = *bi;
|
||||||
|
if (bigram.score > max) {
|
||||||
|
map<string, size_t>::const_iterator uf =
|
||||||
|
m_valueUnigramIndexMap.find((*bi).keyValue.value);
|
||||||
|
if (uf != m_valueUnigramIndexMap.end()) {
|
||||||
|
newIndex = (*uf).second;
|
||||||
|
max = bigram.score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_score != max) {
|
||||||
|
m_score = max;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (newIndex != m_selectedUnigramIndex) {
|
||||||
|
m_selectedUnigramIndex = newIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool Node::isCandidateFixed() const { return m_candidateFixed; }
|
||||||
|
|
||||||
|
inline const vector<KeyValuePair>& Node::candidates() const {
|
||||||
|
return m_candidates;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void Node::selectCandidateAtIndex(size_t inIndex, bool inFix) {
|
||||||
|
if (inIndex >= m_unigrams.size()) {
|
||||||
|
m_selectedUnigramIndex = 0;
|
||||||
|
} else {
|
||||||
|
m_selectedUnigramIndex = inIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_candidateFixed = inFix;
|
||||||
|
m_score = 99;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void Node::resetCandidate() {
|
||||||
|
m_selectedUnigramIndex = 0;
|
||||||
|
m_candidateFixed = 0;
|
||||||
|
if (m_unigrams.size()) {
|
||||||
|
m_score = m_unigrams[0].score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) {
|
||||||
|
if (index >= m_unigrams.size()) {
|
||||||
|
m_selectedUnigramIndex = 0;
|
||||||
|
} else {
|
||||||
|
m_selectedUnigramIndex = index;
|
||||||
|
}
|
||||||
|
m_candidateFixed = false;
|
||||||
|
m_score = score;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline const string& Node::key() const { return m_key; }
|
||||||
|
|
||||||
|
inline double Node::score() const { return m_score; }
|
||||||
|
|
||||||
|
inline double Node::scoreForCandidate(string& candidate) const {
|
||||||
|
for (auto unigram : m_unigrams) {
|
||||||
|
if (unigram.keyValue.value == candidate) {
|
||||||
|
return unigram.score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline double Node::highestUnigramScore() const {
|
||||||
|
if (m_unigrams.empty()) {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
return m_unigrams[0].score;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline const KeyValuePair Node::currentKeyValue() const {
|
||||||
|
if (m_selectedUnigramIndex >= m_unigrams.size()) {
|
||||||
|
return KeyValuePair();
|
||||||
|
} else {
|
||||||
|
return m_candidates[m_selectedUnigramIndex];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // namespace Gramambular
|
||||||
|
} // namespace Formosa
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -31,49 +31,44 @@
|
||||||
#include "Node.h"
|
#include "Node.h"
|
||||||
|
|
||||||
namespace Formosa {
|
namespace Formosa {
|
||||||
namespace Gramambular {
|
namespace Gramambular {
|
||||||
class NodeAnchor {
|
class NodeAnchor {
|
||||||
public:
|
public:
|
||||||
NodeAnchor();
|
NodeAnchor();
|
||||||
const Node *node;
|
const Node* node;
|
||||||
size_t location;
|
size_t location;
|
||||||
size_t spanningLength;
|
size_t spanningLength;
|
||||||
double accumulatedScore;
|
double accumulatedScore;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline NodeAnchor::NodeAnchor()
|
|
||||||
: node(0)
|
|
||||||
, location(0)
|
|
||||||
, spanningLength(0)
|
|
||||||
, accumulatedScore(0.0)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
inline ostream& operator<<(ostream& inStream, const NodeAnchor& inAnchor)
|
inline NodeAnchor::NodeAnchor()
|
||||||
{
|
: node(0), location(0), spanningLength(0), accumulatedScore(0.0) {}
|
||||||
inStream << "{@(" << inAnchor.location << "," << inAnchor.spanningLength << "),";
|
|
||||||
if (inAnchor.node) {
|
inline ostream& operator<<(ostream& inStream, const NodeAnchor& inAnchor) {
|
||||||
inStream << *(inAnchor.node);
|
inStream << "{@(" << inAnchor.location << "," << inAnchor.spanningLength
|
||||||
}
|
<< "),";
|
||||||
else {
|
if (inAnchor.node) {
|
||||||
inStream << "null";
|
inStream << *(inAnchor.node);
|
||||||
}
|
} else {
|
||||||
inStream << "}";
|
inStream << "null";
|
||||||
return inStream;
|
}
|
||||||
}
|
inStream << "}";
|
||||||
|
return inStream;
|
||||||
inline ostream& operator<<(ostream& inStream, const vector<NodeAnchor>& inAnchor)
|
|
||||||
{
|
|
||||||
for (vector<NodeAnchor>::const_iterator i = inAnchor.begin() ; i != inAnchor.end() ; ++i) {
|
|
||||||
inStream << *i;
|
|
||||||
if (i + 1 != inAnchor.end()) {
|
|
||||||
inStream << "<-";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return inStream;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline ostream& operator<<(ostream& inStream,
|
||||||
|
const vector<NodeAnchor>& inAnchor) {
|
||||||
|
for (vector<NodeAnchor>::const_iterator i = inAnchor.begin();
|
||||||
|
i != inAnchor.end(); ++i) {
|
||||||
|
inStream << *i;
|
||||||
|
if (i + 1 != inAnchor.end()) {
|
||||||
|
inStream << "<-";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return inStream;
|
||||||
|
}
|
||||||
|
} // namespace Gramambular
|
||||||
|
} // namespace Formosa
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -31,82 +31,75 @@
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
#include "Node.h"
|
#include "Node.h"
|
||||||
|
|
||||||
namespace Formosa {
|
namespace Formosa {
|
||||||
namespace Gramambular {
|
namespace Gramambular {
|
||||||
class Span {
|
class Span {
|
||||||
public:
|
public:
|
||||||
Span();
|
Span();
|
||||||
|
|
||||||
void clear();
|
void clear();
|
||||||
void insertNodeOfLength(const Node& inNode, size_t inLength);
|
void insertNodeOfLength(const Node& inNode, size_t inLength);
|
||||||
void removeNodeOfLengthGreaterThan(size_t inLength);
|
void removeNodeOfLengthGreaterThan(size_t inLength);
|
||||||
|
|
||||||
Node* nodeOfLength(size_t inLength);
|
|
||||||
size_t maximumLength() const;
|
|
||||||
|
|
||||||
protected:
|
Node* nodeOfLength(size_t inLength);
|
||||||
map<size_t, Node> m_lengthNodeMap;
|
size_t maximumLength() const;
|
||||||
size_t m_maximumLength;
|
|
||||||
};
|
|
||||||
|
|
||||||
inline Span::Span()
|
|
||||||
: m_maximumLength(0)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Span::clear()
|
|
||||||
{
|
|
||||||
m_lengthNodeMap.clear();
|
|
||||||
m_maximumLength = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Span::insertNodeOfLength(const Node& inNode, size_t inLength)
|
|
||||||
{
|
|
||||||
m_lengthNodeMap[inLength] = inNode;
|
|
||||||
if (inLength > m_maximumLength) {
|
|
||||||
m_maximumLength = inLength;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Span::removeNodeOfLengthGreaterThan(size_t inLength)
|
|
||||||
{
|
|
||||||
if (inLength > m_maximumLength) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t max = 0;
|
|
||||||
set<size_t> removeSet;
|
|
||||||
for (map<size_t, Node>::iterator i = m_lengthNodeMap.begin(), e = m_lengthNodeMap.end() ; i != e ; ++i) {
|
|
||||||
if ((*i).first > inLength) {
|
|
||||||
removeSet.insert((*i).first);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if ((*i).first > max) {
|
|
||||||
max = (*i).first;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (set<size_t>::iterator i = removeSet.begin(), e = removeSet.end(); i != e; ++i) {
|
|
||||||
m_lengthNodeMap.erase(*i);
|
|
||||||
}
|
|
||||||
|
|
||||||
m_maximumLength = max;
|
protected:
|
||||||
}
|
map<size_t, Node> m_lengthNodeMap;
|
||||||
|
size_t m_maximumLength;
|
||||||
inline Node* Span::nodeOfLength(size_t inLength)
|
};
|
||||||
{
|
|
||||||
map<size_t, Node>::iterator f = m_lengthNodeMap.find(inLength);
|
inline Span::Span() : m_maximumLength(0) {}
|
||||||
return f == m_lengthNodeMap.end() ? 0 : &(*f).second;
|
|
||||||
}
|
inline void Span::clear() {
|
||||||
|
m_lengthNodeMap.clear();
|
||||||
inline size_t Span::maximumLength() const
|
m_maximumLength = 0;
|
||||||
{
|
|
||||||
return m_maximumLength;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void Span::insertNodeOfLength(const Node& inNode, size_t inLength) {
|
||||||
|
m_lengthNodeMap[inLength] = inNode;
|
||||||
|
if (inLength > m_maximumLength) {
|
||||||
|
m_maximumLength = inLength;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void Span::removeNodeOfLengthGreaterThan(size_t inLength) {
|
||||||
|
if (inLength > m_maximumLength) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t max = 0;
|
||||||
|
set<size_t> removeSet;
|
||||||
|
for (map<size_t, Node>::iterator i = m_lengthNodeMap.begin(),
|
||||||
|
e = m_lengthNodeMap.end();
|
||||||
|
i != e; ++i) {
|
||||||
|
if ((*i).first > inLength) {
|
||||||
|
removeSet.insert((*i).first);
|
||||||
|
} else {
|
||||||
|
if ((*i).first > max) {
|
||||||
|
max = (*i).first;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (set<size_t>::iterator i = removeSet.begin(), e = removeSet.end(); i != e;
|
||||||
|
++i) {
|
||||||
|
m_lengthNodeMap.erase(*i);
|
||||||
|
}
|
||||||
|
|
||||||
|
m_maximumLength = max;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Node* Span::nodeOfLength(size_t inLength) {
|
||||||
|
map<size_t, Node>::iterator f = m_lengthNodeMap.find(inLength);
|
||||||
|
return f == m_lengthNodeMap.end() ? 0 : &(*f).second;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline size_t Span::maximumLength() const { return m_maximumLength; }
|
||||||
|
} // namespace Gramambular
|
||||||
|
} // namespace Formosa
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -29,76 +29,69 @@
|
||||||
#define Unigram_h
|
#define Unigram_h
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "KeyValuePair.h"
|
#include "KeyValuePair.h"
|
||||||
|
|
||||||
namespace Formosa {
|
namespace Formosa {
|
||||||
namespace Gramambular {
|
namespace Gramambular {
|
||||||
class Unigram {
|
class Unigram {
|
||||||
public:
|
public:
|
||||||
Unigram();
|
Unigram();
|
||||||
|
|
||||||
KeyValuePair keyValue;
|
KeyValuePair keyValue;
|
||||||
double score;
|
double score;
|
||||||
|
|
||||||
bool operator==(const Unigram& inAnother) const;
|
|
||||||
bool operator<(const Unigram& inAnother) const;
|
|
||||||
|
|
||||||
static bool ScoreCompare(const Unigram& a, const Unigram& b);
|
|
||||||
};
|
|
||||||
|
|
||||||
inline ostream& operator<<(ostream& inStream, const Unigram& inGram)
|
bool operator==(const Unigram& inAnother) const;
|
||||||
{
|
bool operator<(const Unigram& inAnother) const;
|
||||||
streamsize p = inStream.precision();
|
|
||||||
inStream.precision(6);
|
|
||||||
inStream << "(" << inGram.keyValue << "," << inGram.score << ")";
|
|
||||||
inStream.precision(p);
|
|
||||||
return inStream;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline ostream& operator<<(ostream& inStream, const vector<Unigram>& inGrams)
|
|
||||||
{
|
|
||||||
inStream << "[" << inGrams.size() << "]=>{";
|
|
||||||
|
|
||||||
size_t index = 0;
|
|
||||||
|
|
||||||
for (vector<Unigram>::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) {
|
|
||||||
inStream << index << "=>";
|
|
||||||
inStream << *gi;
|
|
||||||
if (gi + 1 != inGrams.end()) {
|
|
||||||
inStream << ",";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inStream << "}";
|
|
||||||
return inStream;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline Unigram::Unigram()
|
|
||||||
: score(0.0)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool Unigram::operator==(const Unigram& inAnother) const
|
|
||||||
{
|
|
||||||
return keyValue == inAnother.keyValue && score == inAnother.score;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool Unigram::operator<(const Unigram& inAnother) const
|
|
||||||
{
|
|
||||||
if (keyValue < inAnother.keyValue) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else if (keyValue == inAnother.keyValue) {
|
|
||||||
return score < inAnother.score;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b)
|
static bool ScoreCompare(const Unigram& a, const Unigram& b);
|
||||||
{
|
};
|
||||||
return a.score > b.score;
|
|
||||||
}
|
inline ostream& operator<<(ostream& inStream, const Unigram& inGram) {
|
||||||
}
|
streamsize p = inStream.precision();
|
||||||
|
inStream.precision(6);
|
||||||
|
inStream << "(" << inGram.keyValue << "," << inGram.score << ")";
|
||||||
|
inStream.precision(p);
|
||||||
|
return inStream;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline ostream& operator<<(ostream& inStream, const vector<Unigram>& inGrams) {
|
||||||
|
inStream << "[" << inGrams.size() << "]=>{";
|
||||||
|
|
||||||
|
size_t index = 0;
|
||||||
|
|
||||||
|
for (vector<Unigram>::const_iterator gi = inGrams.begin();
|
||||||
|
gi != inGrams.end(); ++gi, ++index) {
|
||||||
|
inStream << index << "=>";
|
||||||
|
inStream << *gi;
|
||||||
|
if (gi + 1 != inGrams.end()) {
|
||||||
|
inStream << ",";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inStream << "}";
|
||||||
|
return inStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Unigram::Unigram() : score(0.0) {}
|
||||||
|
|
||||||
|
inline bool Unigram::operator==(const Unigram& inAnother) const {
|
||||||
|
return keyValue == inAnother.keyValue && score == inAnother.score;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool Unigram::operator<(const Unigram& inAnother) const {
|
||||||
|
if (keyValue < inAnother.keyValue) {
|
||||||
|
return true;
|
||||||
|
} else if (keyValue == inAnother.keyValue) {
|
||||||
|
return score < inAnother.score;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) {
|
||||||
|
return a.score > b.score;
|
||||||
|
}
|
||||||
|
} // namespace Gramambular
|
||||||
|
} // namespace Formosa
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -29,63 +29,65 @@
|
||||||
#define Walker_h
|
#define Walker_h
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
#include "Grid.h"
|
#include "Grid.h"
|
||||||
|
|
||||||
namespace Formosa {
|
namespace Formosa {
|
||||||
namespace Gramambular {
|
namespace Gramambular {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
class Walker {
|
class Walker {
|
||||||
public:
|
public:
|
||||||
Walker(Grid* inGrid);
|
Walker(Grid* inGrid);
|
||||||
const vector<NodeAnchor> reverseWalk(size_t inLocation, double inAccumulatedScore = 0.0);
|
const vector<NodeAnchor> reverseWalk(size_t inLocation,
|
||||||
|
double inAccumulatedScore = 0.0);
|
||||||
protected:
|
|
||||||
Grid* m_grid;
|
|
||||||
};
|
|
||||||
|
|
||||||
inline Walker::Walker(Grid* inGrid)
|
|
||||||
: m_grid(inGrid)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
inline const vector<NodeAnchor> Walker::reverseWalk(size_t inLocation, double inAccumulatedScore)
|
|
||||||
{
|
|
||||||
if (!inLocation || inLocation > m_grid->width()) {
|
|
||||||
return vector<NodeAnchor>();
|
|
||||||
}
|
|
||||||
|
|
||||||
vector<vector<NodeAnchor> > paths;
|
|
||||||
|
|
||||||
vector<NodeAnchor> nodes = m_grid->nodesEndingAt(inLocation);
|
protected:
|
||||||
|
Grid* m_grid;
|
||||||
for (vector<NodeAnchor>::iterator ni = nodes.begin() ; ni != nodes.end() ; ++ni) {
|
};
|
||||||
if (!(*ni).node) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
(*ni).accumulatedScore = inAccumulatedScore + (*ni).node->score();
|
inline Walker::Walker(Grid* inGrid) : m_grid(inGrid) {}
|
||||||
|
|
||||||
vector<NodeAnchor> path = reverseWalk(inLocation - (*ni).spanningLength, (*ni).accumulatedScore);
|
inline const vector<NodeAnchor> Walker::reverseWalk(size_t inLocation,
|
||||||
path.insert(path.begin(), *ni);
|
double inAccumulatedScore) {
|
||||||
|
if (!inLocation || inLocation > m_grid->width()) {
|
||||||
paths.push_back(path);
|
return vector<NodeAnchor>();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!paths.size()) {
|
vector<vector<NodeAnchor> > paths;
|
||||||
return vector<NodeAnchor>();
|
|
||||||
}
|
vector<NodeAnchor> nodes = m_grid->nodesEndingAt(inLocation);
|
||||||
|
|
||||||
vector<NodeAnchor>* result = &*(paths.begin());
|
for (vector<NodeAnchor>::iterator ni = nodes.begin(); ni != nodes.end();
|
||||||
for (vector<vector<NodeAnchor> >::iterator pi = paths.begin() ; pi != paths.end() ; ++pi) {
|
++ni) {
|
||||||
if ((*pi).back().accumulatedScore > result->back().accumulatedScore) {
|
if (!(*ni).node) {
|
||||||
result = &*pi;
|
continue;
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return *result;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
(*ni).accumulatedScore = inAccumulatedScore + (*ni).node->score();
|
||||||
|
|
||||||
|
vector<NodeAnchor> path =
|
||||||
|
reverseWalk(inLocation - (*ni).spanningLength, (*ni).accumulatedScore);
|
||||||
|
path.insert(path.begin(), *ni);
|
||||||
|
|
||||||
|
paths.push_back(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!paths.size()) {
|
||||||
|
return vector<NodeAnchor>();
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<NodeAnchor>* result = &*(paths.begin());
|
||||||
|
for (vector<vector<NodeAnchor> >::iterator pi = paths.begin();
|
||||||
|
pi != paths.end(); ++pi) {
|
||||||
|
if ((*pi).back().accumulatedScore > result->back().accumulatedScore) {
|
||||||
|
result = &*pi;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return *result;
|
||||||
}
|
}
|
||||||
|
} // namespace Gramambular
|
||||||
|
} // namespace Formosa
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue