Merge pull request #298 from lukhnos/gramambular-modernization

Gramambular modernization
This commit is contained in:
Lukhnos Liu 2022-02-20 08:32:57 -08:00 committed by GitHub
commit ccd035a666
17 changed files with 1220 additions and 945 deletions

View File

@ -24,6 +24,12 @@ jobs:
- name: Run MandarinTest - name: Run MandarinTest
run: make runTest run: make runTest
working-directory: Source/Engine/Mandarin/build working-directory: Source/Engine/Mandarin/build
- name: Build GramambularTest
run: cmake -S . -B build
working-directory: Source/Engine/Gramambular
- name: Run GramambularTest
run: make runTest
working-directory: Source/Engine/Gramambular/build
- name: Test McBopomofo App Bundle - name: Test McBopomofo App Bundle
run: xcodebuild -scheme McBopomofo -configuration Debug test run: xcodebuild -scheme McBopomofo -configuration Debug test
- name: Test CandidateUI - name: Test CandidateUI

View File

@ -24,6 +24,12 @@ jobs:
- name: Run MandarinTest - name: Run MandarinTest
run: make runTest run: make runTest
working-directory: Source/Engine/Mandarin/build working-directory: Source/Engine/Mandarin/build
- name: Build GramambularTest
run: cmake -S . -B build
working-directory: Source/Engine/Gramambular
- name: Run GramambularTest
run: make runTest
working-directory: Source/Engine/Gramambular/build
- name: Test McBopomofo App Bundle - name: Test McBopomofo App Bundle
run: xcodebuild -scheme McBopomofo -configuration Debug test run: xcodebuild -scheme McBopomofo -configuration Debug test
- name: Test CandidateUI - name: Test CandidateUI

View File

@ -20,6 +20,7 @@
6A6ED16C2797650A0012872E /* template-data.txt in Resources */ = {isa = PBXBuildFile; fileRef = 6A6ED1652797650A0012872E /* template-data.txt */; }; 6A6ED16C2797650A0012872E /* template-data.txt in Resources */ = {isa = PBXBuildFile; fileRef = 6A6ED1652797650A0012872E /* template-data.txt */; };
6A6ED16D2797650A0012872E /* template-exclude-phrases-plain-bpmf.txt in Resources */ = {isa = PBXBuildFile; fileRef = 6A6ED1672797650A0012872E /* template-exclude-phrases-plain-bpmf.txt */; }; 6A6ED16D2797650A0012872E /* template-exclude-phrases-plain-bpmf.txt in Resources */ = {isa = PBXBuildFile; fileRef = 6A6ED1672797650A0012872E /* template-exclude-phrases-plain-bpmf.txt */; };
6A6ED16E2797650A0012872E /* template-exclude-phrases.txt in Resources */ = {isa = PBXBuildFile; fileRef = 6A6ED1692797650A0012872E /* template-exclude-phrases.txt */; }; 6A6ED16E2797650A0012872E /* template-exclude-phrases.txt in Resources */ = {isa = PBXBuildFile; fileRef = 6A6ED1692797650A0012872E /* template-exclude-phrases.txt */; };
6A74B14927C16845001988F4 /* Grid.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6A74B14827C16845001988F4 /* Grid.cpp */; };
6ACA41FA15FC1D9000935EF6 /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = 6ACA41EA15FC1D9000935EF6 /* InfoPlist.strings */; }; 6ACA41FA15FC1D9000935EF6 /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = 6ACA41EA15FC1D9000935EF6 /* InfoPlist.strings */; };
6ACA41FB15FC1D9000935EF6 /* License.rtf in Resources */ = {isa = PBXBuildFile; fileRef = 6ACA41EC15FC1D9000935EF6 /* License.rtf */; }; 6ACA41FB15FC1D9000935EF6 /* License.rtf in Resources */ = {isa = PBXBuildFile; fileRef = 6ACA41EC15FC1D9000935EF6 /* License.rtf */; };
6ACA41FC15FC1D9000935EF6 /* Localizable.strings in Resources */ = {isa = PBXBuildFile; fileRef = 6ACA41EE15FC1D9000935EF6 /* Localizable.strings */; }; 6ACA41FC15FC1D9000935EF6 /* Localizable.strings in Resources */ = {isa = PBXBuildFile; fileRef = 6ACA41EE15FC1D9000935EF6 /* Localizable.strings */; };
@ -59,8 +60,8 @@
D485D3B92796A8A000657FF3 /* PreferencesTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D485D3B82796A8A000657FF3 /* PreferencesTests.swift */; }; D485D3B92796A8A000657FF3 /* PreferencesTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D485D3B82796A8A000657FF3 /* PreferencesTests.swift */; };
D485D3C02796CE3200657FF3 /* VersionUpdateTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D485D3BF2796CE3200657FF3 /* VersionUpdateTests.swift */; }; D485D3C02796CE3200657FF3 /* VersionUpdateTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D485D3BF2796CE3200657FF3 /* VersionUpdateTests.swift */; };
D4A13D5A27A59F0B003BE359 /* InputMethodController.swift in Sources */ = {isa = PBXBuildFile; fileRef = D4A13D5927A59D5C003BE359 /* InputMethodController.swift */; }; D4A13D5A27A59F0B003BE359 /* InputMethodController.swift in Sources */ = {isa = PBXBuildFile; fileRef = D4A13D5927A59D5C003BE359 /* InputMethodController.swift */; };
D4C9CAB127AAC9690058DFEA /* NSStringUtils in Frameworks */ = {isa = PBXBuildFile; productRef = D4C9CAB027AAC9690058DFEA /* NSStringUtils */; };
D4A8E43627A9E982002F7A07 /* KeyHandlerPlainBopomofoTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D4A8E43527A9E982002F7A07 /* KeyHandlerPlainBopomofoTests.swift */; }; D4A8E43627A9E982002F7A07 /* KeyHandlerPlainBopomofoTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = D4A8E43527A9E982002F7A07 /* KeyHandlerPlainBopomofoTests.swift */; };
D4C9CAB127AAC9690058DFEA /* NSStringUtils in Frameworks */ = {isa = PBXBuildFile; productRef = D4C9CAB027AAC9690058DFEA /* NSStringUtils */; };
D4E33D8A27A838CF006DB1CF /* Localizable.strings in Resources */ = {isa = PBXBuildFile; fileRef = D4E33D8827A838CF006DB1CF /* Localizable.strings */; }; D4E33D8A27A838CF006DB1CF /* Localizable.strings in Resources */ = {isa = PBXBuildFile; fileRef = D4E33D8827A838CF006DB1CF /* Localizable.strings */; };
D4E33D8F27A838F0006DB1CF /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = D4E33D8D27A838F0006DB1CF /* InfoPlist.strings */; }; D4E33D8F27A838F0006DB1CF /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = D4E33D8D27A838F0006DB1CF /* InfoPlist.strings */; };
D4E569DC27A34D0E00AC2CEF /* KeyHandler.mm in Sources */ = {isa = PBXBuildFile; fileRef = D4E569DB27A34CC100AC2CEF /* KeyHandler.mm */; }; D4E569DC27A34D0E00AC2CEF /* KeyHandler.mm in Sources */ = {isa = PBXBuildFile; fileRef = D4E569DB27A34CC100AC2CEF /* KeyHandler.mm */; };
@ -131,6 +132,7 @@
6A6ED170279765140012872E /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text; name = "zh-Hant"; path = "zh-Hant.lproj/template-exclude-phrases-plain-bpmf.txt"; sourceTree = "<group>"; }; 6A6ED170279765140012872E /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text; name = "zh-Hant"; path = "zh-Hant.lproj/template-exclude-phrases-plain-bpmf.txt"; sourceTree = "<group>"; };
6A6ED171279765170012872E /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text; name = "zh-Hant"; path = "zh-Hant.lproj/template-exclude-phrases.txt"; sourceTree = "<group>"; }; 6A6ED171279765170012872E /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text; name = "zh-Hant"; path = "zh-Hant.lproj/template-exclude-phrases.txt"; sourceTree = "<group>"; };
6A6ED1722797651A0012872E /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text; name = "zh-Hant"; path = "zh-Hant.lproj/template-phrases-replacement.txt"; sourceTree = "<group>"; }; 6A6ED1722797651A0012872E /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text; name = "zh-Hant"; path = "zh-Hant.lproj/template-phrases-replacement.txt"; sourceTree = "<group>"; };
6A74B14827C16845001988F4 /* Grid.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Grid.cpp; path = Grid.cpp; sourceTree = "<group>"; };
6A93050C279877FF00D370DA /* McBopomofoInstaller-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "McBopomofoInstaller-Bridging-Header.h"; sourceTree = "<group>"; }; 6A93050C279877FF00D370DA /* McBopomofoInstaller-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "McBopomofoInstaller-Bridging-Header.h"; sourceTree = "<group>"; };
6ACA41CB15FC1D7500935EF6 /* McBopomofoInstaller.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = McBopomofoInstaller.app; sourceTree = BUILT_PRODUCTS_DIR; }; 6ACA41CB15FC1D7500935EF6 /* McBopomofoInstaller.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = McBopomofoInstaller.app; sourceTree = BUILT_PRODUCTS_DIR; };
6ACA41EB15FC1D9000935EF6 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/InfoPlist.strings; sourceTree = "<group>"; }; 6ACA41EB15FC1D9000935EF6 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/InfoPlist.strings; sourceTree = "<group>"; };
@ -186,8 +188,8 @@
D485D3BF2796CE3200657FF3 /* VersionUpdateTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VersionUpdateTests.swift; sourceTree = "<group>"; }; D485D3BF2796CE3200657FF3 /* VersionUpdateTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VersionUpdateTests.swift; sourceTree = "<group>"; };
D495583A27A5C6C4006ADE1C /* LanguageModelManager+Privates.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "LanguageModelManager+Privates.h"; sourceTree = "<group>"; }; D495583A27A5C6C4006ADE1C /* LanguageModelManager+Privates.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "LanguageModelManager+Privates.h"; sourceTree = "<group>"; };
D4A13D5927A59D5C003BE359 /* InputMethodController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = InputMethodController.swift; sourceTree = "<group>"; }; D4A13D5927A59D5C003BE359 /* InputMethodController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = InputMethodController.swift; sourceTree = "<group>"; };
D4C9CAAF27AAC8EC0058DFEA /* NSStringUtils */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = NSStringUtils; path = Packages/NSStringUtils; sourceTree = "<group>"; };
D4A8E43527A9E982002F7A07 /* KeyHandlerPlainBopomofoTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = KeyHandlerPlainBopomofoTests.swift; sourceTree = "<group>"; }; D4A8E43527A9E982002F7A07 /* KeyHandlerPlainBopomofoTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = KeyHandlerPlainBopomofoTests.swift; sourceTree = "<group>"; };
D4C9CAAF27AAC8EC0058DFEA /* NSStringUtils */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = NSStringUtils; path = Packages/NSStringUtils; sourceTree = "<group>"; };
D4E33D8927A838CF006DB1CF /* Base */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = Base; path = Base.lproj/Localizable.strings; sourceTree = "<group>"; }; D4E33D8927A838CF006DB1CF /* Base */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = Base; path = Base.lproj/Localizable.strings; sourceTree = "<group>"; };
D4E33D8B27A838D5006DB1CF /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/Localizable.strings; sourceTree = "<group>"; }; D4E33D8B27A838D5006DB1CF /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/Localizable.strings; sourceTree = "<group>"; };
D4E33D8C27A838D8006DB1CF /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/Localizable.strings"; sourceTree = "<group>"; }; D4E33D8C27A838D8006DB1CF /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/Localizable.strings"; sourceTree = "<group>"; };
@ -329,6 +331,7 @@
6A0D4F1415FC0EB100ABF4B3 /* Bigram.h */, 6A0D4F1415FC0EB100ABF4B3 /* Bigram.h */,
6A0D4F1515FC0EB100ABF4B3 /* BlockReadingBuilder.h */, 6A0D4F1515FC0EB100ABF4B3 /* BlockReadingBuilder.h */,
6A0D4F1615FC0EB100ABF4B3 /* Gramambular.h */, 6A0D4F1615FC0EB100ABF4B3 /* Gramambular.h */,
6A74B14827C16845001988F4 /* Grid.cpp */,
6A0D4F1715FC0EB100ABF4B3 /* Grid.h */, 6A0D4F1715FC0EB100ABF4B3 /* Grid.h */,
6A0D4F1815FC0EB100ABF4B3 /* KeyValuePair.h */, 6A0D4F1815FC0EB100ABF4B3 /* KeyValuePair.h */,
6A0D4F1915FC0EB100ABF4B3 /* LanguageModel.h */, 6A0D4F1915FC0EB100ABF4B3 /* LanguageModel.h */,
@ -669,6 +672,7 @@
D41355DE278EA3ED005E5CBD /* UserPhrasesLM.cpp in Sources */, D41355DE278EA3ED005E5CBD /* UserPhrasesLM.cpp in Sources */,
6ACC3D3F27914F2400F1B140 /* KeyValueBlobReader.cpp in Sources */, 6ACC3D3F27914F2400F1B140 /* KeyValueBlobReader.cpp in Sources */,
D41355D8278D74B5005E5CBD /* LanguageModelManager.mm in Sources */, D41355D8278D74B5005E5CBD /* LanguageModelManager.mm in Sources */,
6A74B14927C16845001988F4 /* Grid.cpp in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };

View File

@ -25,82 +25,77 @@
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
// //
#ifndef Bigram_h #ifndef BIGRAM_H_
#define Bigram_h #define BIGRAM_H_
#include <vector> #include <vector>
#include "KeyValuePair.h" #include "KeyValuePair.h"
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
class Bigram { class Bigram {
public: public:
Bigram(); Bigram();
KeyValuePair preceedingKeyValue; KeyValuePair preceedingKeyValue;
KeyValuePair keyValue; KeyValuePair keyValue;
double score; double score;
bool operator==(const Bigram& inAnother) const; bool operator==(const Bigram& another) const;
bool operator<(const Bigram& inAnother) const; bool operator<(const Bigram& another) const;
}; };
inline ostream& operator<<(ostream& inStream, const Bigram& inGram) inline std::ostream& operator<<(std::ostream& stream, const Bigram& gram) {
{ std::streamsize p = stream.precision();
streamsize p = inStream.precision(); stream.precision(6);
inStream.precision(6); stream << "(" << gram.keyValue << "|" << gram.preceedingKeyValue << ","
inStream << "(" << inGram.keyValue << "|" <<inGram.preceedingKeyValue << "," << inGram.score << ")"; << gram.score << ")";
inStream.precision(p); stream.precision(p);
return inStream; return stream;
}
inline ostream& operator<<(ostream& inStream, const vector<Bigram>& inGrams)
{
inStream << "[" << inGrams.size() << "]=>{";
size_t index = 0;
for (vector<Bigram>::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) {
inStream << index << "=>";
inStream << *gi;
if (gi + 1 != inGrams.end()) {
inStream << ",";
}
}
inStream << "}";
return inStream;
}
inline Bigram::Bigram()
: score(0.0)
{
}
inline bool Bigram::operator==(const Bigram& inAnother) const
{
return preceedingKeyValue == inAnother.preceedingKeyValue && keyValue == inAnother.keyValue && score == inAnother.score;
}
inline bool Bigram::operator<(const Bigram& inAnother) const
{
if (preceedingKeyValue < inAnother.preceedingKeyValue) {
return true;
}
else if (preceedingKeyValue == inAnother.preceedingKeyValue) {
if (keyValue < inAnother.keyValue) {
return true;
}
else if (keyValue == inAnother.keyValue) {
return score < inAnother.score;
}
return false;
}
return false;
}
}
} }
inline std::ostream& operator<<(std::ostream& stream,
const std::vector<Bigram>& grams) {
stream << "[" << grams.size() << "]=>{";
size_t index = 0;
for (std::vector<Bigram>::const_iterator gi = grams.begin();
gi != grams.end(); ++gi, ++index) {
stream << index << "=>";
stream << *gi;
if (gi + 1 != grams.end()) {
stream << ",";
}
}
stream << "}";
return stream;
}
inline Bigram::Bigram() : score(0.0) {}
inline bool Bigram::operator==(const Bigram& another) const {
return preceedingKeyValue == another.preceedingKeyValue &&
keyValue == another.keyValue && score == another.score;
}
inline bool Bigram::operator<(const Bigram& another) const {
if (preceedingKeyValue < another.preceedingKeyValue) {
return true;
} else if (preceedingKeyValue == another.preceedingKeyValue) {
if (keyValue < another.keyValue) {
return true;
} else if (keyValue == another.keyValue) {
return score < another.score;
}
return false;
}
return false;
}
} // namespace Gramambular
} // namespace Formosa
#endif #endif

View File

@ -25,202 +25,190 @@
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
// //
#ifndef BlockReadingBuilder_h #ifndef BLOCKREADINGBUILDER_H_
#define BlockReadingBuilder_h #define BLOCKREADINGBUILDER_H_
#include <string>
#include <vector> #include <vector>
#include "Grid.h" #include "Grid.h"
#include "LanguageModel.h" #include "LanguageModel.h"
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
using namespace std;
class BlockReadingBuilder { class BlockReadingBuilder {
public: public:
BlockReadingBuilder(LanguageModel *inLM); explicit BlockReadingBuilder(LanguageModel* lm);
void clear(); void clear();
size_t length() const; size_t length() const;
size_t cursorIndex() const; size_t cursorIndex() const;
void setCursorIndex(size_t inNewIndex); void setCursorIndex(size_t newIndex);
void insertReadingAtCursor(const string& inReading); void insertReadingAtCursor(const std::string& reading);
bool deleteReadingBeforeCursor(); // backspace bool deleteReadingBeforeCursor(); // backspace
bool deleteReadingAfterCursor(); // delete bool deleteReadingAfterCursor(); // delete
bool removeHeadReadings(size_t count); bool removeHeadReadings(size_t count);
void setJoinSeparator(const string& separator); void setJoinSeparator(const std::string& separator);
const string joinSeparator() const; const std::string joinSeparator() const;
vector<string> readings() const; std::vector<std::string> readings() const;
Grid& grid(); Grid& grid();
protected: protected:
void build(); void build();
static const string Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator); static const std::string Join(std::vector<std::string>::const_iterator begin,
std::vector<std::string>::const_iterator end,
const std::string& separator);
//最多使用六個字組成一個詞 // 最多使用六個字組成一個詞
static const size_t MaximumBuildSpanLength = 6; static const size_t MaximumBuildSpanLength = 6;
size_t m_cursorIndex; size_t m_cursorIndex;
vector<string> m_readings; std::vector<std::string> m_readings;
Grid m_grid; Grid m_grid;
LanguageModel *m_LM; LanguageModel* m_LM;
string m_joinSeparator; std::string m_joinSeparator;
}; };
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM) inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel* lm)
: m_LM(inLM) : m_LM(lm), m_cursorIndex(0) {}
, m_cursorIndex(0)
{
}
inline void BlockReadingBuilder::clear() inline void BlockReadingBuilder::clear() {
{ m_cursorIndex = 0;
m_cursorIndex = 0; m_readings.clear();
m_readings.clear(); m_grid.clear();
m_grid.clear();
}
inline size_t BlockReadingBuilder::length() const
{
return m_readings.size();
}
inline size_t BlockReadingBuilder::cursorIndex() const
{
return m_cursorIndex;
}
inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex)
{
m_cursorIndex = inNewIndex > m_readings.size() ? m_readings.size() : inNewIndex;
}
inline void BlockReadingBuilder::insertReadingAtCursor(const string& inReading)
{
m_readings.insert(m_readings.begin() + m_cursorIndex, inReading);
m_grid.expandGridByOneAtLocation(m_cursorIndex);
build();
m_cursorIndex++;
}
inline vector<string> BlockReadingBuilder::readings() const
{
return m_readings;
}
inline bool BlockReadingBuilder::deleteReadingBeforeCursor()
{
if (!m_cursorIndex) {
return false;
}
m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.begin() + m_cursorIndex);
m_cursorIndex--;
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
}
inline bool BlockReadingBuilder::deleteReadingAfterCursor()
{
if (m_cursorIndex == m_readings.size()) {
return false;
}
m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.begin() + m_cursorIndex + 1);
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
}
inline bool BlockReadingBuilder::removeHeadReadings(size_t count)
{
if (count > length()) {
return false;
}
for (size_t i = 0; i < count; i++) {
if (m_cursorIndex) {
m_cursorIndex--;
}
m_readings.erase(m_readings.begin(), m_readings.begin() + 1);
m_grid.shrinkGridByOneAtLocation(0);
build();
}
return true;
}
inline void BlockReadingBuilder::setJoinSeparator(const string& separator)
{
m_joinSeparator = separator;
}
inline const string BlockReadingBuilder::joinSeparator() const
{
return m_joinSeparator;
}
inline Grid& BlockReadingBuilder::grid()
{
return m_grid;
}
inline void BlockReadingBuilder::build()
{
if (!m_LM) {
return;
}
size_t begin = 0;
size_t end = m_cursorIndex + MaximumBuildSpanLength;
if (m_cursorIndex < MaximumBuildSpanLength) {
begin = 0;
}
else {
begin = m_cursorIndex - MaximumBuildSpanLength;
}
if (end > m_readings.size()) {
end = m_readings.size();
}
for (size_t p = begin ; p < end ; p++) {
for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) {
string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) {
vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
if (unigrams.size() > 0) {
Node n(combinedReading, unigrams, vector<Bigram>());
m_grid.insertNode(n, p, q);
}
}
}
}
}
inline const string BlockReadingBuilder::Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator)
{
string result;
for (vector<string>::const_iterator iter = begin ; iter != end ; ) {
result += *iter;
++iter;
if (iter != end) {
result += separator;
}
}
return result;
}
}
} }
inline size_t BlockReadingBuilder::length() const { return m_readings.size(); }
inline size_t BlockReadingBuilder::cursorIndex() const { return m_cursorIndex; }
inline void BlockReadingBuilder::setCursorIndex(size_t newIndex) {
m_cursorIndex = newIndex > m_readings.size() ? m_readings.size() : newIndex;
}
inline void BlockReadingBuilder::insertReadingAtCursor(
const std::string& reading) {
m_readings.insert(m_readings.begin() + m_cursorIndex, reading);
m_grid.expandGridByOneAtLocation(m_cursorIndex);
build();
m_cursorIndex++;
}
inline std::vector<std::string> BlockReadingBuilder::readings() const {
return m_readings;
}
inline bool BlockReadingBuilder::deleteReadingBeforeCursor() {
if (!m_cursorIndex) {
return false;
}
m_readings.erase(m_readings.begin() + m_cursorIndex - 1,
m_readings.begin() + m_cursorIndex);
m_cursorIndex--;
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
}
inline bool BlockReadingBuilder::deleteReadingAfterCursor() {
if (m_cursorIndex == m_readings.size()) {
return false;
}
m_readings.erase(m_readings.begin() + m_cursorIndex,
m_readings.begin() + m_cursorIndex + 1);
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
}
inline bool BlockReadingBuilder::removeHeadReadings(size_t count) {
if (count > length()) {
return false;
}
for (size_t i = 0; i < count; i++) {
if (m_cursorIndex) {
m_cursorIndex--;
}
m_readings.erase(m_readings.begin(), m_readings.begin() + 1);
m_grid.shrinkGridByOneAtLocation(0);
build();
}
return true;
}
inline void BlockReadingBuilder::setJoinSeparator(
const std::string& separator) {
m_joinSeparator = separator;
}
inline const std::string BlockReadingBuilder::joinSeparator() const {
return m_joinSeparator;
}
inline Grid& BlockReadingBuilder::grid() { return m_grid; }
inline void BlockReadingBuilder::build() {
if (!m_LM) {
return;
}
size_t begin = 0;
size_t end = m_cursorIndex + MaximumBuildSpanLength;
if (m_cursorIndex < MaximumBuildSpanLength) {
begin = 0;
} else {
begin = m_cursorIndex - MaximumBuildSpanLength;
}
if (end > m_readings.size()) {
end = m_readings.size();
}
for (size_t p = begin; p < end; p++) {
for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) {
std::string combinedReading = Join(
m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q,
combinedReading)) {
std::vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
if (unigrams.size() > 0) {
Node n(combinedReading, unigrams, std::vector<Bigram>());
m_grid.insertNode(n, p, q);
}
}
}
}
}
inline const std::string BlockReadingBuilder::Join(
std::vector<std::string>::const_iterator begin,
std::vector<std::string>::const_iterator end,
const std::string& separator) {
std::string result;
for (std::vector<std::string>::const_iterator iter = begin; iter != end;) {
result += *iter;
++iter;
if (iter != end) {
result += separator;
}
}
return result;
}
} // namespace Gramambular
} // namespace Formosa
#endif #endif

View File

@ -0,0 +1,31 @@
cmake_minimum_required(VERSION 3.17)
project(Gramambular)
set(CMAKE_CXX_STANDARD 17)
add_library(GramambularLib Bigram.h BlockReadingBuilder.h Gramambular.h Grid.h Grid.cpp KeyValuePair.h LanguageModel.h Node.h NodeAnchor.h Span.h Unigram.h Walker.h)
# Let CMake fetch Google Test for us.
# https://github.com/google/googletest/tree/main/googletest#incorporating-into-an-existing-cmake-project
include(FetchContent)
FetchContent_Declare(
googletest
# Specify the commit you depend on and update it regularly.
URL https://github.com/google/googletest/archive/609281088cfefc76f9d0ce82e1ff6c30cc3591e5.zip
)
# For Windows: Prevent overriding the parent project's compiler/linker settings
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
# Test target declarations.
add_executable(GramambularTest GramambularTest.cpp)
target_link_libraries(GramambularTest gtest_main GramambularLib)
include(GoogleTest)
gtest_discover_tests(GramambularTest)
add_custom_target(
runTest
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/GramambularTest
)
add_dependencies(runTest GramambularTest)

View File

@ -25,8 +25,8 @@
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
// //
#ifndef Gramambular_h #ifndef GRAMAMBULAR_H_
#define Gramambular_h #define GRAMAMBULAR_H_
#include "Bigram.h" #include "Bigram.h"
#include "BlockReadingBuilder.h" #include "BlockReadingBuilder.h"

View File

@ -0,0 +1,247 @@
// Copyright (c) 2022 and onwards Lukhnos Liu
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <map>
#include <sstream>
#include <vector>
#include "Gramambular.h"
#include "gtest/gtest.h"
namespace Formosa {
namespace Gramambular {
const char* SampleData = R"(
#
# The sample is from libtabe (http://sourceforge.net/projects/libtabe/)
# last updated in 2002. The project was originally initiated by
# Pai-Hsiang Hsiao in 1999.
#
# Libtabe is a frequency table of Taiwanese Mandarin words. The database
# itself is, according to the tar file, released under the BSD License.
#
-9.495858
-9.006414
-99.000000
-8.091803
-99.000000
-13.513987
-12.259095
-7.171551
-10.574273
-11.504072
-10.450457
-7.171052
-99.000000
-11.928720
-13.624335
-12.390804
˙ -3.516024
ˊ -3.516024
ˋ -3.516024
-5.809297
˙ -7.427179
-8.381971
-8.501463
ˋ -99.000000
-8.034095
-8.858181
ˋ -7.608341
ˋ -99.000000
-7.290109
ˋ -10.939895
-99.000000
ˋ -99.000000
ˋ -99.000000
-99.000000
ˋ -9.715317
ˋ -7.926683
ˋ -8.373022
-9.877580
-10.711079
-7.877973
-7.822167
-99.000000
-99.000000
-99.000000
-9.685671
ˋ -10.425662
-99.000000
-99.000000
ˋ -8.888722
ˋ -10.204425
-11.378321
-99.000000
ˋ -99.000000
ˋ -8.450826
-11.074890
-99.000000
ˋ -12.045357
-99.000000
ˋ -99.000000
ˋ -9.517568
ˋ -12.021587
-99.000000
-12.784206
ˊ -6.086515
ˇ -9.164384
ˇ -8.690941
ˇ -10.127828
ˊ -11.336864
ˊ -11.285740
ˇ -12.492933
-6.299461
ˋ -6.736613
ˋ -13.336653
ˇ -10.344678
ˊ -11.668947
ˊ -11.373044
ˋ -9.842421
)";
class SimpleLM : public LanguageModel {
public:
SimpleLM(const char* input, bool swapKeyValue = false) {
std::stringstream sstream(input);
while (sstream.good()) {
std::string line;
getline(sstream, line);
if (!line.size() || (line.size() && line[0] == '#')) {
continue;
}
std::stringstream linestream(line);
std::string col0;
std::string col1;
std::string col2;
linestream >> col0;
linestream >> col1;
linestream >> col2;
Unigram u;
if (swapKeyValue) {
u.keyValue.key = col1;
u.keyValue.value = col0;
} else {
u.keyValue.key = col0;
u.keyValue.value = col1;
}
u.score = atof(col2.c_str());
m_db[u.keyValue.key].push_back(u);
}
}
const std::vector<Bigram> bigramsForKeys(const std::string& preceedingKey,
const std::string& key) override {
return std::vector<Bigram>();
}
const std::vector<Unigram> unigramsForKey(const std::string& key) override {
std::map<std::string, std::vector<Unigram> >::const_iterator f =
m_db.find(key);
return f == m_db.end() ? std::vector<Unigram>() : (*f).second;
}
bool hasUnigramsForKey(const std::string& key) override {
std::map<std::string, std::vector<Unigram> >::const_iterator f =
m_db.find(key);
return f != m_db.end();
}
protected:
std::map<std::string, std::vector<Unigram> > m_db;
};
TEST(GramambularTest, InputTest) {
SimpleLM lm(SampleData);
BlockReadingBuilder builder(&lm);
builder.insertReadingAtCursor("ㄍㄠ");
builder.insertReadingAtCursor("ㄐㄧˋ");
builder.setCursorIndex(1);
builder.insertReadingAtCursor("ㄎㄜ");
builder.setCursorIndex(0);
builder.deleteReadingAfterCursor();
builder.insertReadingAtCursor("ㄍㄠ");
builder.setCursorIndex(builder.length());
builder.insertReadingAtCursor("ㄍㄨㄥ");
builder.insertReadingAtCursor("");
builder.insertReadingAtCursor("ㄉㄜ˙");
builder.insertReadingAtCursor("ㄋㄧㄢˊ");
builder.insertReadingAtCursor("ㄓㄨㄥ");
builder.insertReadingAtCursor("ㄐㄧㄤˇ");
builder.insertReadingAtCursor("ㄐㄧㄣ");
Walker walker(&builder.grid());
std::vector<NodeAnchor> walked =
walker.reverseWalk(builder.grid().width(), 0.0);
reverse(walked.begin(), walked.end());
std::vector<std::string> composed;
for (std::vector<NodeAnchor>::iterator wi = walked.begin();
wi != walked.end(); ++wi) {
composed.push_back((*wi).node->currentKeyValue().value);
}
ASSERT_EQ(composed,
(std::vector<std::string>{"高科技", "公司", "", "年中", "獎金"}));
}
TEST(GramambularTest, WordSegmentationTest) {
SimpleLM lm2(SampleData, true);
BlockReadingBuilder builder2(&lm2);
builder2.insertReadingAtCursor("");
builder2.insertReadingAtCursor("");
builder2.insertReadingAtCursor("");
builder2.insertReadingAtCursor("");
builder2.insertReadingAtCursor("");
builder2.insertReadingAtCursor("");
builder2.insertReadingAtCursor("");
builder2.insertReadingAtCursor("");
builder2.insertReadingAtCursor("");
builder2.insertReadingAtCursor("");
Walker walker2(&builder2.grid());
std::vector<NodeAnchor> walked =
walker2.reverseWalk(builder2.grid().width(), 0.0);
reverse(walked.begin(), walked.end());
std::vector<std::string> segmented;
for (std::vector<NodeAnchor>::iterator wi = walked.begin();
wi != walked.end(); ++wi) {
segmented.push_back((*wi).node->currentKeyValue().key);
}
ASSERT_EQ(segmented,
(std::vector<std::string>{"高科技", "公司", "", "年終", "獎金"}));
}
} // namespace Gramambular
} // namespace Formosa

View File

@ -0,0 +1,74 @@
// Copyright (c) 2007 and onwards Lukhnos Liu
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "Grid.h"
#include <iostream>
#include <string>
namespace Formosa {
namespace Gramambular {
std::string Grid::dumpDOT() {
std::stringstream sst;
sst << "digraph {" << std::endl;
sst << "graph [ rankdir=LR ];" << std::endl;
sst << "BOS;" << std::endl;
for (size_t p = 0; p < m_spans.size(); p++) {
Span& span = m_spans[p];
for (size_t ni = 0; ni <= span.maximumLength(); ni++) {
Node* np = span.nodeOfLength(ni);
if (np) {
if (!p) {
sst << "BOS -> " << np->currentKeyValue().value << ";" << std::endl;
}
sst << np->currentKeyValue().value << ";" << std::endl;
if (p + ni < m_spans.size()) {
Span& dstSpan = m_spans[p + ni];
for (size_t q = 0; q <= dstSpan.maximumLength(); q++) {
Node* dn = dstSpan.nodeOfLength(q);
if (dn) {
sst << np->currentKeyValue().value << " -> "
<< dn->currentKeyValue().value << ";" << std::endl;
}
}
}
if (p + ni == m_spans.size()) {
sst << np->currentKeyValue().value << " -> "
<< "EOS;" << std::endl;
}
}
}
}
sst << "EOS;" << std::endl;
sst << "}";
return sst.str();
}
} // namespace Gramambular
} // namespace Formosa

View File

@ -25,248 +25,207 @@
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
// //
#ifndef Grid_h #ifndef GRID_H_
#define Grid_h #define GRID_H_
#include <map> #include <map>
#include <string>
#include <vector>
#include "NodeAnchor.h" #include "NodeAnchor.h"
#include "Span.h" #include "Span.h"
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
class Grid { class Grid {
public: public:
void clear(); void clear();
void insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength); void insertNode(const Node& node, size_t location, size_t spanningLength);
bool hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey); bool hasNodeAtLocationSpanningLengthMatchingKey(size_t location,
size_t spanningLength,
const std::string& key);
void expandGridByOneAtLocation(size_t inLocation); void expandGridByOneAtLocation(size_t location);
void shrinkGridByOneAtLocation(size_t inLocation); void shrinkGridByOneAtLocation(size_t location);
size_t width() const; size_t width() const;
vector<NodeAnchor> nodesEndingAt(size_t inLocation); std::vector<NodeAnchor> nodesEndingAt(size_t location);
vector<NodeAnchor> nodesCrossingOrEndingAt(size_t inLocation); std::vector<NodeAnchor> nodesCrossingOrEndingAt(size_t location);
// "Freeze" the node with the unigram that represents the selected candidate value. // "Freeze" the node with the unigram that represents the selected candidate
// After this, the node that contains the unigram will always be evaluated to that // value. After this, the node that contains the unigram will always be
// unigram, while all other overlapping nodes will be reset to their initial state // evaluated to that unigram, while all other overlapping nodes will be reset
// (that is, if any of those nodes were "frozen" or fixed, they will be unfrozen.) // to their initial state (that is, if any of those nodes were "frozen" or
NodeAnchor fixNodeSelectedCandidate(size_t location, const string& value); // fixed, they will be unfrozen.)
NodeAnchor fixNodeSelectedCandidate(size_t location,
const std::string& value);
// Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, only // Similar to fixNodeSelectedCandidate, but instead of "freezing" the node,
// boost the unigram that represents the value with an overriding score. This // only boost the unigram that represents the value with an overriding score.
// has the same side effect as fixNodeSelectedCandidate, which is that all other // This has the same side effect as fixNodeSelectedCandidate, which is that
// overlapping nodes will be reset to their initial state. // all other overlapping nodes will be reset to their initial state.
void overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore); void overrideNodeScoreForSelectedCandidate(size_t location,
const std::string& value,
float overridingScore);
const string dumpDOT(); std::string dumpDOT();
protected: protected:
vector<Span> m_spans; std::vector<Span> m_spans;
}; };
inline void Grid::clear() inline void Grid::clear() { m_spans.clear(); }
{
m_spans.clear();
}
inline void Grid::insertNode(const Node& inNode, size_t inLocation, size_t inSpanningLength) inline void Grid::insertNode(const Node& node, size_t location,
{ size_t spanningLength) {
if (inLocation >= m_spans.size()) { if (location >= m_spans.size()) {
size_t diff = inLocation - m_spans.size() + 1; size_t diff = location - m_spans.size() + 1;
for (size_t i = 0 ; i < diff ; i++) { for (size_t i = 0; i < diff; i++) {
m_spans.push_back(Span()); m_spans.push_back(Span());
}
}
m_spans[inLocation].insertNodeOfLength(inNode, inSpanningLength);
}
inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(size_t inLocation, size_t inSpanningLength, const string& inKey)
{
if (inLocation > m_spans.size()) {
return false;
}
const Node *n = m_spans[inLocation].nodeOfLength(inSpanningLength);
if (!n) {
return false;
}
return inKey == n->key();
}
inline void Grid::expandGridByOneAtLocation(size_t inLocation)
{
if (!inLocation || inLocation == m_spans.size()) {
m_spans.insert(m_spans.begin() + inLocation, Span());
}
else {
m_spans.insert(m_spans.begin() + inLocation, Span());
for (size_t i = 0 ; i < inLocation ; i++) {
// zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i);
}
}
}
inline void Grid::shrinkGridByOneAtLocation(size_t inLocation)
{
if (inLocation >= m_spans.size()) {
return;
}
m_spans.erase(m_spans.begin() + inLocation);
for (size_t i = 0 ; i < inLocation ; i++) {
// zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(inLocation - i);
}
}
inline size_t Grid::width() const
{
return m_spans.size();
}
inline vector<NodeAnchor> Grid::nodesEndingAt(size_t inLocation)
{
vector<NodeAnchor> result;
if (m_spans.size() && inLocation <= m_spans.size()) {
for (size_t i = 0 ; i < inLocation ; i++) {
Span& span = m_spans[i];
if (i + span.maximumLength() >= inLocation) {
Node *np = span.nodeOfLength(inLocation - i);
if (np) {
NodeAnchor na;
na.node = np;
na.location = i;
na.spanningLength = inLocation - i;
result.push_back(na);
}
}
}
}
return result;
}
inline vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t inLocation)
{
vector<NodeAnchor> result;
if (m_spans.size() && inLocation <= m_spans.size()) {
for (size_t i = 0 ; i < inLocation ; i++) {
Span& span = m_spans[i];
if (i + span.maximumLength() >= inLocation) {
for (size_t j = 1, m = span.maximumLength(); j <= m ; j++) {
if (i + j < inLocation) {
continue;
}
Node *np = span.nodeOfLength(j);
if (np) {
NodeAnchor na;
na.node = np;
na.location = i;
na.spanningLength = inLocation - i;
result.push_back(na);
}
}
}
}
}
return result;
}
// For nodes found at the location, fix their currently-selected candidate using the supplied string value.
inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, const string& value)
{
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
NodeAnchor node;
for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectCandidateAtIndex(i);
node = nodeAnchor;
break;;
}
}
}
return node;
}
inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const string& value, float overridingScore)
{
vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore);
break;
}
}
}
}
inline const string Grid::dumpDOT()
{
stringstream sst;
sst << "digraph {" << endl;
sst << "graph [ rankdir=LR ];" << endl;
sst << "BOS;" << endl;
for (size_t p = 0 ; p < m_spans.size() ; p++) {
Span& span = m_spans[p];
for (size_t ni = 0 ; ni <= span.maximumLength() ; ni++) {
Node* np = span.nodeOfLength(ni);
if (np) {
if (!p) {
sst << "BOS -> " << np->currentKeyValue().value << ";" << endl;
}
sst << np->currentKeyValue().value << ";" << endl;
if (p + ni < m_spans.size()) {
Span& dstSpan = m_spans[p+ni];
for (size_t q = 0 ; q <= dstSpan.maximumLength() ; q++) {
Node *dn = dstSpan.nodeOfLength(q);
if (dn) {
sst << np->currentKeyValue().value << " -> " << dn->currentKeyValue().value << ";" << endl;
}
}
}
if (p + ni == m_spans.size()) {
sst << np->currentKeyValue().value << " -> " << "EOS;" << endl;
}
}
}
}
sst << "EOS;" << endl;
sst << "}";
return sst.str();
}
} }
}
m_spans[location].insertNodeOfLength(node, spanningLength);
} }
inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(
size_t location, size_t spanningLength, const std::string& key) {
if (location > m_spans.size()) {
return false;
}
const Node* n = m_spans[location].nodeOfLength(spanningLength);
if (!n) {
return false;
}
return key == n->key();
}
inline void Grid::expandGridByOneAtLocation(size_t location) {
if (!location || location == m_spans.size()) {
m_spans.insert(m_spans.begin() + location, Span());
} else {
m_spans.insert(m_spans.begin() + location, Span());
for (size_t i = 0; i < location; i++) {
// zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(location - i);
}
}
}
inline void Grid::shrinkGridByOneAtLocation(size_t location) {
if (location >= m_spans.size()) {
return;
}
m_spans.erase(m_spans.begin() + location);
for (size_t i = 0; i < location; i++) {
// zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(location - i);
}
}
inline size_t Grid::width() const { return m_spans.size(); }
inline std::vector<NodeAnchor> Grid::nodesEndingAt(size_t location) {
std::vector<NodeAnchor> result;
if (m_spans.size() && location <= m_spans.size()) {
for (size_t i = 0; i < location; i++) {
Span& span = m_spans[i];
if (i + span.maximumLength() >= location) {
Node* np = span.nodeOfLength(location - i);
if (np) {
NodeAnchor na;
na.node = np;
na.location = i;
na.spanningLength = location - i;
result.push_back(na);
}
}
}
}
return result;
}
inline std::vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t location) {
std::vector<NodeAnchor> result;
if (m_spans.size() && location <= m_spans.size()) {
for (size_t i = 0; i < location; i++) {
Span& span = m_spans[i];
if (i + span.maximumLength() >= location) {
for (size_t j = 1, m = span.maximumLength(); j <= m; j++) {
if (i + j < location) {
continue;
}
Node* np = span.nodeOfLength(j);
if (np) {
NodeAnchor na;
na.node = np;
na.location = i;
na.spanningLength = location - i;
result.push_back(na);
}
}
}
}
}
return result;
}
// For nodes found at the location, fix their currently-selected candidate using
// the supplied string value.
inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location,
const std::string& value) {
std::vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
NodeAnchor node;
for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectCandidateAtIndex(i);
node = nodeAnchor;
break;
}
}
}
return node;
}
inline void Grid::overrideNodeScoreForSelectedCandidate(
size_t location, const std::string& value, float overridingScore) {
std::vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
for (auto nodeAnchor : nodes) {
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)
->selectFloatingCandidateAtIndex(i, overridingScore);
break;
}
}
}
}
} // namespace Gramambular
} // namespace Formosa
#endif #endif

View File

@ -25,47 +25,43 @@
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
// //
#ifndef KeyValuePair_h #ifndef KEYVALUEPAIR_H_
#define KeyValuePair_h #define KEYVALUEPAIR_H_
#include <ostream> #include <ostream>
#include <string> #include <string>
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
using namespace std;
class KeyValuePair { class KeyValuePair {
public: public:
string key; std::string key;
string value; std::string value;
bool operator==(const KeyValuePair& inAnother) const; bool operator==(const KeyValuePair& another) const;
bool operator<(const KeyValuePair& inAnother) const; bool operator<(const KeyValuePair& another) const;
}; };
inline ostream& operator<<(ostream& inStream, const KeyValuePair& inPair) inline std::ostream& operator<<(std::ostream& stream,
{ const KeyValuePair& pair) {
inStream << "(" << inPair.key << "," << inPair.value << ")"; stream << "(" << pair.key << "," << pair.value << ")";
return inStream; return stream;
}
inline bool KeyValuePair::operator==(const KeyValuePair& inAnother) const
{
return key == inAnother.key && value == inAnother.value;
}
inline bool KeyValuePair::operator<(const KeyValuePair& inAnother) const
{
if (key < inAnother.key) {
return true;
}
else if (key == inAnother.key) {
return value < inAnother.value;
}
return false;
}
}
} }
inline bool KeyValuePair::operator==(const KeyValuePair& another) const {
return key == another.key && value == another.value;
}
inline bool KeyValuePair::operator<(const KeyValuePair& another) const {
if (key < another.key) {
return true;
} else if (key == another.key) {
return value < another.value;
}
return false;
}
} // namespace Gramambular
} // namespace Formosa
#endif #endif

View File

@ -25,28 +25,28 @@
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
// //
#ifndef LanguageModel_h #ifndef LANGUAGEMODEL_H_
#define LanguageModel_h #define LANGUAGEMODEL_H_
#include <string>
#include <vector> #include <vector>
#include "Bigram.h" #include "Bigram.h"
#include "Unigram.h" #include "Unigram.h"
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
using namespace std; class LanguageModel {
public:
class LanguageModel { virtual ~LanguageModel() {}
public:
virtual ~LanguageModel() {}
virtual const vector<Bigram> bigramsForKeys(const string &preceedingKey, const string& key) = 0;
virtual const vector<Unigram> unigramsForKey(const string &key) = 0;
virtual bool hasUnigramsForKey(const string& key) = 0;
};
}
}
virtual const std::vector<Bigram> bigramsForKeys(
const std::string& preceedingKey, const std::string& key) = 0;
virtual const std::vector<Unigram> unigramsForKey(const std::string& key) = 0;
virtual bool hasUnigramsForKey(const std::string& key) = 0;
};
} // namespace Gramambular
} // namespace Formosa
#endif #endif

View File

@ -25,207 +25,197 @@
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
// //
#ifndef Node_h #ifndef NODE_H_
#define Node_h #define NODE_H_
#include <limits> #include <limits>
#include <map>
#include <string>
#include <vector> #include <vector>
#include "LanguageModel.h" #include "LanguageModel.h"
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
using namespace std;
class Node { class Node {
public: public:
Node(); Node();
Node(const string& inKey, const vector<Unigram>& inUnigrams, const vector<Bigram>& inBigrams); Node(const std::string& key, const std::vector<Unigram>& unigrams,
const std::vector<Bigram>& bigrams);
void primeNodeWithPreceedingKeyValues(const vector<KeyValuePair>& inKeyValues); void primeNodeWithPreceedingKeyValues(
const std::vector<KeyValuePair>& keyValues);
bool isCandidateFixed() const; bool isCandidateFixed() const;
const vector<KeyValuePair>& candidates() const; const std::vector<KeyValuePair>& candidates() const;
void selectCandidateAtIndex(size_t inIndex = 0, bool inFix = true); void selectCandidateAtIndex(size_t index = 0, bool fix = true);
void resetCandidate(); void resetCandidate();
void selectFloatingCandidateAtIndex(size_t index, double score); void selectFloatingCandidateAtIndex(size_t index, double score);
const string& key() const; const std::string& key() const;
double score() const; double score() const;
double scoreForCandidate(string &candidate) const; double scoreForCandidate(const std::string& candidate) const;
const KeyValuePair currentKeyValue() const; const KeyValuePair currentKeyValue() const;
double highestUnigramScore() const; double highestUnigramScore() const;
protected: protected:
const LanguageModel* m_LM; const LanguageModel* m_LM;
string m_key; std::string m_key;
double m_score; double m_score;
vector<Unigram> m_unigrams; std::vector<Unigram> m_unigrams;
vector<KeyValuePair> m_candidates; std::vector<KeyValuePair> m_candidates;
map<string, size_t> m_valueUnigramIndexMap; std::map<std::string, size_t> m_valueUnigramIndexMap;
map<KeyValuePair, vector<Bigram> > m_preceedingGramBigramMap; std::map<KeyValuePair, std::vector<Bigram> > m_preceedingGramBigramMap;
bool m_candidateFixed; bool m_candidateFixed;
size_t m_selectedUnigramIndex; size_t m_selectedUnigramIndex;
friend ostream& operator<<(ostream& inStream, const Node& inNode); friend std::ostream& operator<<(std::ostream& stream, const Node& node);
}; };
inline ostream& operator<<(ostream& inStream, const Node& inNode) inline std::ostream& operator<<(std::ostream& stream, const Node& node) {
{ stream << "(node,key:" << node.m_key
inStream << "(node,key:" << inNode.m_key << ",fixed:" << (inNode.m_candidateFixed ? "true" : "false") << ",fixed:" << (node.m_candidateFixed ? "true" : "false")
<< ",selected:" << inNode.m_selectedUnigramIndex << ",selected:" << node.m_selectedUnigramIndex << ","
<< "," << inNode.m_unigrams << ")"; << node.m_unigrams << ")";
return inStream; return stream;
}
inline Node::Node()
: m_candidateFixed(false)
, m_selectedUnigramIndex(0)
, m_score(0.0)
{
}
inline Node::Node(const string& inKey, const vector<Unigram>& inUnigrams, const vector<Bigram>& inBigrams)
: m_key(inKey)
, m_unigrams(inUnigrams)
, m_candidateFixed(false)
, m_selectedUnigramIndex(0)
, m_score(0.0)
{
stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare);
if (m_unigrams.size()) {
m_score = m_unigrams[0].score;
}
size_t i = 0;
for (vector<Unigram>::const_iterator ui = m_unigrams.begin() ; ui != m_unigrams.end() ; ++ui) {
m_valueUnigramIndexMap[(*ui).keyValue.value] = i;
i++;
m_candidates.push_back((*ui).keyValue);
}
for (vector<Bigram>::const_iterator bi = inBigrams.begin() ; bi != inBigrams.end() ; ++bi) {
m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi);
}
}
inline void Node::primeNodeWithPreceedingKeyValues(const vector<KeyValuePair>& inKeyValues)
{
size_t newIndex = m_selectedUnigramIndex;
double max = m_score;
if (!isCandidateFixed()) {
for (vector<KeyValuePair>::const_iterator kvi = inKeyValues.begin() ; kvi != inKeyValues.end() ; ++kvi) {
map<KeyValuePair, vector<Bigram> >::const_iterator f = m_preceedingGramBigramMap.find(*kvi);
if (f != m_preceedingGramBigramMap.end()) {
const vector<Bigram>& bigrams = (*f).second;
for (vector<Bigram>::const_iterator bi = bigrams.begin() ; bi != bigrams.end() ; ++bi) {
const Bigram& bigram = *bi;
if (bigram.score > max) {
map<string, size_t>::const_iterator uf = m_valueUnigramIndexMap.find((*bi).keyValue.value);
if (uf != m_valueUnigramIndexMap.end()) {
newIndex = (*uf).second;
max = bigram.score;
}
}
}
}
}
}
if (m_score != max) {
m_score = max;
}
if (newIndex != m_selectedUnigramIndex) {
m_selectedUnigramIndex = newIndex;
}
}
inline bool Node::isCandidateFixed() const
{
return m_candidateFixed;
}
inline const vector<KeyValuePair>& Node::candidates() const
{
return m_candidates;
}
inline void Node::selectCandidateAtIndex(size_t inIndex, bool inFix)
{
if (inIndex >= m_unigrams.size()) {
m_selectedUnigramIndex = 0;
}
else {
m_selectedUnigramIndex = inIndex;
}
m_candidateFixed = inFix;
m_score = 99;
}
inline void Node::resetCandidate()
{
m_selectedUnigramIndex = 0;
m_candidateFixed = 0;
if (m_unigrams.size()) {
m_score = m_unigrams[0].score;
}
}
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) {
if (index >= m_unigrams.size()) {
m_selectedUnigramIndex = 0;
} else {
m_selectedUnigramIndex = index;
}
m_candidateFixed = false;
m_score = score;
}
inline const string& Node::key() const
{
return m_key;
}
inline double Node::score() const
{
return m_score;
}
inline double Node::scoreForCandidate(string &candidate) const
{
for (auto unigram : m_unigrams) {
if (unigram.keyValue.value == candidate) {
return unigram.score;
}
}
return 0.0;
}
inline double Node::highestUnigramScore() const {
if (m_unigrams.empty()) {
return 0.0;
}
return m_unigrams[0].score;
}
inline const KeyValuePair Node::currentKeyValue() const
{
if(m_selectedUnigramIndex >= m_unigrams.size()) {
return KeyValuePair();
}
else {
return m_candidates[m_selectedUnigramIndex];
}
}
}
} }
inline Node::Node()
: m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) {}
inline Node::Node(const std::string& key, const std::vector<Unigram>& unigrams,
const std::vector<Bigram>& bigrams)
: m_key(key),
m_unigrams(unigrams),
m_candidateFixed(false),
m_selectedUnigramIndex(0),
m_score(0.0) {
stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare);
if (m_unigrams.size()) {
m_score = m_unigrams[0].score;
}
size_t i = 0;
for (std::vector<Unigram>::const_iterator ui = m_unigrams.begin();
ui != m_unigrams.end(); ++ui) {
m_valueUnigramIndexMap[(*ui).keyValue.value] = i;
i++;
m_candidates.push_back((*ui).keyValue);
}
for (std::vector<Bigram>::const_iterator bi = bigrams.begin();
bi != bigrams.end(); ++bi) {
m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi);
}
}
inline void Node::primeNodeWithPreceedingKeyValues(
const std::vector<KeyValuePair>& keyValues) {
size_t newIndex = m_selectedUnigramIndex;
double max = m_score;
if (!isCandidateFixed()) {
for (std::vector<KeyValuePair>::const_iterator kvi = keyValues.begin();
kvi != keyValues.end(); ++kvi) {
std::map<KeyValuePair, std::vector<Bigram> >::const_iterator f =
m_preceedingGramBigramMap.find(*kvi);
if (f != m_preceedingGramBigramMap.end()) {
const std::vector<Bigram>& bigrams = (*f).second;
for (std::vector<Bigram>::const_iterator bi = bigrams.begin();
bi != bigrams.end(); ++bi) {
const Bigram& bigram = *bi;
if (bigram.score > max) {
std::map<std::string, size_t>::const_iterator uf =
m_valueUnigramIndexMap.find((*bi).keyValue.value);
if (uf != m_valueUnigramIndexMap.end()) {
newIndex = (*uf).second;
max = bigram.score;
}
}
}
}
}
}
if (m_score != max) {
m_score = max;
}
if (newIndex != m_selectedUnigramIndex) {
m_selectedUnigramIndex = newIndex;
}
}
inline bool Node::isCandidateFixed() const { return m_candidateFixed; }
inline const std::vector<KeyValuePair>& Node::candidates() const {
return m_candidates;
}
inline void Node::selectCandidateAtIndex(size_t index, bool fix) {
if (index >= m_unigrams.size()) {
m_selectedUnigramIndex = 0;
} else {
m_selectedUnigramIndex = index;
}
m_candidateFixed = fix;
m_score = 99;
}
inline void Node::resetCandidate() {
m_selectedUnigramIndex = 0;
m_candidateFixed = 0;
if (m_unigrams.size()) {
m_score = m_unigrams[0].score;
}
}
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) {
if (index >= m_unigrams.size()) {
m_selectedUnigramIndex = 0;
} else {
m_selectedUnigramIndex = index;
}
m_candidateFixed = false;
m_score = score;
}
inline const std::string& Node::key() const { return m_key; }
inline double Node::score() const { return m_score; }
inline double Node::scoreForCandidate(const std::string& candidate) const {
for (auto unigram : m_unigrams) {
if (unigram.keyValue.value == candidate) {
return unigram.score;
}
}
return 0.0;
}
inline double Node::highestUnigramScore() const {
if (m_unigrams.empty()) {
return 0.0;
}
return m_unigrams[0].score;
}
inline const KeyValuePair Node::currentKeyValue() const {
if (m_selectedUnigramIndex >= m_unigrams.size()) {
return KeyValuePair();
} else {
return m_candidates[m_selectedUnigramIndex];
}
}
} // namespace Gramambular
} // namespace Formosa
#endif #endif

View File

@ -25,55 +25,48 @@
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
// //
#ifndef NodeAnchor_h #ifndef NODEANCHOR_H_
#define NodeAnchor_h #define NODEANCHOR_H_
#include <vector>
#include "Node.h" #include "Node.h"
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
class NodeAnchor {
public:
NodeAnchor();
const Node *node;
size_t location;
size_t spanningLength;
double accumulatedScore;
};
inline NodeAnchor::NodeAnchor() struct NodeAnchor {
: node(0) const Node* node = nullptr;
, location(0) size_t location = 0;
, spanningLength(0) size_t spanningLength = 0;
, accumulatedScore(0.0) double accumulatedScore = 0.0;
{ };
}
inline ostream& operator<<(ostream& inStream, const NodeAnchor& inAnchor) inline std::ostream& operator<<(std::ostream& stream,
{ const NodeAnchor& anchor) {
inStream << "{@(" << inAnchor.location << "," << inAnchor.spanningLength << "),"; stream << "{@(" << anchor.location << "," << anchor.spanningLength << "),";
if (inAnchor.node) { if (anchor.node) {
inStream << *(inAnchor.node); stream << *(anchor.node);
} } else {
else { stream << "null";
inStream << "null"; }
} stream << "}";
inStream << "}"; return stream;
return inStream;
}
inline ostream& operator<<(ostream& inStream, const vector<NodeAnchor>& inAnchor)
{
for (vector<NodeAnchor>::const_iterator i = inAnchor.begin() ; i != inAnchor.end() ; ++i) {
inStream << *i;
if (i + 1 != inAnchor.end()) {
inStream << "<-";
}
}
return inStream;
}
}
} }
inline std::ostream& operator<<(std::ostream& stream,
const std::vector<NodeAnchor>& anchor) {
for (std::vector<NodeAnchor>::const_iterator i = anchor.begin();
i != anchor.end(); ++i) {
stream << *i;
if (i + 1 != anchor.end()) {
stream << "<-";
}
}
return stream;
}
} // namespace Gramambular
} // namespace Formosa
#endif #endif

View File

@ -25,88 +25,77 @@
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
// //
#ifndef Span_h #ifndef SPAN_H_
#define Span_h #define SPAN_H_
#include <map> #include <map>
#include <set> #include <set>
#include <sstream> #include <sstream>
#include "Node.h" #include "Node.h"
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
class Span { class Span {
public: public:
Span(); void clear();
void insertNodeOfLength(const Node& node, size_t length);
void removeNodeOfLengthGreaterThan(size_t length);
void clear(); Node* nodeOfLength(size_t length);
void insertNodeOfLength(const Node& inNode, size_t inLength); size_t maximumLength() const;
void removeNodeOfLengthGreaterThan(size_t inLength);
Node* nodeOfLength(size_t inLength); protected:
size_t maximumLength() const; std::map<size_t, Node> m_lengthNodeMap;
size_t m_maximumLength = 0;
};
protected: inline void Span::clear() {
map<size_t, Node> m_lengthNodeMap; m_lengthNodeMap.clear();
size_t m_maximumLength; m_maximumLength = 0;
};
inline Span::Span()
: m_maximumLength(0)
{
}
inline void Span::clear()
{
m_lengthNodeMap.clear();
m_maximumLength = 0;
}
inline void Span::insertNodeOfLength(const Node& inNode, size_t inLength)
{
m_lengthNodeMap[inLength] = inNode;
if (inLength > m_maximumLength) {
m_maximumLength = inLength;
}
}
inline void Span::removeNodeOfLengthGreaterThan(size_t inLength)
{
if (inLength > m_maximumLength) {
return;
}
size_t max = 0;
set<size_t> removeSet;
for (map<size_t, Node>::iterator i = m_lengthNodeMap.begin(), e = m_lengthNodeMap.end() ; i != e ; ++i) {
if ((*i).first > inLength) {
removeSet.insert((*i).first);
}
else {
if ((*i).first > max) {
max = (*i).first;
}
}
}
for (set<size_t>::iterator i = removeSet.begin(), e = removeSet.end(); i != e; ++i) {
m_lengthNodeMap.erase(*i);
}
m_maximumLength = max;
}
inline Node* Span::nodeOfLength(size_t inLength)
{
map<size_t, Node>::iterator f = m_lengthNodeMap.find(inLength);
return f == m_lengthNodeMap.end() ? 0 : &(*f).second;
}
inline size_t Span::maximumLength() const
{
return m_maximumLength;
}
}
} }
inline void Span::insertNodeOfLength(const Node& node, size_t length) {
m_lengthNodeMap[length] = node;
if (length > m_maximumLength) {
m_maximumLength = length;
}
}
inline void Span::removeNodeOfLengthGreaterThan(size_t length) {
if (length > m_maximumLength) {
return;
}
size_t max = 0;
std::set<size_t> removeSet;
for (std::map<size_t, Node>::iterator i = m_lengthNodeMap.begin(),
e = m_lengthNodeMap.end();
i != e; ++i) {
if ((*i).first > length) {
removeSet.insert((*i).first);
} else {
if ((*i).first > max) {
max = (*i).first;
}
}
}
for (std::set<size_t>::iterator i = removeSet.begin(), e = removeSet.end();
i != e; ++i) {
m_lengthNodeMap.erase(*i);
}
m_maximumLength = max;
}
inline Node* Span::nodeOfLength(size_t length) {
std::map<size_t, Node>::iterator f = m_lengthNodeMap.find(length);
return f == m_lengthNodeMap.end() ? 0 : &(*f).second;
}
inline size_t Span::maximumLength() const { return m_maximumLength; }
} // namespace Gramambular
} // namespace Formosa
#endif #endif

View File

@ -25,80 +25,75 @@
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
// //
#ifndef Unigram_h #ifndef UNIGRAM_H_
#define Unigram_h #define UNIGRAM_H_
#include <vector> #include <vector>
#include "KeyValuePair.h" #include "KeyValuePair.h"
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
class Unigram {
public:
Unigram();
KeyValuePair keyValue; class Unigram {
double score; public:
Unigram();
bool operator==(const Unigram& inAnother) const; KeyValuePair keyValue;
bool operator<(const Unigram& inAnother) const; double score;
static bool ScoreCompare(const Unigram& a, const Unigram& b); bool operator==(const Unigram& another) const;
}; bool operator<(const Unigram& another) const;
inline ostream& operator<<(ostream& inStream, const Unigram& inGram) static bool ScoreCompare(const Unigram& a, const Unigram& b);
{ };
streamsize p = inStream.precision();
inStream.precision(6);
inStream << "(" << inGram.keyValue << "," << inGram.score << ")";
inStream.precision(p);
return inStream;
}
inline ostream& operator<<(ostream& inStream, const vector<Unigram>& inGrams) inline std::ostream& operator<<(std::ostream& stream, const Unigram& gram) {
{ std::streamsize p = stream.precision();
inStream << "[" << inGrams.size() << "]=>{"; stream.precision(6);
stream << "(" << gram.keyValue << "," << gram.score << ")";
size_t index = 0; stream.precision(p);
return stream;
for (vector<Unigram>::const_iterator gi = inGrams.begin() ; gi != inGrams.end() ; ++gi, ++index) {
inStream << index << "=>";
inStream << *gi;
if (gi + 1 != inGrams.end()) {
inStream << ",";
}
}
inStream << "}";
return inStream;
}
inline Unigram::Unigram()
: score(0.0)
{
}
inline bool Unigram::operator==(const Unigram& inAnother) const
{
return keyValue == inAnother.keyValue && score == inAnother.score;
}
inline bool Unigram::operator<(const Unigram& inAnother) const
{
if (keyValue < inAnother.keyValue) {
return true;
}
else if (keyValue == inAnother.keyValue) {
return score < inAnother.score;
}
return false;
}
inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b)
{
return a.score > b.score;
}
}
} }
inline std::ostream& operator<<(std::ostream& stream,
const std::vector<Unigram>& grams) {
stream << "[" << grams.size() << "]=>{";
size_t index = 0;
for (std::vector<Unigram>::const_iterator gi = grams.begin();
gi != grams.end(); ++gi, ++index) {
stream << index << "=>";
stream << *gi;
if (gi + 1 != grams.end()) {
stream << ",";
}
}
stream << "}";
return stream;
}
inline Unigram::Unigram() : score(0.0) {}
inline bool Unigram::operator==(const Unigram& another) const {
return keyValue == another.keyValue && score == another.score;
}
inline bool Unigram::operator<(const Unigram& another) const {
if (keyValue < another.keyValue) {
return true;
} else if (keyValue == another.keyValue) {
return score < another.score;
}
return false;
}
inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) {
return a.score > b.score;
}
} // namespace Gramambular
} // namespace Formosa
#endif #endif

View File

@ -25,67 +25,69 @@
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
// //
#ifndef Walker_h #ifndef WALKER_H_
#define Walker_h #define WALKER_H_
#include <algorithm> #include <algorithm>
#include <vector>
#include "Grid.h" #include "Grid.h"
namespace Formosa { namespace Formosa {
namespace Gramambular { namespace Gramambular {
using namespace std;
class Walker { class Walker {
public: public:
Walker(Grid* inGrid); explicit Walker(Grid* inGrid);
const vector<NodeAnchor> reverseWalk(size_t inLocation, double inAccumulatedScore = 0.0); const std::vector<NodeAnchor> reverseWalk(size_t location,
double accumulatedScore = 0.0);
protected: protected:
Grid* m_grid; Grid* m_grid;
}; };
inline Walker::Walker(Grid* inGrid) inline Walker::Walker(Grid* inGrid) : m_grid(inGrid) {}
: m_grid(inGrid)
{
}
inline const vector<NodeAnchor> Walker::reverseWalk(size_t inLocation, double inAccumulatedScore) inline const std::vector<NodeAnchor> Walker::reverseWalk(
{ size_t location, double accumulatedScore) {
if (!inLocation || inLocation > m_grid->width()) { if (!location || location > m_grid->width()) {
return vector<NodeAnchor>(); return std::vector<NodeAnchor>();
} }
vector<vector<NodeAnchor> > paths; std::vector<std::vector<NodeAnchor> > paths;
vector<NodeAnchor> nodes = m_grid->nodesEndingAt(inLocation); std::vector<NodeAnchor> nodes = m_grid->nodesEndingAt(location);
for (vector<NodeAnchor>::iterator ni = nodes.begin() ; ni != nodes.end() ; ++ni) { for (std::vector<NodeAnchor>::iterator ni = nodes.begin(); ni != nodes.end();
if (!(*ni).node) { ++ni) {
continue; if (!(*ni).node) {
} continue;
(*ni).accumulatedScore = inAccumulatedScore + (*ni).node->score();
vector<NodeAnchor> path = reverseWalk(inLocation - (*ni).spanningLength, (*ni).accumulatedScore);
path.insert(path.begin(), *ni);
paths.push_back(path);
}
if (!paths.size()) {
return vector<NodeAnchor>();
}
vector<NodeAnchor>* result = &*(paths.begin());
for (vector<vector<NodeAnchor> >::iterator pi = paths.begin() ; pi != paths.end() ; ++pi) {
if ((*pi).back().accumulatedScore > result->back().accumulatedScore) {
result = &*pi;
}
}
return *result;
}
} }
(*ni).accumulatedScore = accumulatedScore + (*ni).node->score();
std::vector<NodeAnchor> path =
reverseWalk(location - (*ni).spanningLength, (*ni).accumulatedScore);
path.insert(path.begin(), *ni);
paths.push_back(path);
}
if (!paths.size()) {
return std::vector<NodeAnchor>();
}
std::vector<NodeAnchor>* result = &*(paths.begin());
for (std::vector<std::vector<NodeAnchor> >::iterator pi = paths.begin();
pi != paths.end(); ++pi) {
if ((*pi).back().accumulatedScore > result->back().accumulatedScore) {
result = &*pi;
}
}
return *result;
} }
} // namespace Gramambular
} // namespace Formosa
#endif #endif