vChewing-macOS/Source/Engine/Gramambular/BlockReadingBuilder.h

227 lines
7.3 KiB
C++

//
// BlockReadingBuilder.h
//
// Copyright (c) 2007-2010 Lukhnos D. Liu (http://lukhnos.org)
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
#ifndef BlockReadingBuilder_h
#define BlockReadingBuilder_h
#include <vector>
#include "Grid.h"
#include "LanguageModel.h"
namespace Formosa {
namespace Gramambular {
using namespace std;
class BlockReadingBuilder {
public:
BlockReadingBuilder(LanguageModel *inLM);
void clear();
size_t length() const;
size_t cursorIndex() const;
void setCursorIndex(size_t inNewIndex);
void insertReadingAtCursor(const string& inReading);
bool deleteReadingBeforeCursor(); // backspace
bool deleteReadingAfterCursor(); // delete
bool removeHeadReadings(size_t count);
void setJoinSeparator(const string& separator);
const string joinSeparator() const;
vector<string> readings() const;
Grid& grid();
protected:
void build();
static const string Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator);
//最多使用六個字組成一個詞
static const size_t MaximumBuildSpanLength = 6;
size_t m_cursorIndex;
vector<string> m_readings;
Grid m_grid;
LanguageModel *m_LM;
string m_joinSeparator;
};
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *inLM)
: m_LM(inLM)
, m_cursorIndex(0)
{
}
inline void BlockReadingBuilder::clear()
{
m_cursorIndex = 0;
m_readings.clear();
m_grid.clear();
}
inline size_t BlockReadingBuilder::length() const
{
return m_readings.size();
}
inline size_t BlockReadingBuilder::cursorIndex() const
{
return m_cursorIndex;
}
inline void BlockReadingBuilder::setCursorIndex(size_t inNewIndex)
{
m_cursorIndex = inNewIndex > m_readings.size() ? m_readings.size() : inNewIndex;
}
inline void BlockReadingBuilder::insertReadingAtCursor(const string& inReading)
{
m_readings.insert(m_readings.begin() + m_cursorIndex, inReading);
m_grid.expandGridByOneAtLocation(m_cursorIndex);
build();
m_cursorIndex++;
}
inline vector<string> BlockReadingBuilder::readings() const
{
return m_readings;
}
inline bool BlockReadingBuilder::deleteReadingBeforeCursor()
{
if (!m_cursorIndex) {
return false;
}
m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.begin() + m_cursorIndex);
m_cursorIndex--;
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
}
inline bool BlockReadingBuilder::deleteReadingAfterCursor()
{
if (m_cursorIndex == m_readings.size()) {
return false;
}
m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.begin() + m_cursorIndex + 1);
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
}
inline bool BlockReadingBuilder::removeHeadReadings(size_t count)
{
if (count > length()) {
return false;
}
for (size_t i = 0; i < count; i++) {
if (m_cursorIndex) {
m_cursorIndex--;
}
m_readings.erase(m_readings.begin(), m_readings.begin() + 1);
m_grid.shrinkGridByOneAtLocation(0);
build();
}
return true;
}
inline void BlockReadingBuilder::setJoinSeparator(const string& separator)
{
m_joinSeparator = separator;
}
inline const string BlockReadingBuilder::joinSeparator() const
{
return m_joinSeparator;
}
inline Grid& BlockReadingBuilder::grid()
{
return m_grid;
}
inline void BlockReadingBuilder::build()
{
if (!m_LM) {
return;
}
size_t begin = 0;
size_t end = m_cursorIndex + MaximumBuildSpanLength;
if (m_cursorIndex < MaximumBuildSpanLength) {
begin = 0;
}
else {
begin = m_cursorIndex - MaximumBuildSpanLength;
}
if (end > m_readings.size()) {
end = m_readings.size();
}
for (size_t p = begin ; p < end ; p++) {
for (size_t q = 1 ; q <= MaximumBuildSpanLength && p+q <= end ; q++) {
string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) {
vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
if (unigrams.size() > 0) {
Node n(combinedReading, unigrams, vector<Bigram>());
m_grid.insertNode(n, p, q);
}
}
}
}
}
inline const string BlockReadingBuilder::Join(vector<string>::const_iterator begin, vector<string>::const_iterator end, const string& separator)
{
string result;
for (vector<string>::const_iterator iter = begin ; iter != end ; ) {
result += *iter;
++iter;
if (iter != end) {
result += separator;
}
}
return result;
}
}
}
#endif