Remove the "using namespace" usage in McBopomofo

This commit is contained in:
Lukhnos Liu 2022-02-19 08:41:14 -08:00
parent 2091c1dc7d
commit dc6ef93d72
6 changed files with 185 additions and 183 deletions

View File

@ -25,7 +25,7 @@
#include <algorithm>
#include <iterator>
using namespace McBopomofo;
namespace McBopomofo {
McBopomofoLM::McBopomofoLM()
{
@ -87,16 +87,16 @@ void McBopomofoLM::loadPhraseReplacementMap(const char* phraseReplacementPath)
}
}
const vector<Bigram> McBopomofoLM::bigramsForKeys(const string& preceedingKey, const string& key)
const std::vector<Formosa::Gramambular::Bigram> McBopomofoLM::bigramsForKeys(const std::string& preceedingKey, const std::string& key)
{
return vector<Bigram>();
return std::vector<Formosa::Gramambular::Bigram>();
}
const vector<Unigram> McBopomofoLM::unigramsForKey(const string& key)
const std::vector<Formosa::Gramambular::Unigram> McBopomofoLM::unigramsForKey(const std::string& key)
{
if (key == " ") {
vector<Unigram> spaceUnigrams;
Unigram g;
std::vector<Formosa::Gramambular::Unigram> spaceUnigrams;
Formosa::Gramambular::Unigram g;
g.keyValue.key = " ";
g.keyValue.value = " ";
g.score = 0;
@ -104,26 +104,26 @@ const vector<Unigram> McBopomofoLM::unigramsForKey(const string& key)
return spaceUnigrams;
}
vector<Unigram> allUnigrams;
vector<Unigram> userUnigrams;
std::vector<Formosa::Gramambular::Unigram> allUnigrams;
std::vector<Formosa::Gramambular::Unigram> userUnigrams;
unordered_set<string> excludedValues;
unordered_set<string> insertedValues;
std::unordered_set<std::string> excludedValues;
std::unordered_set<std::string> insertedValues;
if (m_excludedPhrases.hasUnigramsForKey(key)) {
vector<Unigram> excludedUnigrams = m_excludedPhrases.unigramsForKey(key);
std::vector<Formosa::Gramambular::Unigram> excludedUnigrams = m_excludedPhrases.unigramsForKey(key);
transform(excludedUnigrams.begin(), excludedUnigrams.end(),
inserter(excludedValues, excludedValues.end()),
[](const Unigram& u) { return u.keyValue.value; });
[](const Formosa::Gramambular::Unigram& u) { return u.keyValue.value; });
}
if (m_userPhrases.hasUnigramsForKey(key)) {
vector<Unigram> rawUserUnigrams = m_userPhrases.unigramsForKey(key);
std::vector<Formosa::Gramambular::Unigram> rawUserUnigrams = m_userPhrases.unigramsForKey(key);
userUnigrams = filterAndTransformUnigrams(rawUserUnigrams, excludedValues, insertedValues);
}
if (m_languageModel.hasUnigramsForKey(key)) {
vector<Unigram> rawGlobalUnigrams = m_languageModel.unigramsForKey(key);
std::vector<Formosa::Gramambular::Unigram> rawGlobalUnigrams = m_languageModel.unigramsForKey(key);
allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues);
}
@ -131,7 +131,7 @@ const vector<Unigram> McBopomofoLM::unigramsForKey(const string& key)
return allUnigrams;
}
bool McBopomofoLM::hasUnigramsForKey(const string& key)
bool McBopomofoLM::hasUnigramsForKey(const std::string& key)
{
if (key == " ") {
return true;
@ -164,36 +164,36 @@ bool McBopomofoLM::externalConverterEnabled()
return m_externalConverterEnabled;
}
void McBopomofoLM::setExternalConverter(std::function<string(string)> externalConverter)
void McBopomofoLM::setExternalConverter(std::function<std::string(std::string)> externalConverter)
{
m_externalConverter = externalConverter;
}
const vector<Unigram> McBopomofoLM::filterAndTransformUnigrams(const vector<Unigram> unigrams, const unordered_set<string>& excludedValues, unordered_set<string>& insertedValues)
const std::vector<Formosa::Gramambular::Unigram> McBopomofoLM::filterAndTransformUnigrams(const std::vector<Formosa::Gramambular::Unigram> unigrams, const std::unordered_set<std::string>& excludedValues, std::unordered_set<std::string>& insertedValues)
{
vector<Unigram> results;
std::vector<Formosa::Gramambular::Unigram> results;
for (auto&& unigram : unigrams) {
// excludedValues filters out the unigrams with the original value.
// insertedValues filters out the ones with the converted value
string originalValue = unigram.keyValue.value;
std::string originalValue = unigram.keyValue.value;
if (excludedValues.find(originalValue) != excludedValues.end()) {
continue;
}
string value = originalValue;
std::string value = originalValue;
if (m_phraseReplacementEnabled) {
string replacement = m_phraseReplacement.valueForKey(value);
std::string replacement = m_phraseReplacement.valueForKey(value);
if (replacement != "") {
value = replacement;
}
}
if (m_externalConverterEnabled && m_externalConverter) {
string replacement = m_externalConverter(value);
std::string replacement = m_externalConverter(value);
value = replacement;
}
if (insertedValues.find(value) == insertedValues.end()) {
Unigram g;
Formosa::Gramambular::Unigram g;
g.keyValue.value = value;
g.keyValue.key = unigram.keyValue.key;
g.score = unigram.score;
@ -204,12 +204,14 @@ const vector<Unigram> McBopomofoLM::filterAndTransformUnigrams(const vector<Unig
return results;
}
const vector<std::string> McBopomofoLM::associatedPhrasesForKey(const string& key)
const std::vector<std::string> McBopomofoLM::associatedPhrasesForKey(const std::string& key)
{
return m_associatedPhrases.valuesForKey(key);
}
bool McBopomofoLM::hasAssociatedPhrasesForKey(const string& key)
bool McBopomofoLM::hasAssociatedPhrasesForKey(const std::string& key)
{
return m_associatedPhrases.hasValuesForKey(key);
}
} // namespace McBopomofo

View File

@ -24,17 +24,15 @@
#ifndef MCBOPOMOFOLM_H
#define MCBOPOMOFOLM_H
#include <stdio.h>
#include "UserPhrasesLM.h"
#include "AssociatedPhrases.h"
#include "ParselessLM.h"
#include "PhraseReplacementMap.h"
#include "AssociatedPhrases.h"
#include "UserPhrasesLM.h"
#include <stdio.h>
#include <unordered_set>
namespace McBopomofo {
using namespace Formosa::Gramambular;
/// McBopomofoLM is a facade for managing a set of models including
/// the input method language model, user phrases and excluded phrases.
///
@ -57,7 +55,7 @@ using namespace Formosa::Gramambular;
/// model while launching and to load the user phrases anytime if the custom
/// files are modified. It does not keep the reference of the data pathes but
/// you have to pass the paths when you ask it to do loading.
class McBopomofoLM : public LanguageModel {
class McBopomofoLM : public Formosa::Gramambular::LanguageModel {
public:
McBopomofoLM();
~McBopomofoLM();
@ -83,14 +81,14 @@ public:
void loadPhraseReplacementMap(const char* phraseReplacementPath);
/// Not implemented since we do not have data to provide bigram function.
const vector<Bigram> bigramsForKeys(const string& preceedingKey, const string& key);
const std::vector<Formosa::Gramambular::Bigram> bigramsForKeys(const std::string& preceedingKey, const std::string& key);
/// Returns a list of available unigram for the given key.
/// @param key A string represents the BPMF reading or a symbol key. For
/// example, it you pass "ㄇㄚ", it returns "嗎", "媽", and so on.
const vector<Unigram> unigramsForKey(const string& key);
const std::vector<Formosa::Gramambular::Unigram> unigramsForKey(const std::string& key);
/// If the model has unigrams for the given key.
/// @param key The key.
bool hasUnigramsForKey(const string& key);
bool hasUnigramsForKey(const std::string& key);
/// Enables or disables phrase replacement.
void setPhraseReplacementEnabled(bool enabled);
@ -102,11 +100,10 @@ public:
/// If the external converted is enabled or not.
bool externalConverterEnabled();
/// Sets a lambda to let the values of unigrams could be converted by it.
void setExternalConverter(std::function<string(string)> externalConverter);
const vector<std::string> associatedPhrasesForKey(const string& key);
bool hasAssociatedPhrasesForKey(const string& key);
void setExternalConverter(std::function<std::string(std::string)> externalConverter);
const std::vector<std::string> associatedPhrasesForKey(const std::string& key);
bool hasAssociatedPhrasesForKey(const std::string& key);
protected:
/// Filters and converts the input unigrams and return a new list of unigrams.
@ -116,9 +113,9 @@ protected:
/// @param insertedValues The values for unigrams already in the results.
/// It helps to prevent duplicated unigrams. Please note that the method
/// has a side effect that it inserts values to `insertedValues`.
const vector<Unigram> filterAndTransformUnigrams(const vector<Unigram> unigrams,
const std::unordered_set<string>& excludedValues,
std::unordered_set<string>& insertedValues);
const std::vector<Formosa::Gramambular::Unigram> filterAndTransformUnigrams(const std::vector<Formosa::Gramambular::Unigram> unigrams,
const std::unordered_set<std::string>& excludedValues,
std::unordered_set<std::string>& insertedValues);
ParselessLM m_languageModel;
UserPhrasesLM m_userPhrases;
@ -127,7 +124,7 @@ protected:
AssociatedPhrases m_associatedPhrases;
bool m_phraseReplacementEnabled;
bool m_externalConverterEnabled;
std::function<string(string)> m_externalConverter;
std::function<std::string(std::string)> m_externalConverter;
};
};

View File

@ -31,7 +31,7 @@
#include <cmath>
#include <sstream>
using namespace McBopomofo;
namespace McBopomofo {
// About 20 generations.
static const double DecayThreshould = 1.0 / 1048576.0;
@ -41,21 +41,23 @@ static double Score(size_t eventCount,
double eventTimestamp,
double timestamp,
double lambda);
static bool IsEndingPunctuation(const string& value);
static string WalkedNodesToKey(const std::vector<NodeAnchor>& walkedNodes,
static bool IsEndingPunctuation(const std::string& value);
static std::string WalkedNodesToKey(const std::vector<Formosa::Gramambular::NodeAnchor>& walkedNodes,
size_t cursorIndex);
UserOverrideModel::UserOverrideModel(size_t capacity, double decayConstant)
: m_capacity(capacity) {
: m_capacity(capacity)
{
assert(m_capacity > 0);
m_decayExponent = log(0.5) / decayConstant;
}
void UserOverrideModel::observe(const std::vector<NodeAnchor>& walkedNodes,
void UserOverrideModel::observe(const std::vector<Formosa::Gramambular::NodeAnchor>& walkedNodes,
size_t cursorIndex,
const string& candidate,
double timestamp) {
string key = WalkedNodesToKey(walkedNodes, cursorIndex);
const std::string& candidate,
double timestamp)
{
std::string key = WalkedNodesToKey(walkedNodes, cursorIndex);
auto mapIter = m_lruMap.find(key);
if (mapIter == m_lruMap.end()) {
auto keyValuePair = KeyObservationPair(key, Observation());
@ -84,20 +86,21 @@ void UserOverrideModel::observe(const std::vector<NodeAnchor>& walkedNodes,
}
}
string UserOverrideModel::suggest(const std::vector<NodeAnchor>& walkedNodes,
std::string UserOverrideModel::suggest(const std::vector<Formosa::Gramambular::NodeAnchor>& walkedNodes,
size_t cursorIndex,
double timestamp) {
string key = WalkedNodesToKey(walkedNodes, cursorIndex);
double timestamp)
{
std::string key = WalkedNodesToKey(walkedNodes, cursorIndex);
auto mapIter = m_lruMap.find(key);
if (mapIter == m_lruMap.end()) {
return string();
return std::string();
}
auto listIter = mapIter->second;
auto& keyValuePair = *listIter;
const Observation& observation = keyValuePair.second;
string candidate;
std::string candidate;
double score = 0.0;
for (auto i = observation.overrides.begin();
i != observation.overrides.end();
@ -120,8 +123,9 @@ string UserOverrideModel::suggest(const std::vector<NodeAnchor>& walkedNodes,
return candidate;
}
void UserOverrideModel::Observation::update(const string& candidate,
double timestamp) {
void UserOverrideModel::Observation::update(const std::string& candidate,
double timestamp)
{
count++;
auto& o = overrides[candidate];
o.timestamp = timestamp;
@ -132,7 +136,8 @@ static double Score(size_t eventCount,
size_t totalCount,
double eventTimestamp,
double timestamp,
double lambda) {
double lambda)
{
double decay = exp((timestamp - eventTimestamp) * lambda);
if (decay < DecayThreshould) {
return 0.0;
@ -142,16 +147,17 @@ static double Score(size_t eventCount,
return prob * decay;
}
static bool IsEndingPunctuation(const string& value) {
return value == "" || value == "" || value== "" || value == "" ||
value == "" || value == "" || value== "" || value == "";
static bool IsEndingPunctuation(const std::string& value)
{
return value == "" || value == "" || value == "" || value == "" || value == "" || value == "" || value == "" || value == "";
}
static string WalkedNodesToKey(const std::vector<NodeAnchor>& walkedNodes,
size_t cursorIndex) {
static std::string WalkedNodesToKey(const std::vector<Formosa::Gramambular::NodeAnchor>& walkedNodes,
size_t cursorIndex)
{
std::stringstream s;
std::vector<NodeAnchor> n;
std::vector<Formosa::Gramambular::NodeAnchor> n;
size_t ll = 0;
for (std::vector<NodeAnchor>::const_iterator i = walkedNodes.begin();
for (std::vector<Formosa::Gramambular::NodeAnchor>::const_iterator i = walkedNodes.begin();
i != walkedNodes.end();
++i) {
const auto& nn = *i;
@ -162,19 +168,19 @@ static string WalkedNodesToKey(const std::vector<NodeAnchor>& walkedNodes,
}
}
std::vector<NodeAnchor>::const_reverse_iterator r = n.rbegin();
std::vector<Formosa::Gramambular::NodeAnchor>::const_reverse_iterator r = n.rbegin();
if (r == n.rend()) {
return "";
}
string current = (*r).node->currentKeyValue().key;
std::string current = (*r).node->currentKeyValue().key;
++r;
s.clear();
s.str(std::string());
if (r != n.rend()) {
string value = (*r).node->currentKeyValue().value;
std::string value = (*r).node->currentKeyValue().value;
if (IsEndingPunctuation(value)) {
s << "()";
r = n.rend();
@ -189,12 +195,12 @@ static string WalkedNodesToKey(const std::vector<NodeAnchor>& walkedNodes,
} else {
s << "()";
}
string prev = s.str();
std::string prev = s.str();
s.clear();
s.str(std::string());
if (r != n.rend()) {
string value = (*r).node->currentKeyValue().value;
std::string value = (*r).node->currentKeyValue().value;
if (IsEndingPunctuation(value)) {
s << "()";
r = n.rend();
@ -209,7 +215,7 @@ static string WalkedNodesToKey(const std::vector<NodeAnchor>& walkedNodes,
} else {
s << "()";
}
string anterior = s.str();
std::string anterior = s.str();
s.clear();
s.str(std::string());
@ -217,3 +223,5 @@ static string WalkedNodesToKey(const std::vector<NodeAnchor>& walkedNodes,
return s.str();
}
} // namespace McBopomofo

View File

@ -32,18 +32,16 @@
namespace McBopomofo {
using namespace Formosa::Gramambular;
class UserOverrideModel {
public:
UserOverrideModel(size_t capacity, double decayConstant);
void observe(const std::vector<NodeAnchor>& walkedNodes,
void observe(const std::vector<Formosa::Gramambular::NodeAnchor>& walkedNodes,
size_t cursorIndex,
const string& candidate,
const std::string& candidate,
double timestamp);
string suggest(const std::vector<NodeAnchor>& walkedNodes,
std::string suggest(const std::vector<Formosa::Gramambular::NodeAnchor>& walkedNodes,
size_t cursorIndex,
double timestamp);
@ -52,15 +50,22 @@ private:
size_t count;
double timestamp;
Override() : count(0), timestamp(0.0) {}
Override()
: count(0)
, timestamp(0.0)
{
}
};
struct Observation {
size_t count;
std::map<std::string, Override> overrides;
Observation() : count(0) {}
void update(const string& candidate, double timestamp);
Observation()
: count(0)
{
}
void update(const std::string& candidate, double timestamp);
};
typedef std::pair<std::string, Observation> KeyObservationPair;
@ -74,4 +79,3 @@ private:
}; // namespace McBopomofo
#endif

View File

@ -33,18 +33,12 @@
@import CandidateUI;
@import NSStringUtils;
// C++ namespace usages
using namespace std;
using namespace Formosa::Mandarin;
using namespace Formosa::Gramambular;
using namespace McBopomofo;
InputMode InputModeBopomofo = @"org.openvanilla.inputmethod.McBopomofo.Bopomofo";
InputMode InputModePlainBopomofo = @"org.openvanilla.inputmethod.McBopomofo.PlainBopomofo";
static const double kEpsilon = 0.000001;
static double FindHighestScore(const vector<NodeAnchor> &nodes, double epsilon)
static double FindHighestScore(const std::vector<Formosa::Gramambular::NodeAnchor> &nodes, double epsilon)
{
double highestScore = 0.0;
for (auto ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) {
@ -59,7 +53,7 @@ static double FindHighestScore(const vector<NodeAnchor> &nodes, double epsilon)
// sort helper
class NodeAnchorDescendingSorter {
public:
bool operator()(const NodeAnchor &a, const NodeAnchor &b) const
bool operator()(const Formosa::Gramambular::NodeAnchor &a, const Formosa::Gramambular::NodeAnchor &b) const
{
return a.node->key().length() > b.node->key().length();
}
@ -101,7 +95,7 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
- (void)setInputMode:(NSString *)value
{
NSString *newInputMode;
McBopomofoLM *newLanguageModel;
McBopomofo::McBopomofoLM *newLanguageModel;
if ([value isKindOfClass:[NSString class]] && [value isEqual:InputModePlainBopomofo]) {
newInputMode = InputModePlainBopomofo;
@ -121,7 +115,7 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
if (_builder) {
delete _builder;
_builder = new BlockReadingBuilder(_languageModel);
_builder = new Formosa::Gramambular::BlockReadingBuilder(_languageModel);
_builder->setJoinSeparator("-");
}
@ -147,14 +141,14 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
{
self = [super init];
if (self) {
_bpmfReadingBuffer = new BopomofoReadingBuffer(BopomofoKeyboardLayout::StandardLayout());
_bpmfReadingBuffer = new Formosa::Mandarin::BopomofoReadingBuffer(Formosa::Mandarin::BopomofoKeyboardLayout::StandardLayout());
// create the lattice builder
_languageModel = [LanguageModelManager languageModelMcBopomofo];
_languageModel->setPhraseReplacementEnabled(Preferences.phraseReplacementEnabled);
_userOverrideModel = [LanguageModelManager userOverrideModel];
_builder = new BlockReadingBuilder(_languageModel);
_builder = new Formosa::Gramambular::BlockReadingBuilder(_languageModel);
// each Mandarin syllable is separated by a hyphen
_builder->setJoinSeparator("-");
@ -168,25 +162,25 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
NSInteger layout = Preferences.keyboardLayout;
switch (layout) {
case KeyboardLayoutStandard:
_bpmfReadingBuffer->setKeyboardLayout(BopomofoKeyboardLayout::StandardLayout());
_bpmfReadingBuffer->setKeyboardLayout(Formosa::Mandarin::BopomofoKeyboardLayout::StandardLayout());
break;
case KeyboardLayoutEten:
_bpmfReadingBuffer->setKeyboardLayout(BopomofoKeyboardLayout::ETenLayout());
_bpmfReadingBuffer->setKeyboardLayout(Formosa::Mandarin::BopomofoKeyboardLayout::ETenLayout());
break;
case KeyboardLayoutHsu:
_bpmfReadingBuffer->setKeyboardLayout(BopomofoKeyboardLayout::HsuLayout());
_bpmfReadingBuffer->setKeyboardLayout(Formosa::Mandarin::BopomofoKeyboardLayout::HsuLayout());
break;
case KeyboardLayoutEten26:
_bpmfReadingBuffer->setKeyboardLayout(BopomofoKeyboardLayout::ETen26Layout());
_bpmfReadingBuffer->setKeyboardLayout(Formosa::Mandarin::BopomofoKeyboardLayout::ETen26Layout());
break;
case KeyboardLayoutHanyuPinyin:
_bpmfReadingBuffer->setKeyboardLayout(BopomofoKeyboardLayout::HanyuPinyinLayout());
_bpmfReadingBuffer->setKeyboardLayout(Formosa::Mandarin::BopomofoKeyboardLayout::HanyuPinyinLayout());
break;
case KeyboardLayoutIBM:
_bpmfReadingBuffer->setKeyboardLayout(BopomofoKeyboardLayout::IBMLayout());
_bpmfReadingBuffer->setKeyboardLayout(Formosa::Mandarin::BopomofoKeyboardLayout::IBMLayout());
break;
default:
_bpmfReadingBuffer->setKeyboardLayout(BopomofoKeyboardLayout::StandardLayout());
_bpmfReadingBuffer->setKeyboardLayout(Formosa::Mandarin::BopomofoKeyboardLayout::StandardLayout());
Preferences.keyboardLayout = KeyboardLayoutStandard;
}
_languageModel->setExternalConverterEnabled(Preferences.chineseConversionStyle == 1);
@ -195,8 +189,8 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
- (void)fixNodeWithValue:(NSString *)value
{
size_t cursorIndex = [self _actualCandidateCursorIndex];
string stringValue = [value UTF8String];
NodeAnchor selectedNode = _builder->grid().fixNodeSelectedCandidate(cursorIndex, stringValue);
std::string stringValue(value.UTF8String);
Formosa::Gramambular::NodeAnchor selectedNode = _builder->grid().fixNodeSelectedCandidate(cursorIndex, stringValue);
if (_inputMode != InputModePlainBopomofo) {
// If the length of the readings and the characters do not match,
// it often means it is a special symbol and it should not be stored
@ -238,10 +232,10 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
_walkedNodes.clear();
}
- (string)_currentLayout
- (std::string)_currentLayout
{
NSString *keyboardLayoutName = Preferences.keyboardLayoutName;
string layout = string(keyboardLayoutName.UTF8String) + string("_");
std::string layout = std::string(keyboardLayoutName.UTF8String) + "_";
return layout;
}
@ -350,7 +344,7 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
composeReading |= (!_bpmfReadingBuffer->isEmpty() && (charCode == 32 || charCode == 13));
if (composeReading) {
// combine the reading
string reading = _bpmfReadingBuffer->syllable().composedString();
std::string reading = _bpmfReadingBuffer->syllable().composedString();
// see if we have a unigram for this
if (!_languageModel->hasUnigramsForKey(reading)) {
@ -367,11 +361,11 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
NSString *poppedText = [self _popOverflowComposingTextAndWalk];
// get user override model suggestion
string overrideValue = (_inputMode == InputModePlainBopomofo) ? "" : _userOverrideModel->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]);
std::string overrideValue = (_inputMode == InputModePlainBopomofo) ? "" : _userOverrideModel->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]);
if (!overrideValue.empty()) {
size_t cursorIndex = [self _actualCandidateCursorIndex];
vector<NodeAnchor> nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex);
std::vector<Formosa::Gramambular::NodeAnchor> nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex);
double highestScore = FindHighestScore(nodes, kEpsilon);
_builder->grid().overrideNodeScoreForSelectedCandidate(cursorIndex, overrideValue, static_cast<float>(highestScore));
}
@ -495,9 +489,9 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
// MARK: Punctuation list
if ((char)charCode == '`') {
if (_languageModel->hasUnigramsForKey(string("_punctuation_list"))) {
if (_languageModel->hasUnigramsForKey("_punctuation_list")) {
if (_bpmfReadingBuffer->isEmpty()) {
_builder->insertReadingAtCursor(string("_punctuation_list"));
_builder->insertReadingAtCursor("_punctuation_list");
NSString *poppedText = [self _popOverflowComposingTextAndWalk];
InputStateInputting *inputting = (InputStateInputting *)[self buildInputtingState];
inputting.poppedText = poppedText;
@ -514,28 +508,28 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
// MARK: Punctuation
// if nothing is matched, see if it's a punctuation key for current layout.
string punctuationNamePrefix;
std::string punctuationNamePrefix;
if ([input isControlHold]) {
punctuationNamePrefix = string("_ctrl_punctuation_");
punctuationNamePrefix = "_ctrl_punctuation_";
} else if (Preferences.halfWidthPunctuationEnabled) {
punctuationNamePrefix = string("_half_punctuation_");
punctuationNamePrefix = "_half_punctuation_";
} else {
punctuationNamePrefix = string("_punctuation_");
punctuationNamePrefix = "_punctuation_";
}
string layout = [self _currentLayout];
string customPunctuation = punctuationNamePrefix + layout + string(1, (char)charCode);
std::string layout = [self _currentLayout];
std::string customPunctuation = punctuationNamePrefix + layout + std::string(1, (char)charCode);
if ([self _handlePunctuation:customPunctuation state:state usingVerticalMode:input.useVerticalMode stateCallback:stateCallback errorCallback:errorCallback]) {
return YES;
}
// if nothing is matched, see if it's a punctuation key.
string punctuation = punctuationNamePrefix + string(1, (char)charCode);
std::string punctuation = punctuationNamePrefix + std::string(1, (char)charCode);
if ([self _handlePunctuation:punctuation state:state usingVerticalMode:input.useVerticalMode stateCallback:stateCallback errorCallback:errorCallback]) {
return YES;
}
if ([state isKindOfClass:[InputStateNotEmpty class]] && (char)charCode >= 'A' && (char)charCode <= 'Z') {
string letter = string("_letter_") + string(1, (char)charCode);
std::string letter = std::string("_letter_") + std::string(1, (char)charCode);
if ([self _handlePunctuation:letter state:state usingVerticalMode:input.useVerticalMode stateCallback:stateCallback errorCallback:errorCallback]) {
return YES;
}
@ -819,7 +813,7 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
return YES;
}
- (BOOL)_handlePunctuation:(string)customPunctuation state:(InputState *)state usingVerticalMode:(BOOL)useVerticalMode stateCallback:(void (^)(InputState *))stateCallback errorCallback:(void (^)(void))errorCallback
- (BOOL)_handlePunctuation:(std::string)customPunctuation state:(InputState *)state usingVerticalMode:(BOOL)useVerticalMode stateCallback:(void (^)(InputState *))stateCallback errorCallback:(void (^)(void))errorCallback
{
if (!_languageModel->hasUnigramsForKey(customPunctuation)) {
return NO;
@ -1120,22 +1114,22 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
}
if (_inputMode == InputModePlainBopomofo) {
string layout = [self _currentLayout];
string punctuationNamePrefix;
std::string layout = [self _currentLayout];
std::string punctuationNamePrefix;
if ([input isControlHold]) {
punctuationNamePrefix = string("_ctrl_punctuation_");
punctuationNamePrefix = "_ctrl_punctuation_";
} else if (Preferences.halfWidthPunctuationEnabled) {
punctuationNamePrefix = string("_half_punctuation_");
punctuationNamePrefix = "_half_punctuation_";
} else {
punctuationNamePrefix = string("_punctuation_");
punctuationNamePrefix = "_punctuation_";
}
string customPunctuation = punctuationNamePrefix + layout + string(1, (char)charCode);
string punctuation = punctuationNamePrefix + string(1, (char)charCode);
std::string customPunctuation = punctuationNamePrefix + layout + std::string(1, (char)charCode);
std::string punctuation = punctuationNamePrefix + std::string(1, (char)charCode);
BOOL shouldAutoSelectCandidate = _bpmfReadingBuffer->isValidKey((char)charCode) || _languageModel->hasUnigramsForKey(customPunctuation) || _languageModel->hasUnigramsForKey(punctuation);
if (!shouldAutoSelectCandidate && (char)charCode >= 'A' && (char)charCode <= 'Z') {
string letter = string("_letter_") + string(1, (char)charCode);
std::string letter = std::string("_letter_") + std::string(1, (char)charCode);
if (_languageModel->hasUnigramsForKey(letter)) {
shouldAutoSelectCandidate = YES;
}
@ -1175,9 +1169,9 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
// we must do some Unicode codepoint counting to find the actual cursor location for the client
// i.e. we need to take UTF-16 into consideration, for which a surrogate pair takes 2 UniChars
// locations
for (vector<NodeAnchor>::iterator wi = _walkedNodes.begin(), we = _walkedNodes.end(); wi != we; ++wi) {
for (std::vector<Formosa::Gramambular::NodeAnchor>::iterator wi = _walkedNodes.begin(), we = _walkedNodes.end(); wi != we; ++wi) {
if ((*wi).node) {
string nodeStr = (*wi).node->currentKeyValue().value;
std::string nodeStr = (*wi).node->currentKeyValue().value;
NSString *valueString = [NSString stringWithUTF8String:nodeStr.c_str()];
[composingBuffer appendString:valueString];
@ -1243,7 +1237,7 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
// retrieve the most likely trellis, i.e. a Maximum Likelihood Estimation
// of the best possible Mandarain characters given the input syllables,
// using the Viterbi algorithm implemented in the Gramambular library
Walker walker(&_builder->grid());
Formosa::Gramambular::Walker walker(&_builder->grid());
// the reverse walk traces the trellis from the end
_walkedNodes = walker.reverseWalk(_builder->grid().width());
@ -1253,7 +1247,7 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
// if DEBUG is defined, a GraphViz file is written to kGraphVizOutputfile
#if DEBUG
string dotDump = _builder->grid().dumpDOT();
std::string dotDump = _builder->grid().dumpDOT();
NSString *dotStr = [NSString stringWithUTF8String:dotDump.c_str()];
NSError *error = nil;
@ -1276,7 +1270,7 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
if (_builder->grid().width() > (size_t)composingBufferSize) {
if (_walkedNodes.size() > 0) {
NodeAnchor &anchor = _walkedNodes[0];
Formosa::Gramambular::NodeAnchor &anchor = _walkedNodes[0];
poppedText = [NSString stringWithUTF8String:anchor.node->currentKeyValue().value.c_str()];
_builder->removeHeadReadings(anchor.spanningLength);
}
@ -1291,15 +1285,15 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
NSMutableArray *candidatesArray = [[NSMutableArray alloc] init];
size_t cursorIndex = [self _actualCandidateCursorIndex];
vector<NodeAnchor> nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex);
std::vector<Formosa::Gramambular::NodeAnchor> nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex);
// sort the nodes, so that longer nodes (representing longer phrases) are placed at the top of the candidate list
stable_sort(nodes.begin(), nodes.end(), NodeAnchorDescendingSorter());
// then use the C++ trick to retrieve the candidates for each node at/crossing the cursor
for (vector<NodeAnchor>::iterator ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) {
const vector<KeyValuePair> &candidates = (*ni).node->candidates();
for (vector<KeyValuePair>::const_iterator ci = candidates.begin(), ce = candidates.end(); ci != ce; ++ci) {
for (std::vector<Formosa::Gramambular::NodeAnchor>::iterator ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) {
const std::vector<Formosa::Gramambular::KeyValuePair> &candidates = (*ni).node->candidates();
for (std::vector<Formosa::Gramambular::KeyValuePair>::const_iterator ci = candidates.begin(), ce = candidates.end(); ci != ce; ++ci) {
[candidatesArray addObject:[NSString stringWithUTF8String:(*ci).value.c_str()]];
}
}
@ -1328,8 +1322,8 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
- (NSArray *)_currentReadings
{
NSMutableArray *readingsArray = [[NSMutableArray alloc] init];
vector<std::string> v = _builder->readings();
for (vector<std::string>::iterator it_i = v.begin(); it_i != v.end(); ++it_i) {
std::vector<std::string> v = _builder->readings();
for (std::vector<std::string>::iterator it_i = v.begin(); it_i != v.end(); ++it_i) {
[readingsArray addObject:[NSString stringWithUTF8String:it_i->c_str()]];
}
return readingsArray;
@ -1337,9 +1331,9 @@ static NSString *const kGraphVizOutputfile = @"/tmp/McBopomofo-visualization.dot
- (nullable InputState *)buildAssociatePhraseStateWithKey:(NSString *)key useVerticalMode:(BOOL)useVerticalMode
{
string cppKey = string(key.UTF8String);
std::string cppKey(key.UTF8String);
if (_languageModel->hasAssociatedPhrasesForKey(cppKey)) {
vector<string> phrases = _languageModel->associatedPhrasesForKey(cppKey);
std::vector<std::string> phrases = _languageModel->associatedPhrasesForKey(cppKey);
NSMutableArray<NSString *> *array = [NSMutableArray array];
for (auto phrase : phrases) {
NSString *item = [[NSString alloc] initWithUTF8String:phrase.c_str()];

View File

@ -28,15 +28,12 @@
@import VXHanConvert;
@import OpenCCBridge;
using namespace std;
using namespace McBopomofo;
static const int kUserOverrideModelCapacity = 500;
static const double kObservedOverrideHalflife = 5400.0; // 1.5 hr.
static McBopomofoLM gLanguageModelMcBopomofo;
static McBopomofoLM gLanguageModelPlainBopomofo;
static UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
static McBopomofo::McBopomofoLM gLanguageModelMcBopomofo;
static McBopomofo::McBopomofoLM gLanguageModelPlainBopomofo;
static McBopomofo::UserOverrideModel gUserOverrideModel(kUserOverrideModelCapacity, kObservedOverrideHalflife);
static NSString *const kUserDataTemplateName = @"template-data";
static NSString *const kExcludedPhrasesMcBopomofoTemplateName = @"template-exclude-phrases";
@ -46,14 +43,14 @@ static NSString *const kTemplateExtension = @".txt";
@implementation LanguageModelManager
static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, McBopomofoLM &lm)
static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, McBopomofo::McBopomofoLM &lm)
{
Class cls = NSClassFromString(@"McBopomofoInputMethodController");
NSString *dataPath = [[NSBundle bundleForClass:cls] pathForResource:filenameWithoutExtension ofType:@"txt"];
lm.loadLanguageModel([dataPath UTF8String]);
}
static void LTLoadAssociatedPhrases(McBopomofoLM &lm)
static void LTLoadAssociatedPhrases(McBopomofo::McBopomofoLM &lm)
{
Class cls = NSClassFromString(@"McBopomofoInputMethodController");
NSString *dataPath = [[NSBundle bundleForClass:cls] pathForResource:@"associated-phrases" ofType:@"txt"];
@ -104,7 +101,7 @@ static void LTLoadAssociatedPhrases(McBopomofoLM &lm)
+ (void)setupDataModelValueConverter
{
auto converter = [](string input) {
auto converter = [](std::string input) {
if (!Preferences.chineseConversionEnabled) {
return input;
}
@ -119,7 +116,7 @@ static void LTLoadAssociatedPhrases(McBopomofoLM &lm)
} else {
text = [OpenCCBridge convertToSimplified:text];
}
return string(text.UTF8String);
return std::string(text.UTF8String);
};
gLanguageModelMcBopomofo.setExternalConverter(converter);
@ -194,9 +191,9 @@ static void LTLoadAssociatedPhrases(McBopomofoLM &lm)
+ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase key:(NSString *)key NS_SWIFT_NAME(checkIfExist(userPhrase:key:))
{
string unigramKey = string(key.UTF8String);
vector<Unigram> unigrams = gLanguageModelMcBopomofo.unigramsForKey(unigramKey);
string userPhraseString = string(userPhrase.UTF8String);
std::string unigramKey(key.UTF8String);
std::vector<Formosa::Gramambular::Unigram> unigrams = gLanguageModelMcBopomofo.unigramsForKey(unigramKey);
std::string userPhraseString(userPhrase.UTF8String);
for (auto unigram : unigrams) {
if (unigram.keyValue.value == userPhraseString) {
return YES;
@ -282,12 +279,12 @@ static void LTLoadAssociatedPhrases(McBopomofoLM &lm)
return [[self dataFolderPath] stringByAppendingPathComponent:@"phrases-replacement.txt"];
}
+ (McBopomofoLM *)languageModelMcBopomofo
+ (McBopomofo::McBopomofoLM *)languageModelMcBopomofo
{
return &gLanguageModelMcBopomofo;
}
+ (McBopomofoLM *)languageModelPlainBopomofo
+ (McBopomofo::McBopomofoLM *)languageModelPlainBopomofo
{
return &gLanguageModelPlainBopomofo;
}