LMInstantiator // Dealing with Namespace Pollusion.
This commit is contained in:
parent
5b07811b02
commit
3903ac79a7
|
@ -20,12 +20,12 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
#ifndef LMInstantiator_H
|
#ifndef LMInstantiator_H
|
||||||
#define LMInstantiator_H
|
#define LMInstantiator_H
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include "UserPhrasesLM.h"
|
|
||||||
#include "ParselessLM.h"
|
|
||||||
#include "CNSLM.h"
|
|
||||||
#include "PhraseReplacementMap.h"
|
|
||||||
#include "AssociatedPhrases.h"
|
#include "AssociatedPhrases.h"
|
||||||
|
#include "CNSLM.h"
|
||||||
|
#include "ParselessLM.h"
|
||||||
|
#include "PhraseReplacementMap.h"
|
||||||
|
#include "UserPhrasesLM.h"
|
||||||
|
#include <stdio.h>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing {
|
||||||
|
@ -54,7 +54,7 @@ using namespace Taiyan::Gramambular;
|
||||||
/// model while launching and to load the user phrases anytime if the custom
|
/// model while launching and to load the user phrases anytime if the custom
|
||||||
/// files are modified. It does not keep the reference of the data pathes but
|
/// files are modified. It does not keep the reference of the data pathes but
|
||||||
/// you have to pass the paths when you ask it to do loading.
|
/// you have to pass the paths when you ask it to do loading.
|
||||||
class LMInstantiator : public LanguageModel {
|
class LMInstantiator : public Taiyan::Gramambular::LanguageModel {
|
||||||
public:
|
public:
|
||||||
LMInstantiator();
|
LMInstantiator();
|
||||||
~LMInstantiator();
|
~LMInstantiator();
|
||||||
|
@ -83,14 +83,14 @@ public:
|
||||||
void loadPhraseReplacementMap(const char* phraseReplacementPath);
|
void loadPhraseReplacementMap(const char* phraseReplacementPath);
|
||||||
|
|
||||||
/// Not implemented since we do not have data to provide bigram function.
|
/// Not implemented since we do not have data to provide bigram function.
|
||||||
const vector<Bigram> bigramsForKeys(const string& preceedingKey, const string& key);
|
const std::vector<Taiyan::Gramambular::Bigram> bigramsForKeys(const std::string& preceedingKey, const std::string& key);
|
||||||
/// Returns a list of available unigram for the given key.
|
/// Returns a list of available unigram for the given key.
|
||||||
/// @param key A string represents the BPMF reading or a symbol key. For
|
/// @param key A std::string represents the BPMF reading or a symbol key. For
|
||||||
/// example, it you pass "ㄇㄚ", it returns "嗎", "媽", and so on.
|
/// example, it you pass "ㄇㄚ", it returns "嗎", "媽", and so on.
|
||||||
const vector<Unigram> unigramsForKey(const string& key);
|
const std::vector<Taiyan::Gramambular::Unigram> unigramsForKey(const std::string& key);
|
||||||
/// If the model has unigrams for the given key.
|
/// If the model has unigrams for the given key.
|
||||||
/// @param key The key.
|
/// @param key The key.
|
||||||
bool hasUnigramsForKey(const string& key);
|
bool hasUnigramsForKey(const std::string& key);
|
||||||
|
|
||||||
/// Enables or disables phrase replacement.
|
/// Enables or disables phrase replacement.
|
||||||
void setPhraseReplacementEnabled(bool enabled);
|
void setPhraseReplacementEnabled(bool enabled);
|
||||||
|
@ -107,10 +107,10 @@ public:
|
||||||
/// If the external converted is enabled or not.
|
/// If the external converted is enabled or not.
|
||||||
bool externalConverterEnabled();
|
bool externalConverterEnabled();
|
||||||
/// Sets a lambda to let the values of unigrams could be converted by it.
|
/// Sets a lambda to let the values of unigrams could be converted by it.
|
||||||
void setExternalConverter(std::function<string(string)> externalConverter);
|
void setExternalConverter(std::function<std::string(std::string)> externalConverter);
|
||||||
|
|
||||||
const vector<std::string> associatedPhrasesForKey(const string& key);
|
const std::vector<std::string> associatedPhrasesForKey(const std::string& key);
|
||||||
bool hasAssociatedPhrasesForKey(const string& key);
|
bool hasAssociatedPhrasesForKey(const std::string& key);
|
||||||
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -121,9 +121,9 @@ protected:
|
||||||
/// @param insertedValues The values for unigrams already in the results.
|
/// @param insertedValues The values for unigrams already in the results.
|
||||||
/// It helps to prevent duplicated unigrams. Please note that the method
|
/// It helps to prevent duplicated unigrams. Please note that the method
|
||||||
/// has a side effect that it inserts values to `insertedValues`.
|
/// has a side effect that it inserts values to `insertedValues`.
|
||||||
const vector<Unigram> filterAndTransformUnigrams(const vector<Unigram> unigrams,
|
const std::vector<Taiyan::Gramambular::Unigram> filterAndTransformUnigrams(const std::vector<Taiyan::Gramambular::Unigram> unigrams,
|
||||||
const std::unordered_set<string>& excludedValues,
|
const std::unordered_set<std::string>& excludedValues,
|
||||||
std::unordered_set<string>& insertedValues);
|
std::unordered_set<std::string>& insertedValues);
|
||||||
|
|
||||||
ParselessLM m_languageModel;
|
ParselessLM m_languageModel;
|
||||||
CNSLM m_cnsModel;
|
CNSLM m_cnsModel;
|
||||||
|
@ -134,7 +134,7 @@ protected:
|
||||||
bool m_phraseReplacementEnabled;
|
bool m_phraseReplacementEnabled;
|
||||||
bool m_cnsEnabled;
|
bool m_cnsEnabled;
|
||||||
bool m_externalConverterEnabled;
|
bool m_externalConverterEnabled;
|
||||||
std::function<string(string)> m_externalConverter;
|
std::function<std::string(std::string)> m_externalConverter;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,7 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
|
|
||||||
using namespace vChewing;
|
namespace vChewing {
|
||||||
|
|
||||||
LMInstantiator::LMInstantiator()
|
LMInstantiator::LMInstantiator()
|
||||||
{
|
{
|
||||||
|
@ -92,49 +92,49 @@ void LMInstantiator::loadPhraseReplacementMap(const char* phraseReplacementPath)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const vector<Bigram> LMInstantiator::bigramsForKeys(const string& preceedingKey, const string& key)
|
const std::vector<Taiyan::Gramambular::Bigram> LMInstantiator::bigramsForKeys(const std::string& preceedingKey, const std::string& key)
|
||||||
{
|
{
|
||||||
return vector<Bigram>();
|
return std::vector<Taiyan::Gramambular::Bigram>();
|
||||||
}
|
}
|
||||||
|
|
||||||
const vector<Unigram> LMInstantiator::unigramsForKey(const string& key)
|
const std::vector<Taiyan::Gramambular::Unigram> LMInstantiator::unigramsForKey(const std::string& key)
|
||||||
{
|
{
|
||||||
if (key == " ") {
|
if (key == " ") {
|
||||||
vector<Unigram> spaceUnigrams;
|
std::vector<Taiyan::Gramambular::Unigram> spaceUnigrams;
|
||||||
Unigram g;
|
Taiyan::Gramambular::Unigram g;
|
||||||
g.keyValue.key = " ";
|
g.keyValue.key = " ";
|
||||||
g.keyValue.value= " ";
|
g.keyValue.value = " ";
|
||||||
g.score = 0;
|
g.score = 0;
|
||||||
spaceUnigrams.push_back(g);
|
spaceUnigrams.push_back(g);
|
||||||
return spaceUnigrams;
|
return spaceUnigrams;
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<Unigram> allUnigrams;
|
std::vector<Taiyan::Gramambular::Unigram> allUnigrams;
|
||||||
vector<Unigram> userUnigrams;
|
std::vector<Taiyan::Gramambular::Unigram> userUnigrams;
|
||||||
vector<Unigram> cnsUnigrams;
|
std::vector<Taiyan::Gramambular::Unigram> cnsUnigrams;
|
||||||
|
|
||||||
unordered_set<string> excludedValues;
|
std::unordered_set<std::string> excludedValues;
|
||||||
unordered_set<string> insertedValues;
|
std::unordered_set<std::string> insertedValues;
|
||||||
|
|
||||||
if (m_excludedPhrases.hasUnigramsForKey(key)) {
|
if (m_excludedPhrases.hasUnigramsForKey(key)) {
|
||||||
vector<Unigram> excludedUnigrams = m_excludedPhrases.unigramsForKey(key);
|
std::vector<Taiyan::Gramambular::Unigram> excludedUnigrams = m_excludedPhrases.unigramsForKey(key);
|
||||||
transform(excludedUnigrams.begin(), excludedUnigrams.end(),
|
transform(excludedUnigrams.begin(), excludedUnigrams.end(),
|
||||||
inserter(excludedValues, excludedValues.end()),
|
inserter(excludedValues, excludedValues.end()),
|
||||||
[](const Unigram& u) { return u.keyValue.value; });
|
[](const Taiyan::Gramambular::Unigram& u) { return u.keyValue.value; });
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_userPhrases.hasUnigramsForKey(key)) {
|
if (m_userPhrases.hasUnigramsForKey(key)) {
|
||||||
vector<Unigram> rawUserUnigrams = m_userPhrases.unigramsForKey(key);
|
std::vector<Taiyan::Gramambular::Unigram> rawUserUnigrams = m_userPhrases.unigramsForKey(key);
|
||||||
userUnigrams = filterAndTransformUnigrams(rawUserUnigrams, excludedValues, insertedValues);
|
userUnigrams = filterAndTransformUnigrams(rawUserUnigrams, excludedValues, insertedValues);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_languageModel.hasUnigramsForKey(key)) {
|
if (m_languageModel.hasUnigramsForKey(key)) {
|
||||||
vector<Unigram> rawGlobalUnigrams = m_languageModel.unigramsForKey(key);
|
std::vector<Taiyan::Gramambular::Unigram> rawGlobalUnigrams = m_languageModel.unigramsForKey(key);
|
||||||
allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues);
|
allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_cnsModel.hasUnigramsForKey(key) && m_cnsEnabled) {
|
if (m_cnsModel.hasUnigramsForKey(key) && m_cnsEnabled) {
|
||||||
vector<Unigram> rawCNSUnigrams = m_cnsModel.unigramsForKey(key);
|
std::vector<Taiyan::Gramambular::Unigram> rawCNSUnigrams = m_cnsModel.unigramsForKey(key);
|
||||||
cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues);
|
cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,7 +143,7 @@ const vector<Unigram> LMInstantiator::unigramsForKey(const string& key)
|
||||||
return allUnigrams;
|
return allUnigrams;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LMInstantiator::hasUnigramsForKey(const string& key)
|
bool LMInstantiator::hasUnigramsForKey(const std::string& key)
|
||||||
{
|
{
|
||||||
if (key == " ") {
|
if (key == " ") {
|
||||||
return true;
|
return true;
|
||||||
|
@ -185,36 +185,36 @@ bool LMInstantiator::externalConverterEnabled()
|
||||||
return m_externalConverterEnabled;
|
return m_externalConverterEnabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
void LMInstantiator::setExternalConverter(std::function<string(string)> externalConverter)
|
void LMInstantiator::setExternalConverter(std::function<std::string(std::string)> externalConverter)
|
||||||
{
|
{
|
||||||
m_externalConverter = externalConverter;
|
m_externalConverter = externalConverter;
|
||||||
}
|
}
|
||||||
|
|
||||||
const vector<Unigram> LMInstantiator::filterAndTransformUnigrams(const vector<Unigram> unigrams, const unordered_set<string>& excludedValues, unordered_set<string>& insertedValues)
|
const std::vector<Taiyan::Gramambular::Unigram> LMInstantiator::filterAndTransformUnigrams(const std::vector<Taiyan::Gramambular::Unigram> unigrams, const std::unordered_set<std::string>& excludedValues, std::unordered_set<std::string>& insertedValues)
|
||||||
{
|
{
|
||||||
vector<Unigram> results;
|
std::vector<Taiyan::Gramambular::Unigram> results;
|
||||||
|
|
||||||
for (auto&& unigram : unigrams) {
|
for (auto&& unigram : unigrams) {
|
||||||
// excludedValues filters out the unigrams with the original value.
|
// excludedValues filters out the unigrams with the original value.
|
||||||
// insertedValues filters out the ones with the converted value
|
// insertedValues filters out the ones with the converted value
|
||||||
string originalValue = unigram.keyValue.value;
|
std::string originalValue = unigram.keyValue.value;
|
||||||
if (excludedValues.find(originalValue) != excludedValues.end()) {
|
if (excludedValues.find(originalValue) != excludedValues.end()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
string value = originalValue;
|
std::string value = originalValue;
|
||||||
if (m_phraseReplacementEnabled) {
|
if (m_phraseReplacementEnabled) {
|
||||||
string replacement = m_phraseReplacement.valueForKey(value);
|
std::string replacement = m_phraseReplacement.valueForKey(value);
|
||||||
if (replacement != "") {
|
if (replacement != "") {
|
||||||
value = replacement;
|
value = replacement;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (m_externalConverterEnabled && m_externalConverter) {
|
if (m_externalConverterEnabled && m_externalConverter) {
|
||||||
string replacement = m_externalConverter(value);
|
std::string replacement = m_externalConverter(value);
|
||||||
value = replacement;
|
value = replacement;
|
||||||
}
|
}
|
||||||
if (insertedValues.find(value) == insertedValues.end()) {
|
if (insertedValues.find(value) == insertedValues.end()) {
|
||||||
Unigram g;
|
Taiyan::Gramambular::Unigram g;
|
||||||
g.keyValue.value = value;
|
g.keyValue.value = value;
|
||||||
g.keyValue.key = unigram.keyValue.key;
|
g.keyValue.key = unigram.keyValue.key;
|
||||||
g.score = unigram.score;
|
g.score = unigram.score;
|
||||||
|
@ -225,12 +225,14 @@ const vector<Unigram> LMInstantiator::filterAndTransformUnigrams(const vector<Un
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
const vector<std::string> LMInstantiator::associatedPhrasesForKey(const string& key)
|
const std::vector<std::string> LMInstantiator::associatedPhrasesForKey(const std::string& key)
|
||||||
{
|
{
|
||||||
return m_associatedPhrases.valuesForKey(key);
|
return m_associatedPhrases.valuesForKey(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LMInstantiator::hasAssociatedPhrasesForKey(const string& key)
|
bool LMInstantiator::hasAssociatedPhrasesForKey(const std::string& key)
|
||||||
{
|
{
|
||||||
return m_associatedPhrases.hasValuesForKey(key);
|
return m_associatedPhrases.hasValuesForKey(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace vChewing
|
||||||
|
|
Loading…
Reference in New Issue