LMInstantiator // Dealing with Namespace Pollusion.
This commit is contained in:
parent
5b07811b02
commit
3903ac79a7
|
@ -20,12 +20,12 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
|||
#ifndef LMInstantiator_H
|
||||
#define LMInstantiator_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include "UserPhrasesLM.h"
|
||||
#include "ParselessLM.h"
|
||||
#include "CNSLM.h"
|
||||
#include "PhraseReplacementMap.h"
|
||||
#include "AssociatedPhrases.h"
|
||||
#include "CNSLM.h"
|
||||
#include "ParselessLM.h"
|
||||
#include "PhraseReplacementMap.h"
|
||||
#include "UserPhrasesLM.h"
|
||||
#include <stdio.h>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace vChewing {
|
||||
|
@ -54,7 +54,7 @@ using namespace Taiyan::Gramambular;
|
|||
/// model while launching and to load the user phrases anytime if the custom
|
||||
/// files are modified. It does not keep the reference of the data pathes but
|
||||
/// you have to pass the paths when you ask it to do loading.
|
||||
class LMInstantiator : public LanguageModel {
|
||||
class LMInstantiator : public Taiyan::Gramambular::LanguageModel {
|
||||
public:
|
||||
LMInstantiator();
|
||||
~LMInstantiator();
|
||||
|
@ -83,14 +83,14 @@ public:
|
|||
void loadPhraseReplacementMap(const char* phraseReplacementPath);
|
||||
|
||||
/// Not implemented since we do not have data to provide bigram function.
|
||||
const vector<Bigram> bigramsForKeys(const string& preceedingKey, const string& key);
|
||||
const std::vector<Taiyan::Gramambular::Bigram> bigramsForKeys(const std::string& preceedingKey, const std::string& key);
|
||||
/// Returns a list of available unigram for the given key.
|
||||
/// @param key A string represents the BPMF reading or a symbol key. For
|
||||
/// @param key A std::string represents the BPMF reading or a symbol key. For
|
||||
/// example, it you pass "ㄇㄚ", it returns "嗎", "媽", and so on.
|
||||
const vector<Unigram> unigramsForKey(const string& key);
|
||||
const std::vector<Taiyan::Gramambular::Unigram> unigramsForKey(const std::string& key);
|
||||
/// If the model has unigrams for the given key.
|
||||
/// @param key The key.
|
||||
bool hasUnigramsForKey(const string& key);
|
||||
bool hasUnigramsForKey(const std::string& key);
|
||||
|
||||
/// Enables or disables phrase replacement.
|
||||
void setPhraseReplacementEnabled(bool enabled);
|
||||
|
@ -107,10 +107,10 @@ public:
|
|||
/// If the external converted is enabled or not.
|
||||
bool externalConverterEnabled();
|
||||
/// Sets a lambda to let the values of unigrams could be converted by it.
|
||||
void setExternalConverter(std::function<string(string)> externalConverter);
|
||||
void setExternalConverter(std::function<std::string(std::string)> externalConverter);
|
||||
|
||||
const vector<std::string> associatedPhrasesForKey(const string& key);
|
||||
bool hasAssociatedPhrasesForKey(const string& key);
|
||||
const std::vector<std::string> associatedPhrasesForKey(const std::string& key);
|
||||
bool hasAssociatedPhrasesForKey(const std::string& key);
|
||||
|
||||
|
||||
protected:
|
||||
|
@ -121,9 +121,9 @@ protected:
|
|||
/// @param insertedValues The values for unigrams already in the results.
|
||||
/// It helps to prevent duplicated unigrams. Please note that the method
|
||||
/// has a side effect that it inserts values to `insertedValues`.
|
||||
const vector<Unigram> filterAndTransformUnigrams(const vector<Unigram> unigrams,
|
||||
const std::unordered_set<string>& excludedValues,
|
||||
std::unordered_set<string>& insertedValues);
|
||||
const std::vector<Taiyan::Gramambular::Unigram> filterAndTransformUnigrams(const std::vector<Taiyan::Gramambular::Unigram> unigrams,
|
||||
const std::unordered_set<std::string>& excludedValues,
|
||||
std::unordered_set<std::string>& insertedValues);
|
||||
|
||||
ParselessLM m_languageModel;
|
||||
CNSLM m_cnsModel;
|
||||
|
@ -134,7 +134,7 @@ protected:
|
|||
bool m_phraseReplacementEnabled;
|
||||
bool m_cnsEnabled;
|
||||
bool m_externalConverterEnabled;
|
||||
std::function<string(string)> m_externalConverter;
|
||||
std::function<std::string(std::string)> m_externalConverter;
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
|||
#include <algorithm>
|
||||
#include <iterator>
|
||||
|
||||
using namespace vChewing;
|
||||
namespace vChewing {
|
||||
|
||||
LMInstantiator::LMInstantiator()
|
||||
{
|
||||
|
@ -92,16 +92,16 @@ void LMInstantiator::loadPhraseReplacementMap(const char* phraseReplacementPath)
|
|||
}
|
||||
}
|
||||
|
||||
const vector<Bigram> LMInstantiator::bigramsForKeys(const string& preceedingKey, const string& key)
|
||||
const std::vector<Taiyan::Gramambular::Bigram> LMInstantiator::bigramsForKeys(const std::string& preceedingKey, const std::string& key)
|
||||
{
|
||||
return vector<Bigram>();
|
||||
return std::vector<Taiyan::Gramambular::Bigram>();
|
||||
}
|
||||
|
||||
const vector<Unigram> LMInstantiator::unigramsForKey(const string& key)
|
||||
const std::vector<Taiyan::Gramambular::Unigram> LMInstantiator::unigramsForKey(const std::string& key)
|
||||
{
|
||||
if (key == " ") {
|
||||
vector<Unigram> spaceUnigrams;
|
||||
Unigram g;
|
||||
std::vector<Taiyan::Gramambular::Unigram> spaceUnigrams;
|
||||
Taiyan::Gramambular::Unigram g;
|
||||
g.keyValue.key = " ";
|
||||
g.keyValue.value = " ";
|
||||
g.score = 0;
|
||||
|
@ -109,32 +109,32 @@ const vector<Unigram> LMInstantiator::unigramsForKey(const string& key)
|
|||
return spaceUnigrams;
|
||||
}
|
||||
|
||||
vector<Unigram> allUnigrams;
|
||||
vector<Unigram> userUnigrams;
|
||||
vector<Unigram> cnsUnigrams;
|
||||
std::vector<Taiyan::Gramambular::Unigram> allUnigrams;
|
||||
std::vector<Taiyan::Gramambular::Unigram> userUnigrams;
|
||||
std::vector<Taiyan::Gramambular::Unigram> cnsUnigrams;
|
||||
|
||||
unordered_set<string> excludedValues;
|
||||
unordered_set<string> insertedValues;
|
||||
std::unordered_set<std::string> excludedValues;
|
||||
std::unordered_set<std::string> insertedValues;
|
||||
|
||||
if (m_excludedPhrases.hasUnigramsForKey(key)) {
|
||||
vector<Unigram> excludedUnigrams = m_excludedPhrases.unigramsForKey(key);
|
||||
std::vector<Taiyan::Gramambular::Unigram> excludedUnigrams = m_excludedPhrases.unigramsForKey(key);
|
||||
transform(excludedUnigrams.begin(), excludedUnigrams.end(),
|
||||
inserter(excludedValues, excludedValues.end()),
|
||||
[](const Unigram& u) { return u.keyValue.value; });
|
||||
[](const Taiyan::Gramambular::Unigram& u) { return u.keyValue.value; });
|
||||
}
|
||||
|
||||
if (m_userPhrases.hasUnigramsForKey(key)) {
|
||||
vector<Unigram> rawUserUnigrams = m_userPhrases.unigramsForKey(key);
|
||||
std::vector<Taiyan::Gramambular::Unigram> rawUserUnigrams = m_userPhrases.unigramsForKey(key);
|
||||
userUnigrams = filterAndTransformUnigrams(rawUserUnigrams, excludedValues, insertedValues);
|
||||
}
|
||||
|
||||
if (m_languageModel.hasUnigramsForKey(key)) {
|
||||
vector<Unigram> rawGlobalUnigrams = m_languageModel.unigramsForKey(key);
|
||||
std::vector<Taiyan::Gramambular::Unigram> rawGlobalUnigrams = m_languageModel.unigramsForKey(key);
|
||||
allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues);
|
||||
}
|
||||
|
||||
if (m_cnsModel.hasUnigramsForKey(key) && m_cnsEnabled) {
|
||||
vector<Unigram> rawCNSUnigrams = m_cnsModel.unigramsForKey(key);
|
||||
std::vector<Taiyan::Gramambular::Unigram> rawCNSUnigrams = m_cnsModel.unigramsForKey(key);
|
||||
cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues);
|
||||
}
|
||||
|
||||
|
@ -143,7 +143,7 @@ const vector<Unigram> LMInstantiator::unigramsForKey(const string& key)
|
|||
return allUnigrams;
|
||||
}
|
||||
|
||||
bool LMInstantiator::hasUnigramsForKey(const string& key)
|
||||
bool LMInstantiator::hasUnigramsForKey(const std::string& key)
|
||||
{
|
||||
if (key == " ") {
|
||||
return true;
|
||||
|
@ -185,36 +185,36 @@ bool LMInstantiator::externalConverterEnabled()
|
|||
return m_externalConverterEnabled;
|
||||
}
|
||||
|
||||
void LMInstantiator::setExternalConverter(std::function<string(string)> externalConverter)
|
||||
void LMInstantiator::setExternalConverter(std::function<std::string(std::string)> externalConverter)
|
||||
{
|
||||
m_externalConverter = externalConverter;
|
||||
}
|
||||
|
||||
const vector<Unigram> LMInstantiator::filterAndTransformUnigrams(const vector<Unigram> unigrams, const unordered_set<string>& excludedValues, unordered_set<string>& insertedValues)
|
||||
const std::vector<Taiyan::Gramambular::Unigram> LMInstantiator::filterAndTransformUnigrams(const std::vector<Taiyan::Gramambular::Unigram> unigrams, const std::unordered_set<std::string>& excludedValues, std::unordered_set<std::string>& insertedValues)
|
||||
{
|
||||
vector<Unigram> results;
|
||||
std::vector<Taiyan::Gramambular::Unigram> results;
|
||||
|
||||
for (auto&& unigram : unigrams) {
|
||||
// excludedValues filters out the unigrams with the original value.
|
||||
// insertedValues filters out the ones with the converted value
|
||||
string originalValue = unigram.keyValue.value;
|
||||
std::string originalValue = unigram.keyValue.value;
|
||||
if (excludedValues.find(originalValue) != excludedValues.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
string value = originalValue;
|
||||
std::string value = originalValue;
|
||||
if (m_phraseReplacementEnabled) {
|
||||
string replacement = m_phraseReplacement.valueForKey(value);
|
||||
std::string replacement = m_phraseReplacement.valueForKey(value);
|
||||
if (replacement != "") {
|
||||
value = replacement;
|
||||
}
|
||||
}
|
||||
if (m_externalConverterEnabled && m_externalConverter) {
|
||||
string replacement = m_externalConverter(value);
|
||||
std::string replacement = m_externalConverter(value);
|
||||
value = replacement;
|
||||
}
|
||||
if (insertedValues.find(value) == insertedValues.end()) {
|
||||
Unigram g;
|
||||
Taiyan::Gramambular::Unigram g;
|
||||
g.keyValue.value = value;
|
||||
g.keyValue.key = unigram.keyValue.key;
|
||||
g.score = unigram.score;
|
||||
|
@ -225,12 +225,14 @@ const vector<Unigram> LMInstantiator::filterAndTransformUnigrams(const vector<Un
|
|||
return results;
|
||||
}
|
||||
|
||||
const vector<std::string> LMInstantiator::associatedPhrasesForKey(const string& key)
|
||||
const std::vector<std::string> LMInstantiator::associatedPhrasesForKey(const std::string& key)
|
||||
{
|
||||
return m_associatedPhrases.valuesForKey(key);
|
||||
}
|
||||
|
||||
bool LMInstantiator::hasAssociatedPhrasesForKey(const string& key)
|
||||
bool LMInstantiator::hasAssociatedPhrasesForKey(const std::string& key)
|
||||
{
|
||||
return m_associatedPhrases.hasValuesForKey(key);
|
||||
}
|
||||
|
||||
} // namespace vChewing
|
||||
|
|
Loading…
Reference in New Issue