CNS // Phase 7: Modify AllUnigrams to let CNSLM work.
This commit is contained in:
parent
50cdb6be6c
commit
2320c2657b
|
@ -19,6 +19,7 @@ vChewingLM::vChewingLM()
|
||||||
vChewingLM::~vChewingLM()
|
vChewingLM::~vChewingLM()
|
||||||
{
|
{
|
||||||
m_languageModel.close();
|
m_languageModel.close();
|
||||||
|
m_cnsModel.close();
|
||||||
m_userPhrases.close();
|
m_userPhrases.close();
|
||||||
m_excludedPhrases.close();
|
m_excludedPhrases.close();
|
||||||
m_phraseReplacement.close();
|
m_phraseReplacement.close();
|
||||||
|
@ -35,8 +36,8 @@ void vChewingLM::loadLanguageModel(const char* languageModelDataPath)
|
||||||
void vChewingLM::loadCNSData(const char* cnsDataPath)
|
void vChewingLM::loadCNSData(const char* cnsDataPath)
|
||||||
{
|
{
|
||||||
if (cnsDataPath) {
|
if (cnsDataPath) {
|
||||||
m_cnsData.close();
|
m_cnsModel.close();
|
||||||
m_cnsData.open(cnsDataPath);
|
m_cnsModel.open(cnsDataPath);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -70,7 +71,8 @@ const vector<Unigram> vChewingLM::unigramsForKey(const string& key)
|
||||||
{
|
{
|
||||||
vector<Unigram> allUnigrams;
|
vector<Unigram> allUnigrams;
|
||||||
vector<Unigram> userUnigrams;
|
vector<Unigram> userUnigrams;
|
||||||
|
vector<Unigram> cnsUnigrams;
|
||||||
|
|
||||||
unordered_set<string> excludedValues;
|
unordered_set<string> excludedValues;
|
||||||
unordered_set<string> insertedValues;
|
unordered_set<string> insertedValues;
|
||||||
|
|
||||||
|
@ -90,8 +92,14 @@ const vector<Unigram> vChewingLM::unigramsForKey(const string& key)
|
||||||
vector<Unigram> rawGlobalUnigrams = m_languageModel.unigramsForKey(key);
|
vector<Unigram> rawGlobalUnigrams = m_languageModel.unigramsForKey(key);
|
||||||
allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues);
|
allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (m_cnsModel.hasUnigramsForKey(key)) {
|
||||||
|
vector<Unigram> rawCNSUnigrams = m_cnsModel.unigramsForKey(key);
|
||||||
|
cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues);
|
||||||
|
}
|
||||||
|
|
||||||
allUnigrams.insert(allUnigrams.begin(), userUnigrams.begin(), userUnigrams.end());
|
allUnigrams.insert(allUnigrams.begin(), userUnigrams.begin(), userUnigrams.end());
|
||||||
|
allUnigrams.insert(allUnigrams.end(), cnsUnigrams.begin(), cnsUnigrams.end());
|
||||||
return allUnigrams;
|
return allUnigrams;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,6 +122,16 @@ bool vChewingLM::phraseReplacementEnabled()
|
||||||
return m_phraseReplacementEnabled;
|
return m_phraseReplacementEnabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void vChewingLM::setCNSEnabled(bool enabled)
|
||||||
|
{
|
||||||
|
m_CNSEnabled = enabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool vChewingLM::CNSEnabled()
|
||||||
|
{
|
||||||
|
return m_CNSEnabled;
|
||||||
|
}
|
||||||
|
|
||||||
const vector<Unigram> vChewingLM::filterAndTransformUnigrams(vector<Unigram> unigrams, const unordered_set<string>& excludedValues, unordered_set<string>& insertedValues)
|
const vector<Unigram> vChewingLM::filterAndTransformUnigrams(vector<Unigram> unigrams, const unordered_set<string>& excludedValues, unordered_set<string>& insertedValues)
|
||||||
{
|
{
|
||||||
vector<Unigram> results;
|
vector<Unigram> results;
|
||||||
|
|
|
@ -24,31 +24,34 @@ class vChewingLM : public LanguageModel {
|
||||||
public:
|
public:
|
||||||
vChewingLM();
|
vChewingLM();
|
||||||
~vChewingLM();
|
~vChewingLM();
|
||||||
|
|
||||||
void loadLanguageModel(const char* languageModelPath);
|
void loadLanguageModel(const char* languageModelPath);
|
||||||
void loadCNSData(const char* cnsDataPath);
|
void loadCNSData(const char* cnsDataPath);
|
||||||
void loadUserPhrases(const char* userPhrasesPath, const char* excludedPhrasesPath);
|
void loadUserPhrases(const char* userPhrasesPath, const char* excludedPhrasesPath);
|
||||||
|
|
||||||
void loadPhraseReplacementMap(const char* phraseReplacementPath);
|
void loadPhraseReplacementMap(const char* phraseReplacementPath);
|
||||||
|
|
||||||
const vector<Bigram> bigramsForKeys(const string& preceedingKey, const string& key);
|
const vector<Bigram> bigramsForKeys(const string& preceedingKey, const string& key);
|
||||||
const vector<Unigram> unigramsForKey(const string& key);
|
const vector<Unigram> unigramsForKey(const string& key);
|
||||||
bool hasUnigramsForKey(const string& key);
|
bool hasUnigramsForKey(const string& key);
|
||||||
|
|
||||||
void setPhraseReplacementEnabled(bool enabled);
|
void setPhraseReplacementEnabled(bool enabled);
|
||||||
bool phraseReplacementEnabled();
|
bool phraseReplacementEnabled();
|
||||||
|
|
||||||
|
void setCNSEnabled(bool enabled);
|
||||||
|
bool CNSEnabled();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const vector<Unigram> filterAndTransformUnigrams(vector<Unigram> unigrams,
|
const vector<Unigram> filterAndTransformUnigrams(vector<Unigram> unigrams,
|
||||||
const std::unordered_set<string>& excludedValues,
|
const std::unordered_set<string>& excludedValues,
|
||||||
std::unordered_set<string>& insertedValues);
|
std::unordered_set<string>& insertedValues);
|
||||||
|
|
||||||
FastLM m_languageModel;
|
FastLM m_languageModel;
|
||||||
CNSLM m_cnsData;
|
CNSLM m_cnsModel;
|
||||||
UserPhrasesLM m_userPhrases;
|
UserPhrasesLM m_userPhrases;
|
||||||
UserPhrasesLM m_excludedPhrases;
|
UserPhrasesLM m_excludedPhrases;
|
||||||
PhraseReplacementMap m_phraseReplacement;
|
PhraseReplacementMap m_phraseReplacement;
|
||||||
bool m_phraseReplacementEnabled;
|
bool m_phraseReplacementEnabled;
|
||||||
|
bool m_CNSEnabled;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue