Xcode // Trash useless Cpp files, etc.

This commit is contained in:
ShikiSuen 2022-05-01 23:58:01 +08:00
parent 466bc6fc23
commit 12e13d9524
24 changed files with 3 additions and 2293 deletions

View File

@ -1,51 +0,0 @@
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef LMConsolidator_hpp
#define LMConsolidator_hpp
#include <fstream>
#include <iostream>
#include <map>
#include <regex>
#include <set>
#include <sstream>
#include <stdio.h>
#include <string>
#include <syslog.h>
using namespace std;
namespace vChewing
{
class LMConsolidator
{
public:
static bool CheckPragma(const char *path);
static bool FixEOF(const char *path);
static bool ConsolidateContent(const char *path, bool shouldCheckPragma);
};
} // namespace vChewing
#endif /* LMConsolidator_hpp */

View File

@ -1,176 +0,0 @@
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "LMConsolidator.h"
#include "vChewing-Swift.h"
namespace vChewing
{
constexpr std::string_view FORMATTED_PRAGMA_HEADER =
"# 𝙵𝙾𝚁𝙼𝙰𝚃 𝚘𝚛𝚐.𝚊𝚝𝚎𝚕𝚒𝚎𝚛𝙸𝚗𝚖𝚞.𝚟𝚌𝚑𝚎𝚠𝚒𝚗𝚐.𝚞𝚜𝚎𝚛𝙻𝚊𝚗𝚐𝚞𝚊𝚐𝚎𝙼𝚘𝚍𝚎𝚕𝙳𝚊𝚝𝚊.𝚏𝚘𝚛𝚖𝚊𝚝𝚝𝚎𝚍";
// HEADER VERIFIER. CREDIT: Shiki Suen
bool LMConsolidator::CheckPragma(const char *path)
{
ifstream zfdCheckPragma(path);
if (zfdCheckPragma.good())
{
string firstLine;
getline(zfdCheckPragma, firstLine);
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "HEADER SEEN ||%s", firstLine.c_str());
if (firstLine != FORMATTED_PRAGMA_HEADER)
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "HEADER VERIFICATION FAILED. START IN-PLACE CONSOLIDATING PROCESS.");
return false;
}
}
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "HEADER VERIFICATION SUCCESSFUL.");
return true;
}
// EOF FIXER. CREDIT: Shiki Suen.
bool LMConsolidator::FixEOF(const char *path)
{
std::fstream zfdEOFFixerIncomingStream(path);
zfdEOFFixerIncomingStream.seekg(-1, std::ios_base::end);
char z;
zfdEOFFixerIncomingStream.get(z);
if (z != '\n')
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// REPORT: Data File not ended with a new line.\n");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// DATA FILE: %s", path);
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// PROCEDURE: Trying to insert a new line as EOF before per-line check process.\n");
std::ofstream zfdEOFFixerOutput(path, std::ios_base::app);
zfdEOFFixerOutput << std::endl;
zfdEOFFixerOutput.close();
if (zfdEOFFixerOutput.fail())
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// REPORT: Failed to append a newline to the data file. Insufficient Privileges?\n");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// DATA FILE: %s", path);
return false;
}
}
zfdEOFFixerIncomingStream.close();
if (zfdEOFFixerIncomingStream.fail())
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS,
"// REPORT: Failed to read lines through the data file for EOF check. Insufficient Privileges?\n");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// DATA FILE: %s", path);
return false;
}
return true;
} // END: EOF FIXER.
// CONTENT CONSOLIDATOR. CREDIT: Shiki Suen.
bool LMConsolidator::ConsolidateContent(const char *path, bool shouldCheckPragma)
{
bool pragmaCheckResult = LMConsolidator::CheckPragma(path);
if (pragmaCheckResult && shouldCheckPragma)
{
return true;
}
ifstream zfdContentConsolidatorIncomingStream(path);
vector<string> vecEntry;
while (!zfdContentConsolidatorIncomingStream.eof())
{ // Xcode 13 能用的 ObjCpp 與 Cpp 並無原生支援「\h」這個 Regex 參數的能力,只能逐行處理。
string zfdBuffer;
getline(zfdContentConsolidatorIncomingStream, zfdBuffer);
vecEntry.push_back(zfdBuffer);
}
// 第一遍 for 用來統整每行內的內容。
// regex sedCJKWhiteSpace("\\x{3000}"), sedNonBreakWhiteSpace("\\x{A0}"), sedWhiteSpace("\\s+"),
// sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // 這樣寫會導致輸入法敲不了任何字,推測 Xcode 13 支援的 cpp /
// objCpp 可能對某些 Regex 寫法有相容性問題。 regex sedCJKWhiteSpace(" "), sedNonBreakWhiteSpace(" "),
// sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // RegEx 先定義好。
regex sedToConsolidate("( +| +| +|\t+)+"), sedToTrim("(^\\s|\\s$)");
for (int i = 0; i < vecEntry.size(); i++)
{ // 第一遍 for 用來統整每行內的內容。
if (vecEntry[i].size() != 0)
{ // 不要理會空行,否則給空行加上 endl 等於再加空行。
// RegEx 處理順序:先將全形空格換成西文空格,然後合併任何意義上的連續空格(包括 tab
// 等),最後去除每行首尾空格。 vecEntry[i] = regex_replace(vecEntry[i], sedCJKWhiteSpace, " ").c_str(); //
// 中日韓全形空格轉為 ASCII 空格。 vecEntry[i] = regex_replace(vecEntry[i], sedNonBreakWhiteSpace, "
// ").c_str(); // Non-Break 型空格轉為 ASCII 空格。 vecEntry[i] = regex_replace(vecEntry[i], sedWhiteSpace,
// " ").c_str(); // 所有意義上的連續的 \s 型空格都轉為單個 ASCII 空格。 vecEntry[i] =
// regex_replace(vecEntry[i], sedLeadingSpace, "").c_str(); // 去掉行首空格。 vecEntry[i] =
// regex_replace(vecEntry[i], sedTrailingSpace, "").c_str(); // 去掉行尾空格。
// 上述命令分步驟執行容易產生效能問題,故濃縮為下述兩句。
vecEntry[i] = regex_replace(vecEntry[i], sedToConsolidate, " ").c_str();
vecEntry[i] = regex_replace(vecEntry[i], sedToTrim, "").c_str();
}
}
// 在第二遍 for 運算之前,針對 vecEntry 去除重複條目。
std::reverse(vecEntry.begin(), vecEntry.end()); // 先首尾顛倒,免得破壞最新的 override 資訊。
vecEntry.erase(unique(vecEntry.begin(), vecEntry.end()), vecEntry.end()); // 去重複。
std::reverse(vecEntry.begin(), vecEntry.end()); // 再顛倒回來。
// 統整完畢。開始將統整過的內容寫入檔案。
ofstream zfdContentConsolidatorOutput(path); // 這裡是要從頭開始重寫檔案內容,所以不需要「 ios_base::app 」。
if (!pragmaCheckResult)
{
zfdContentConsolidatorOutput << FORMATTED_PRAGMA_HEADER << endl; // 寫入經過整理處理的 HEADER。
}
for (int i = 0; i < vecEntry.size(); i++)
{ // 第二遍 for 用來寫入統整過的內容。
if (vecEntry[i].size() != 0)
{ // 這句很重要,不然還是會把經過 RegEx 處理後出現的空行搞到檔案裡。
zfdContentConsolidatorOutput << vecEntry[i]
<< endl; // 這裡是必須得加上 endl 的,不然所有行都變成一個整合行。
}
}
zfdContentConsolidatorOutput.close();
if (zfdContentConsolidatorOutput.fail())
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS,
"// REPORT: Failed to write content-consolidated data to the file. Insufficient Privileges?\n");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// DATA FILE: %s", path);
return false;
}
zfdContentConsolidatorIncomingStream.close();
if (zfdContentConsolidatorIncomingStream.fail())
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// REPORT: Failed to read lines through the data file for content-consolidation. "
"Insufficient Privileges?\n");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// DATA FILE: %s", path);
return false;
}
return true;
} // END: CONTENT CONSOLIDATOR.
} // namespace vChewing

View File

@ -1,155 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "KeyValueBlobReader.h"
namespace vChewing
{
KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue *out)
{
static auto new_line = [](char c) { return c == '\n' || c == '\r'; };
static auto blank = [](char c) { return c == ' ' || c == '\t'; };
static auto blank_or_newline = [](char c) { return blank(c) || new_line(c); };
static auto content_char = [](char c) { return !blank(c) && !new_line(c); };
if (state_ == State::ERROR)
{
return state_;
}
const char *key_begin = nullptr;
size_t key_length = 0;
const char *value_begin = nullptr;
size_t value_length = 0;
while (true)
{
state_ = SkipUntilNot(blank_or_newline);
if (state_ != State::CAN_CONTINUE)
{
return state_;
}
// Check if it's a comment line; if so, read until end of line.
if (*current_ != '#')
{
break;
}
state_ = SkipUntil(new_line);
if (state_ != State::CAN_CONTINUE)
{
return state_;
}
}
// No need to check whether* current_ is a content_char, since content_char
// is defined as not blank and not new_line.
key_begin = current_;
state_ = SkipUntilNot(content_char);
if (state_ != State::CAN_CONTINUE)
{
goto error;
}
key_length = current_ - key_begin;
// There should be at least one blank character after the key string.
if (!blank(*current_))
{
goto error;
}
state_ = SkipUntilNot(blank);
if (state_ != State::CAN_CONTINUE)
{
goto error;
}
if (!content_char(*current_))
{
goto error;
}
value_begin = current_;
// value must only contain content characters, blanks not are allowed.
// also, there's no need to check the state after this, since we will always
// emit the value. This also avoids the situation where trailing spaces in a
// line would become part of the value.
SkipUntilNot(content_char);
value_length = current_ - value_begin;
// Unconditionally skip until the end of the line. This prevents the case
// like "foo bar baz\n" where baz should not be treated as the Next key.
SkipUntil(new_line);
if (out != nullptr)
{
*out = KeyValue{std::string_view{key_begin, key_length}, std::string_view{value_begin, value_length}};
}
state_ = State::HAS_PAIR;
return state_;
error:
state_ = State::ERROR;
return state_;
}
KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot(const std::function<bool(char)> &f)
{
while (current_ != end_ && *current_)
{
if (!f(*current_))
{
return State::CAN_CONTINUE;
}
++current_;
}
return State::END;
}
KeyValueBlobReader::State KeyValueBlobReader::SkipUntil(const std::function<bool(char)> &f)
{
while (current_ != end_ && *current_)
{
if (f(*current_))
{
return State::CAN_CONTINUE;
}
++current_;
}
return State::END;
}
std::ostream &operator<<(std::ostream &os, const KeyValueBlobReader::KeyValue &kv)
{
os << "(key: " << kv.key << ", value: " << kv.value << ")";
return os;
}
} // namespace vChewing

View File

@ -1,107 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SOURCE_ENGINE_KEYVALUEBLOBREADER_H_
#define SOURCE_ENGINE_KEYVALUEBLOBREADER_H_
#include <cstddef>
#include <functional>
#include <iostream>
#include <string_view>
// A reader for text-based, blank-separated key-value pairs in a binary blob.
//
// This reader is suitable for reading language model files that entirely
// consist of key-value pairs. Leading or trailing spaces are ignored.
// Lines that start with "#" are treated as comments. Values cannot contain
// spaces. Any space after the value string is parsed is ignored. This implies
// that after a blank, anything that comes after the value can be used as
// comment. Both ' ' and '\t' are treated as blank characters, and the parser
// is agnostic to how lines are ended, and so LF, CR LF, and CR are all valid
// line endings.
//
// std::string_view is used to allow returning results efficiently. As a result,
// the blob is a const char* and will never be mutated. This implies, for
// example, read-only mmap can be used to parse large files.
namespace vChewing
{
class KeyValueBlobReader
{
public:
enum class State : int
{
// There are no more key-value pairs in this blob.
END = 0,
// The reader has produced a new key-value pair.
HAS_PAIR = 1,
// An error is encountered and the parsing stopped.
ERROR = -1,
// Internal-only state: the parser can continue parsing.
CAN_CONTINUE = 2
};
struct KeyValue
{
constexpr KeyValue() : key(""), value("")
{
}
constexpr KeyValue(std::string_view k, std::string_view v) : key(k), value(v)
{
}
bool operator==(const KeyValue &another) const
{
return key == another.key && value == another.value;
}
std::string_view key;
std::string_view value;
};
KeyValueBlobReader(const char *blob, size_t size) : current_(blob), end_(blob + size)
{
}
// Parse the next key-value pair and return the state of the reader. If
// `out` is passed, out will be set to the produced key-value pair if there
// is one.
State Next(KeyValue *out = nullptr);
private:
State SkipUntil(const std::function<bool(char)> &f);
State SkipUntilNot(const std::function<bool(char)> &f);
const char *current_;
const char *end_;
State state_ = State::CAN_CONTINUE;
};
std::ostream &operator<<(std::ostream &, const KeyValueBlobReader::KeyValue &);
} // namespace vChewing
#endif // SOURCE_ENGINE_KEYVALUEBLOBREADER_H_

View File

@ -1,167 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef LMInstantiator_H
#define LMInstantiator_H
#include "AssociatedPhrases.h"
#include "CNSLM.h"
#include "CoreLM.h"
#include "ParselessLM.h"
#include "PhraseReplacementMap.h"
#include "SymbolLM.h"
#include "UserPhrasesLM.h"
#include "UserSymbolLM.h"
#include <stdio.h>
#include <unordered_set>
namespace vChewing
{
using namespace Gramambular;
/// LMInstantiator is a facade for managing a set of models including
/// the input method language model, user phrases and excluded phrases.
///
/// It is the primary model class that the input controller and grammar builder
/// of vChewing talks to. When the grammar builder starts to build a sentence
/// from a series of BPMF readings, it passes the readings to the model to see
/// if there are valid unigrams, and use returned unigrams to produce the final
/// results.
///
/// LMInstantiator combine and transform the unigrams from the primary language
/// model and user phrases. The process is
///
/// 1) Get the original unigrams.
/// 2) Drop the unigrams whose value is contained in the exclusion map.
/// 3) Replace the values of the unigrams using the phrase replacement map.
/// 4) Replace the values of the unigrams using an external converter lambda.
/// 5) Drop the duplicated phrases.
///
/// The controller can ask the model to load the primary input method language
/// model while launching and to load the user phrases anytime if the custom
/// files are modified. It does not keep the reference of the data pathes but
/// you have to pass the paths when you ask it to do loading.
class LMInstantiator : public Gramambular::LanguageModel
{
public:
LMInstantiator();
~LMInstantiator();
/// Asks to load the primary language model at the given path.
/// @param languageModelPath The path of the language model.
void loadLanguageModel(const char *languageModelPath);
/// If the data model is already loaded.
bool isDataModelLoaded();
/// Asks to load the primary language model at the given path.
/// @param miscDataPath The path of the misc data model.
void loadMiscData(const char *miscDataPath);
/// If the data model is already loaded.
bool isMiscDataLoaded();
/// Asks to load the primary language model at the given path.
/// @param symbolDataPath The path of the symbol data model.
void loadSymbolData(const char *symbolDataPath);
/// If the data model is already loaded.
bool isSymbolDataLoaded();
/// Asks to load the primary language model at the given path.
/// @param cnsDataPath The path of the CNS data model.
void loadCNSData(const char *cnsDataPath);
/// If the data model is already loaded.
bool isCNSDataLoaded();
/// Asks to load the user phrases and excluded phrases at the given path.
/// @param userPhrasesPath The path of user phrases.
/// @param excludedPhrasesPath The path of excluded phrases.
void loadUserPhrases(const char *userPhrasesPath, const char *excludedPhrasesPath);
/// Asks to load the user symbol data at the given path.
/// @param userSymbolDataPath The path of user symbol data.
void loadUserSymbolData(const char *userPhrasesPath);
/// Asks to load the user associated phrases at the given path.
/// @param userAssociatedPhrasesPath The path of the user associated phrases.
void loadUserAssociatedPhrases(const char *userAssociatedPhrasesPath);
/// Asks to load the phrase replacement table at the given path.
/// @param phraseReplacementPath The path of the phrase replacement table.
void loadPhraseReplacementMap(const char *phraseReplacementPath);
/// Not implemented since we do not have data to provide bigram function.
const std::vector<Gramambular::Bigram> bigramsForKeys(const std::string &preceedingKey, const std::string &key);
/// Returns a list of available unigram for the given key.
/// @param key A std::string represents the BPMF reading or a symbol key. For
/// example, it you pass "ㄇㄚ", it returns "嗎", "媽", and so on.
const std::vector<Gramambular::Unigram> unigramsForKey(const std::string &key);
/// If the model has unigrams for the given key.
/// @param key The key.
bool hasUnigramsForKey(const std::string &key);
/// Enables or disables phrase replacement.
void setPhraseReplacementEnabled(bool enabled);
/// If phrase replacement is enabled or not.
bool phraseReplacementEnabled();
/// Enables or disables symbol input.
void setSymbolEnabled(bool enabled);
/// If symbol input is enabled or not.
bool symbolEnabled();
/// Enables or disables CNS11643 input.
void setCNSEnabled(bool enabled);
/// If CNS11643 input is enabled or not.
bool cnsEnabled();
const std::vector<std::string> associatedPhrasesForKey(const std::string &key);
bool hasAssociatedPhrasesForKey(const std::string &key);
protected:
/// Filters and converts the input unigrams and return a new list of unigrams.
///
/// @param unigrams The unigrams to be processed.
/// @param excludedValues The values to excluded unigrams.
/// @param insertedValues The values for unigrams already in the results.
/// It helps to prevent duplicated unigrams. Please note that the method
/// has a side effect that it inserts values to `insertedValues`.
const std::vector<Gramambular::Unigram> filterAndTransformUnigrams(
const std::vector<Gramambular::Unigram> unigrams, const std::unordered_set<std::string> &excludedValues,
std::unordered_set<std::string> &insertedValues);
ParselessLM m_languageModel;
CoreLM m_miscModel;
SymbolLM m_symbolModel;
CNSLM m_cnsModel;
UserPhrasesLM m_userPhrases;
UserPhrasesLM m_excludedPhrases;
UserSymbolLM m_userSymbolModel;
PhraseReplacementMap m_phraseReplacement;
AssociatedPhrases m_associatedPhrases;
bool m_phraseReplacementEnabled;
bool m_cnsEnabled;
bool m_symbolEnabled;
};
}; // namespace vChewing
#endif

View File

@ -1,323 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "LMInstantiator.h"
#include <algorithm>
#include <iterator>
namespace vChewing
{
LMInstantiator::LMInstantiator()
{
}
LMInstantiator::~LMInstantiator()
{
m_languageModel.close();
m_miscModel.close();
m_userPhrases.close();
m_userSymbolModel.close();
m_cnsModel.close();
m_excludedPhrases.close();
m_phraseReplacement.close();
m_associatedPhrases.close();
}
void LMInstantiator::loadLanguageModel(const char *languageModelDataPath)
{
if (languageModelDataPath)
{
m_languageModel.close();
m_languageModel.open(languageModelDataPath);
}
}
bool LMInstantiator::isDataModelLoaded()
{
return m_languageModel.isLoaded();
}
void LMInstantiator::loadCNSData(const char *cnsDataPath)
{
if (cnsDataPath)
{
m_cnsModel.close();
m_cnsModel.open(cnsDataPath);
}
}
bool LMInstantiator::isCNSDataLoaded()
{
return m_cnsModel.isLoaded();
}
void LMInstantiator::loadMiscData(const char *miscDataPath)
{
if (miscDataPath)
{
m_miscModel.close();
m_miscModel.open(miscDataPath);
}
}
bool LMInstantiator::isMiscDataLoaded()
{
return m_miscModel.isLoaded();
}
void LMInstantiator::loadSymbolData(const char *symbolDataPath)
{
if (symbolDataPath)
{
m_symbolModel.close();
m_symbolModel.open(symbolDataPath);
}
}
bool LMInstantiator::isSymbolDataLoaded()
{
return m_symbolModel.isLoaded();
}
void LMInstantiator::loadUserPhrases(const char *userPhrasesDataPath, const char *excludedPhrasesDataPath)
{
if (userPhrasesDataPath)
{
m_userPhrases.close();
m_userPhrases.open(userPhrasesDataPath);
}
if (excludedPhrasesDataPath)
{
m_excludedPhrases.close();
m_excludedPhrases.open(excludedPhrasesDataPath);
}
}
void LMInstantiator::loadUserSymbolData(const char *userSymbolDataPath)
{
if (userSymbolDataPath)
{
m_userSymbolModel.close();
m_userSymbolModel.open(userSymbolDataPath);
}
}
void LMInstantiator::loadUserAssociatedPhrases(const char *userAssociatedPhrasesPath)
{
if (userAssociatedPhrasesPath)
{
m_associatedPhrases.close();
m_associatedPhrases.open(userAssociatedPhrasesPath);
}
}
void LMInstantiator::loadPhraseReplacementMap(const char *phraseReplacementPath)
{
if (phraseReplacementPath)
{
m_phraseReplacement.close();
m_phraseReplacement.open(phraseReplacementPath);
}
}
const std::vector<Gramambular::Bigram> LMInstantiator::bigramsForKeys(const std::string &preceedingKey,
const std::string &key)
{
return std::vector<Gramambular::Bigram>();
}
const std::vector<Gramambular::Unigram> LMInstantiator::unigramsForKey(const std::string &key)
{
if (key == " ")
{
std::vector<Gramambular::Unigram> spaceUnigrams;
Gramambular::Unigram g;
g.keyValue.key = " ";
g.keyValue.value = " ";
g.score = 0;
spaceUnigrams.push_back(g);
return spaceUnigrams;
}
std::vector<Gramambular::Unigram> allUnigrams;
std::vector<Gramambular::Unigram> miscUnigrams;
std::vector<Gramambular::Unigram> symbolUnigrams;
std::vector<Gramambular::Unigram> userUnigrams;
std::vector<Gramambular::Unigram> userSymbolUnigrams;
std::vector<Gramambular::Unigram> cnsUnigrams;
std::unordered_set<std::string> excludedValues;
std::unordered_set<std::string> insertedValues;
if (m_excludedPhrases.hasUnigramsForKey(key))
{
std::vector<Gramambular::Unigram> excludedUnigrams = m_excludedPhrases.unigramsForKey(key);
transform(excludedUnigrams.begin(), excludedUnigrams.end(), inserter(excludedValues, excludedValues.end()),
[](const Gramambular::Unigram &u) { return u.keyValue.value; });
}
if (m_userPhrases.hasUnigramsForKey(key))
{
std::vector<Gramambular::Unigram> rawUserUnigrams = m_userPhrases.unigramsForKey(key);
// 用這句指令讓使用者語彙檔案內的詞條優先順序隨著行數增加而逐漸增高。
// 這樣一來就可以在就地新增語彙時徹底複寫優先權。
std::reverse(rawUserUnigrams.begin(), rawUserUnigrams.end());
userUnigrams = filterAndTransformUnigrams(rawUserUnigrams, excludedValues, insertedValues);
}
if (m_languageModel.hasUnigramsForKey(key))
{
std::vector<Gramambular::Unigram> rawGlobalUnigrams = m_languageModel.unigramsForKey(key);
allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues);
}
if (m_miscModel.hasUnigramsForKey(key))
{
std::vector<Gramambular::Unigram> rawMiscUnigrams = m_miscModel.unigramsForKey(key);
miscUnigrams = filterAndTransformUnigrams(rawMiscUnigrams, excludedValues, insertedValues);
}
if (m_symbolModel.hasUnigramsForKey(key) && m_symbolEnabled)
{
std::vector<Gramambular::Unigram> rawSymbolUnigrams = m_symbolModel.unigramsForKey(key);
symbolUnigrams = filterAndTransformUnigrams(rawSymbolUnigrams, excludedValues, insertedValues);
}
if (m_userSymbolModel.hasUnigramsForKey(key) && m_symbolEnabled)
{
std::vector<Gramambular::Unigram> rawUserSymbolUnigrams = m_userSymbolModel.unigramsForKey(key);
userSymbolUnigrams = filterAndTransformUnigrams(rawUserSymbolUnigrams, excludedValues, insertedValues);
}
if (m_cnsModel.hasUnigramsForKey(key) && m_cnsEnabled)
{
std::vector<Gramambular::Unigram> rawCNSUnigrams = m_cnsModel.unigramsForKey(key);
cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues);
}
allUnigrams.insert(allUnigrams.begin(), userUnigrams.begin(), userUnigrams.end());
allUnigrams.insert(allUnigrams.end(), cnsUnigrams.begin(), cnsUnigrams.end());
allUnigrams.insert(allUnigrams.begin(), miscUnigrams.begin(), miscUnigrams.end());
allUnigrams.insert(allUnigrams.end(), userSymbolUnigrams.begin(), userSymbolUnigrams.end());
allUnigrams.insert(allUnigrams.end(), symbolUnigrams.begin(), symbolUnigrams.end());
return allUnigrams;
}
bool LMInstantiator::hasUnigramsForKey(const std::string &key)
{
if (key == " ")
{
return true;
}
if (!m_excludedPhrases.hasUnigramsForKey(key))
{
return m_userPhrases.hasUnigramsForKey(key) || m_languageModel.hasUnigramsForKey(key);
}
return unigramsForKey(key).size() > 0;
}
void LMInstantiator::setPhraseReplacementEnabled(bool enabled)
{
m_phraseReplacementEnabled = enabled;
}
bool LMInstantiator::phraseReplacementEnabled()
{
return m_phraseReplacementEnabled;
}
void LMInstantiator::setCNSEnabled(bool enabled)
{
m_cnsEnabled = enabled;
}
bool LMInstantiator::cnsEnabled()
{
return m_cnsEnabled;
}
void LMInstantiator::setSymbolEnabled(bool enabled)
{
m_symbolEnabled = enabled;
}
bool LMInstantiator::symbolEnabled()
{
return m_symbolEnabled;
}
const std::vector<Gramambular::Unigram> LMInstantiator::filterAndTransformUnigrams(
const std::vector<Gramambular::Unigram> unigrams, const std::unordered_set<std::string> &excludedValues,
std::unordered_set<std::string> &insertedValues)
{
std::vector<Gramambular::Unigram> results;
for (auto &&unigram : unigrams)
{
// excludedValues filters out the unigrams with the original value.
// insertedValues filters out the ones with the converted value
std::string originalValue = unigram.keyValue.value;
if (excludedValues.find(originalValue) != excludedValues.end())
{
continue;
}
std::string value = originalValue;
if (m_phraseReplacementEnabled)
{
std::string replacement = m_phraseReplacement.valueForKey(value);
if (replacement != "")
{
value = replacement;
}
}
if (insertedValues.find(value) == insertedValues.end())
{
Gramambular::Unigram g;
g.keyValue.value = value;
g.keyValue.key = unigram.keyValue.key;
g.score = unigram.score;
results.push_back(g);
insertedValues.insert(value);
}
}
return results;
}
const std::vector<std::string> LMInstantiator::associatedPhrasesForKey(const std::string &key)
{
return m_associatedPhrases.valuesForKey(key);
}
bool LMInstantiator::hasAssociatedPhrasesForKey(const std::string &key)
{
return m_associatedPhrases.hasValuesForKey(key);
}
} // namespace vChewing

View File

@ -1,69 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ASSOCIATEDPHRASES_H
#define ASSOCIATEDPHRASES_H
#include <iostream>
#include <map>
#include <string>
#include <vector>
namespace vChewing
{
class AssociatedPhrases
{
public:
AssociatedPhrases();
~AssociatedPhrases();
const bool isLoaded();
bool open(const char *path);
void close();
const std::vector<std::string> valuesForKey(const std::string &key);
const bool hasValuesForKey(const std::string &key);
protected:
struct Row
{
Row(std::string_view &k, std::string_view &v) : key(k), value(v)
{
}
std::string_view key;
std::string_view value;
};
std::map<std::string_view, std::vector<Row>> keyRowMap;
int fd;
void *data;
size_t length;
};
} // namespace vChewing
#endif /* AssociatedPhrases_hpp */

View File

@ -1,146 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "AssociatedPhrases.h"
#include "vChewing-Swift.h"
#include <fcntl.h>
#include <fstream>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include "KeyValueBlobReader.h"
#include "LMConsolidator.h"
namespace vChewing
{
AssociatedPhrases::AssociatedPhrases() : fd(-1), data(0), length(0)
{
}
AssociatedPhrases::~AssociatedPhrases()
{
if (data)
{
close();
}
}
const bool AssociatedPhrases::isLoaded()
{
if (data)
{
return true;
}
return false;
}
bool AssociatedPhrases::open(const char *path)
{
if (data)
{
return false;
}
LMConsolidator::FixEOF(path);
LMConsolidator::ConsolidateContent(path, true);
fd = ::open(path, O_RDONLY);
if (fd == -1)
{
printf("open:: file not exist");
return false;
}
struct stat sb;
if (fstat(fd, &sb) == -1)
{
printf("open:: cannot open file");
return false;
}
length = (size_t)sb.st_size;
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
if (!data)
{
::close(fd);
return false;
}
KeyValueBlobReader reader(static_cast<char *>(data), length);
KeyValueBlobReader::KeyValue keyValue;
KeyValueBlobReader::State state;
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR)
{
keyRowMap[keyValue.key].emplace_back(keyValue.key, keyValue.value);
}
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
if (state == KeyValueBlobReader::State::ERROR)
{
// close();
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "AssociatedPhrases: Failed at Open Step 5. On Error Resume Next.\n");
// return false;
}
return true;
}
void AssociatedPhrases::close()
{
if (data)
{
munmap(data, length);
::close(fd);
data = 0;
}
keyRowMap.clear();
}
const std::vector<std::string> AssociatedPhrases::valuesForKey(const std::string &key)
{
std::vector<std::string> v;
auto iter = keyRowMap.find(key);
if (iter != keyRowMap.end())
{
const std::vector<Row> &rows = iter->second;
for (const auto &row : rows)
{
std::string_view value = row.value;
v.push_back({value.data(), value.size()});
}
}
return v;
}
const bool AssociatedPhrases::hasValuesForKey(const std::string &key)
{
return keyRowMap.find(key) != keyRowMap.end();
}
}; // namespace vChewing

View File

@ -1,85 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef CoreLM_H
#define CoreLM_H
#include "LanguageModel.h"
#include <iostream>
#include <map>
#include <string>
#include <vector>
// this class relies on the fact that we have a space-separated data
// format, and we use mmap and zero-out the separators and line feeds
// to avoid creating new string objects; the parser is a simple DFA
using namespace std;
using namespace Gramambular;
namespace vChewing
{
class CoreLM : public Gramambular::LanguageModel
{
public:
CoreLM();
~CoreLM();
bool isLoaded();
bool open(const char *path);
void close();
void dump();
virtual const std::vector<Gramambular::Bigram> bigramsForKeys(const string &preceedingKey, const string &key);
virtual const std::vector<Gramambular::Unigram> unigramsForKey(const string &key);
virtual bool hasUnigramsForKey(const string &key);
protected:
struct CStringCmp
{
bool operator()(const char *s1, const char *s2) const
{
return strcmp(s1, s2) < 0;
}
};
struct Row
{
const char *key;
const char *value;
const char *logProbability;
};
map<const char *, vector<Row>, CStringCmp> keyRowMap;
int fd;
void *data;
size_t length;
};
}; // namespace vChewing
#endif

View File

@ -1,365 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "CoreLM.h"
#include "vChewing-Swift.h"
#include <fcntl.h>
#include <fstream>
#include <sys/mman.h>
#include <sys/stat.h>
#include <syslog.h>
#include <unistd.h>
using namespace Gramambular;
vChewing::CoreLM::CoreLM() : fd(-1), data(0), length(0)
{
}
vChewing::CoreLM::~CoreLM()
{
if (data)
{
close();
}
}
bool vChewing::CoreLM::isLoaded()
{
if (data)
{
return true;
}
return false;
}
bool vChewing::CoreLM::open(const char *path)
{
if (data)
{
return false;
}
fd = ::open(path, O_RDONLY);
if (fd == -1)
{
return false;
}
struct stat sb;
if (fstat(fd, &sb) == -1)
{
return false;
}
length = (size_t)sb.st_size;
data = mmap(NULL, length, PROT_WRITE, MAP_PRIVATE, fd, 0);
if (!data)
{
::close(fd);
return false;
}
// Regular expression for parsing:
// (\n*\w\w*\s\w\w*\s\w\w*)*$
//
// Expanded as DFA (in Graphviz):
//
// digraph finite_state_machine {
// rankdir = LR;
// size = "10";
//
// node [shape = doublecircle]; End;
// node [shape = circle];
//
// Start -> End [ label = "EOF"];
// Start -> Error [ label = "\\s" ];
// Start -> Start [ label = "\\n" ];
// Start -> 1 [ label = "\\w" ];
//
// 1 -> Error [ label = "\\n, EOF" ];
// 1 -> 2 [ label = "\\s" ];
// 1 -> 1 [ label = "\\w" ];
//
// 2 -> Error [ label = "\\n, \\s, EOF" ];
// 2 -> 3 [ label = "\\w" ];
//
// 3 -> Error [ label = "\\n, EOF "];
// 3 -> 4 [ label = "\\s" ];
// 3 -> 3 [ label = "\\w" ];
//
// 4 -> Error [ label = "\\n, \\s, EOF" ];
// 4 -> 5 [ label = "\\w" ];
//
// 5 -> Error [ label = "\\s, EOF" ];
// 5 -> Start [ label = "\\n" ];
// 5 -> 5 [ label = "\\w" ];
// }
char *head = (char *)data;
char *end = (char *)data + length;
char c;
Row row;
start:
// EOF -> end
if (head == end)
{
goto end;
}
c = *head;
// \s -> error
if (c == ' ')
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // Start: \\s -> error");
goto error;
}
// \n -> start
else if (c == '\n')
{
head++;
goto start;
}
// \w -> record column star, state1
row.value = head;
head++;
// fall through to state 1
state1:
// EOF -> error
if (head == end)
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: EOF -> error");
goto error;
}
c = *head;
// \n -> error
if (c == '\n')
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: \\n -> error");
goto error;
}
// \s -> state2 + zero out ending + record column start
else if (c == ' ')
{
*head = 0;
head++;
row.key = head;
goto state2;
}
// \w -> state1
head++;
goto state1;
state2:
// eof -> error
if (head == end)
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: EOF -> error");
goto error;
}
c = *head;
// \n, \s -> error
if (c == '\n' || c == ' ')
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: \\n \\s -> error");
goto error;
}
// \w -> state3
head++;
// fall through to state 3
state3:
// eof -> error
if (head == end)
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: EOF -> error");
goto error;
}
c = *head;
// \n -> error
if (c == '\n')
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: \\n -> error");
goto error;
}
// \s -> state4 + zero out ending + record column start
else if (c == ' ')
{
*head = 0;
head++;
row.logProbability = head;
goto state4;
}
// \w -> state3
head++;
goto state3;
state4:
// eof -> error
if (head == end)
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: EOF -> error");
goto error;
}
c = *head;
// \n, \s -> error
if (c == '\n' || c == ' ')
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: \\n \\s -> error");
goto error;
}
// \w -> state5
head++;
// fall through to state 5
state5:
// eof -> error
if (head == end)
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: EOF -> error");
goto error;
}
c = *head;
// \s -> error
if (c == ' ')
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: \\s -> error");
goto error;
}
// \n -> start
else if (c == '\n')
{
*head = 0;
head++;
keyRowMap[row.key].push_back(row);
goto start;
}
// \w -> state 5
head++;
goto state5;
error:
close();
return false;
end:
static const char *space = " ";
static const char *zero = "0.0";
Row emptyRow;
emptyRow.key = space;
emptyRow.value = space;
emptyRow.logProbability = zero;
keyRowMap[space].push_back(emptyRow);
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // File Load Complete.");
return true;
}
void vChewing::CoreLM::close()
{
if (data)
{
munmap(data, length);
::close(fd);
data = 0;
}
keyRowMap.clear();
}
void vChewing::CoreLM::dump()
{
size_t rows = 0;
for (map<const char *, vector<Row>>::const_iterator i = keyRowMap.begin(), e = keyRowMap.end(); i != e; ++i)
{
const vector<Row> &r = (*i).second;
for (vector<Row>::const_iterator ri = r.begin(), re = r.end(); ri != re; ++ri)
{
const Row &row = *ri;
cerr << row.key << " " << row.value << " " << row.logProbability << "\n";
rows++;
}
}
}
const std::vector<Gramambular::Bigram> vChewing::CoreLM::bigramsForKeys(const string &preceedingKey, const string &key)
{
return std::vector<Gramambular::Bigram>();
}
const std::vector<Gramambular::Unigram> vChewing::CoreLM::unigramsForKey(const string &key)
{
std::vector<Gramambular::Unigram> v;
map<const char *, vector<Row>>::const_iterator i = keyRowMap.find(key.c_str());
if (i != keyRowMap.end())
{
for (vector<Row>::const_iterator ri = (*i).second.begin(), re = (*i).second.end(); ri != re; ++ri)
{
Unigram g;
const Row &r = *ri;
g.keyValue.key = r.key;
g.keyValue.value = r.value;
g.score = atof(r.logProbability);
v.push_back(g);
}
}
return v;
}
bool vChewing::CoreLM::hasUnigramsForKey(const string &key)
{
return keyRowMap.find(key.c_str()) != keyRowMap.end();
}

View File

@ -1,54 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef CNSLM_H
#define CNSLM_H
#include "LanguageModel.h"
#include "UserPhrasesLM.h"
#include <iostream>
#include <map>
#include <string>
namespace vChewing
{
class CNSLM : public UserPhrasesLM
{
public:
bool allowConsolidation() override
{
return false;
}
float overridedValue() override
{
return -11.0;
}
};
} // namespace vChewing
#endif

View File

@ -1,54 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SYMBOLLM_H
#define SYMBOLLM_H
#include "LanguageModel.h"
#include "UserPhrasesLM.h"
#include <iostream>
#include <map>
#include <string>
namespace vChewing
{
class SymbolLM : public UserPhrasesLM
{
public:
bool allowConsolidation() override
{
return false;
}
float overridedValue() override
{
return -13.0;
}
};
} // namespace vChewing
#endif

View File

@ -1,54 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef USERSYMBOLLM_H
#define USERSYMBOLLM_H
#include "LanguageModel.h"
#include "UserPhrasesLM.h"
#include <iostream>
#include <map>
#include <string>
namespace vChewing
{
class UserSymbolLM : public UserPhrasesLM
{
public:
bool allowConsolidation() override
{
return true;
}
float overridedValue() override
{
return -12.0;
}
};
} // namespace vChewing
#endif

View File

@ -1,56 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef PHRASEREPLACEMENTMAP_H
#define PHRASEREPLACEMENTMAP_H
#include <iostream>
#include <map>
#include <string>
namespace vChewing
{
class PhraseReplacementMap
{
public:
PhraseReplacementMap();
~PhraseReplacementMap();
bool open(const char *path);
void close();
const std::string valueForKey(const std::string &key);
protected:
std::map<std::string_view, std::string_view> keyValueMap;
int fd;
void *data;
size_t length;
};
} // namespace vChewing
#endif

View File

@ -1,130 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "PhraseReplacementMap.h"
#include "vChewing-Swift.h"
#include <fcntl.h>
#include <fstream>
#include <sys/mman.h>
#include <sys/stat.h>
#include <syslog.h>
#include <unistd.h>
#include "KeyValueBlobReader.h"
#include "LMConsolidator.h"
namespace vChewing
{
using std::string;
PhraseReplacementMap::PhraseReplacementMap() : fd(-1), data(0), length(0)
{
}
PhraseReplacementMap::~PhraseReplacementMap()
{
if (data)
{
close();
}
}
bool PhraseReplacementMap::open(const char *path)
{
if (data)
{
return false;
}
LMConsolidator::FixEOF(path);
LMConsolidator::ConsolidateContent(path, true);
fd = ::open(path, O_RDONLY);
if (fd == -1)
{
printf("open:: file not exist");
return false;
}
struct stat sb;
if (fstat(fd, &sb) == -1)
{
printf("open:: cannot open file");
return false;
}
length = (size_t)sb.st_size;
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
if (!data)
{
::close(fd);
return false;
}
KeyValueBlobReader reader(static_cast<char *>(data), length);
KeyValueBlobReader::KeyValue keyValue;
KeyValueBlobReader::State state;
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR)
{
keyValueMap[keyValue.key] = keyValue.value;
}
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
if (state == KeyValueBlobReader::State::ERROR)
{
// close();
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "PhraseReplacementMap: Failed at Open Step 5. On Error Resume Next.\n");
// return false;
}
return true;
}
void PhraseReplacementMap::close()
{
if (data)
{
munmap(data, length);
::close(fd);
data = 0;
}
keyValueMap.clear();
}
const std::string PhraseReplacementMap::valueForKey(const std::string &key)
{
auto iter = keyValueMap.find(key);
if (iter != keyValueMap.end())
{
const std::string_view v = iter->second;
return {v.data(), v.size()};
}
return string("");
}
}

View File

@ -1,82 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef USERPHRASESLM_H
#define USERPHRASESLM_H
#include "LanguageModel.h"
#include <iostream>
#include <map>
#include <string>
namespace vChewing
{
class UserPhrasesLM : public Gramambular::LanguageModel
{
public:
UserPhrasesLM();
~UserPhrasesLM();
bool isLoaded();
bool open(const char *path);
void close();
void dump();
virtual bool allowConsolidation()
{
return true;
}
virtual float overridedValue()
{
return 0.0;
}
virtual const std::vector<Gramambular::Bigram> bigramsForKeys(const std::string &preceedingKey,
const std::string &key);
virtual const std::vector<Gramambular::Unigram> unigramsForKey(const std::string &key);
virtual bool hasUnigramsForKey(const std::string &key);
protected:
struct Row
{
Row(std::string_view &k, std::string_view &v) : key(k), value(v)
{
}
std::string_view key;
std::string_view value;
};
std::map<std::string_view, std::vector<Row>> keyRowMap;
int fd;
void *data;
size_t length;
};
} // namespace vChewing
#endif

View File

@ -1,174 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "UserPhrasesLM.h"
#include "vChewing-Swift.h"
#include <fcntl.h>
#include <fstream>
#include <sys/mman.h>
#include <sys/stat.h>
#include <syslog.h>
#include <unistd.h>
#include "KeyValueBlobReader.h"
#include "LMConsolidator.h"
namespace vChewing
{
UserPhrasesLM::UserPhrasesLM() : fd(-1), data(0), length(0)
{
}
UserPhrasesLM::~UserPhrasesLM()
{
if (data)
{
close();
}
}
bool UserPhrasesLM::isLoaded()
{
if (data)
{
return true;
}
return false;
}
bool UserPhrasesLM::open(const char *path)
{
if (data)
{
return false;
}
if (allowConsolidation())
{
LMConsolidator::FixEOF(path);
LMConsolidator::ConsolidateContent(path, true);
}
fd = ::open(path, O_RDONLY);
if (fd == -1)
{
printf("open:: file not exist");
return false;
}
struct stat sb;
if (fstat(fd, &sb) == -1)
{
printf("open:: cannot open file");
return false;
}
length = (size_t)sb.st_size;
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
if (!data)
{
::close(fd);
return false;
}
KeyValueBlobReader reader(static_cast<char *>(data), length);
KeyValueBlobReader::KeyValue keyValue;
KeyValueBlobReader::State state;
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR)
{
// We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF
// reading.
keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key);
}
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
if (state == KeyValueBlobReader::State::ERROR)
{
// close();
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "UserPhrasesLM: Failed at Open Step 5. On Error Resume Next.\n");
// return false;
}
return true;
}
void UserPhrasesLM::close()
{
if (data)
{
munmap(data, length);
::close(fd);
data = 0;
}
keyRowMap.clear();
}
void UserPhrasesLM::dump()
{
for (const auto &entry : keyRowMap)
{
const std::vector<Row> &rows = entry.second;
for (const auto &row : rows)
{
std::cerr << row.key << " " << row.value << "\n";
}
}
}
const std::vector<Gramambular::Bigram> UserPhrasesLM::bigramsForKeys(const std::string &preceedingKey,
const std::string &key)
{
return std::vector<Gramambular::Bigram>();
}
const std::vector<Gramambular::Unigram> UserPhrasesLM::unigramsForKey(const std::string &key)
{
std::vector<Gramambular::Unigram> v;
auto iter = keyRowMap.find(key);
if (iter != keyRowMap.end())
{
const std::vector<Row> &rows = iter->second;
for (const auto &row : rows)
{
Gramambular::Unigram g;
g.keyValue.key = row.key;
g.keyValue.value = row.value;
g.score = overridedValue();
v.push_back(g);
}
}
return v;
}
bool UserPhrasesLM::hasUnigramsForKey(const std::string &key)
{
return keyRowMap.find(key) != keyRowMap.end();
}
}; // namespace vChewing

View File

@ -206,12 +206,7 @@
5B5D28AB281EA1E800523D4D /* lmLite.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = lmLite.swift; sourceTree = "<group>"; };
5B5E535127EF261400C6AA1E /* IME.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = IME.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 1; };
5B61B0C9280BEFD4002E3CFA /* KeyHandler_Misc.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = KeyHandler_Misc.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 1; };
5B62A32627AE77BB00A19448 /* LMConsolidator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = LMConsolidator.h; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
5B62A32727AE77BB00A19448 /* LMConsolidator.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = LMConsolidator.mm; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
5B62A32827AE77D100A19448 /* FSEventStreamHelper.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = FSEventStreamHelper.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 1; };
5B62A32B27AE78B000A19448 /* CNSLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = CNSLM.h; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
5B62A32C27AE78B000A19448 /* CoreLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = CoreLM.h; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
5B62A32D27AE78B000A19448 /* CoreLM.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = CoreLM.mm; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
5B62A33127AE792F00A19448 /* InputSourceHelper.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = InputSourceHelper.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 1; };
5B62A33527AE795800A19448 /* mgrPrefs.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = mgrPrefs.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 1; };
5B62A33727AE79CD00A19448 /* NSStringUtils.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = NSStringUtils.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 1; };
@ -223,14 +218,12 @@
5B62A34527AE7CD900A19448 /* NotifierController.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = NotifierController.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 1; };
5B707CE527D9F3A10099EF99 /* SwiftyOpenCC */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = SwiftyOpenCC; path = Packages/SwiftyOpenCC; sourceTree = "<group>"; };
5B707CE727D9F4590099EF99 /* OpenCCBridge.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = OpenCCBridge.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 1; };
5B7111C727DEF9FF00444310 /* UserSymbolLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = UserSymbolLM.h; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
5B73FB5427B2BD6900E9BF49 /* PhraseEditor-Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; name = "PhraseEditor-Info.plist"; path = "UserPhraseEditor/PhraseEditor-Info.plist"; sourceTree = SOURCE_ROOT; };
5B73FB5F27B2BE1300E9BF49 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/InfoPlist.strings; sourceTree = "<group>"; };
5B782EC3280C243C007276DE /* KeyHandler_HandleCandidate.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = KeyHandler_HandleCandidate.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 1; };
5B7BC4AF27AFFBE800F66C24 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/frmPrefWindow.xib; sourceTree = "<group>"; };
5B7BC4B227AFFC0B00F66C24 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/frmPrefWindow.strings; sourceTree = "<group>"; };
5B7F225C2808501000DDD3CB /* KeyHandler_HandleInput.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = KeyHandler_HandleInput.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 1; };
5B8F43ED27C9BC220069AC27 /* SymbolLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = SymbolLM.h; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
5B949BD82816DC5400D87B5D /* LineReader.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = LineReader.swift; sourceTree = "<group>"; usesTabs = 1; };
5B949BDA2816DDBC00D87B5D /* LMConsolidator.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = LMConsolidator.swift; sourceTree = "<group>"; usesTabs = 1; };
5BA0DF2E2817857D009E73BB /* lmUserOverride.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = lmUserOverride.swift; sourceTree = "<group>"; usesTabs = 1; };
@ -324,25 +317,15 @@
6ACA41EF15FC1D9000935EF6 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/Localizable.strings; sourceTree = "<group>"; };
6ACA41F215FC1D9000935EF6 /* Installer-Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; name = "Installer-Info.plist"; path = "Installer/Installer-Info.plist"; sourceTree = SOURCE_ROOT; };
6ACA41F315FC1D9000935EF6 /* Installer-Prefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "Installer-Prefix.pch"; path = "Installer/Installer-Prefix.pch"; sourceTree = SOURCE_ROOT; };
6ACC3D3C27914AAB00F1B140 /* KeyValueBlobReader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = KeyValueBlobReader.h; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
6ACC3D3E27914F2400F1B140 /* KeyValueBlobReader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = KeyValueBlobReader.cpp; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
6ACC3D402793701600F1B140 /* ParselessPhraseDB.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = ParselessPhraseDB.cpp; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
6ACC3D412793701600F1B140 /* ParselessPhraseDB.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = ParselessPhraseDB.h; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
6ACC3D422793701600F1B140 /* ParselessLM.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = ParselessLM.cpp; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
6ACC3D432793701600F1B140 /* ParselessLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = ParselessLM.h; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
D41355D9278E6D17005E5CBD /* LMInstantiator.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = LMInstantiator.mm; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
D41355DA278E6D17005E5CBD /* LMInstantiator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = LMInstantiator.h; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
D41355DC278EA3ED005E5CBD /* UserPhrasesLM.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = UserPhrasesLM.mm; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
D41355DD278EA3ED005E5CBD /* UserPhrasesLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = UserPhrasesLM.h; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
D427A9BF25ED28CC005D43E0 /* vChewing-Bridging-Header.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = "vChewing-Bridging-Header.h"; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
D427F76B278CA1BA004A2160 /* AppDelegate.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = AppDelegate.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 1; };
D44FB74B2792189A003C80A6 /* PhraseReplacementMap.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = PhraseReplacementMap.mm; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
D44FB74C2792189A003C80A6 /* PhraseReplacementMap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = PhraseReplacementMap.h; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
D456576D279E4F7B00DF6BC9 /* InputHandler.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = InputHandler.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 1; };
D461B791279DAC010070E734 /* InputState.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = InputState.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 1; };
D47B92BF27972AC800458394 /* main.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = main.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 1; };
D47D73AA27A6CAE600255A50 /* AssociatedPhrases.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = AssociatedPhrases.mm; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
D47D73AB27A6CAE600255A50 /* AssociatedPhrases.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = AssociatedPhrases.h; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
D47F7DCD278BFB57002F9DD7 /* ctlPrefWindow.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ctlPrefWindow.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 1; };
D47F7DCF278C0897002F9DD7 /* ctlNonModalAlertWindow.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ctlNonModalAlertWindow.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 1; };
D47F7DD1278C1263002F9DD7 /* UserOverrideModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = UserOverrideModel.h; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
@ -417,16 +400,6 @@
path = SubLMs;
sourceTree = "<group>";
};
5B4D47B627C9186900220DDC /* InstantiatedModels */ = {
isa = PBXGroup;
children = (
5B62A32B27AE78B000A19448 /* CNSLM.h */,
5B8F43ED27C9BC220069AC27 /* SymbolLM.h */,
5B7111C727DEF9FF00444310 /* UserSymbolLM.h */,
);
path = InstantiatedModels;
sourceTree = "<group>";
};
5B62A30127AE732800A19448 /* 3rdParty */ = {
isa = PBXGroup;
children = (
@ -489,8 +462,6 @@
isa = PBXGroup;
children = (
5B62A32827AE77D100A19448 /* FSEventStreamHelper.swift */,
5B62A32627AE77BB00A19448 /* LMConsolidator.h */,
5B62A32727AE77BB00A19448 /* LMConsolidator.mm */,
5B949BDA2816DDBC00D87B5D /* LMConsolidator.swift */,
);
path = FileHandlers;
@ -520,39 +491,26 @@
5B62A32427AE757300A19448 /* LangModelRelated */ = {
isa = PBXGroup;
children = (
5B62A32527AE758000A19448 /* SubLanguageModels */,
5B62A32527AE758000A19448 /* OldFileReferences */,
5B407308281672610023DFFF /* SubLMs */,
6ACC3D3E27914F2400F1B140 /* KeyValueBlobReader.cpp */,
6ACC3D3C27914AAB00F1B140 /* KeyValueBlobReader.h */,
5BE33BEC28169B5D00CE5BB0 /* KeyValueStructs.swift */,
5BD0113A28180D6100609769 /* LMInstantiator.swift */,
D41355DA278E6D17005E5CBD /* LMInstantiator.h */,
D41355D9278E6D17005E5CBD /* LMInstantiator.mm */,
5BAEFACF28012565001F42C9 /* mgrLangModel.swift */,
);
path = LangModelRelated;
sourceTree = "<group>";
};
5B62A32527AE758000A19448 /* SubLanguageModels */ = {
5B62A32527AE758000A19448 /* OldFileReferences */ = {
isa = PBXGroup;
children = (
5B4D47B627C9186900220DDC /* InstantiatedModels */,
D47D73AB27A6CAE600255A50 /* AssociatedPhrases.h */,
D47D73AA27A6CAE600255A50 /* AssociatedPhrases.mm */,
5B62A32C27AE78B000A19448 /* CoreLM.h */,
5B62A32D27AE78B000A19448 /* CoreLM.mm */,
6ACC3D422793701600F1B140 /* ParselessLM.cpp */,
6ACC3D432793701600F1B140 /* ParselessLM.h */,
6ACC3D402793701600F1B140 /* ParselessPhraseDB.cpp */,
6ACC3D412793701600F1B140 /* ParselessPhraseDB.h */,
D44FB74C2792189A003C80A6 /* PhraseReplacementMap.h */,
D44FB74B2792189A003C80A6 /* PhraseReplacementMap.mm */,
D47F7DD2278C1263002F9DD7 /* UserOverrideModel.cpp */,
D47F7DD1278C1263002F9DD7 /* UserOverrideModel.h */,
D41355DD278EA3ED005E5CBD /* UserPhrasesLM.h */,
D41355DC278EA3ED005E5CBD /* UserPhrasesLM.mm */,
);
path = SubLanguageModels;
path = OldFileReferences;
sourceTree = "<group>";
};
5B62A33027AE78E500A19448 /* Resources */ = {