CNSLM // Instantiation with -11.0 value.
This commit is contained in:
parent
2cf6690da8
commit
a6692413fa
|
@ -1,150 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
||||
except as required to fulfill notice requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "CNSLM.h"
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <fstream>
|
||||
#include <unistd.h>
|
||||
#include <syslog.h>
|
||||
|
||||
#include "KeyValueBlobReader.h"
|
||||
|
||||
namespace vChewing {
|
||||
|
||||
CNSLM::CNSLM()
|
||||
: fd(-1)
|
||||
, data(0)
|
||||
, length(0)
|
||||
{
|
||||
}
|
||||
|
||||
CNSLM::~CNSLM()
|
||||
{
|
||||
if (data) {
|
||||
close();
|
||||
}
|
||||
}
|
||||
|
||||
bool CNSLM::isLoaded()
|
||||
{
|
||||
if (data) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CNSLM::open(const char *path)
|
||||
{
|
||||
if (data) {
|
||||
syslog(LOG_CONS, "CNSLM: Failed at Open Step 1.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
fd = ::open(path, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
syslog(LOG_CONS, "CNSLM: Failed at Open Step 2.\n");
|
||||
printf("open:: file not exist");
|
||||
return false;
|
||||
}
|
||||
|
||||
struct stat sb;
|
||||
if (fstat(fd, &sb) == -1) {
|
||||
syslog(LOG_CONS, "CNSLM: Failed at Open Step 3.\n");
|
||||
printf("open:: cannot open file");
|
||||
return false;
|
||||
}
|
||||
|
||||
length = (size_t)sb.st_size;
|
||||
|
||||
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
|
||||
if (!data) {
|
||||
::close(fd);
|
||||
syslog(LOG_CONS, "CNSLM: Failed at Open Step 4.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
KeyValueBlobReader reader(static_cast<char*>(data), length);
|
||||
KeyValueBlobReader::KeyValue keyValue;
|
||||
KeyValueBlobReader::State state;
|
||||
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) {
|
||||
// We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF reading.
|
||||
keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key);
|
||||
}
|
||||
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
|
||||
if (state == KeyValueBlobReader::State::ERROR) {
|
||||
// close();
|
||||
syslog(LOG_CONS, "CNSLM: Failed at Open Step 5. On Error Resume Next.\n");
|
||||
// return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void CNSLM::close()
|
||||
{
|
||||
if (data) {
|
||||
munmap(data, length);
|
||||
::close(fd);
|
||||
data = 0;
|
||||
}
|
||||
|
||||
keyRowMap.clear();
|
||||
}
|
||||
|
||||
void CNSLM::dump()
|
||||
{
|
||||
for (const auto& entry : keyRowMap) {
|
||||
const std::vector<Row>& rows = entry.second;
|
||||
for (const auto& row : rows) {
|
||||
std::cerr << row.key << " " << row.value << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<Taiyan::Gramambular::Bigram> CNSLM::bigramsForKeys(const std::string& preceedingKey, const std::string& key)
|
||||
{
|
||||
return std::vector<Taiyan::Gramambular::Bigram>();
|
||||
}
|
||||
|
||||
const std::vector<Taiyan::Gramambular::Unigram> CNSLM::unigramsForKey(const std::string& key)
|
||||
{
|
||||
std::vector<Taiyan::Gramambular::Unigram> v;
|
||||
auto iter = keyRowMap.find(key);
|
||||
if (iter != keyRowMap.end()) {
|
||||
const std::vector<Row>& rows = iter->second;
|
||||
for (const auto& row : rows) {
|
||||
Taiyan::Gramambular::Unigram g;
|
||||
g.keyValue.key = row.key;
|
||||
g.keyValue.value = row.value;
|
||||
g.score = -17.0;
|
||||
v.push_back(g);
|
||||
}
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
bool CNSLM::hasUnigramsForKey(const std::string& key)
|
||||
{
|
||||
return keyRowMap.find(key) != keyRowMap.end();
|
||||
}
|
||||
|
||||
}; // namespace vChewing
|
|
@ -24,35 +24,19 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
|||
#include <map>
|
||||
#include <iostream>
|
||||
#include "LanguageModel.h"
|
||||
#include "UserPhrasesLM.h"
|
||||
|
||||
namespace vChewing {
|
||||
|
||||
class CNSLM : public Taiyan::Gramambular::LanguageModel
|
||||
class CNSLM: public UserPhrasesLM
|
||||
{
|
||||
public:
|
||||
CNSLM();
|
||||
~CNSLM();
|
||||
|
||||
bool isLoaded();
|
||||
bool open(const char *path);
|
||||
void close();
|
||||
void dump();
|
||||
|
||||
virtual const std::vector<Taiyan::Gramambular::Bigram> bigramsForKeys(const std::string& preceedingKey, const std::string& key);
|
||||
virtual const std::vector<Taiyan::Gramambular::Unigram> unigramsForKey(const std::string& key);
|
||||
virtual bool hasUnigramsForKey(const std::string& key);
|
||||
|
||||
protected:
|
||||
struct Row {
|
||||
Row(std::string_view& k, std::string_view& v) : key(k), value(v) {}
|
||||
std::string_view key;
|
||||
std::string_view value;
|
||||
};
|
||||
|
||||
std::map<std::string_view, std::vector<Row>> keyRowMap;
|
||||
int fd;
|
||||
void *data;
|
||||
size_t length;
|
||||
virtual bool allowConsolidation() override {
|
||||
return false;
|
||||
}
|
||||
virtual float overridedValue() override {
|
||||
return -11.0;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
|
@ -38,6 +38,14 @@ public:
|
|||
void close();
|
||||
void dump();
|
||||
|
||||
virtual bool allowConsolidation() {
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual float overridedValue() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
virtual const std::vector<Taiyan::Gramambular::Bigram> bigramsForKeys(const std::string& preceedingKey, const std::string& key);
|
||||
virtual const std::vector<Taiyan::Gramambular::Unigram> unigramsForKey(const std::string& key);
|
||||
virtual bool hasUnigramsForKey(const std::string& key);
|
||||
|
|
|
@ -59,8 +59,10 @@ bool UserPhrasesLM::open(const char *path)
|
|||
return false;
|
||||
}
|
||||
|
||||
if (allowConsolidation()) {
|
||||
LMConsolidator::FixEOF(path);
|
||||
LMConsolidator::ConsolidateContent(path, true);
|
||||
}
|
||||
|
||||
fd = ::open(path, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
|
@ -134,7 +136,7 @@ const std::vector<Taiyan::Gramambular::Unigram> UserPhrasesLM::unigramsForKey(co
|
|||
Taiyan::Gramambular::Unigram g;
|
||||
g.keyValue.key = row.key;
|
||||
g.keyValue.value = row.value;
|
||||
g.score = 0.0;
|
||||
g.score = overridedValue();
|
||||
v.push_back(g);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
5B62A31B27AE73A700A19448 /* SSZipArchive.m in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A31327AE73A700A19448 /* SSZipArchive.m */; };
|
||||
5B62A31C27AE73A700A19448 /* AWFileHash.m in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A31627AE73A700A19448 /* AWFileHash.m */; };
|
||||
5B62A32927AE77D100A19448 /* FSEventStreamHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A32827AE77D100A19448 /* FSEventStreamHelper.swift */; };
|
||||
5B62A32E27AE78B000A19448 /* CNSLM.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A32A27AE78B000A19448 /* CNSLM.mm */; };
|
||||
5B62A32F27AE78B000A19448 /* CoreLM.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A32D27AE78B000A19448 /* CoreLM.mm */; };
|
||||
5B62A33227AE792F00A19448 /* InputSourceHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A33127AE792F00A19448 /* InputSourceHelper.swift */; };
|
||||
5B62A33627AE795800A19448 /* PreferencesModule.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A33527AE795800A19448 /* PreferencesModule.swift */; };
|
||||
|
@ -191,7 +190,6 @@
|
|||
5B62A32627AE77BB00A19448 /* LMConsolidator.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LMConsolidator.h; sourceTree = "<group>"; };
|
||||
5B62A32727AE77BB00A19448 /* LMConsolidator.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = LMConsolidator.mm; sourceTree = "<group>"; };
|
||||
5B62A32827AE77D100A19448 /* FSEventStreamHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FSEventStreamHelper.swift; sourceTree = "<group>"; };
|
||||
5B62A32A27AE78B000A19448 /* CNSLM.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = CNSLM.mm; sourceTree = "<group>"; };
|
||||
5B62A32B27AE78B000A19448 /* CNSLM.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CNSLM.h; sourceTree = "<group>"; };
|
||||
5B62A32C27AE78B000A19448 /* CoreLM.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CoreLM.h; sourceTree = "<group>"; };
|
||||
5B62A32D27AE78B000A19448 /* CoreLM.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = CoreLM.mm; sourceTree = "<group>"; };
|
||||
|
@ -364,6 +362,14 @@
|
|||
name = MiscRootFiles;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
5B4D47B627C9186900220DDC /* InstantiatedModels */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
5B62A32B27AE78B000A19448 /* CNSLM.h */,
|
||||
);
|
||||
path = InstantiatedModels;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
5B62A30127AE732800A19448 /* 3rdParty */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
|
@ -495,8 +501,7 @@
|
|||
5B62A32527AE758000A19448 /* SubLanguageModels */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
5B62A32B27AE78B000A19448 /* CNSLM.h */,
|
||||
5B62A32A27AE78B000A19448 /* CNSLM.mm */,
|
||||
5B4D47B627C9186900220DDC /* InstantiatedModels */,
|
||||
5B62A32C27AE78B000A19448 /* CoreLM.h */,
|
||||
5B62A32D27AE78B000A19448 /* CoreLM.mm */,
|
||||
D41355DC278EA3ED005E5CBD /* UserPhrasesLM.mm */,
|
||||
|
@ -1054,7 +1059,6 @@
|
|||
5B62A34A27AE7CD900A19448 /* NotifierController.swift in Sources */,
|
||||
5B11328927B94CFB00E58451 /* AppleKeyboardConverter.swift in Sources */,
|
||||
5B62A31827AE73A700A19448 /* zip.m in Sources */,
|
||||
5B62A32E27AE78B000A19448 /* CNSLM.mm in Sources */,
|
||||
D41355DB278E6D17005E5CBD /* LMInstantiator.mm in Sources */,
|
||||
5B62A31A27AE73A700A19448 /* mztools.m in Sources */,
|
||||
5B62A32927AE77D100A19448 /* FSEventStreamHelper.swift in Sources */,
|
||||
|
|
Loading…
Reference in New Issue