Repo // Remove remained ParselessLM files.

This commit is contained in:
ShikiSuen 2022-05-12 15:10:14 +08:00
parent 07a6a51954
commit 3b93f2addb
5 changed files with 0 additions and 463 deletions

View File

@ -1,168 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "ParselessLM.h"
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <memory>
vChewing::ParselessLM::~ParselessLM()
{
close();
}
bool vChewing::ParselessLM::isLoaded()
{
if (data_)
{
return true;
}
return false;
}
bool vChewing::ParselessLM::open(const std::string_view &path)
{
if (data_)
{
return false;
}
fd_ = ::open(path.data(), O_RDONLY);
if (fd_ == -1)
{
return false;
}
struct stat sb;
if (fstat(fd_, &sb) == -1)
{
::close(fd_);
fd_ = -1;
return false;
}
length_ = static_cast<size_t>(sb.st_size);
data_ = mmap(NULL, length_, PROT_READ, MAP_SHARED, fd_, 0);
if (data_ == nullptr)
{
::close(fd_);
fd_ = -1;
length_ = 0;
return false;
}
db_ = std::unique_ptr<ParselessPhraseDB>(new ParselessPhraseDB(static_cast<char *>(data_), length_));
return true;
}
void vChewing::ParselessLM::close()
{
if (data_ != nullptr)
{
munmap(data_, length_);
::close(fd_);
fd_ = -1;
length_ = 0;
data_ = nullptr;
}
}
const std::vector<Gramambular::Bigram> vChewing::ParselessLM::bigramsForKeys(const std::string &preceedingKey,
const std::string &key)
{
return std::vector<Gramambular::Bigram>();
}
const std::vector<Gramambular::Unigram> vChewing::ParselessLM::unigramsForKey(const std::string &key)
{
if (db_ == nullptr)
{
return std::vector<Gramambular::Unigram>();
}
std::vector<Gramambular::Unigram> results;
for (const auto &row : db_->findRows(key + " "))
{
Gramambular::Unigram unigram;
// Move ahead until we encounter the first space. This is the key.
auto it = row.begin();
while (it != row.end() && *it != ' ')
{
++it;
}
unigram.keyValue.key = std::string(row.begin(), it);
// Read past the space.
if (it != row.end())
{
++it;
}
if (it != row.end())
{
// Now it is the start of the value portion.
auto value_begin = it;
// Move ahead until we encounter the second space. This is the
// value.
while (it != row.end() && *it != ' ')
{
++it;
}
unigram.keyValue.value = std::string(value_begin, it);
}
// Read past the space. The remainder, if it exists, is the score.
if (it != row.end())
{
++it;
}
if (it != row.end())
{
unigram.score = std::stod(std::string(it, row.end()));
}
results.push_back(unigram);
}
return results;
}
bool vChewing::ParselessLM::hasUnigramsForKey(const std::string &key)
{
if (db_ == nullptr)
{
return false;
}
return db_->findFirstMatchingLine(key + " ") != nullptr;
}

View File

@ -1,63 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SOURCE_ENGINE_PARSELESSLM_H_
#define SOURCE_ENGINE_PARSELESSLM_H_
#include <memory>
#include <string>
#include <vector>
#include "LanguageModel.h"
#include "ParselessPhraseDB.h"
namespace vChewing
{
class ParselessLM : public Gramambular::LanguageModel
{
public:
~ParselessLM() override;
bool isLoaded();
bool open(const std::string_view &path);
void close();
const std::vector<Gramambular::Bigram> bigramsForKeys(const std::string &preceedingKey,
const std::string &key) override;
const std::vector<Gramambular::Unigram> unigramsForKey(const std::string &key) override;
bool hasUnigramsForKey(const std::string &key) override;
private:
int fd_ = -1;
void *data_ = nullptr;
size_t length_ = 0;
std::unique_ptr<ParselessPhraseDB> db_;
};
}; // namespace vChewing
#endif // SOURCE_ENGINE_PARSELESSLM_H_

View File

@ -1,163 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "ParselessPhraseDB.h"
#include <cassert>
#include <cstring>
namespace vChewing
{
ParselessPhraseDB::ParselessPhraseDB(const char *buf, size_t length) : begin_(buf), end_(buf + length)
{
}
std::vector<std::string_view> ParselessPhraseDB::findRows(const std::string_view &key)
{
std::vector<std::string_view> rows;
const char *ptr = findFirstMatchingLine(key);
if (ptr == nullptr)
{
return rows;
}
while (ptr + key.length() <= end_ && memcmp(ptr, key.data(), key.length()) == 0)
{
const char *eol = ptr;
while (eol != end_ && *eol != '\n')
{
++eol;
}
rows.emplace_back(ptr, eol - ptr);
if (eol == end_)
{
break;
}
ptr = ++eol;
}
return rows;
}
// Implements a binary search that returns the pointer to the first matching
// row. In its core it's just a standard binary search, but we use backtracking
// to locate the line start. We also check the previous line to see if the
// current line is actually the first matching line: if the previous line is
// less to the key and the current line starts exactly with the key, then
// the current line is the first matching line.
const char *ParselessPhraseDB::findFirstMatchingLine(const std::string_view &key)
{
if (key.empty())
{
return begin_;
}
const char *top = begin_;
const char *bottom = end_;
while (top < bottom)
{
const char *mid = top + (bottom - top) / 2;
const char *ptr = mid;
if (ptr != begin_)
{
--ptr;
}
while (ptr != begin_ && *ptr != '\n')
{
--ptr;
}
const char *prev = nullptr;
if (*ptr == '\n')
{
prev = ptr;
++ptr;
}
// ptr is now in the "current" line we're interested in.
if (ptr + key.length() > end_)
{
// not enough data to compare at this point, bail.
break;
}
int current_cmp = memcmp(ptr, key.data(), key.length());
if (current_cmp > 0)
{
bottom = mid - 1;
continue;
}
if (current_cmp < 0)
{
top = mid + 1;
continue;
}
if (!prev)
{
return ptr;
}
// Move the prev so that it reaches the previous line.
if (prev != begin_)
{
--prev;
}
while (prev != begin_ && *prev != '\n')
{
--prev;
}
if (*prev == '\n')
{
++prev;
}
int prev_cmp = memcmp(prev, key.data(), key.length());
// This is the first occurrence.
if (prev_cmp < 0 && current_cmp == 0)
{
return ptr;
}
// This is not, which means ptr is "larger" than the keyData.
bottom = mid - 1;
}
return nullptr;
}
}; // namespace vChewing

View File

@ -1,61 +0,0 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SOURCE_ENGINE_PARSELESSPHRASEDB_H_
#define SOURCE_ENGINE_PARSELESSPHRASEDB_H_
#include <cstddef>
#include <string>
#include <vector>
namespace vChewing
{
// Defines phrase database that consists of (key, value, score) rows that are
// pre-sorted by the byte value of the keys. It is way faster than FastLM
// because it does not need to parse anything. Instead, it relies on the fact
// that the database is already sorted, and binary search is used to find the
// rows.
class ParselessPhraseDB
{
public:
ParselessPhraseDB(const char *buf, size_t length);
// Find the rows that match the key. Note that prefix match is used. If you
// need exact match, the key will need to have a delimiter (usually a space)
// at the end.
std::vector<std::string_view> findRows(const std::string_view &key);
const char *findFirstMatchingLine(const std::string_view &key);
private:
const char *begin_;
const char *end_;
};
}; // namespace vChewing
#endif // SOURCE_ENGINE_PARSELESSPHRASEDB_H_

View File

@ -315,10 +315,6 @@
6ACA41EF15FC1D9000935EF6 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/Localizable.strings; sourceTree = "<group>"; };
6ACA41F215FC1D9000935EF6 /* Installer-Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; name = "Installer-Info.plist"; path = "Installer/Installer-Info.plist"; sourceTree = SOURCE_ROOT; };
6ACA41F315FC1D9000935EF6 /* Installer-Prefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "Installer-Prefix.pch"; path = "Installer/Installer-Prefix.pch"; sourceTree = SOURCE_ROOT; };
6ACC3D402793701600F1B140 /* ParselessPhraseDB.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = ParselessPhraseDB.cpp; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
6ACC3D412793701600F1B140 /* ParselessPhraseDB.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = ParselessPhraseDB.h; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
6ACC3D422793701600F1B140 /* ParselessLM.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = ParselessLM.cpp; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
6ACC3D432793701600F1B140 /* ParselessLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = ParselessLM.h; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
D427A9BF25ED28CC005D43E0 /* vChewing-Bridging-Header.h */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = "vChewing-Bridging-Header.h"; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
D427F76B278CA1BA004A2160 /* AppDelegate.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = AppDelegate.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
D456576D279E4F7B00DF6BC9 /* InputHandler.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = InputHandler.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
@ -501,10 +497,6 @@
5B62A32527AE758000A19448 /* OldFileReferences */ = {
isa = PBXGroup;
children = (
6ACC3D422793701600F1B140 /* ParselessLM.cpp */,
6ACC3D432793701600F1B140 /* ParselessLM.h */,
6ACC3D402793701600F1B140 /* ParselessPhraseDB.cpp */,
6ACC3D412793701600F1B140 /* ParselessPhraseDB.h */,
D47F7DD2278C1263002F9DD7 /* UserOverrideModel.cpp */,
D47F7DD1278C1263002F9DD7 /* UserOverrideModel.h */,
);