vChewing-macOS/Source/Engine/UserPhrasesLM.cpp

136 lines
3.6 KiB
C++

// Copyright (c) 2022 and onwards The McBopomofo Authors.
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "UserPhrasesLM.h"
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <fstream>
#include <unistd.h>
#include "KeyValueBlobReader.h"
namespace McBopomofo {
UserPhrasesLM::UserPhrasesLM()
: fd(-1)
, data(0)
, length(0)
{
}
UserPhrasesLM::~UserPhrasesLM()
{
if (data) {
close();
}
}
bool UserPhrasesLM::open(const char *path)
{
if (data) {
return false;
}
fd = ::open(path, O_RDONLY);
if (fd == -1) {
printf("open:: file not exist");
return false;
}
struct stat sb;
if (fstat(fd, &sb) == -1) {
printf("open:: cannot open file");
return false;
}
length = (size_t)sb.st_size;
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
if (!data) {
::close(fd);
return false;
}
KeyValueBlobReader reader(static_cast<char*>(data), length);
KeyValueBlobReader::KeyValue keyValue;
KeyValueBlobReader::State state;
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) {
// We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF reading.
keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key);
}
return true;
}
void UserPhrasesLM::close()
{
if (data) {
munmap(data, length);
::close(fd);
data = 0;
}
keyRowMap.clear();
}
void UserPhrasesLM::dump()
{
for (const auto& entry : keyRowMap) {
const std::vector<Row>& rows = entry.second;
for (const auto& row : rows) {
std::cerr << row.key << " " << row.value << "\n";
}
}
}
const std::vector<Formosa::Gramambular::Bigram> UserPhrasesLM::bigramsForKeys(const std::string& preceedingKey, const std::string& key)
{
return std::vector<Formosa::Gramambular::Bigram>();
}
const std::vector<Formosa::Gramambular::Unigram> UserPhrasesLM::unigramsForKey(const std::string& key)
{
std::vector<Formosa::Gramambular::Unigram> v;
auto iter = keyRowMap.find(key);
if (iter != keyRowMap.end()) {
const std::vector<Row>& rows = iter->second;
for (const auto& row : rows) {
Formosa::Gramambular::Unigram g;
g.keyValue.key = row.key;
g.keyValue.value = row.value;
g.score = 0.0;
v.push_back(g);
}
}
return v;
}
bool UserPhrasesLM::hasUnigramsForKey(const std::string& key)
{
return keyRowMap.find(key) != keyRowMap.end();
}
}; // namespace McBopomofo