Merge pull request #246 from lukhnos/more-tolerant-userphraseslm

Make UserPhrasesLM more tolerant
This commit is contained in:
Lukhnos Liu 2022-01-18 23:01:45 -08:00 committed by GitHub
commit b7ef7b5fb7
8 changed files with 154 additions and 61 deletions

View File

@ -9,14 +9,12 @@ jobs:
DEVELOPER_DIR: /Applications/Xcode.app/Contents/Developer
steps:
- uses: actions/checkout@v1
- name: Clean McBopomofo
run: xcodebuild -scheme McBopomofo -configuration Release clean
- name: Clean McBopomofoInstaller
run: xcodebuild -scheme McBopomofoInstaller -configuration Release clean
- name: Build McBopomofo
run: xcodebuild -scheme McBopomofo -configuration Release build
- name: Build McBopomofoInstaller
run: xcodebuild -scheme McBopomofoInstaller -configuration Release build
- name: Build McBopomofoLMLibTest
run: cmake -S . -B build
working-directory: Source/Engine
- name: Run McBopomofoLMLibTest
run: make runTest
working-directory: Source/Engine/build
- name: Test McBopomofo App Bundle
run: xcodebuild -scheme McBopomofo -configuration Debug test
- name: Test CandidateUI
@ -28,3 +26,12 @@ jobs:
- name: Test VXHanConvert
run: swift test
working-directory: Packages/VXHanConvert
- name: Clean McBopomofo
run: xcodebuild -scheme McBopomofo -configuration Release clean
- name: Clean McBopomofoInstaller
run: xcodebuild -scheme McBopomofoInstaller -configuration Release clean
- name: Build McBopomofo
run: xcodebuild -scheme McBopomofo -configuration Release build
- name: Build McBopomofoInstaller
run: xcodebuild -scheme McBopomofoInstaller -configuration Release build

View File

@ -1,8 +0,0 @@
language: objective-c
before_script: travis/before_script.sh
script: travis/script.sh
sudo: false
git:
depth: 1

View File

@ -10,7 +10,11 @@ add_library(McBopomofoLMLib
ParselessPhraseDB.cpp
ParselessPhraseDB.h
ParselessLM.cpp
ParselessLM.h)
ParselessLM.h
PhraseReplacementMap.h
PhraseReplacementMap.cpp
UserPhrasesLM.h
UserPhrasesLM.cpp)
# Let CMake fetch Google Test for us.
# https://github.com/google/googletest/tree/main/googletest#incorporating-into-an-existing-cmake-project
@ -29,14 +33,22 @@ FetchContent_MakeAvailable(googletest)
add_executable(McBopomofoLMLibTest
KeyValueBlobReaderTest.cpp
ParselessLMTest.cpp
ParselessPhraseDBTest.cpp)
ParselessPhraseDBTest.cpp
PhraseReplacementMapTest.cpp
UserPhrasesLMTest.cpp)
target_link_libraries(McBopomofoLMLibTest gtest_main McBopomofoLMLib)
include(GoogleTest)
gtest_discover_tests(McBopomofoLMLibTest)
# Benchmark target.
find_package(benchmark REQUIRED)
add_executable(ParselessLMBenchmark
FastLM.cpp
ParselessLMBenchmark.cpp)
target_link_libraries(ParselessLMBenchmark McBopomofoLMLib benchmark::benchmark)
add_custom_target(
runTest
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/McBopomofoLMLibTest
)
add_dependencies(runTest McBopomofoLMLibTest)
# Benchmark target; to run, manually uncomment the lines below.
#
# find_package(benchmark)
# add_executable(ParselessLMBenchmark
# ParselessLMBenchmark.cpp)
# target_link_libraries(ParselessLMBenchmark McBopomofoLMLib benchmark::benchmark)

View File

@ -26,16 +26,13 @@
#include <cassert>
#include <filesystem>
#include "FastLM.h"
#include "ParselessLM.h"
namespace {
using FastLM = Formosa::Gramambular::FastLM;
using ParselessLM = McBopomofo::ParselessLM;
static const char* kDataPath = "data.txt";
static const char* kLegacyDataPath = "data-legacy.txt";
static const char* kUnigramSearchKey = "ㄕˋ-ㄕˊ";
static void BM_ParselessLMOpenClose(benchmark::State& state)
@ -49,17 +46,6 @@ static void BM_ParselessLMOpenClose(benchmark::State& state)
}
BENCHMARK(BM_ParselessLMOpenClose);
static void BM_FastLMOpenClose(benchmark::State& state)
{
assert(std::filesystem::exists(kLegacyDataPath));
for (auto _ : state) {
FastLM lm;
lm.open(kLegacyDataPath);
lm.close();
}
}
BENCHMARK(BM_FastLMOpenClose);
static void BM_ParselessLMFindUnigrams(benchmark::State& state)
{
assert(std::filesystem::exists(kDataPath));
@ -72,18 +58,6 @@ static void BM_ParselessLMFindUnigrams(benchmark::State& state)
}
BENCHMARK(BM_ParselessLMFindUnigrams);
static void BM_FastLMFindUnigrams(benchmark::State& state)
{
assert(std::filesystem::exists(kLegacyDataPath));
FastLM lm;
lm.open(kLegacyDataPath);
for (auto _ : state) {
lm.unigramsForKey(kUnigramSearchKey);
}
lm.close();
}
BENCHMARK(BM_FastLMFindUnigrams);
}; // namespace
BENCHMARK_MAIN();

View File

@ -58,11 +58,6 @@ bool PhraseReplacementMap::open(const char *path)
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) {
keyValueMap[keyValue.key] = keyValue.value;
}
if (state == KeyValueBlobReader::State::ERROR) {
close();
return false;
}
return true;
}

View File

@ -0,0 +1,59 @@
// Copyright (c) 2022 and onwards The McBopomofo Authors.
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <cstdio>
#include <filesystem>
#include <string>
#include "PhraseReplacementMap.h"
#include "gtest/gtest.h"
namespace McBopomofo {
TEST(PhraseReplacementMapTest, LenientReading)
{
std::string tmp_name
= std::string(std::filesystem::temp_directory_path()) + "test.txt";
FILE* f = fopen(tmp_name.c_str(), "w");
ASSERT_NE(f, nullptr);
fprintf(f, "key value\n");
fprintf(f, "key2\n"); // error line
fprintf(f, "key3 value2\n");
int r = fclose(f);
ASSERT_EQ(r, 0);
PhraseReplacementMap map;
map.open(tmp_name.c_str());
ASSERT_EQ(map.valueForKey("key"), "value");
ASSERT_EQ(map.valueForKey("key2"), "");
// key2 causes parsing error, and the line that has key3 won't be parsed.
ASSERT_EQ(map.valueForKey("key3"), "");
r = remove(tmp_name.c_str());
ASSERT_EQ(r, 0);
}
} // namespace McBopomofo

View File

@ -78,12 +78,7 @@ bool UserPhrasesLM::open(const char *path)
KeyValueBlobReader::State state;
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) {
// We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF reading.
keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key );
}
if (state == KeyValueBlobReader::State::ERROR) {
close();
return false;
keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key);
}
return true;
}

View File

@ -0,0 +1,59 @@
// Copyright (c) 2022 and onwards The McBopomofo Authors.
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <cstdio>
#include <filesystem>
#include <string>
#include "UserPhrasesLM.h"
#include "gtest/gtest.h"
namespace McBopomofo {
TEST(UserPhreasesLMTest, LenientReading)
{
std::string tmp_name
= std::string(std::filesystem::temp_directory_path()) + "test.txt";
FILE* f = fopen(tmp_name.c_str(), "w");
ASSERT_NE(f, nullptr);
fprintf(f, "value1 reading1\n");
fprintf(f, "value2 \n"); // error line
fprintf(f, "value3 reading2\n");
int r = fclose(f);
ASSERT_EQ(r, 0);
UserPhrasesLM lm;
lm.open(tmp_name.c_str());
ASSERT_TRUE(lm.hasUnigramsForKey("reading1"));
ASSERT_FALSE(lm.hasUnigramsForKey("value2"));
// Anything after the error won't be parsed, so reading2 won't be found.
ASSERT_FALSE(lm.hasUnigramsForKey("reading2"));
r = remove(tmp_name.c_str());
ASSERT_EQ(r, 0);
}
} // namespace McBopomofo