Make UserPhrasesLM more tolerant
This lets UserPhrasesLM consumes as much user data as possible before bailing. This makes it more tolerant to data errors and will not fail entirely just because the user has one faulty line in a data file. Also removes FastFM from the benchmarking suite. This also runs the CMake-based C++ tests as part of the GitHub CI.
This commit is contained in:
parent
00f110f101
commit
c8f65580bb
|
@ -9,6 +9,12 @@ jobs:
|
|||
DEVELOPER_DIR: /Applications/Xcode.app/Contents/Developer
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Build McBopomofoLMLibTest
|
||||
run: cmake -S . -B build
|
||||
working-directory: Source/Engine
|
||||
- name: Run McBopomofoLMLibTest
|
||||
run: make runTest
|
||||
working-directory: Source/Engine/build
|
||||
- name: Clean
|
||||
run: xcodebuild -scheme McBopomofo -configuration Release clean
|
||||
- name: Clean
|
||||
|
@ -17,4 +23,3 @@ jobs:
|
|||
run: xcodebuild -scheme McBopomofo -configuration Release build
|
||||
- name: Build
|
||||
run: xcodebuild -scheme McBopomofoInstaller -configuration Release build
|
||||
|
||||
|
|
|
@ -10,7 +10,9 @@ add_library(McBopomofoLMLib
|
|||
ParselessPhraseDB.cpp
|
||||
ParselessPhraseDB.h
|
||||
ParselessLM.cpp
|
||||
ParselessLM.h)
|
||||
ParselessLM.h
|
||||
UserPhrasesLM.h
|
||||
UserPhrasesLM.cpp)
|
||||
|
||||
# Let CMake fetch Google Test for us.
|
||||
# https://github.com/google/googletest/tree/main/googletest#incorporating-into-an-existing-cmake-project
|
||||
|
@ -29,14 +31,21 @@ FetchContent_MakeAvailable(googletest)
|
|||
add_executable(McBopomofoLMLibTest
|
||||
KeyValueBlobReaderTest.cpp
|
||||
ParselessLMTest.cpp
|
||||
ParselessPhraseDBTest.cpp)
|
||||
ParselessPhraseDBTest.cpp
|
||||
UserPhrasesLMTest.cpp)
|
||||
target_link_libraries(McBopomofoLMLibTest gtest_main McBopomofoLMLib)
|
||||
include(GoogleTest)
|
||||
gtest_discover_tests(McBopomofoLMLibTest)
|
||||
|
||||
# Benchmark target.
|
||||
find_package(benchmark REQUIRED)
|
||||
add_executable(ParselessLMBenchmark
|
||||
FastLM.cpp
|
||||
ParselessLMBenchmark.cpp)
|
||||
target_link_libraries(ParselessLMBenchmark McBopomofoLMLib benchmark::benchmark)
|
||||
add_custom_target(
|
||||
runTest
|
||||
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/McBopomofoLMLibTest
|
||||
)
|
||||
add_dependencies(runTest McBopomofoLMLibTest)
|
||||
|
||||
# Benchmark target; to run, manually uncomment the lines below.
|
||||
#
|
||||
# find_package(benchmark)
|
||||
# add_executable(ParselessLMBenchmark
|
||||
# ParselessLMBenchmark.cpp)
|
||||
# target_link_libraries(ParselessLMBenchmark McBopomofoLMLib benchmark::benchmark)
|
||||
|
|
|
@ -26,16 +26,13 @@
|
|||
#include <cassert>
|
||||
#include <filesystem>
|
||||
|
||||
#include "FastLM.h"
|
||||
#include "ParselessLM.h"
|
||||
|
||||
namespace {
|
||||
|
||||
using FastLM = Formosa::Gramambular::FastLM;
|
||||
using ParselessLM = McBopomofo::ParselessLM;
|
||||
|
||||
static const char* kDataPath = "data.txt";
|
||||
static const char* kLegacyDataPath = "data-legacy.txt";
|
||||
static const char* kUnigramSearchKey = "ㄕˋ-ㄕˊ";
|
||||
|
||||
static void BM_ParselessLMOpenClose(benchmark::State& state)
|
||||
|
@ -49,17 +46,6 @@ static void BM_ParselessLMOpenClose(benchmark::State& state)
|
|||
}
|
||||
BENCHMARK(BM_ParselessLMOpenClose);
|
||||
|
||||
static void BM_FastLMOpenClose(benchmark::State& state)
|
||||
{
|
||||
assert(std::filesystem::exists(kLegacyDataPath));
|
||||
for (auto _ : state) {
|
||||
FastLM lm;
|
||||
lm.open(kLegacyDataPath);
|
||||
lm.close();
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_FastLMOpenClose);
|
||||
|
||||
static void BM_ParselessLMFindUnigrams(benchmark::State& state)
|
||||
{
|
||||
assert(std::filesystem::exists(kDataPath));
|
||||
|
@ -72,18 +58,6 @@ static void BM_ParselessLMFindUnigrams(benchmark::State& state)
|
|||
}
|
||||
BENCHMARK(BM_ParselessLMFindUnigrams);
|
||||
|
||||
static void BM_FastLMFindUnigrams(benchmark::State& state)
|
||||
{
|
||||
assert(std::filesystem::exists(kLegacyDataPath));
|
||||
FastLM lm;
|
||||
lm.open(kLegacyDataPath);
|
||||
for (auto _ : state) {
|
||||
lm.unigramsForKey(kUnigramSearchKey);
|
||||
}
|
||||
lm.close();
|
||||
}
|
||||
BENCHMARK(BM_FastLMFindUnigrams);
|
||||
|
||||
}; // namespace
|
||||
|
||||
BENCHMARK_MAIN();
|
||||
|
|
|
@ -78,12 +78,7 @@ bool UserPhrasesLM::open(const char *path)
|
|||
KeyValueBlobReader::State state;
|
||||
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) {
|
||||
// We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF reading.
|
||||
keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key );
|
||||
}
|
||||
|
||||
if (state == KeyValueBlobReader::State::ERROR) {
|
||||
close();
|
||||
return false;
|
||||
keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
// Copyright (c) 2022 and onwards The McBopomofo Authors.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
#include <cstdio>
|
||||
#include <filesystem>
|
||||
#include <string>
|
||||
|
||||
#include "UserPhrasesLM.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace McBopomofo {
|
||||
|
||||
TEST(UserPhreasesLMTest, LenientReading)
|
||||
{
|
||||
std::string tmp_name
|
||||
= std::string(std::filesystem::temp_directory_path()) + "test.txt";
|
||||
|
||||
FILE* f = fopen(tmp_name.c_str(), "w");
|
||||
ASSERT_NE(f, nullptr);
|
||||
|
||||
fprintf(f, "bar foo\n");
|
||||
fprintf(f, "bar \n"); // error line
|
||||
fprintf(f, "argh baz\n");
|
||||
int r = fclose(f);
|
||||
ASSERT_EQ(r, 0);
|
||||
|
||||
UserPhrasesLM lm;
|
||||
lm.open(tmp_name.c_str());
|
||||
ASSERT_TRUE(lm.hasUnigramsForKey("foo"));
|
||||
ASSERT_FALSE(lm.hasUnigramsForKey("bar"));
|
||||
ASSERT_FALSE(lm.hasUnigramsForKey("baz"));
|
||||
|
||||
r = remove(tmp_name.c_str());
|
||||
ASSERT_EQ(r, 0);
|
||||
}
|
||||
|
||||
} // namespace McBopomofo
|
Loading…
Reference in New Issue