From c8f65580bb01e3484be5bf758fb88ffef2aab8cb Mon Sep 17 00:00:00 2001 From: Lukhnos Liu Date: Tue, 18 Jan 2022 15:52:02 -0800 Subject: [PATCH 1/3] Make UserPhrasesLM more tolerant This lets UserPhrasesLM consumes as much user data as possible before bailing. This makes it more tolerant to data errors and will not fail entirely just because the user has one faulty line in a data file. Also removes FastFM from the benchmarking suite. This also runs the CMake-based C++ tests as part of the GitHub CI. --- .../continuous-integration-workflow.yml | 7 ++- Source/Engine/CMakeLists.txt | 25 +++++--- Source/Engine/ParselessLMBenchmark.cpp | 26 --------- Source/Engine/UserPhrasesLM.cpp | 7 +-- Source/Engine/UserPhrasesLMTest.cpp | 57 +++++++++++++++++++ 5 files changed, 81 insertions(+), 41 deletions(-) create mode 100644 Source/Engine/UserPhrasesLMTest.cpp diff --git a/.github/workflows/continuous-integration-workflow.yml b/.github/workflows/continuous-integration-workflow.yml index b70b3abe..4b4304c5 100644 --- a/.github/workflows/continuous-integration-workflow.yml +++ b/.github/workflows/continuous-integration-workflow.yml @@ -9,6 +9,12 @@ jobs: DEVELOPER_DIR: /Applications/Xcode.app/Contents/Developer steps: - uses: actions/checkout@v1 + - name: Build McBopomofoLMLibTest + run: cmake -S . -B build + working-directory: Source/Engine + - name: Run McBopomofoLMLibTest + run: make runTest + working-directory: Source/Engine/build - name: Clean run: xcodebuild -scheme McBopomofo -configuration Release clean - name: Clean @@ -17,4 +23,3 @@ jobs: run: xcodebuild -scheme McBopomofo -configuration Release build - name: Build run: xcodebuild -scheme McBopomofoInstaller -configuration Release build - diff --git a/Source/Engine/CMakeLists.txt b/Source/Engine/CMakeLists.txt index 6e06f2ad..5778bacb 100644 --- a/Source/Engine/CMakeLists.txt +++ b/Source/Engine/CMakeLists.txt @@ -10,7 +10,9 @@ add_library(McBopomofoLMLib ParselessPhraseDB.cpp ParselessPhraseDB.h ParselessLM.cpp - ParselessLM.h) + ParselessLM.h + UserPhrasesLM.h + UserPhrasesLM.cpp) # Let CMake fetch Google Test for us. # https://github.com/google/googletest/tree/main/googletest#incorporating-into-an-existing-cmake-project @@ -29,14 +31,21 @@ FetchContent_MakeAvailable(googletest) add_executable(McBopomofoLMLibTest KeyValueBlobReaderTest.cpp ParselessLMTest.cpp - ParselessPhraseDBTest.cpp) + ParselessPhraseDBTest.cpp + UserPhrasesLMTest.cpp) target_link_libraries(McBopomofoLMLibTest gtest_main McBopomofoLMLib) include(GoogleTest) gtest_discover_tests(McBopomofoLMLibTest) -# Benchmark target. -find_package(benchmark REQUIRED) -add_executable(ParselessLMBenchmark - FastLM.cpp - ParselessLMBenchmark.cpp) -target_link_libraries(ParselessLMBenchmark McBopomofoLMLib benchmark::benchmark) \ No newline at end of file +add_custom_target( + runTest + COMMAND ${CMAKE_CURRENT_BINARY_DIR}/McBopomofoLMLibTest +) +add_dependencies(runTest McBopomofoLMLibTest) + +# Benchmark target; to run, manually uncomment the lines below. +# +# find_package(benchmark) +# add_executable(ParselessLMBenchmark +# ParselessLMBenchmark.cpp) +# target_link_libraries(ParselessLMBenchmark McBopomofoLMLib benchmark::benchmark) diff --git a/Source/Engine/ParselessLMBenchmark.cpp b/Source/Engine/ParselessLMBenchmark.cpp index d47b27ea..7664feaf 100644 --- a/Source/Engine/ParselessLMBenchmark.cpp +++ b/Source/Engine/ParselessLMBenchmark.cpp @@ -26,16 +26,13 @@ #include #include -#include "FastLM.h" #include "ParselessLM.h" namespace { -using FastLM = Formosa::Gramambular::FastLM; using ParselessLM = McBopomofo::ParselessLM; static const char* kDataPath = "data.txt"; -static const char* kLegacyDataPath = "data-legacy.txt"; static const char* kUnigramSearchKey = "ㄕˋ-ㄕˊ"; static void BM_ParselessLMOpenClose(benchmark::State& state) @@ -49,17 +46,6 @@ static void BM_ParselessLMOpenClose(benchmark::State& state) } BENCHMARK(BM_ParselessLMOpenClose); -static void BM_FastLMOpenClose(benchmark::State& state) -{ - assert(std::filesystem::exists(kLegacyDataPath)); - for (auto _ : state) { - FastLM lm; - lm.open(kLegacyDataPath); - lm.close(); - } -} -BENCHMARK(BM_FastLMOpenClose); - static void BM_ParselessLMFindUnigrams(benchmark::State& state) { assert(std::filesystem::exists(kDataPath)); @@ -72,18 +58,6 @@ static void BM_ParselessLMFindUnigrams(benchmark::State& state) } BENCHMARK(BM_ParselessLMFindUnigrams); -static void BM_FastLMFindUnigrams(benchmark::State& state) -{ - assert(std::filesystem::exists(kLegacyDataPath)); - FastLM lm; - lm.open(kLegacyDataPath); - for (auto _ : state) { - lm.unigramsForKey(kUnigramSearchKey); - } - lm.close(); -} -BENCHMARK(BM_FastLMFindUnigrams); - }; // namespace BENCHMARK_MAIN(); diff --git a/Source/Engine/UserPhrasesLM.cpp b/Source/Engine/UserPhrasesLM.cpp index cbb6b9cd..31f2f1e7 100644 --- a/Source/Engine/UserPhrasesLM.cpp +++ b/Source/Engine/UserPhrasesLM.cpp @@ -78,12 +78,7 @@ bool UserPhrasesLM::open(const char *path) KeyValueBlobReader::State state; while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) { // We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF reading. - keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key ); - } - - if (state == KeyValueBlobReader::State::ERROR) { - close(); - return false; + keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key); } return true; } diff --git a/Source/Engine/UserPhrasesLMTest.cpp b/Source/Engine/UserPhrasesLMTest.cpp new file mode 100644 index 00000000..b923fd97 --- /dev/null +++ b/Source/Engine/UserPhrasesLMTest.cpp @@ -0,0 +1,57 @@ +// Copyright (c) 2022 and onwards The McBopomofo Authors. +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include +#include +#include + +#include "UserPhrasesLM.h" +#include "gtest/gtest.h" + +namespace McBopomofo { + +TEST(UserPhreasesLMTest, LenientReading) +{ + std::string tmp_name + = std::string(std::filesystem::temp_directory_path()) + "test.txt"; + + FILE* f = fopen(tmp_name.c_str(), "w"); + ASSERT_NE(f, nullptr); + + fprintf(f, "bar foo\n"); + fprintf(f, "bar \n"); // error line + fprintf(f, "argh baz\n"); + int r = fclose(f); + ASSERT_EQ(r, 0); + + UserPhrasesLM lm; + lm.open(tmp_name.c_str()); + ASSERT_TRUE(lm.hasUnigramsForKey("foo")); + ASSERT_FALSE(lm.hasUnigramsForKey("bar")); + ASSERT_FALSE(lm.hasUnigramsForKey("baz")); + + r = remove(tmp_name.c_str()); + ASSERT_EQ(r, 0); +} + +} // namespace McBopomofo From 202b1fa05808210881b19b9723ca7a50a9f52385 Mon Sep 17 00:00:00 2001 From: Lukhnos Liu Date: Tue, 18 Jan 2022 22:46:26 -0800 Subject: [PATCH 2/3] Also make PhraseReplacementMap more tolerant This also clarifies the test expectations and how parsing errors are handled. --- Source/Engine/CMakeLists.txt | 3 ++ Source/Engine/PhraseReplacementMap.cpp | 5 -- Source/Engine/PhraseReplacementMapTest.cpp | 59 ++++++++++++++++++++++ Source/Engine/UserPhrasesLMTest.cpp | 14 ++--- 4 files changed, 70 insertions(+), 11 deletions(-) create mode 100644 Source/Engine/PhraseReplacementMapTest.cpp diff --git a/Source/Engine/CMakeLists.txt b/Source/Engine/CMakeLists.txt index 5778bacb..d0e5b05f 100644 --- a/Source/Engine/CMakeLists.txt +++ b/Source/Engine/CMakeLists.txt @@ -11,6 +11,8 @@ add_library(McBopomofoLMLib ParselessPhraseDB.h ParselessLM.cpp ParselessLM.h + PhraseReplacementMap.h + PhraseReplacementMap.cpp UserPhrasesLM.h UserPhrasesLM.cpp) @@ -32,6 +34,7 @@ add_executable(McBopomofoLMLibTest KeyValueBlobReaderTest.cpp ParselessLMTest.cpp ParselessPhraseDBTest.cpp + PhraseReplacementMapTest.cpp UserPhrasesLMTest.cpp) target_link_libraries(McBopomofoLMLibTest gtest_main McBopomofoLMLib) include(GoogleTest) diff --git a/Source/Engine/PhraseReplacementMap.cpp b/Source/Engine/PhraseReplacementMap.cpp index c0c3abbe..ed5831fb 100644 --- a/Source/Engine/PhraseReplacementMap.cpp +++ b/Source/Engine/PhraseReplacementMap.cpp @@ -58,11 +58,6 @@ bool PhraseReplacementMap::open(const char *path) while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) { keyValueMap[keyValue.key] = keyValue.value; } - - if (state == KeyValueBlobReader::State::ERROR) { - close(); - return false; - } return true; } diff --git a/Source/Engine/PhraseReplacementMapTest.cpp b/Source/Engine/PhraseReplacementMapTest.cpp new file mode 100644 index 00000000..239ea6e0 --- /dev/null +++ b/Source/Engine/PhraseReplacementMapTest.cpp @@ -0,0 +1,59 @@ +// Copyright (c) 2022 and onwards The McBopomofo Authors. +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include +#include +#include + +#include "PhraseReplacementMap.h" +#include "gtest/gtest.h" + +namespace McBopomofo { + +TEST(PhraseReplacementMapTest, LenientReading) +{ + std::string tmp_name + = std::string(std::filesystem::temp_directory_path()) + "test.txt"; + + FILE* f = fopen(tmp_name.c_str(), "w"); + ASSERT_NE(f, nullptr); + + fprintf(f, "key value\n"); + fprintf(f, "key2\n"); // error line + fprintf(f, "key3 value2\n"); + int r = fclose(f); + ASSERT_EQ(r, 0); + + PhraseReplacementMap map; + map.open(tmp_name.c_str()); + ASSERT_EQ(map.valueForKey("key"), "value"); + ASSERT_EQ(map.valueForKey("key2"), ""); + + // key2 causes parsing error, and the line that has key3 won't be parsed. + ASSERT_EQ(map.valueForKey("key3"), ""); + + r = remove(tmp_name.c_str()); + ASSERT_EQ(r, 0); +} + +} // namespace McBopomofo diff --git a/Source/Engine/UserPhrasesLMTest.cpp b/Source/Engine/UserPhrasesLMTest.cpp index b923fd97..402d618f 100644 --- a/Source/Engine/UserPhrasesLMTest.cpp +++ b/Source/Engine/UserPhrasesLMTest.cpp @@ -38,17 +38,19 @@ TEST(UserPhreasesLMTest, LenientReading) FILE* f = fopen(tmp_name.c_str(), "w"); ASSERT_NE(f, nullptr); - fprintf(f, "bar foo\n"); - fprintf(f, "bar \n"); // error line - fprintf(f, "argh baz\n"); + fprintf(f, "value1 reading1\n"); + fprintf(f, "value2 \n"); // error line + fprintf(f, "value3 reading2\n"); int r = fclose(f); ASSERT_EQ(r, 0); UserPhrasesLM lm; lm.open(tmp_name.c_str()); - ASSERT_TRUE(lm.hasUnigramsForKey("foo")); - ASSERT_FALSE(lm.hasUnigramsForKey("bar")); - ASSERT_FALSE(lm.hasUnigramsForKey("baz")); + ASSERT_TRUE(lm.hasUnigramsForKey("reading1")); + ASSERT_FALSE(lm.hasUnigramsForKey("value2")); + + // Anything after the error won't be parsed, so reading2 won't be found. + ASSERT_FALSE(lm.hasUnigramsForKey("reading2")); r = remove(tmp_name.c_str()); ASSERT_EQ(r, 0); From a2e551d000b17ec872039fa85df14a92c8c8ebd6 Mon Sep 17 00:00:00 2001 From: Lukhnos Liu Date: Tue, 18 Jan 2022 22:47:08 -0800 Subject: [PATCH 3/3] Clean up Travis CI settings This was missed from #247. --- .travis.yml | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index e907fe3a..00000000 --- a/.travis.yml +++ /dev/null @@ -1,8 +0,0 @@ -language: objective-c - -before_script: travis/before_script.sh -script: travis/script.sh - -sudo: false -git: - depth: 1