diff --git a/.github/workflows/continuous-integration-workflow.yml b/.github/workflows/continuous-integration-workflow.yml index b70b3abe..4b4304c5 100644 --- a/.github/workflows/continuous-integration-workflow.yml +++ b/.github/workflows/continuous-integration-workflow.yml @@ -9,6 +9,12 @@ jobs: DEVELOPER_DIR: /Applications/Xcode.app/Contents/Developer steps: - uses: actions/checkout@v1 + - name: Build McBopomofoLMLibTest + run: cmake -S . -B build + working-directory: Source/Engine + - name: Run McBopomofoLMLibTest + run: make runTest + working-directory: Source/Engine/build - name: Clean run: xcodebuild -scheme McBopomofo -configuration Release clean - name: Clean @@ -17,4 +23,3 @@ jobs: run: xcodebuild -scheme McBopomofo -configuration Release build - name: Build run: xcodebuild -scheme McBopomofoInstaller -configuration Release build - diff --git a/Source/Engine/CMakeLists.txt b/Source/Engine/CMakeLists.txt index 6e06f2ad..5778bacb 100644 --- a/Source/Engine/CMakeLists.txt +++ b/Source/Engine/CMakeLists.txt @@ -10,7 +10,9 @@ add_library(McBopomofoLMLib ParselessPhraseDB.cpp ParselessPhraseDB.h ParselessLM.cpp - ParselessLM.h) + ParselessLM.h + UserPhrasesLM.h + UserPhrasesLM.cpp) # Let CMake fetch Google Test for us. # https://github.com/google/googletest/tree/main/googletest#incorporating-into-an-existing-cmake-project @@ -29,14 +31,21 @@ FetchContent_MakeAvailable(googletest) add_executable(McBopomofoLMLibTest KeyValueBlobReaderTest.cpp ParselessLMTest.cpp - ParselessPhraseDBTest.cpp) + ParselessPhraseDBTest.cpp + UserPhrasesLMTest.cpp) target_link_libraries(McBopomofoLMLibTest gtest_main McBopomofoLMLib) include(GoogleTest) gtest_discover_tests(McBopomofoLMLibTest) -# Benchmark target. -find_package(benchmark REQUIRED) -add_executable(ParselessLMBenchmark - FastLM.cpp - ParselessLMBenchmark.cpp) -target_link_libraries(ParselessLMBenchmark McBopomofoLMLib benchmark::benchmark) \ No newline at end of file +add_custom_target( + runTest + COMMAND ${CMAKE_CURRENT_BINARY_DIR}/McBopomofoLMLibTest +) +add_dependencies(runTest McBopomofoLMLibTest) + +# Benchmark target; to run, manually uncomment the lines below. +# +# find_package(benchmark) +# add_executable(ParselessLMBenchmark +# ParselessLMBenchmark.cpp) +# target_link_libraries(ParselessLMBenchmark McBopomofoLMLib benchmark::benchmark) diff --git a/Source/Engine/ParselessLMBenchmark.cpp b/Source/Engine/ParselessLMBenchmark.cpp index d47b27ea..7664feaf 100644 --- a/Source/Engine/ParselessLMBenchmark.cpp +++ b/Source/Engine/ParselessLMBenchmark.cpp @@ -26,16 +26,13 @@ #include #include -#include "FastLM.h" #include "ParselessLM.h" namespace { -using FastLM = Formosa::Gramambular::FastLM; using ParselessLM = McBopomofo::ParselessLM; static const char* kDataPath = "data.txt"; -static const char* kLegacyDataPath = "data-legacy.txt"; static const char* kUnigramSearchKey = "ㄕˋ-ㄕˊ"; static void BM_ParselessLMOpenClose(benchmark::State& state) @@ -49,17 +46,6 @@ static void BM_ParselessLMOpenClose(benchmark::State& state) } BENCHMARK(BM_ParselessLMOpenClose); -static void BM_FastLMOpenClose(benchmark::State& state) -{ - assert(std::filesystem::exists(kLegacyDataPath)); - for (auto _ : state) { - FastLM lm; - lm.open(kLegacyDataPath); - lm.close(); - } -} -BENCHMARK(BM_FastLMOpenClose); - static void BM_ParselessLMFindUnigrams(benchmark::State& state) { assert(std::filesystem::exists(kDataPath)); @@ -72,18 +58,6 @@ static void BM_ParselessLMFindUnigrams(benchmark::State& state) } BENCHMARK(BM_ParselessLMFindUnigrams); -static void BM_FastLMFindUnigrams(benchmark::State& state) -{ - assert(std::filesystem::exists(kLegacyDataPath)); - FastLM lm; - lm.open(kLegacyDataPath); - for (auto _ : state) { - lm.unigramsForKey(kUnigramSearchKey); - } - lm.close(); -} -BENCHMARK(BM_FastLMFindUnigrams); - }; // namespace BENCHMARK_MAIN(); diff --git a/Source/Engine/UserPhrasesLM.cpp b/Source/Engine/UserPhrasesLM.cpp index cbb6b9cd..31f2f1e7 100644 --- a/Source/Engine/UserPhrasesLM.cpp +++ b/Source/Engine/UserPhrasesLM.cpp @@ -78,12 +78,7 @@ bool UserPhrasesLM::open(const char *path) KeyValueBlobReader::State state; while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) { // We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF reading. - keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key ); - } - - if (state == KeyValueBlobReader::State::ERROR) { - close(); - return false; + keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key); } return true; } diff --git a/Source/Engine/UserPhrasesLMTest.cpp b/Source/Engine/UserPhrasesLMTest.cpp new file mode 100644 index 00000000..b923fd97 --- /dev/null +++ b/Source/Engine/UserPhrasesLMTest.cpp @@ -0,0 +1,57 @@ +// Copyright (c) 2022 and onwards The McBopomofo Authors. +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include +#include +#include + +#include "UserPhrasesLM.h" +#include "gtest/gtest.h" + +namespace McBopomofo { + +TEST(UserPhreasesLMTest, LenientReading) +{ + std::string tmp_name + = std::string(std::filesystem::temp_directory_path()) + "test.txt"; + + FILE* f = fopen(tmp_name.c_str(), "w"); + ASSERT_NE(f, nullptr); + + fprintf(f, "bar foo\n"); + fprintf(f, "bar \n"); // error line + fprintf(f, "argh baz\n"); + int r = fclose(f); + ASSERT_EQ(r, 0); + + UserPhrasesLM lm; + lm.open(tmp_name.c_str()); + ASSERT_TRUE(lm.hasUnigramsForKey("foo")); + ASSERT_FALSE(lm.hasUnigramsForKey("bar")); + ASSERT_FALSE(lm.hasUnigramsForKey("baz")); + + r = remove(tmp_name.c_str()); + ASSERT_EQ(r, 0); +} + +} // namespace McBopomofo