Merge pull request #246 from lukhnos/more-tolerant-userphraseslm
Make UserPhrasesLM more tolerant
This commit is contained in:
commit
b7ef7b5fb7
|
@ -9,14 +9,12 @@ jobs:
|
|||
DEVELOPER_DIR: /Applications/Xcode.app/Contents/Developer
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Clean McBopomofo
|
||||
run: xcodebuild -scheme McBopomofo -configuration Release clean
|
||||
- name: Clean McBopomofoInstaller
|
||||
run: xcodebuild -scheme McBopomofoInstaller -configuration Release clean
|
||||
- name: Build McBopomofo
|
||||
run: xcodebuild -scheme McBopomofo -configuration Release build
|
||||
- name: Build McBopomofoInstaller
|
||||
run: xcodebuild -scheme McBopomofoInstaller -configuration Release build
|
||||
- name: Build McBopomofoLMLibTest
|
||||
run: cmake -S . -B build
|
||||
working-directory: Source/Engine
|
||||
- name: Run McBopomofoLMLibTest
|
||||
run: make runTest
|
||||
working-directory: Source/Engine/build
|
||||
- name: Test McBopomofo App Bundle
|
||||
run: xcodebuild -scheme McBopomofo -configuration Debug test
|
||||
- name: Test CandidateUI
|
||||
|
@ -28,3 +26,12 @@ jobs:
|
|||
- name: Test VXHanConvert
|
||||
run: swift test
|
||||
working-directory: Packages/VXHanConvert
|
||||
- name: Clean McBopomofo
|
||||
run: xcodebuild -scheme McBopomofo -configuration Release clean
|
||||
- name: Clean McBopomofoInstaller
|
||||
run: xcodebuild -scheme McBopomofoInstaller -configuration Release clean
|
||||
- name: Build McBopomofo
|
||||
run: xcodebuild -scheme McBopomofo -configuration Release build
|
||||
- name: Build McBopomofoInstaller
|
||||
run: xcodebuild -scheme McBopomofoInstaller -configuration Release build
|
||||
|
||||
|
|
|
@ -1,8 +0,0 @@
|
|||
language: objective-c
|
||||
|
||||
before_script: travis/before_script.sh
|
||||
script: travis/script.sh
|
||||
|
||||
sudo: false
|
||||
git:
|
||||
depth: 1
|
|
@ -10,7 +10,11 @@ add_library(McBopomofoLMLib
|
|||
ParselessPhraseDB.cpp
|
||||
ParselessPhraseDB.h
|
||||
ParselessLM.cpp
|
||||
ParselessLM.h)
|
||||
ParselessLM.h
|
||||
PhraseReplacementMap.h
|
||||
PhraseReplacementMap.cpp
|
||||
UserPhrasesLM.h
|
||||
UserPhrasesLM.cpp)
|
||||
|
||||
# Let CMake fetch Google Test for us.
|
||||
# https://github.com/google/googletest/tree/main/googletest#incorporating-into-an-existing-cmake-project
|
||||
|
@ -29,14 +33,22 @@ FetchContent_MakeAvailable(googletest)
|
|||
add_executable(McBopomofoLMLibTest
|
||||
KeyValueBlobReaderTest.cpp
|
||||
ParselessLMTest.cpp
|
||||
ParselessPhraseDBTest.cpp)
|
||||
ParselessPhraseDBTest.cpp
|
||||
PhraseReplacementMapTest.cpp
|
||||
UserPhrasesLMTest.cpp)
|
||||
target_link_libraries(McBopomofoLMLibTest gtest_main McBopomofoLMLib)
|
||||
include(GoogleTest)
|
||||
gtest_discover_tests(McBopomofoLMLibTest)
|
||||
|
||||
# Benchmark target.
|
||||
find_package(benchmark REQUIRED)
|
||||
add_executable(ParselessLMBenchmark
|
||||
FastLM.cpp
|
||||
ParselessLMBenchmark.cpp)
|
||||
target_link_libraries(ParselessLMBenchmark McBopomofoLMLib benchmark::benchmark)
|
||||
add_custom_target(
|
||||
runTest
|
||||
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/McBopomofoLMLibTest
|
||||
)
|
||||
add_dependencies(runTest McBopomofoLMLibTest)
|
||||
|
||||
# Benchmark target; to run, manually uncomment the lines below.
|
||||
#
|
||||
# find_package(benchmark)
|
||||
# add_executable(ParselessLMBenchmark
|
||||
# ParselessLMBenchmark.cpp)
|
||||
# target_link_libraries(ParselessLMBenchmark McBopomofoLMLib benchmark::benchmark)
|
||||
|
|
|
@ -26,16 +26,13 @@
|
|||
#include <cassert>
|
||||
#include <filesystem>
|
||||
|
||||
#include "FastLM.h"
|
||||
#include "ParselessLM.h"
|
||||
|
||||
namespace {
|
||||
|
||||
using FastLM = Formosa::Gramambular::FastLM;
|
||||
using ParselessLM = McBopomofo::ParselessLM;
|
||||
|
||||
static const char* kDataPath = "data.txt";
|
||||
static const char* kLegacyDataPath = "data-legacy.txt";
|
||||
static const char* kUnigramSearchKey = "ㄕˋ-ㄕˊ";
|
||||
|
||||
static void BM_ParselessLMOpenClose(benchmark::State& state)
|
||||
|
@ -49,17 +46,6 @@ static void BM_ParselessLMOpenClose(benchmark::State& state)
|
|||
}
|
||||
BENCHMARK(BM_ParselessLMOpenClose);
|
||||
|
||||
static void BM_FastLMOpenClose(benchmark::State& state)
|
||||
{
|
||||
assert(std::filesystem::exists(kLegacyDataPath));
|
||||
for (auto _ : state) {
|
||||
FastLM lm;
|
||||
lm.open(kLegacyDataPath);
|
||||
lm.close();
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_FastLMOpenClose);
|
||||
|
||||
static void BM_ParselessLMFindUnigrams(benchmark::State& state)
|
||||
{
|
||||
assert(std::filesystem::exists(kDataPath));
|
||||
|
@ -72,18 +58,6 @@ static void BM_ParselessLMFindUnigrams(benchmark::State& state)
|
|||
}
|
||||
BENCHMARK(BM_ParselessLMFindUnigrams);
|
||||
|
||||
static void BM_FastLMFindUnigrams(benchmark::State& state)
|
||||
{
|
||||
assert(std::filesystem::exists(kLegacyDataPath));
|
||||
FastLM lm;
|
||||
lm.open(kLegacyDataPath);
|
||||
for (auto _ : state) {
|
||||
lm.unigramsForKey(kUnigramSearchKey);
|
||||
}
|
||||
lm.close();
|
||||
}
|
||||
BENCHMARK(BM_FastLMFindUnigrams);
|
||||
|
||||
}; // namespace
|
||||
|
||||
BENCHMARK_MAIN();
|
||||
|
|
|
@ -58,11 +58,6 @@ bool PhraseReplacementMap::open(const char *path)
|
|||
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) {
|
||||
keyValueMap[keyValue.key] = keyValue.value;
|
||||
}
|
||||
|
||||
if (state == KeyValueBlobReader::State::ERROR) {
|
||||
close();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
// Copyright (c) 2022 and onwards The McBopomofo Authors.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
#include <cstdio>
|
||||
#include <filesystem>
|
||||
#include <string>
|
||||
|
||||
#include "PhraseReplacementMap.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace McBopomofo {
|
||||
|
||||
TEST(PhraseReplacementMapTest, LenientReading)
|
||||
{
|
||||
std::string tmp_name
|
||||
= std::string(std::filesystem::temp_directory_path()) + "test.txt";
|
||||
|
||||
FILE* f = fopen(tmp_name.c_str(), "w");
|
||||
ASSERT_NE(f, nullptr);
|
||||
|
||||
fprintf(f, "key value\n");
|
||||
fprintf(f, "key2\n"); // error line
|
||||
fprintf(f, "key3 value2\n");
|
||||
int r = fclose(f);
|
||||
ASSERT_EQ(r, 0);
|
||||
|
||||
PhraseReplacementMap map;
|
||||
map.open(tmp_name.c_str());
|
||||
ASSERT_EQ(map.valueForKey("key"), "value");
|
||||
ASSERT_EQ(map.valueForKey("key2"), "");
|
||||
|
||||
// key2 causes parsing error, and the line that has key3 won't be parsed.
|
||||
ASSERT_EQ(map.valueForKey("key3"), "");
|
||||
|
||||
r = remove(tmp_name.c_str());
|
||||
ASSERT_EQ(r, 0);
|
||||
}
|
||||
|
||||
} // namespace McBopomofo
|
|
@ -78,12 +78,7 @@ bool UserPhrasesLM::open(const char *path)
|
|||
KeyValueBlobReader::State state;
|
||||
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) {
|
||||
// We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF reading.
|
||||
keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key );
|
||||
}
|
||||
|
||||
if (state == KeyValueBlobReader::State::ERROR) {
|
||||
close();
|
||||
return false;
|
||||
keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
// Copyright (c) 2022 and onwards The McBopomofo Authors.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
#include <cstdio>
|
||||
#include <filesystem>
|
||||
#include <string>
|
||||
|
||||
#include "UserPhrasesLM.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace McBopomofo {
|
||||
|
||||
TEST(UserPhreasesLMTest, LenientReading)
|
||||
{
|
||||
std::string tmp_name
|
||||
= std::string(std::filesystem::temp_directory_path()) + "test.txt";
|
||||
|
||||
FILE* f = fopen(tmp_name.c_str(), "w");
|
||||
ASSERT_NE(f, nullptr);
|
||||
|
||||
fprintf(f, "value1 reading1\n");
|
||||
fprintf(f, "value2 \n"); // error line
|
||||
fprintf(f, "value3 reading2\n");
|
||||
int r = fclose(f);
|
||||
ASSERT_EQ(r, 0);
|
||||
|
||||
UserPhrasesLM lm;
|
||||
lm.open(tmp_name.c_str());
|
||||
ASSERT_TRUE(lm.hasUnigramsForKey("reading1"));
|
||||
ASSERT_FALSE(lm.hasUnigramsForKey("value2"));
|
||||
|
||||
// Anything after the error won't be parsed, so reading2 won't be found.
|
||||
ASSERT_FALSE(lm.hasUnigramsForKey("reading2"));
|
||||
|
||||
r = remove(tmp_name.c_str());
|
||||
ASSERT_EQ(r, 0);
|
||||
}
|
||||
|
||||
} // namespace McBopomofo
|
Loading…
Reference in New Issue