Remove OpenVanilla dependencies
This removes one overengineered method from BopomofoSyllable and rewrites a helper using a simpler UTF-8 heuristics. Also adds the CMake project file and a unit test suite.
This commit is contained in:
parent
0700e0fc60
commit
4ebe1a1a11
|
@ -0,0 +1,31 @@
|
||||||
|
cmake_minimum_required(VERSION 3.17)
|
||||||
|
project(Mandarin)
|
||||||
|
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
|
||||||
|
add_library(MandarinLib Mandarin.h Mandarin.cpp)
|
||||||
|
|
||||||
|
# Let CMake fetch Google Test for us.
|
||||||
|
# https://github.com/google/googletest/tree/main/googletest#incorporating-into-an-existing-cmake-project
|
||||||
|
include(FetchContent)
|
||||||
|
|
||||||
|
FetchContent_Declare(
|
||||||
|
googletest
|
||||||
|
# Specify the commit you depend on and update it regularly.
|
||||||
|
URL https://github.com/google/googletest/archive/609281088cfefc76f9d0ce82e1ff6c30cc3591e5.zip
|
||||||
|
)
|
||||||
|
# For Windows: Prevent overriding the parent project's compiler/linker settings
|
||||||
|
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||||
|
FetchContent_MakeAvailable(googletest)
|
||||||
|
|
||||||
|
# Test target declarations.
|
||||||
|
add_executable(MandarinTest MandarinTest.cpp)
|
||||||
|
target_link_libraries(MandarinTest gtest_main MandarinLib)
|
||||||
|
include(GoogleTest)
|
||||||
|
gtest_discover_tests(MandarinTest)
|
||||||
|
|
||||||
|
add_custom_target(
|
||||||
|
runTest
|
||||||
|
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/MandarinTest
|
||||||
|
)
|
||||||
|
add_dependencies(runTest MandarinTest)
|
|
@ -29,14 +29,9 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "Mandarin.h"
|
#include "Mandarin.h"
|
||||||
|
|
||||||
#include "OVUTF8Helper.h"
|
|
||||||
#include "OVWildcard.h"
|
|
||||||
|
|
||||||
namespace Formosa {
|
namespace Formosa {
|
||||||
namespace Mandarin {
|
namespace Mandarin {
|
||||||
|
|
||||||
using namespace OpenVanilla;
|
|
||||||
|
|
||||||
class PinyinParseHelper {
|
class PinyinParseHelper {
|
||||||
public:
|
public:
|
||||||
static const bool ConsumePrefix(string &target, const string &prefix)
|
static const bool ConsumePrefix(string &target, const string &prefix)
|
||||||
|
@ -591,15 +586,43 @@ const BPMF BPMF::FromPHT(const string& str)
|
||||||
const BPMF BPMF::FromComposedString(const string& str)
|
const BPMF BPMF::FromComposedString(const string& str)
|
||||||
{
|
{
|
||||||
BPMF syllable;
|
BPMF syllable;
|
||||||
vector<string> components = OVUTF8Helper::SplitStringByCodePoint(str);
|
auto iter = str.begin();
|
||||||
for (vector<string>::iterator iter = components.begin() ; iter != components.end() ; ++iter) {
|
while (iter != str.end()) {
|
||||||
|
// This is a naive implementation and we bail early at anything we don't recognize.
|
||||||
|
// A sound implementation would require to either use a trie for the Bopomofo character map
|
||||||
|
// or to split the input by codepoints. This suffices for now.
|
||||||
|
|
||||||
const map<string, BPMF::Component>& charToComp = BopomofoCharacterMap::SharedInstance().characterToComponent;
|
// Illegal.
|
||||||
map<string, BPMF::Component>::const_iterator result = charToComp.find(*iter);
|
if (!(*iter & 0x80)) {
|
||||||
if (result != charToComp.end())
|
break;
|
||||||
syllable += BPMF((*result).second);
|
}
|
||||||
|
|
||||||
|
size_t utf8_length = -1;
|
||||||
|
|
||||||
|
// These are the code points for the tone markers.
|
||||||
|
if ((*iter & (0x80 | 0x40)) && !(*iter & 0x20)) {
|
||||||
|
utf8_length = 2;
|
||||||
|
} else if ((*iter & (0x80 | 0x40 | 0x20)) && !(*iter & 0x10)) {
|
||||||
|
utf8_length = 3;
|
||||||
|
} else {
|
||||||
|
// Illegal.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (iter + (utf8_length - 1) == str.end()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
string component = string(iter, iter + utf8_length);
|
||||||
|
const map<string, BPMF::Component>& charToComp = BopomofoCharacterMap::SharedInstance().characterToComponent;
|
||||||
|
map<string, BPMF::Component>::const_iterator result = charToComp.find(component);
|
||||||
|
if (result == charToComp.end()) {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
syllable += BPMF((*result).second);
|
||||||
|
}
|
||||||
|
iter += utf8_length;
|
||||||
}
|
}
|
||||||
|
|
||||||
return syllable;
|
return syllable;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -736,29 +759,6 @@ void BopomofoKeyboardLayout::FinalizeLayouts()
|
||||||
#undef FL
|
#undef FL
|
||||||
}
|
}
|
||||||
|
|
||||||
const BopomofoKeyboardLayout* BopomofoKeyboardLayout::LayoutForName(const string& name)
|
|
||||||
{
|
|
||||||
if (OVWildcard::Match(name, "standard"))
|
|
||||||
return StandardLayout();
|
|
||||||
|
|
||||||
if (OVWildcard::Match(name, "eten"))
|
|
||||||
return ETenLayout();
|
|
||||||
|
|
||||||
if (OVWildcard::Match(name, "hsu"))
|
|
||||||
return HsuLayout();
|
|
||||||
|
|
||||||
if (OVWildcard::Match(name, "eten26"))
|
|
||||||
return ETen26Layout();
|
|
||||||
|
|
||||||
if (OVWildcard::Match(name, "IBM"))
|
|
||||||
return IBMLayout();
|
|
||||||
|
|
||||||
if (OVWildcard::Match(name, "hanyupinyin") || OVWildcard::Match(name, "hanyu pinyin") || OVWildcard::Match(name, "hanyu-pinyin") || OVWildcard::Match(name, "pinyin"))
|
|
||||||
return HanyuPinyinLayout();
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define ASSIGNKEY1(m, vec, k, val) m[k] = (vec.clear(), vec.push_back((BPMF::Component)val), vec)
|
#define ASSIGNKEY1(m, vec, k, val) m[k] = (vec.clear(), vec.push_back((BPMF::Component)val), vec)
|
||||||
#define ASSIGNKEY2(m, vec, k, val1, val2) m[k] = (vec.clear(), vec.push_back((BPMF::Component)val1), vec.push_back((BPMF::Component)val2), vec)
|
#define ASSIGNKEY2(m, vec, k, val1, val2) m[k] = (vec.clear(), vec.push_back((BPMF::Component)val1), vec.push_back((BPMF::Component)val2), vec)
|
||||||
#define ASSIGNKEY3(m, vec, k, val1, val2, val3) m[k] = (vec.clear(), vec.push_back((BPMF::Component)val1), vec.push_back((BPMF::Component)val2), vec.push_back((BPMF::Component)val3), vec)
|
#define ASSIGNKEY3(m, vec, k, val1, val2, val3) m[k] = (vec.clear(), vec.push_back((BPMF::Component)val1), vec.push_back((BPMF::Component)val2), vec.push_back((BPMF::Component)val3), vec)
|
||||||
|
|
|
@ -271,9 +271,6 @@ namespace Formosa {
|
||||||
static const BopomofoKeyboardLayout* IBMLayout();
|
static const BopomofoKeyboardLayout* IBMLayout();
|
||||||
static const BopomofoKeyboardLayout* HanyuPinyinLayout();
|
static const BopomofoKeyboardLayout* HanyuPinyinLayout();
|
||||||
|
|
||||||
// recognizes (case-insensitive): standard, eten, hsu, eten26, ibm
|
|
||||||
static const BopomofoKeyboardLayout* LayoutForName(const string& name);
|
|
||||||
|
|
||||||
BopomofoKeyboardLayout(const BopomofoKeyToComponentMap& ktcm, const string& name)
|
BopomofoKeyboardLayout(const BopomofoKeyToComponentMap& ktcm, const string& name)
|
||||||
: m_keyToComponent(ktcm)
|
: m_keyToComponent(ktcm)
|
||||||
, m_name(name)
|
, m_name(name)
|
||||||
|
|
|
@ -0,0 +1,47 @@
|
||||||
|
// Copyright (c) 2022 and onwards Lukhnos Liu
|
||||||
|
//
|
||||||
|
// Permission is hereby granted, free of charge, to any person
|
||||||
|
// obtaining a copy of this software and associated documentation
|
||||||
|
// files (the "Software"), to deal in the Software without
|
||||||
|
// restriction, including without limitation the rights to use,
|
||||||
|
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
// copies of the Software, and to permit persons to whom the
|
||||||
|
// Software is furnished to do so, subject to the following
|
||||||
|
// conditions:
|
||||||
|
//
|
||||||
|
// The above copyright notice and this permission notice shall be
|
||||||
|
// included in all copies or substantial portions of the Software.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
// OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
#include "Mandarin.h"
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
|
namespace Formosa {
|
||||||
|
namespace Mandarin {
|
||||||
|
|
||||||
|
static std::string RoundTrip(const std::string& composedString) {
|
||||||
|
return BopomofoSyllable::FromComposedString(composedString).composedString();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(MandarinTest, FromComposedString) {
|
||||||
|
ASSERT_EQ(RoundTrip("ㄅ"), "ㄅ");
|
||||||
|
ASSERT_EQ(RoundTrip("ㄅㄧ"), "ㄅㄧ");
|
||||||
|
ASSERT_EQ(RoundTrip("ㄅㄧˇ"), "ㄅㄧˇ");
|
||||||
|
ASSERT_EQ(RoundTrip("ㄅㄧˇㄆ"), "ㄆㄧˇ");
|
||||||
|
ASSERT_EQ(RoundTrip("ㄅㄧˇㄆ"), "ㄆㄧˇ");
|
||||||
|
ASSERT_EQ(RoundTrip("e"), "");
|
||||||
|
ASSERT_EQ(RoundTrip("é"), "");
|
||||||
|
ASSERT_EQ(RoundTrip("ㄅéㄆ"), "ㄅ");
|
||||||
|
ASSERT_EQ(RoundTrip("ㄅeㄆ"), "ㄅ");
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Mandarin
|
||||||
|
} // namespace Formosa
|
Loading…
Reference in New Issue