From 502e8f1ea97fe81e62ee78f31599025b76fb1c84 Mon Sep 17 00:00:00 2001 From: Lukhnos Liu Date: Mon, 7 Feb 2022 20:07:46 -0800 Subject: [PATCH 1/2] Remove an unused Romanization scheme --- Source/Engine/Mandarin/Mandarin.cpp | 323 ---------------------------- Source/Engine/Mandarin/Mandarin.h | 6 - 2 files changed, 329 deletions(-) diff --git a/Source/Engine/Mandarin/Mandarin.cpp b/Source/Engine/Mandarin/Mandarin.cpp index 996603af..c5f76039 100644 --- a/Source/Engine/Mandarin/Mandarin.cpp +++ b/Source/Engine/Mandarin/Mandarin.cpp @@ -553,329 +553,6 @@ const std::string BPMF::HanyuPinyinString(bool includesTone, return consonant + middle + vowel + tone; } -const std::string BPMF::PHTString(bool includesTone) const { - std::string consonant, middle, vowel, tone; - - Component cc = consonantComponent(), mvc = middleVowelComponent(), - vc = vowelComponent(); - bool hasNoMVCOrVC = !(mvc || vc); - - switch (cc) { - case B: - consonant = "p"; - break; - case P: - consonant = "ph"; - break; - case M: - consonant = "m"; - break; - case F: - consonant = "f"; - break; - case D: - consonant = "t"; - break; - case T: - consonant = "th"; - break; - case N: - consonant = "n"; - break; - case L: - consonant = "l"; - break; - case G: - consonant = "k"; - break; - case K: - consonant = "kh"; - break; - case H: - consonant = "h"; - break; - case J: - consonant = "ch"; - if (mvc != I) middle = "i"; - break; - case Q: - consonant = "chh"; - if (mvc != I) middle = "i"; - break; - case X: - consonant = "hs"; - if (mvc != I) middle = "i"; - break; - case ZH: - consonant = "ch"; - if (hasNoMVCOrVC) middle = "i"; - break; - case CH: - consonant = "chh"; - if (hasNoMVCOrVC) middle = "i"; - break; - case SH: - consonant = "sh"; - if (hasNoMVCOrVC) middle = "i"; - break; - case R: - consonant = "r"; - if (hasNoMVCOrVC) middle = "i"; - break; - case Z: - consonant = "ts"; - if (hasNoMVCOrVC) middle = "i"; - break; - case C: - consonant = "tsh"; - if (hasNoMVCOrVC) middle = "i"; - break; - case S: - consonant = "s"; - if (hasNoMVCOrVC) middle = "i"; - break; - } - - switch (mvc) { - case I: - middle = "i"; - break; - case U: - middle = "u"; - break; - case UE: - middle = "uu"; - break; - } - - switch (vc) { - case A: - vowel = "a"; - break; - case O: - vowel = "o"; - break; - case ER: - vowel = "e"; - break; - case E: - vowel = (!(cc || mvc)) ? "eh" : "e"; - break; - case AI: - vowel = "ai"; - break; - case EI: - vowel = "ei"; - break; - case AO: - vowel = "ao"; - break; - case OU: - vowel = "ou"; - break; - case AN: - vowel = "an"; - break; - case EN: - vowel = "en"; - break; - case ANG: - vowel = "ang"; - break; - case ENG: - vowel = "eng"; - break; - case ERR: - vowel = "err"; - break; - } - - // ieng -> ing - if (mvc == I && vc == ENG) { - middle = ""; - vowel = "ing"; - } - - // zh/ch + i without third component -> append h - if (cc == BPMF::ZH || cc == BPMF::CH) { - if (!mvc && !vc) { - vowel = "h"; - } - } - - if (includesTone) { - switch (toneMarkerComponent()) { - case Tone2: - tone = "2"; - break; - case Tone3: - tone = "3"; - break; - case Tone4: - tone = "4"; - break; - case Tone5: - tone = "5"; - break; - } - } - - return consonant + middle + vowel + tone; -} - -const BPMF BPMF::FromPHT(const std::string& str) { - if (!str.length()) { - return BPMF(); - } - - std::string pht = str; - transform(pht.begin(), pht.end(), pht.begin(), ::tolower); - - BPMF::Component firstComponent = 0; - BPMF::Component secondComponent = 0; - BPMF::Component thirdComponent = 0; - BPMF::Component toneComponent = 0; - -#define IF_CONSUME1(k, v) \ - else if (PinyinParseHelper::ConsumePrefix(pht, k)) { \ - firstComponent = v; \ - } - - // consume the first part - if (0) { - } - IF_CONSUME1("ph", BPMF::P) - IF_CONSUME1("p", BPMF::B) - IF_CONSUME1("m", BPMF::M) - IF_CONSUME1("f", BPMF::F) - IF_CONSUME1("th", BPMF::T) - IF_CONSUME1("n", BPMF::N) - IF_CONSUME1("l", BPMF::L) - IF_CONSUME1("kh", BPMF::K) - IF_CONSUME1("k", BPMF::G) - IF_CONSUME1("chh", BPMF::Q) - IF_CONSUME1("ch", BPMF::J) - IF_CONSUME1("hs", BPMF::X) - IF_CONSUME1("sh", BPMF::SH) - IF_CONSUME1("r", BPMF::R) - IF_CONSUME1("tsh", BPMF::C) - IF_CONSUME1("ts", BPMF::Z) - IF_CONSUME1("s", BPMF::S) - IF_CONSUME1("t", BPMF::D) - IF_CONSUME1("h", BPMF::H) - -#define IF_CONSUME2(k, v) \ - else if (PinyinParseHelper::ConsumePrefix(pht, k)) { \ - secondComponent = v; \ - } - // consume the second part - if (0) { - } else if (PinyinParseHelper::ConsumePrefix(pht, "ing")) { - secondComponent = BPMF::I; - thirdComponent = BPMF::ENG; - } else if (PinyinParseHelper::ConsumePrefix(pht, "ih")) { - if (firstComponent == BPMF::J) { - firstComponent = BPMF::ZH; - } else if (firstComponent == BPMF::Q) { - firstComponent = BPMF::CH; - } - } - IF_CONSUME2("i", BPMF::I) - IF_CONSUME2("uu", BPMF::UE) - IF_CONSUME2("u", BPMF::U) - -#undef IF_CONSUME1 -#undef IF_CONSUME2 - - // the vowels, longer sequence takes precedence - if (0) { - } else if (PinyinParseHelper::ConsumePrefix(pht, "ang")) { - thirdComponent = BPMF::ANG; - } else if (PinyinParseHelper::ConsumePrefix(pht, "eng")) { - thirdComponent = BPMF::ENG; - } else if (PinyinParseHelper::ConsumePrefix(pht, "err")) { - thirdComponent = BPMF::ERR; - } else if (PinyinParseHelper::ConsumePrefix(pht, "ai")) { - thirdComponent = BPMF::AI; - } else if (PinyinParseHelper::ConsumePrefix(pht, "ei")) { - thirdComponent = BPMF::EI; - } else if (PinyinParseHelper::ConsumePrefix(pht, "ao")) { - thirdComponent = BPMF::AO; - } else if (PinyinParseHelper::ConsumePrefix(pht, "ou")) { - thirdComponent = BPMF::OU; - } else if (PinyinParseHelper::ConsumePrefix(pht, "an")) { - thirdComponent = BPMF::AN; - } else if (PinyinParseHelper::ConsumePrefix(pht, "en")) { - thirdComponent = BPMF::EN; - } else if (PinyinParseHelper::ConsumePrefix(pht, "er")) { - thirdComponent = BPMF::ERR; - } else if (PinyinParseHelper::ConsumePrefix(pht, "a")) { - thirdComponent = BPMF::A; - } else if (PinyinParseHelper::ConsumePrefix(pht, "o")) { - thirdComponent = BPMF::O; - } else if (PinyinParseHelper::ConsumePrefix(pht, "eh")) { - thirdComponent = BPMF::E; - } else if (PinyinParseHelper::ConsumePrefix(pht, "e")) { - if (secondComponent) { - thirdComponent = BPMF::E; - } else { - thirdComponent = BPMF::ER; - } - } - - // fix ch/chh mappings - Component corresponding = 0; - if (firstComponent == BPMF::J) { - corresponding = BPMF::ZH; - } else if (firstComponent == BPMF::Q) { - corresponding = BPMF::CH; - } - - if (corresponding) { - if (secondComponent == BPMF::I && !thirdComponent) { - // if the second component is I and there's no third component, we use the - // corresponding part firstComponent = corresponding; - } else if (secondComponent == BPMF::U) { - // if second component is U, we use the corresponding part - firstComponent = corresponding; - } else if (!secondComponent) { - // if there's no second component, it must be a corresponding part - firstComponent = corresponding; - } - } - - if (secondComponent == BPMF::I) { - // fixes a few impossible occurances - switch (firstComponent) { - case BPMF::ZH: - case BPMF::CH: - case BPMF::SH: - case BPMF::R: - case BPMF::Z: - case BPMF::C: - case BPMF::S: - secondComponent = 0; - } - } - - // at last! - if (0) { - } else if (PinyinParseHelper::ConsumePrefix(pht, "1")) { - toneComponent = BPMF::Tone1; - } else if (PinyinParseHelper::ConsumePrefix(pht, "2")) { - toneComponent = BPMF::Tone2; - } else if (PinyinParseHelper::ConsumePrefix(pht, "3")) { - toneComponent = BPMF::Tone3; - } else if (PinyinParseHelper::ConsumePrefix(pht, "4")) { - toneComponent = BPMF::Tone4; - } else if (PinyinParseHelper::ConsumePrefix(pht, "5")) { - toneComponent = BPMF::Tone5; - } - - return BPMF(firstComponent | secondComponent | thirdComponent | - toneComponent); -} - const BPMF BPMF::FromComposedString(const std::string& str) { BPMF syllable; auto iter = str.begin(); diff --git a/Source/Engine/Mandarin/Mandarin.h b/Source/Engine/Mandarin/Mandarin.h index 8192c719..c13ef334 100644 --- a/Source/Engine/Mandarin/Mandarin.h +++ b/Source/Engine/Mandarin/Mandarin.h @@ -50,12 +50,6 @@ class BopomofoSyllable { // TO DO: Support accented vowels const std::string HanyuPinyinString(bool includesTone, bool useVForUUmlaut) const; - // const std::string HanyuPinyinString(bool includesTone, bool useVForUUmlaut, - // bool composeAccentedVowel) const; - - // PHT = Pai-hua-tsi - static const BopomofoSyllable FromPHT(const std::string& str); - const std::string PHTString(bool includesTone) const; static const BopomofoSyllable FromComposedString(const std::string& str); const std::string composedString() const; From a55c0a4b60665180a124705a05a8e227461d1f61 Mon Sep 17 00:00:00 2001 From: Lukhnos Liu Date: Mon, 7 Feb 2022 20:11:57 -0800 Subject: [PATCH 2/2] Remove the absolute order format The "absolute order" is a compact representation, originally a historical carry-over from VanillaInput (2004). Modern input methods no longer need such a compact form. It is therefore now removed. --- Source/Engine/Mandarin/Mandarin.h | 36 ------------------------------- 1 file changed, 36 deletions(-) diff --git a/Source/Engine/Mandarin/Mandarin.h b/Source/Engine/Mandarin/Mandarin.h index c13ef334..23e77020 100644 --- a/Source/Engine/Mandarin/Mandarin.h +++ b/Source/Engine/Mandarin/Mandarin.h @@ -138,38 +138,6 @@ class BopomofoSyllable { return *this; } - uint16_t absoluteOrder() const { - // turn BPMF syllable into a 4*14*4*22 number - return (uint16_t)(syllable_ & ConsonantMask) + - (uint16_t)((syllable_ & MiddleVowelMask) >> 5) * 22 + - (uint16_t)((syllable_ & VowelMask) >> 7) * 22 * 4 + - (uint16_t)((syllable_ & ToneMarkerMask) >> 11) * 22 * 4 * 14; - } - - const std::string absoluteOrderString() const { - // 5*14*4*22 = 6160, we use a 79*79 encoding to represent that - uint16_t order = absoluteOrder(); - char low = 48 + static_cast(order % 79); - char high = 48 + static_cast(order / 79); - std::string result(2, ' '); - result[0] = low; - result[1] = high; - return result; - } - - static BopomofoSyllable FromAbsoluteOrder(uint16_t order) { - return BopomofoSyllable((order % 22) | ((order / 22) % 4) << 5 | - ((order / (22 * 4)) % 14) << 7 | - ((order / (22 * 4 * 14)) % 5) << 11); - } - - static BopomofoSyllable FromAbsoluteOrderString(const std::string& str) { - if (str.length() != 2) return BopomofoSyllable(); - - return FromAbsoluteOrder((uint16_t)(str[1] - 48) * 79 + - (uint16_t)(str[0] - 48)); - } - friend std::ostream& operator<<(std::ostream& stream, const BopomofoSyllable& syllable); @@ -496,10 +464,6 @@ class BopomofoReadingBuffer { syllable_); } - const std::string absoluteOrderQueryString() const { - return syllable_.absoluteOrderString(); - } - bool hasToneMarker() const { return syllable_.hasToneMarker(); } protected: