From 71b69cae509e84c0c8da7b090afa53a16e2d1a85 Mon Sep 17 00:00:00 2001 From: Lukhnos Liu Date: Mon, 31 Jan 2022 21:57:04 -0800 Subject: [PATCH] Reformat with clang-format --- Source/Engine/Mandarin/Mandarin.cpp | 2125 +++++++++++++++------------ Source/Engine/Mandarin/Mandarin.h | 1036 ++++++------- 2 files changed, 1701 insertions(+), 1460 deletions(-) diff --git a/Source/Engine/Mandarin/Mandarin.cpp b/Source/Engine/Mandarin/Mandarin.cpp index bfd2cb55..9c91ee3e 100644 --- a/Source/Engine/Mandarin/Mandarin.cpp +++ b/Source/Engine/Mandarin/Mandarin.cpp @@ -25,719 +25,1028 @@ // OTHER DEALINGS IN THE SOFTWARE. // -#include -#include #include "Mandarin.h" +#include +#include + namespace Formosa { namespace Mandarin { - + class PinyinParseHelper { -public: - static const bool ConsumePrefix(string &target, const string &prefix) - { - if (target.length() < prefix.length()) { - return false; - } - - if (target.substr(0, prefix.length()) == prefix) { - target = target.substr(prefix.length(), target.length() - prefix.length()); - return true; - } - - return false; + public: + static const bool ConsumePrefix(string& target, const string& prefix) { + if (target.length() < prefix.length()) { + return false; } + + if (target.substr(0, prefix.length()) == prefix) { + target = + target.substr(prefix.length(), target.length() - prefix.length()); + return true; + } + + return false; + } }; class BopomofoCharacterMap { -public: - static const BopomofoCharacterMap& SharedInstance(); + public: + static const BopomofoCharacterMap& SharedInstance(); - map componentToCharacter; - map characterToComponent; + map componentToCharacter; + map characterToComponent; -protected: - BopomofoCharacterMap(); - static BopomofoCharacterMap* c_map; + protected: + BopomofoCharacterMap(); + static BopomofoCharacterMap* c_map; }; -const BPMF BPMF::FromHanyuPinyin(const string& str) -{ - if (!str.length()) { - return BPMF(); - } - - string pinyin = str; - transform(pinyin.begin(), pinyin.end(), pinyin.begin(), ::tolower); - - BPMF::Component firstComponent = 0; - BPMF::Component secondComponent = 0; - BPMF::Component thirdComponent = 0; - BPMF::Component toneComponent = 0; - - // lookup consonants and consume them - bool independentConsonant = false; +const BPMF BPMF::FromHanyuPinyin(const string& str) { + if (!str.length()) { + return BPMF(); + } - // the y exceptions fist - if (0) {} - else if (PinyinParseHelper::ConsumePrefix(pinyin, "yuan")) { secondComponent = BPMF::UE; thirdComponent = BPMF::AN; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "ying")) { secondComponent = BPMF::I; thirdComponent = BPMF::ENG; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "yung")) { secondComponent = BPMF::UE; thirdComponent = BPMF::ENG; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "yong")) { secondComponent = BPMF::UE; thirdComponent = BPMF::ENG; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "yue")) { secondComponent = BPMF::UE; thirdComponent = BPMF::E; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "yun")) { secondComponent = BPMF::UE; thirdComponent = BPMF::EN; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "you")) { secondComponent = BPMF::I; thirdComponent = BPMF::OU; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "yu")) { secondComponent = BPMF::UE; } - - - // try the first character - char c = pinyin.length() ? pinyin[0] : 0; - switch (c) { - case 'b': firstComponent = BPMF::B; pinyin = pinyin.substr(1); break; - case 'p': firstComponent = BPMF::P; pinyin = pinyin.substr(1); break; - case 'm': firstComponent = BPMF::M; pinyin = pinyin.substr(1); break; - case 'f': firstComponent = BPMF::F; pinyin = pinyin.substr(1); break; - case 'd': firstComponent = BPMF::D; pinyin = pinyin.substr(1); break; - case 't': firstComponent = BPMF::T; pinyin = pinyin.substr(1); break; - case 'n': firstComponent = BPMF::N; pinyin = pinyin.substr(1); break; - case 'l': firstComponent = BPMF::L; pinyin = pinyin.substr(1); break; - case 'g': firstComponent = BPMF::G; pinyin = pinyin.substr(1); break; - case 'k': firstComponent = BPMF::K; pinyin = pinyin.substr(1); break; - case 'h': firstComponent = BPMF::H; pinyin = pinyin.substr(1); break; - case 'j': firstComponent = BPMF::J; pinyin = pinyin.substr(1); break; - case 'q': firstComponent = BPMF::Q; pinyin = pinyin.substr(1); break; - case 'x': firstComponent = BPMF::X; pinyin = pinyin.substr(1); break; - - // special hanlding for w and y - case 'w': secondComponent = BPMF::U; pinyin = pinyin.substr(1); break; - case 'y': - if (!secondComponent && !thirdComponent) { - secondComponent = BPMF::I; - } - pinyin = pinyin.substr(1); - break; - } - - // then we try ZH, CH, SH, R, Z, C, S (in that order) - if (0) {} - else if (PinyinParseHelper::ConsumePrefix(pinyin, "zh")) { firstComponent = BPMF::ZH; independentConsonant = true; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "ch")) { firstComponent = BPMF::CH; independentConsonant = true; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "sh")) { firstComponent = BPMF::SH; independentConsonant = true; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "r")) { firstComponent = BPMF::R; independentConsonant = true; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "z")) { firstComponent = BPMF::Z; independentConsonant = true; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "c")) { firstComponent = BPMF::C; independentConsonant = true; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "s")) { firstComponent = BPMF::S; independentConsonant = true; } + string pinyin = str; + transform(pinyin.begin(), pinyin.end(), pinyin.begin(), ::tolower); - // consume exceptions first: (ien, in), (iou, iu), (uen, un), (veng, iong), (ven, vn), (uei, ui), ung - // but longer sequence takes precedence - if (0) {} - else if (PinyinParseHelper::ConsumePrefix(pinyin, "veng")) { secondComponent = BPMF::UE; thirdComponent = BPMF::ENG; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "iong")) { secondComponent = BPMF::UE; thirdComponent = BPMF::ENG; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "ing")) { secondComponent = BPMF::I; thirdComponent = BPMF::ENG; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "ien")) { secondComponent = BPMF::I; thirdComponent = BPMF::EN; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "iou")) { secondComponent = BPMF::I; thirdComponent = BPMF::OU; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "uen")) { secondComponent = BPMF::U; thirdComponent = BPMF::EN; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "ven")) { secondComponent = BPMF::UE; thirdComponent = BPMF::EN; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "uei")) { secondComponent = BPMF::U; thirdComponent = BPMF::EI; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "ung")) { - // f exception - if (firstComponent == BPMF::F) { - thirdComponent = BPMF::ENG; - } - else { - secondComponent = BPMF::U; thirdComponent = BPMF::ENG; - } - } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "ong")) { - // f exception - if (firstComponent == BPMF::F) { - thirdComponent = BPMF::ENG; - } - else { - secondComponent = BPMF::U; - thirdComponent = BPMF::ENG; - } - } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "un")) { - if (firstComponent == BPMF::J || firstComponent == BPMF::Q || firstComponent == BPMF::X) { - secondComponent = BPMF::UE; - } - else { - secondComponent = BPMF::U; - } - thirdComponent = BPMF::EN; - } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "iu")) { secondComponent = BPMF::I; thirdComponent = BPMF::OU; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "in")) { secondComponent = BPMF::I; thirdComponent = BPMF::EN; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "vn")) { secondComponent = BPMF::UE; thirdComponent = BPMF::EN; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "ui")) { secondComponent = BPMF::U; thirdComponent = BPMF::EI; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "ue")) - { - secondComponent = BPMF::UE; thirdComponent = BPMF::E; - } - - #ifndef _MSC_VER - else if (PinyinParseHelper::ConsumePrefix(pinyin, "ü")) { secondComponent = BPMF::UE; } - #else - else if (PinyinParseHelper::ConsumePrefix(pinyin, "\xc3\xbc")) { secondComponent = BPMF::UE; } - #endif + BPMF::Component firstComponent = 0; + BPMF::Component secondComponent = 0; + BPMF::Component thirdComponent = 0; + BPMF::Component toneComponent = 0; - // then consume the middle component... - if (0) {} - else if (PinyinParseHelper::ConsumePrefix(pinyin, "i")) { secondComponent = independentConsonant ? 0 : BPMF::I; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "u")) { - if (firstComponent == BPMF::J || firstComponent == BPMF::Q || firstComponent == BPMF::X) { - secondComponent = BPMF::UE; - } - else { - secondComponent = BPMF::U; - } + // lookup consonants and consume them + bool independentConsonant = false; + + // the y exceptions fist + if (0) { + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "yuan")) { + secondComponent = BPMF::UE; + thirdComponent = BPMF::AN; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ying")) { + secondComponent = BPMF::I; + thirdComponent = BPMF::ENG; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "yung")) { + secondComponent = BPMF::UE; + thirdComponent = BPMF::ENG; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "yong")) { + secondComponent = BPMF::UE; + thirdComponent = BPMF::ENG; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "yue")) { + secondComponent = BPMF::UE; + thirdComponent = BPMF::E; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "yun")) { + secondComponent = BPMF::UE; + thirdComponent = BPMF::EN; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "you")) { + secondComponent = BPMF::I; + thirdComponent = BPMF::OU; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "yu")) { + secondComponent = BPMF::UE; + } + + // try the first character + char c = pinyin.length() ? pinyin[0] : 0; + switch (c) { + case 'b': + firstComponent = BPMF::B; + pinyin = pinyin.substr(1); + break; + case 'p': + firstComponent = BPMF::P; + pinyin = pinyin.substr(1); + break; + case 'm': + firstComponent = BPMF::M; + pinyin = pinyin.substr(1); + break; + case 'f': + firstComponent = BPMF::F; + pinyin = pinyin.substr(1); + break; + case 'd': + firstComponent = BPMF::D; + pinyin = pinyin.substr(1); + break; + case 't': + firstComponent = BPMF::T; + pinyin = pinyin.substr(1); + break; + case 'n': + firstComponent = BPMF::N; + pinyin = pinyin.substr(1); + break; + case 'l': + firstComponent = BPMF::L; + pinyin = pinyin.substr(1); + break; + case 'g': + firstComponent = BPMF::G; + pinyin = pinyin.substr(1); + break; + case 'k': + firstComponent = BPMF::K; + pinyin = pinyin.substr(1); + break; + case 'h': + firstComponent = BPMF::H; + pinyin = pinyin.substr(1); + break; + case 'j': + firstComponent = BPMF::J; + pinyin = pinyin.substr(1); + break; + case 'q': + firstComponent = BPMF::Q; + pinyin = pinyin.substr(1); + break; + case 'x': + firstComponent = BPMF::X; + pinyin = pinyin.substr(1); + break; + + // special hanlding for w and y + case 'w': + secondComponent = BPMF::U; + pinyin = pinyin.substr(1); + break; + case 'y': + if (!secondComponent && !thirdComponent) { + secondComponent = BPMF::I; + } + pinyin = pinyin.substr(1); + break; + } + + // then we try ZH, CH, SH, R, Z, C, S (in that order) + if (0) { + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "zh")) { + firstComponent = BPMF::ZH; + independentConsonant = true; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ch")) { + firstComponent = BPMF::CH; + independentConsonant = true; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "sh")) { + firstComponent = BPMF::SH; + independentConsonant = true; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "r")) { + firstComponent = BPMF::R; + independentConsonant = true; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "z")) { + firstComponent = BPMF::Z; + independentConsonant = true; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "c")) { + firstComponent = BPMF::C; + independentConsonant = true; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "s")) { + firstComponent = BPMF::S; + independentConsonant = true; + } + + // consume exceptions first: (ien, in), (iou, iu), (uen, un), (veng, iong), + // (ven, vn), (uei, ui), ung but longer sequence takes precedence + if (0) { + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "veng")) { + secondComponent = BPMF::UE; + thirdComponent = BPMF::ENG; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "iong")) { + secondComponent = BPMF::UE; + thirdComponent = BPMF::ENG; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ing")) { + secondComponent = BPMF::I; + thirdComponent = BPMF::ENG; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ien")) { + secondComponent = BPMF::I; + thirdComponent = BPMF::EN; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "iou")) { + secondComponent = BPMF::I; + thirdComponent = BPMF::OU; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "uen")) { + secondComponent = BPMF::U; + thirdComponent = BPMF::EN; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ven")) { + secondComponent = BPMF::UE; + thirdComponent = BPMF::EN; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "uei")) { + secondComponent = BPMF::U; + thirdComponent = BPMF::EI; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ung")) { + // f exception + if (firstComponent == BPMF::F) { + thirdComponent = BPMF::ENG; + } else { + secondComponent = BPMF::U; + thirdComponent = BPMF::ENG; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "v")) { secondComponent = BPMF::UE; } - - // the vowels, longer sequence takes precedence - if (0) {} - else if (PinyinParseHelper::ConsumePrefix(pinyin, "ang")) { thirdComponent = BPMF::ANG; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "eng")) { thirdComponent = BPMF::ENG; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "err")) { thirdComponent = BPMF::ERR; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "ai")) { thirdComponent = BPMF::AI; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "ei")) { thirdComponent = BPMF::EI; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "ao")) { thirdComponent = BPMF::AO; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "ou")) { thirdComponent = BPMF::OU; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "an")) { thirdComponent = BPMF::AN; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "en")) { thirdComponent = BPMF::EN; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "er")) { thirdComponent = BPMF::ERR; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "a")) { thirdComponent = BPMF::A; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "o")) { thirdComponent = BPMF::O; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "e")) { - if (secondComponent) { - thirdComponent = BPMF::E; - } - else { - thirdComponent = BPMF::ER; - } + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ong")) { + // f exception + if (firstComponent == BPMF::F) { + thirdComponent = BPMF::ENG; + } else { + secondComponent = BPMF::U; + thirdComponent = BPMF::ENG; } + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "un")) { + if (firstComponent == BPMF::J || firstComponent == BPMF::Q || + firstComponent == BPMF::X) { + secondComponent = BPMF::UE; + } else { + secondComponent = BPMF::U; + } + thirdComponent = BPMF::EN; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "iu")) { + secondComponent = BPMF::I; + thirdComponent = BPMF::OU; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "in")) { + secondComponent = BPMF::I; + thirdComponent = BPMF::EN; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "vn")) { + secondComponent = BPMF::UE; + thirdComponent = BPMF::EN; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ui")) { + secondComponent = BPMF::U; + thirdComponent = BPMF::EI; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ue")) { + secondComponent = BPMF::UE; + thirdComponent = BPMF::E; + } +#ifndef _MSC_VER + else if (PinyinParseHelper::ConsumePrefix(pinyin, "ü")) { + secondComponent = BPMF::UE; + } +#else + else if (PinyinParseHelper::ConsumePrefix(pinyin, "\xc3\xbc")) { + secondComponent = BPMF::UE; + } +#endif - // at last! - if (0) {} - else if (PinyinParseHelper::ConsumePrefix(pinyin, "1")) { toneComponent = BPMF::Tone1; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "2")) { toneComponent = BPMF::Tone2; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "3")) { toneComponent = BPMF::Tone3; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "4")) { toneComponent = BPMF::Tone4; } - else if (PinyinParseHelper::ConsumePrefix(pinyin, "5")) { toneComponent = BPMF::Tone5; } + // then consume the middle component... + if (0) { + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "i")) { + secondComponent = independentConsonant ? 0 : BPMF::I; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "u")) { + if (firstComponent == BPMF::J || firstComponent == BPMF::Q || + firstComponent == BPMF::X) { + secondComponent = BPMF::UE; + } else { + secondComponent = BPMF::U; + } + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "v")) { + secondComponent = BPMF::UE; + } - return BPMF(firstComponent | secondComponent | thirdComponent | toneComponent); + // the vowels, longer sequence takes precedence + if (0) { + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ang")) { + thirdComponent = BPMF::ANG; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "eng")) { + thirdComponent = BPMF::ENG; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "err")) { + thirdComponent = BPMF::ERR; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ai")) { + thirdComponent = BPMF::AI; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ei")) { + thirdComponent = BPMF::EI; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ao")) { + thirdComponent = BPMF::AO; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ou")) { + thirdComponent = BPMF::OU; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "an")) { + thirdComponent = BPMF::AN; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "en")) { + thirdComponent = BPMF::EN; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "er")) { + thirdComponent = BPMF::ERR; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "a")) { + thirdComponent = BPMF::A; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "o")) { + thirdComponent = BPMF::O; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "e")) { + if (secondComponent) { + thirdComponent = BPMF::E; + } else { + thirdComponent = BPMF::ER; + } + } + + // at last! + if (0) { + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "1")) { + toneComponent = BPMF::Tone1; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "2")) { + toneComponent = BPMF::Tone2; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "3")) { + toneComponent = BPMF::Tone3; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "4")) { + toneComponent = BPMF::Tone4; + } else if (PinyinParseHelper::ConsumePrefix(pinyin, "5")) { + toneComponent = BPMF::Tone5; + } + + return BPMF(firstComponent | secondComponent | thirdComponent | + toneComponent); } -const string BPMF::HanyuPinyinString(bool includesTone, bool useVForUUmlaut) const -{ - string consonant, middle, vowel, tone; - - Component cc = consonantComponent(), mvc = middleVowelComponent(), vc = vowelComponent(); - bool hasNoMVCOrVC = !(mvc || vc); - - - switch (cc) { - case B: consonant = "b"; break; - case P: consonant = "p"; break; - case M: consonant = "m"; break; - case F: consonant = "f"; break; - case D: consonant = "d"; break; - case T: consonant = "t"; break; - case N: consonant = "n"; break; - case L: consonant = "l"; break; - case G: consonant = "g"; break; - case K: consonant = "k"; break; - case H: consonant = "h"; break; - case J: consonant = "j"; if (hasNoMVCOrVC) middle = "i"; break; - case Q: consonant = "q"; if (hasNoMVCOrVC) middle = "i"; break; - case X: consonant = "x"; if (hasNoMVCOrVC) middle = "i"; break; - case ZH: consonant = "zh"; if (hasNoMVCOrVC) middle = "i"; break; - case CH: consonant = "ch"; if (hasNoMVCOrVC) middle = "i"; break; - case SH: consonant = "sh"; if (hasNoMVCOrVC) middle = "i"; break; - case R: consonant = "r"; if (hasNoMVCOrVC) middle = "i"; break; - case Z: consonant = "z"; if (hasNoMVCOrVC) middle = "i"; break; - case C: consonant = "c"; if (hasNoMVCOrVC) middle = "i"; break; - case S: consonant = "s"; if (hasNoMVCOrVC) middle = "i"; break; - } - - switch (mvc) { - case I: - if (!cc) { - consonant = "y"; - - } - - middle = (!vc || cc) ? "i" : ""; - break; - case U: - if (!cc) { - consonant = "w"; - } - middle = (!vc || cc) ? "u" : ""; - break; - case UE: - if (!cc) { - consonant = "y"; - } +const string BPMF::HanyuPinyinString(bool includesTone, + bool useVForUUmlaut) const { + string consonant, middle, vowel, tone; - if ((cc == N || cc == L) && vc != E) { - middle = useVForUUmlaut ? "v" : "ü"; - } - else { - middle = "u"; - } - - break; - } - - switch (vc) { - case A: vowel = "a"; break; - case O: vowel = "o"; break; - case ER: vowel = "e"; break; - case E: vowel = "e"; break; - case AI: vowel = "ai"; break; - case EI: vowel = "ei"; break; - case AO: vowel = "ao"; break; - case OU: vowel = "ou"; break; - case AN: vowel = "an"; break; - case EN: vowel = "en"; break; - case ANG: vowel = "ang"; break; - case ENG: vowel = "eng"; break; - case ERR: vowel = "er"; break; - } - - // combination rules - - // ueng -> ong, but note "weng" - if ((mvc == U || mvc == UE) && vc == ENG) { - middle = ""; - vowel = (cc == J || cc == Q || cc == X) ? "iong" : ((!cc && mvc == U) ? "eng" : "ong"); - } + Component cc = consonantComponent(), mvc = middleVowelComponent(), + vc = vowelComponent(); + bool hasNoMVCOrVC = !(mvc || vc); - // ien, uen, üen -> in, un, ün ; but note "wen", "yin" and "yun" - if (mvc && vc == EN) { - if (cc) { - vowel = "n"; - } - else { - if (mvc == UE) { - vowel = "n"; // yun - } - else if (mvc == U) { - vowel = "en"; // wen - } - else { - vowel = "in"; // yin - } - } - } + switch (cc) { + case B: + consonant = "b"; + break; + case P: + consonant = "p"; + break; + case M: + consonant = "m"; + break; + case F: + consonant = "f"; + break; + case D: + consonant = "d"; + break; + case T: + consonant = "t"; + break; + case N: + consonant = "n"; + break; + case L: + consonant = "l"; + break; + case G: + consonant = "g"; + break; + case K: + consonant = "k"; + break; + case H: + consonant = "h"; + break; + case J: + consonant = "j"; + if (hasNoMVCOrVC) middle = "i"; + break; + case Q: + consonant = "q"; + if (hasNoMVCOrVC) middle = "i"; + break; + case X: + consonant = "x"; + if (hasNoMVCOrVC) middle = "i"; + break; + case ZH: + consonant = "zh"; + if (hasNoMVCOrVC) middle = "i"; + break; + case CH: + consonant = "ch"; + if (hasNoMVCOrVC) middle = "i"; + break; + case SH: + consonant = "sh"; + if (hasNoMVCOrVC) middle = "i"; + break; + case R: + consonant = "r"; + if (hasNoMVCOrVC) middle = "i"; + break; + case Z: + consonant = "z"; + if (hasNoMVCOrVC) middle = "i"; + break; + case C: + consonant = "c"; + if (hasNoMVCOrVC) middle = "i"; + break; + case S: + consonant = "s"; + if (hasNoMVCOrVC) middle = "i"; + break; + } - // iou -> iu - if (cc && mvc == I && vc == OU) { - middle = ""; - vowel = "iu"; - } - - // ieng -> ing - if (mvc == I && vc == ENG) { - middle = ""; - vowel = "ing"; - } - - // uei -> ui - if (cc && mvc == U && vc == EI) { - middle = ""; - vowel = "ui"; - } + switch (mvc) { + case I: + if (!cc) { + consonant = "y"; + } - - if (includesTone) { - switch (toneMarkerComponent()) { - case Tone2: tone = "2"; break; - case Tone3: tone = "3"; break; - case Tone4: tone = "4"; break; - case Tone5: tone = "5"; break; - } + middle = (!vc || cc) ? "i" : ""; + break; + case U: + if (!cc) { + consonant = "w"; + } + middle = (!vc || cc) ? "u" : ""; + break; + case UE: + if (!cc) { + consonant = "y"; + } + + if ((cc == N || cc == L) && vc != E) { + middle = useVForUUmlaut ? "v" : "ü"; + } else { + middle = "u"; + } + + break; + } + + switch (vc) { + case A: + vowel = "a"; + break; + case O: + vowel = "o"; + break; + case ER: + vowel = "e"; + break; + case E: + vowel = "e"; + break; + case AI: + vowel = "ai"; + break; + case EI: + vowel = "ei"; + break; + case AO: + vowel = "ao"; + break; + case OU: + vowel = "ou"; + break; + case AN: + vowel = "an"; + break; + case EN: + vowel = "en"; + break; + case ANG: + vowel = "ang"; + break; + case ENG: + vowel = "eng"; + break; + case ERR: + vowel = "er"; + break; + } + + // combination rules + + // ueng -> ong, but note "weng" + if ((mvc == U || mvc == UE) && vc == ENG) { + middle = ""; + vowel = (cc == J || cc == Q || cc == X) + ? "iong" + : ((!cc && mvc == U) ? "eng" : "ong"); + } + + // ien, uen, üen -> in, un, ün ; but note "wen", "yin" and "yun" + if (mvc && vc == EN) { + if (cc) { + vowel = "n"; + } else { + if (mvc == UE) { + vowel = "n"; // yun + } else if (mvc == U) { + vowel = "en"; // wen + } else { + vowel = "in"; // yin + } } - - return consonant + middle + vowel + tone; + } + + // iou -> iu + if (cc && mvc == I && vc == OU) { + middle = ""; + vowel = "iu"; + } + + // ieng -> ing + if (mvc == I && vc == ENG) { + middle = ""; + vowel = "ing"; + } + + // uei -> ui + if (cc && mvc == U && vc == EI) { + middle = ""; + vowel = "ui"; + } + + if (includesTone) { + switch (toneMarkerComponent()) { + case Tone2: + tone = "2"; + break; + case Tone3: + tone = "3"; + break; + case Tone4: + tone = "4"; + break; + case Tone5: + tone = "5"; + break; + } + } + + return consonant + middle + vowel + tone; } +const string BPMF::PHTString(bool includesTone) const { + string consonant, middle, vowel, tone; + Component cc = consonantComponent(), mvc = middleVowelComponent(), + vc = vowelComponent(); + bool hasNoMVCOrVC = !(mvc || vc); -const string BPMF::PHTString(bool includesTone) const -{ - string consonant, middle, vowel, tone; - - Component cc = consonantComponent(), mvc = middleVowelComponent(), vc = vowelComponent(); - bool hasNoMVCOrVC = !(mvc || vc); - - switch (cc) { - case B: consonant = "p"; break; - case P: consonant = "ph"; break; - case M: consonant = "m"; break; - case F: consonant = "f"; break; - case D: consonant = "t"; break; - case T: consonant = "th"; break; - case N: consonant = "n"; break; - case L: consonant = "l"; break; - case G: consonant = "k"; break; - case K: consonant = "kh"; break; - case H: consonant = "h"; break; - case J: consonant = "ch"; if (mvc != I) middle = "i"; break; - case Q: consonant = "chh"; if (mvc != I) middle = "i"; break; - case X: consonant = "hs"; if (mvc != I) middle = "i"; break; - case ZH: consonant = "ch"; if (hasNoMVCOrVC) middle = "i"; break; - case CH: consonant = "chh"; if (hasNoMVCOrVC) middle = "i"; break; - case SH: consonant = "sh"; if (hasNoMVCOrVC) middle = "i"; break; - case R: consonant = "r"; if (hasNoMVCOrVC) middle = "i"; break; - case Z: consonant = "ts"; if (hasNoMVCOrVC) middle = "i"; break; - case C: consonant = "tsh"; if (hasNoMVCOrVC) middle = "i"; break; - case S: consonant = "s"; if (hasNoMVCOrVC) middle = "i"; break; - } - - switch (mvc) { - case I: - middle = "i"; - break; - case U: - middle = "u"; - break; - case UE: - middle = "uu"; - break; - } - - switch (vc) { - case A: vowel = "a"; break; - case O: vowel = "o"; break; - case ER: vowel = "e"; break; - case E: vowel = (!(cc || mvc)) ? "eh" : "e"; break; - case AI: vowel = "ai"; break; - case EI: vowel = "ei"; break; - case AO: vowel = "ao"; break; - case OU: vowel = "ou"; break; - case AN: vowel = "an"; break; - case EN: vowel = "en"; break; - case ANG: vowel = "ang"; break; - case ENG: vowel = "eng"; break; - case ERR: vowel = "err"; break; - } + switch (cc) { + case B: + consonant = "p"; + break; + case P: + consonant = "ph"; + break; + case M: + consonant = "m"; + break; + case F: + consonant = "f"; + break; + case D: + consonant = "t"; + break; + case T: + consonant = "th"; + break; + case N: + consonant = "n"; + break; + case L: + consonant = "l"; + break; + case G: + consonant = "k"; + break; + case K: + consonant = "kh"; + break; + case H: + consonant = "h"; + break; + case J: + consonant = "ch"; + if (mvc != I) middle = "i"; + break; + case Q: + consonant = "chh"; + if (mvc != I) middle = "i"; + break; + case X: + consonant = "hs"; + if (mvc != I) middle = "i"; + break; + case ZH: + consonant = "ch"; + if (hasNoMVCOrVC) middle = "i"; + break; + case CH: + consonant = "chh"; + if (hasNoMVCOrVC) middle = "i"; + break; + case SH: + consonant = "sh"; + if (hasNoMVCOrVC) middle = "i"; + break; + case R: + consonant = "r"; + if (hasNoMVCOrVC) middle = "i"; + break; + case Z: + consonant = "ts"; + if (hasNoMVCOrVC) middle = "i"; + break; + case C: + consonant = "tsh"; + if (hasNoMVCOrVC) middle = "i"; + break; + case S: + consonant = "s"; + if (hasNoMVCOrVC) middle = "i"; + break; + } - // ieng -> ing - if (mvc == I && vc == ENG) { - middle = ""; - vowel = "ing"; + switch (mvc) { + case I: + middle = "i"; + break; + case U: + middle = "u"; + break; + case UE: + middle = "uu"; + break; + } + + switch (vc) { + case A: + vowel = "a"; + break; + case O: + vowel = "o"; + break; + case ER: + vowel = "e"; + break; + case E: + vowel = (!(cc || mvc)) ? "eh" : "e"; + break; + case AI: + vowel = "ai"; + break; + case EI: + vowel = "ei"; + break; + case AO: + vowel = "ao"; + break; + case OU: + vowel = "ou"; + break; + case AN: + vowel = "an"; + break; + case EN: + vowel = "en"; + break; + case ANG: + vowel = "ang"; + break; + case ENG: + vowel = "eng"; + break; + case ERR: + vowel = "err"; + break; + } + + // ieng -> ing + if (mvc == I && vc == ENG) { + middle = ""; + vowel = "ing"; + } + + // zh/ch + i without third component -> append h + if (cc == BPMF::ZH || cc == BPMF::CH) { + if (!mvc && !vc) { + vowel = "h"; } - - // zh/ch + i without third component -> append h - if (cc == BPMF::ZH || cc == BPMF::CH) { - if (!mvc && !vc) { - vowel = "h"; - } + } + + if (includesTone) { + switch (toneMarkerComponent()) { + case Tone2: + tone = "2"; + break; + case Tone3: + tone = "3"; + break; + case Tone4: + tone = "4"; + break; + case Tone5: + tone = "5"; + break; } - - - if (includesTone) { - switch (toneMarkerComponent()) { - case Tone2: tone = "2"; break; - case Tone3: tone = "3"; break; - case Tone4: tone = "4"; break; - case Tone5: tone = "5"; break; - } - } - - return consonant + middle + vowel + tone; - + } + + return consonant + middle + vowel + tone; } -const BPMF BPMF::FromPHT(const string& str) -{ - if (!str.length()) { - return BPMF(); - } - - string pht = str; - transform(pht.begin(), pht.end(), pht.begin(), ::tolower); - - BPMF::Component firstComponent = 0; - BPMF::Component secondComponent = 0; - BPMF::Component thirdComponent = 0; - BPMF::Component toneComponent = 0; +const BPMF BPMF::FromPHT(const string& str) { + if (!str.length()) { + return BPMF(); + } - #define IF_CONSUME1(k, v) else if (PinyinParseHelper::ConsumePrefix(pht, k)) { firstComponent = v; } - - // consume the first part - if (0) {} - IF_CONSUME1("ph", BPMF::P) - IF_CONSUME1("p", BPMF::B) - IF_CONSUME1("m", BPMF::M) - IF_CONSUME1("f", BPMF::F) - IF_CONSUME1("th", BPMF::T) - IF_CONSUME1("n", BPMF::N) - IF_CONSUME1("l", BPMF::L) - IF_CONSUME1("kh", BPMF::K) - IF_CONSUME1("k", BPMF::G) - IF_CONSUME1("chh", BPMF::Q) - IF_CONSUME1("ch", BPMF::J) - IF_CONSUME1("hs", BPMF::X) - IF_CONSUME1("sh", BPMF::SH) - IF_CONSUME1("r", BPMF::R) - IF_CONSUME1("tsh", BPMF::C) - IF_CONSUME1("ts", BPMF::Z) - IF_CONSUME1("s", BPMF::S) - IF_CONSUME1("t", BPMF::D) - IF_CONSUME1("h", BPMF::H) - - #define IF_CONSUME2(k, v) else if (PinyinParseHelper::ConsumePrefix(pht, k)) { secondComponent = v; } - // consume the second part - if (0) {} - else if (PinyinParseHelper::ConsumePrefix(pht, "ing")) { secondComponent = BPMF::I; thirdComponent = BPMF::ENG; } - else if (PinyinParseHelper::ConsumePrefix(pht, "ih")) { - if (firstComponent == BPMF::J) { - firstComponent = BPMF::ZH; - } - else if (firstComponent == BPMF::Q) { - firstComponent = BPMF::CH; - } - } - IF_CONSUME2("i", BPMF::I) - IF_CONSUME2("uu", BPMF::UE) - IF_CONSUME2("u", BPMF::U) - - #undef IF_CONSUME1 - #undef IF_CONSUME2 - - // the vowels, longer sequence takes precedence - if (0) {} - else if (PinyinParseHelper::ConsumePrefix(pht, "ang")) { thirdComponent = BPMF::ANG; } - else if (PinyinParseHelper::ConsumePrefix(pht, "eng")) { thirdComponent = BPMF::ENG; } - else if (PinyinParseHelper::ConsumePrefix(pht, "err")) { thirdComponent = BPMF::ERR; } - else if (PinyinParseHelper::ConsumePrefix(pht, "ai")) { thirdComponent = BPMF::AI; } - else if (PinyinParseHelper::ConsumePrefix(pht, "ei")) { thirdComponent = BPMF::EI; } - else if (PinyinParseHelper::ConsumePrefix(pht, "ao")) { thirdComponent = BPMF::AO; } - else if (PinyinParseHelper::ConsumePrefix(pht, "ou")) { thirdComponent = BPMF::OU; } - else if (PinyinParseHelper::ConsumePrefix(pht, "an")) { thirdComponent = BPMF::AN; } - else if (PinyinParseHelper::ConsumePrefix(pht, "en")) { thirdComponent = BPMF::EN; } - else if (PinyinParseHelper::ConsumePrefix(pht, "er")) { thirdComponent = BPMF::ERR; } - else if (PinyinParseHelper::ConsumePrefix(pht, "a")) { thirdComponent = BPMF::A; } - else if (PinyinParseHelper::ConsumePrefix(pht, "o")) { thirdComponent = BPMF::O; } - else if (PinyinParseHelper::ConsumePrefix(pht, "eh")) { thirdComponent = BPMF::E; } - else if (PinyinParseHelper::ConsumePrefix(pht, "e")) { - if (secondComponent) { - thirdComponent = BPMF::E; - } - else { - thirdComponent = BPMF::ER; - } - } + string pht = str; + transform(pht.begin(), pht.end(), pht.begin(), ::tolower); - // fix ch/chh mappings - Component corresponding = 0; + BPMF::Component firstComponent = 0; + BPMF::Component secondComponent = 0; + BPMF::Component thirdComponent = 0; + BPMF::Component toneComponent = 0; + +#define IF_CONSUME1(k, v) \ + else if (PinyinParseHelper::ConsumePrefix(pht, k)) { \ + firstComponent = v; \ + } + + // consume the first part + if (0) { + } + IF_CONSUME1("ph", BPMF::P) + IF_CONSUME1("p", BPMF::B) + IF_CONSUME1("m", BPMF::M) + IF_CONSUME1("f", BPMF::F) + IF_CONSUME1("th", BPMF::T) + IF_CONSUME1("n", BPMF::N) + IF_CONSUME1("l", BPMF::L) + IF_CONSUME1("kh", BPMF::K) + IF_CONSUME1("k", BPMF::G) + IF_CONSUME1("chh", BPMF::Q) + IF_CONSUME1("ch", BPMF::J) + IF_CONSUME1("hs", BPMF::X) + IF_CONSUME1("sh", BPMF::SH) + IF_CONSUME1("r", BPMF::R) + IF_CONSUME1("tsh", BPMF::C) + IF_CONSUME1("ts", BPMF::Z) + IF_CONSUME1("s", BPMF::S) + IF_CONSUME1("t", BPMF::D) + IF_CONSUME1("h", BPMF::H) + +#define IF_CONSUME2(k, v) \ + else if (PinyinParseHelper::ConsumePrefix(pht, k)) { \ + secondComponent = v; \ + } + // consume the second part + if (0) { + } else if (PinyinParseHelper::ConsumePrefix(pht, "ing")) { + secondComponent = BPMF::I; + thirdComponent = BPMF::ENG; + } else if (PinyinParseHelper::ConsumePrefix(pht, "ih")) { if (firstComponent == BPMF::J) { - corresponding = BPMF::ZH; + firstComponent = BPMF::ZH; + } else if (firstComponent == BPMF::Q) { + firstComponent = BPMF::CH; } - else if (firstComponent == BPMF::Q) { - corresponding = BPMF::CH; - } - - if (corresponding) { - if (secondComponent == BPMF::I && !thirdComponent) { - // if the second component is I and there's no third component, we use the corresponding part - // firstComponent = corresponding; - } - else if (secondComponent == BPMF::U) { - // if second component is U, we use the corresponding part - firstComponent = corresponding; - } - else if (!secondComponent) { - // if there's no second component, it must be a corresponding part - firstComponent = corresponding; - } - } - - if (secondComponent == BPMF::I) { - // fixes a few impossible occurances - switch(firstComponent) { - case BPMF::ZH: - case BPMF::CH: - case BPMF::SH: - case BPMF::R: - case BPMF::Z: - case BPMF::C: - case BPMF::S: - secondComponent = 0; - } - } - + } + IF_CONSUME2("i", BPMF::I) + IF_CONSUME2("uu", BPMF::UE) + IF_CONSUME2("u", BPMF::U) - // at last! - if (0) {} - else if (PinyinParseHelper::ConsumePrefix(pht, "1")) { toneComponent = BPMF::Tone1; } - else if (PinyinParseHelper::ConsumePrefix(pht, "2")) { toneComponent = BPMF::Tone2; } - else if (PinyinParseHelper::ConsumePrefix(pht, "3")) { toneComponent = BPMF::Tone3; } - else if (PinyinParseHelper::ConsumePrefix(pht, "4")) { toneComponent = BPMF::Tone4; } - else if (PinyinParseHelper::ConsumePrefix(pht, "5")) { toneComponent = BPMF::Tone5; } +#undef IF_CONSUME1 +#undef IF_CONSUME2 - return BPMF(firstComponent | secondComponent | thirdComponent | toneComponent); + // the vowels, longer sequence takes precedence + if (0) { + } else if (PinyinParseHelper::ConsumePrefix(pht, "ang")) { + thirdComponent = BPMF::ANG; + } else if (PinyinParseHelper::ConsumePrefix(pht, "eng")) { + thirdComponent = BPMF::ENG; + } else if (PinyinParseHelper::ConsumePrefix(pht, "err")) { + thirdComponent = BPMF::ERR; + } else if (PinyinParseHelper::ConsumePrefix(pht, "ai")) { + thirdComponent = BPMF::AI; + } else if (PinyinParseHelper::ConsumePrefix(pht, "ei")) { + thirdComponent = BPMF::EI; + } else if (PinyinParseHelper::ConsumePrefix(pht, "ao")) { + thirdComponent = BPMF::AO; + } else if (PinyinParseHelper::ConsumePrefix(pht, "ou")) { + thirdComponent = BPMF::OU; + } else if (PinyinParseHelper::ConsumePrefix(pht, "an")) { + thirdComponent = BPMF::AN; + } else if (PinyinParseHelper::ConsumePrefix(pht, "en")) { + thirdComponent = BPMF::EN; + } else if (PinyinParseHelper::ConsumePrefix(pht, "er")) { + thirdComponent = BPMF::ERR; + } else if (PinyinParseHelper::ConsumePrefix(pht, "a")) { + thirdComponent = BPMF::A; + } else if (PinyinParseHelper::ConsumePrefix(pht, "o")) { + thirdComponent = BPMF::O; + } else if (PinyinParseHelper::ConsumePrefix(pht, "eh")) { + thirdComponent = BPMF::E; + } else if (PinyinParseHelper::ConsumePrefix(pht, "e")) { + if (secondComponent) { + thirdComponent = BPMF::E; + } else { + thirdComponent = BPMF::ER; + } + } + + // fix ch/chh mappings + Component corresponding = 0; + if (firstComponent == BPMF::J) { + corresponding = BPMF::ZH; + } else if (firstComponent == BPMF::Q) { + corresponding = BPMF::CH; + } + + if (corresponding) { + if (secondComponent == BPMF::I && !thirdComponent) { + // if the second component is I and there's no third component, we use the + // corresponding part firstComponent = corresponding; + } else if (secondComponent == BPMF::U) { + // if second component is U, we use the corresponding part + firstComponent = corresponding; + } else if (!secondComponent) { + // if there's no second component, it must be a corresponding part + firstComponent = corresponding; + } + } + + if (secondComponent == BPMF::I) { + // fixes a few impossible occurances + switch (firstComponent) { + case BPMF::ZH: + case BPMF::CH: + case BPMF::SH: + case BPMF::R: + case BPMF::Z: + case BPMF::C: + case BPMF::S: + secondComponent = 0; + } + } + + // at last! + if (0) { + } else if (PinyinParseHelper::ConsumePrefix(pht, "1")) { + toneComponent = BPMF::Tone1; + } else if (PinyinParseHelper::ConsumePrefix(pht, "2")) { + toneComponent = BPMF::Tone2; + } else if (PinyinParseHelper::ConsumePrefix(pht, "3")) { + toneComponent = BPMF::Tone3; + } else if (PinyinParseHelper::ConsumePrefix(pht, "4")) { + toneComponent = BPMF::Tone4; + } else if (PinyinParseHelper::ConsumePrefix(pht, "5")) { + toneComponent = BPMF::Tone5; + } + + return BPMF(firstComponent | secondComponent | thirdComponent | + toneComponent); } -const BPMF BPMF::FromComposedString(const string& str) -{ - BPMF syllable; - auto iter = str.begin(); - while (iter != str.end()) { - // This is a naive implementation and we bail early at anything we don't recognize. - // A sound implementation would require to either use a trie for the Bopomofo character map - // or to split the input by codepoints. This suffices for now. +const BPMF BPMF::FromComposedString(const string& str) { + BPMF syllable; + auto iter = str.begin(); + while (iter != str.end()) { + // This is a naive implementation and we bail early at anything we don't + // recognize. A sound implementation would require to either use a trie for + // the Bopomofo character map or to split the input by codepoints. This + // suffices for now. + // Illegal. + if (!(*iter & 0x80)) { + break; + } + + size_t utf8_length = -1; + + // These are the code points for the tone markers. + if ((*iter & (0x80 | 0x40)) && !(*iter & 0x20)) { + utf8_length = 2; + } else if ((*iter & (0x80 | 0x40 | 0x20)) && !(*iter & 0x10)) { + utf8_length = 3; + } else { // Illegal. - if (!(*iter & 0x80)) { - break; - } - - size_t utf8_length = -1; - - // These are the code points for the tone markers. - if ((*iter & (0x80 | 0x40)) && !(*iter & 0x20)) { - utf8_length = 2; - } else if ((*iter & (0x80 | 0x40 | 0x20)) && !(*iter & 0x10)) { - utf8_length = 3; - } else { - // Illegal. - break; - } - - if (iter + (utf8_length - 1) == str.end()) { - break; - } - - string component = string(iter, iter + utf8_length); - const map& charToComp = BopomofoCharacterMap::SharedInstance().characterToComponent; - map::const_iterator result = charToComp.find(component); - if (result == charToComp.end()) { - break; - } else { - syllable += BPMF((*result).second); - } - iter += utf8_length; + break; } - return syllable; + + if (iter + (utf8_length - 1) == str.end()) { + break; + } + + string component = string(iter, iter + utf8_length); + const map& charToComp = + BopomofoCharacterMap::SharedInstance().characterToComponent; + map::const_iterator result = + charToComp.find(component); + if (result == charToComp.end()) { + break; + } else { + syllable += BPMF((*result).second); + } + iter += utf8_length; + } + return syllable; } -const string BPMF::composedString() const -{ - string result; - #define APPEND(c) if (m_syllable & c) result += (*BopomofoCharacterMap::SharedInstance().componentToCharacter.find(m_syllable & c)).second - APPEND(ConsonantMask); - APPEND(MiddleVowelMask); - APPEND(VowelMask); - APPEND(ToneMarkerMask); - #undef APPEND - return result; +const string BPMF::composedString() const { + string result; +#define APPEND(c) \ + if (m_syllable & c) \ + result += \ + (*BopomofoCharacterMap::SharedInstance().componentToCharacter.find( \ + m_syllable & c)) \ + .second + APPEND(ConsonantMask); + APPEND(MiddleVowelMask); + APPEND(VowelMask); + APPEND(ToneMarkerMask); +#undef APPEND + return result; } BopomofoCharacterMap* BopomofoCharacterMap::c_map = 0; -const BopomofoCharacterMap& BopomofoCharacterMap::SharedInstance() -{ - if (!c_map) - c_map = new BopomofoCharacterMap(); - - return *c_map; +const BopomofoCharacterMap& BopomofoCharacterMap::SharedInstance() { + if (!c_map) c_map = new BopomofoCharacterMap(); + + return *c_map; } -BopomofoCharacterMap::BopomofoCharacterMap() -{ +BopomofoCharacterMap::BopomofoCharacterMap() { #ifndef _MSC_VER - characterToComponent["ㄅ"] = BPMF::B; - characterToComponent["ㄆ"] = BPMF::P; - characterToComponent["ㄇ"] = BPMF::M; - characterToComponent["ㄈ"] = BPMF::F; - characterToComponent["ㄉ"] = BPMF::D; - characterToComponent["ㄊ"] = BPMF::T; - characterToComponent["ㄋ"] = BPMF::N; - characterToComponent["ㄌ"] = BPMF::L; - characterToComponent["ㄎ"] = BPMF::K; - characterToComponent["ㄍ"] = BPMF::G; - characterToComponent["ㄏ"] = BPMF::H; - characterToComponent["ㄐ"] = BPMF::J; - characterToComponent["ㄑ"] = BPMF::Q; - characterToComponent["ㄒ"] = BPMF::X; - characterToComponent["ㄓ"] = BPMF::ZH; - characterToComponent["ㄔ"] = BPMF::CH; - characterToComponent["ㄕ"] = BPMF::SH; - characterToComponent["ㄖ"] = BPMF::R; - characterToComponent["ㄗ"] = BPMF::Z; - characterToComponent["ㄘ"] = BPMF::C; - characterToComponent["ㄙ"] = BPMF::S; - characterToComponent["ㄧ"] = BPMF::I; - characterToComponent["ㄨ"] = BPMF::U; - characterToComponent["ㄩ"] = BPMF::UE; - characterToComponent["ㄚ"] = BPMF::A; - characterToComponent["ㄛ"] = BPMF::O; - characterToComponent["ㄜ"] = BPMF::ER; - characterToComponent["ㄝ"] = BPMF::E; - characterToComponent["ㄞ"] = BPMF::AI; - characterToComponent["ㄟ"] = BPMF::EI; - characterToComponent["ㄠ"] = BPMF::AO; - characterToComponent["ㄡ"] = BPMF::OU; - characterToComponent["ㄢ"] = BPMF::AN; - characterToComponent["ㄣ"] = BPMF::EN; - characterToComponent["ㄤ"] = BPMF::ANG; - characterToComponent["ㄥ"] = BPMF::ENG; - characterToComponent["ㄦ"] = BPMF::ERR; - characterToComponent["ˊ"] = BPMF::Tone2; - characterToComponent["ˇ"] = BPMF::Tone3; - characterToComponent["ˋ"] = BPMF::Tone4; - characterToComponent["˙"] = BPMF::Tone5; + characterToComponent["ㄅ"] = BPMF::B; + characterToComponent["ㄆ"] = BPMF::P; + characterToComponent["ㄇ"] = BPMF::M; + characterToComponent["ㄈ"] = BPMF::F; + characterToComponent["ㄉ"] = BPMF::D; + characterToComponent["ㄊ"] = BPMF::T; + characterToComponent["ㄋ"] = BPMF::N; + characterToComponent["ㄌ"] = BPMF::L; + characterToComponent["ㄎ"] = BPMF::K; + characterToComponent["ㄍ"] = BPMF::G; + characterToComponent["ㄏ"] = BPMF::H; + characterToComponent["ㄐ"] = BPMF::J; + characterToComponent["ㄑ"] = BPMF::Q; + characterToComponent["ㄒ"] = BPMF::X; + characterToComponent["ㄓ"] = BPMF::ZH; + characterToComponent["ㄔ"] = BPMF::CH; + characterToComponent["ㄕ"] = BPMF::SH; + characterToComponent["ㄖ"] = BPMF::R; + characterToComponent["ㄗ"] = BPMF::Z; + characterToComponent["ㄘ"] = BPMF::C; + characterToComponent["ㄙ"] = BPMF::S; + characterToComponent["ㄧ"] = BPMF::I; + characterToComponent["ㄨ"] = BPMF::U; + characterToComponent["ㄩ"] = BPMF::UE; + characterToComponent["ㄚ"] = BPMF::A; + characterToComponent["ㄛ"] = BPMF::O; + characterToComponent["ㄜ"] = BPMF::ER; + characterToComponent["ㄝ"] = BPMF::E; + characterToComponent["ㄞ"] = BPMF::AI; + characterToComponent["ㄟ"] = BPMF::EI; + characterToComponent["ㄠ"] = BPMF::AO; + characterToComponent["ㄡ"] = BPMF::OU; + characterToComponent["ㄢ"] = BPMF::AN; + characterToComponent["ㄣ"] = BPMF::EN; + characterToComponent["ㄤ"] = BPMF::ANG; + characterToComponent["ㄥ"] = BPMF::ENG; + characterToComponent["ㄦ"] = BPMF::ERR; + characterToComponent["ˊ"] = BPMF::Tone2; + characterToComponent["ˇ"] = BPMF::Tone3; + characterToComponent["ˋ"] = BPMF::Tone4; + characterToComponent["˙"] = BPMF::Tone5; #else - characterToComponent["\xe3\x84\x85"] = BPMF::B; - characterToComponent["\xe3\x84\x86"] = BPMF::P; - characterToComponent["\xe3\x84\x87"] = BPMF::M; - characterToComponent["\xe3\x84\x88"] = BPMF::F; - characterToComponent["\xe3\x84\x89"] = BPMF::D; - characterToComponent["\xe3\x84\x8a"] = BPMF::T; - characterToComponent["\xe3\x84\x8b"] = BPMF::N; - characterToComponent["\xe3\x84\x8c"] = BPMF::L; - characterToComponent["\xe3\x84\x8e"] = BPMF::K; - characterToComponent["\xe3\x84\x8d"] = BPMF::G; - characterToComponent["\xe3\x84\x8f"] = BPMF::H; - characterToComponent["\xe3\x84\x90"] = BPMF::J; - characterToComponent["\xe3\x84\x91"] = BPMF::Q; - characterToComponent["\xe3\x84\x92"] = BPMF::X; - characterToComponent["\xe3\x84\x93"] = BPMF::ZH; - characterToComponent["\xe3\x84\x94"] = BPMF::CH; - characterToComponent["\xe3\x84\x95"] = BPMF::SH; - characterToComponent["\xe3\x84\x96"] = BPMF::R; - characterToComponent["\xe3\x84\x97"] = BPMF::Z; - characterToComponent["\xe3\x84\x98"] = BPMF::C; - characterToComponent["\xe3\x84\x99"] = BPMF::S; - characterToComponent["\xe3\x84\xa7"] = BPMF::I; - characterToComponent["\xe3\x84\xa8"] = BPMF::U; - characterToComponent["\xe3\x84\xa9"] = BPMF::UE; - characterToComponent["\xe3\x84\x9a"] = BPMF::A; - characterToComponent["\xe3\x84\x9b"] = BPMF::O; - characterToComponent["\xe3\x84\x9c"] = BPMF::ER; - characterToComponent["\xe3\x84\x9d"] = BPMF::E; - characterToComponent["\xe3\x84\x9e"] = BPMF::AI; - characterToComponent["\xe3\x84\x9f"] = BPMF::EI; - characterToComponent["\xe3\x84\xa0"] = BPMF::AO; - characterToComponent["\xe3\x84\xa1"] = BPMF::OU; - characterToComponent["\xe3\x84\xa2"] = BPMF::AN; - characterToComponent["\xe3\x84\xa3"] = BPMF::EN; - characterToComponent["\xe3\x84\xa4"] = BPMF::ANG; - characterToComponent["\xe3\x84\xa5"] = BPMF::ENG; - characterToComponent["\xe3\x84\xa6"] = BPMF::ERR; - characterToComponent["\xcb\x8a"] = BPMF::Tone2; - characterToComponent["\xcb\x87"] = BPMF::Tone3; - characterToComponent["\xcb\x8b"] = BPMF::Tone4; - characterToComponent["\xcb\x99"] = BPMF::Tone5; + characterToComponent["\xe3\x84\x85"] = BPMF::B; + characterToComponent["\xe3\x84\x86"] = BPMF::P; + characterToComponent["\xe3\x84\x87"] = BPMF::M; + characterToComponent["\xe3\x84\x88"] = BPMF::F; + characterToComponent["\xe3\x84\x89"] = BPMF::D; + characterToComponent["\xe3\x84\x8a"] = BPMF::T; + characterToComponent["\xe3\x84\x8b"] = BPMF::N; + characterToComponent["\xe3\x84\x8c"] = BPMF::L; + characterToComponent["\xe3\x84\x8e"] = BPMF::K; + characterToComponent["\xe3\x84\x8d"] = BPMF::G; + characterToComponent["\xe3\x84\x8f"] = BPMF::H; + characterToComponent["\xe3\x84\x90"] = BPMF::J; + characterToComponent["\xe3\x84\x91"] = BPMF::Q; + characterToComponent["\xe3\x84\x92"] = BPMF::X; + characterToComponent["\xe3\x84\x93"] = BPMF::ZH; + characterToComponent["\xe3\x84\x94"] = BPMF::CH; + characterToComponent["\xe3\x84\x95"] = BPMF::SH; + characterToComponent["\xe3\x84\x96"] = BPMF::R; + characterToComponent["\xe3\x84\x97"] = BPMF::Z; + characterToComponent["\xe3\x84\x98"] = BPMF::C; + characterToComponent["\xe3\x84\x99"] = BPMF::S; + characterToComponent["\xe3\x84\xa7"] = BPMF::I; + characterToComponent["\xe3\x84\xa8"] = BPMF::U; + characterToComponent["\xe3\x84\xa9"] = BPMF::UE; + characterToComponent["\xe3\x84\x9a"] = BPMF::A; + characterToComponent["\xe3\x84\x9b"] = BPMF::O; + characterToComponent["\xe3\x84\x9c"] = BPMF::ER; + characterToComponent["\xe3\x84\x9d"] = BPMF::E; + characterToComponent["\xe3\x84\x9e"] = BPMF::AI; + characterToComponent["\xe3\x84\x9f"] = BPMF::EI; + characterToComponent["\xe3\x84\xa0"] = BPMF::AO; + characterToComponent["\xe3\x84\xa1"] = BPMF::OU; + characterToComponent["\xe3\x84\xa2"] = BPMF::AN; + characterToComponent["\xe3\x84\xa3"] = BPMF::EN; + characterToComponent["\xe3\x84\xa4"] = BPMF::ANG; + characterToComponent["\xe3\x84\xa5"] = BPMF::ENG; + characterToComponent["\xe3\x84\xa6"] = BPMF::ERR; + characterToComponent["\xcb\x8a"] = BPMF::Tone2; + characterToComponent["\xcb\x87"] = BPMF::Tone3; + characterToComponent["\xcb\x8b"] = BPMF::Tone4; + characterToComponent["\xcb\x99"] = BPMF::Tone5; #endif - - for (map::iterator iter = characterToComponent.begin() ; iter != characterToComponent.end() ; ++iter) - componentToCharacter[(*iter).second] = (*iter).first; + + for (map::iterator iter = + characterToComponent.begin(); + iter != characterToComponent.end(); ++iter) + componentToCharacter[(*iter).second] = (*iter).first; } const BopomofoKeyboardLayout* BopomofoKeyboardLayout::c_StandardLayout = 0; @@ -747,267 +1056,269 @@ const BopomofoKeyboardLayout* BopomofoKeyboardLayout::c_ETen26Layout = 0; const BopomofoKeyboardLayout* BopomofoKeyboardLayout::c_IBMLayout = 0; const BopomofoKeyboardLayout* BopomofoKeyboardLayout::c_HanyuPinyinLayout = 0; -void BopomofoKeyboardLayout::FinalizeLayouts() -{ - #define FL(x) if (x) { delete x; } x = 0 - FL(c_StandardLayout); - FL(c_ETenLayout); - FL(c_HsuLayout); - FL(c_ETen26Layout); - FL(c_IBMLayout); - FL(c_HanyuPinyinLayout); - #undef FL +void BopomofoKeyboardLayout::FinalizeLayouts() { +#define FL(x) \ + if (x) { \ + delete x; \ + } \ + x = 0 + FL(c_StandardLayout); + FL(c_ETenLayout); + FL(c_HsuLayout); + FL(c_ETen26Layout); + FL(c_IBMLayout); + FL(c_HanyuPinyinLayout); +#undef FL } -#define ASSIGNKEY1(m, vec, k, val) m[k] = (vec.clear(), vec.push_back((BPMF::Component)val), vec) -#define ASSIGNKEY2(m, vec, k, val1, val2) m[k] = (vec.clear(), vec.push_back((BPMF::Component)val1), vec.push_back((BPMF::Component)val2), vec) -#define ASSIGNKEY3(m, vec, k, val1, val2, val3) m[k] = (vec.clear(), vec.push_back((BPMF::Component)val1), vec.push_back((BPMF::Component)val2), vec.push_back((BPMF::Component)val3), vec) +#define ASSIGNKEY1(m, vec, k, val) \ + m[k] = (vec.clear(), vec.push_back((BPMF::Component)val), vec) +#define ASSIGNKEY2(m, vec, k, val1, val2) \ + m[k] = (vec.clear(), vec.push_back((BPMF::Component)val1), \ + vec.push_back((BPMF::Component)val2), vec) +#define ASSIGNKEY3(m, vec, k, val1, val2, val3) \ + m[k] = (vec.clear(), vec.push_back((BPMF::Component)val1), \ + vec.push_back((BPMF::Component)val2), \ + vec.push_back((BPMF::Component)val3), vec) -const BopomofoKeyboardLayout* BopomofoKeyboardLayout::StandardLayout() -{ - if (!c_StandardLayout) { - vector vec; - BopomofoKeyToComponentMap ktcm; - - ASSIGNKEY1(ktcm, vec, '1', BPMF::B); - ASSIGNKEY1(ktcm, vec, 'q', BPMF::P); - ASSIGNKEY1(ktcm, vec, 'a', BPMF::M); - ASSIGNKEY1(ktcm, vec, 'z', BPMF::F); - ASSIGNKEY1(ktcm, vec, '2', BPMF::D); - ASSIGNKEY1(ktcm, vec, 'w', BPMF::T); - ASSIGNKEY1(ktcm, vec, 's', BPMF::N); - ASSIGNKEY1(ktcm, vec, 'x', BPMF::L); - ASSIGNKEY1(ktcm, vec, 'e', BPMF::G); - ASSIGNKEY1(ktcm, vec, 'd', BPMF::K); - ASSIGNKEY1(ktcm, vec, 'c', BPMF::H); - ASSIGNKEY1(ktcm, vec, 'r', BPMF::J); - ASSIGNKEY1(ktcm, vec, 'f', BPMF::Q); - ASSIGNKEY1(ktcm, vec, 'v', BPMF::X); - ASSIGNKEY1(ktcm, vec, '5', BPMF::ZH); - ASSIGNKEY1(ktcm, vec, 't', BPMF::CH); - ASSIGNKEY1(ktcm, vec, 'g', BPMF::SH); - ASSIGNKEY1(ktcm, vec, 'b', BPMF::R); - ASSIGNKEY1(ktcm, vec, 'y', BPMF::Z); - ASSIGNKEY1(ktcm, vec, 'h', BPMF::C); - ASSIGNKEY1(ktcm, vec, 'n', BPMF::S); - ASSIGNKEY1(ktcm, vec, 'u', BPMF::I); - ASSIGNKEY1(ktcm, vec, 'j', BPMF::U); - ASSIGNKEY1(ktcm, vec, 'm', BPMF::UE); - ASSIGNKEY1(ktcm, vec, '8', BPMF::A); - ASSIGNKEY1(ktcm, vec, 'i', BPMF::O); - ASSIGNKEY1(ktcm, vec, 'k', BPMF::ER); - ASSIGNKEY1(ktcm, vec, ',', BPMF::E); - ASSIGNKEY1(ktcm, vec, '9', BPMF::AI); - ASSIGNKEY1(ktcm, vec, 'o', BPMF::EI); - ASSIGNKEY1(ktcm, vec, 'l', BPMF::AO); - ASSIGNKEY1(ktcm, vec, '.', BPMF::OU); - ASSIGNKEY1(ktcm, vec, '0', BPMF::AN); - ASSIGNKEY1(ktcm, vec, 'p', BPMF::EN); - ASSIGNKEY1(ktcm, vec, ';', BPMF::ANG); - ASSIGNKEY1(ktcm, vec, '/', BPMF::ENG); - ASSIGNKEY1(ktcm, vec, '-', BPMF::ERR); - ASSIGNKEY1(ktcm, vec, '3', BPMF::Tone3); - ASSIGNKEY1(ktcm, vec, '4', BPMF::Tone4); - ASSIGNKEY1(ktcm, vec, '6', BPMF::Tone2); - ASSIGNKEY1(ktcm, vec, '7', BPMF::Tone5); - - c_StandardLayout = new BopomofoKeyboardLayout(ktcm, "Standard"); - } - - return c_StandardLayout; +const BopomofoKeyboardLayout* BopomofoKeyboardLayout::StandardLayout() { + if (!c_StandardLayout) { + vector vec; + BopomofoKeyToComponentMap ktcm; + + ASSIGNKEY1(ktcm, vec, '1', BPMF::B); + ASSIGNKEY1(ktcm, vec, 'q', BPMF::P); + ASSIGNKEY1(ktcm, vec, 'a', BPMF::M); + ASSIGNKEY1(ktcm, vec, 'z', BPMF::F); + ASSIGNKEY1(ktcm, vec, '2', BPMF::D); + ASSIGNKEY1(ktcm, vec, 'w', BPMF::T); + ASSIGNKEY1(ktcm, vec, 's', BPMF::N); + ASSIGNKEY1(ktcm, vec, 'x', BPMF::L); + ASSIGNKEY1(ktcm, vec, 'e', BPMF::G); + ASSIGNKEY1(ktcm, vec, 'd', BPMF::K); + ASSIGNKEY1(ktcm, vec, 'c', BPMF::H); + ASSIGNKEY1(ktcm, vec, 'r', BPMF::J); + ASSIGNKEY1(ktcm, vec, 'f', BPMF::Q); + ASSIGNKEY1(ktcm, vec, 'v', BPMF::X); + ASSIGNKEY1(ktcm, vec, '5', BPMF::ZH); + ASSIGNKEY1(ktcm, vec, 't', BPMF::CH); + ASSIGNKEY1(ktcm, vec, 'g', BPMF::SH); + ASSIGNKEY1(ktcm, vec, 'b', BPMF::R); + ASSIGNKEY1(ktcm, vec, 'y', BPMF::Z); + ASSIGNKEY1(ktcm, vec, 'h', BPMF::C); + ASSIGNKEY1(ktcm, vec, 'n', BPMF::S); + ASSIGNKEY1(ktcm, vec, 'u', BPMF::I); + ASSIGNKEY1(ktcm, vec, 'j', BPMF::U); + ASSIGNKEY1(ktcm, vec, 'm', BPMF::UE); + ASSIGNKEY1(ktcm, vec, '8', BPMF::A); + ASSIGNKEY1(ktcm, vec, 'i', BPMF::O); + ASSIGNKEY1(ktcm, vec, 'k', BPMF::ER); + ASSIGNKEY1(ktcm, vec, ',', BPMF::E); + ASSIGNKEY1(ktcm, vec, '9', BPMF::AI); + ASSIGNKEY1(ktcm, vec, 'o', BPMF::EI); + ASSIGNKEY1(ktcm, vec, 'l', BPMF::AO); + ASSIGNKEY1(ktcm, vec, '.', BPMF::OU); + ASSIGNKEY1(ktcm, vec, '0', BPMF::AN); + ASSIGNKEY1(ktcm, vec, 'p', BPMF::EN); + ASSIGNKEY1(ktcm, vec, ';', BPMF::ANG); + ASSIGNKEY1(ktcm, vec, '/', BPMF::ENG); + ASSIGNKEY1(ktcm, vec, '-', BPMF::ERR); + ASSIGNKEY1(ktcm, vec, '3', BPMF::Tone3); + ASSIGNKEY1(ktcm, vec, '4', BPMF::Tone4); + ASSIGNKEY1(ktcm, vec, '6', BPMF::Tone2); + ASSIGNKEY1(ktcm, vec, '7', BPMF::Tone5); + + c_StandardLayout = new BopomofoKeyboardLayout(ktcm, "Standard"); + } + + return c_StandardLayout; } -const BopomofoKeyboardLayout* BopomofoKeyboardLayout::IBMLayout() -{ - if (!c_IBMLayout) { - vector vec; - BopomofoKeyToComponentMap ktcm; - - ASSIGNKEY1(ktcm, vec, '1', BPMF::B); - ASSIGNKEY1(ktcm, vec, '2', BPMF::P); - ASSIGNKEY1(ktcm, vec, '3', BPMF::M); - ASSIGNKEY1(ktcm, vec, '4', BPMF::F); - ASSIGNKEY1(ktcm, vec, '5', BPMF::D); - ASSIGNKEY1(ktcm, vec, '6', BPMF::T); - ASSIGNKEY1(ktcm, vec, '7', BPMF::N); - ASSIGNKEY1(ktcm, vec, '8', BPMF::L); - ASSIGNKEY1(ktcm, vec, '9', BPMF::G); - ASSIGNKEY1(ktcm, vec, '0', BPMF::K); - ASSIGNKEY1(ktcm, vec, '-', BPMF::H); - ASSIGNKEY1(ktcm, vec, 'q', BPMF::J); - ASSIGNKEY1(ktcm, vec, 'w', BPMF::Q); - ASSIGNKEY1(ktcm, vec, 'e', BPMF::X); - ASSIGNKEY1(ktcm, vec, 'r', BPMF::ZH); - ASSIGNKEY1(ktcm, vec, 't', BPMF::CH); - ASSIGNKEY1(ktcm, vec, 'y', BPMF::SH); - ASSIGNKEY1(ktcm, vec, 'u', BPMF::R); - ASSIGNKEY1(ktcm, vec, 'i', BPMF::Z); - ASSIGNKEY1(ktcm, vec, 'o', BPMF::C); - ASSIGNKEY1(ktcm, vec, 'p', BPMF::S); - ASSIGNKEY1(ktcm, vec, 'a', BPMF::I); - ASSIGNKEY1(ktcm, vec, 's', BPMF::U); - ASSIGNKEY1(ktcm, vec, 'd', BPMF::UE); - ASSIGNKEY1(ktcm, vec, 'f', BPMF::A); - ASSIGNKEY1(ktcm, vec, 'g', BPMF::O); - ASSIGNKEY1(ktcm, vec, 'h', BPMF::ER); - ASSIGNKEY1(ktcm, vec, 'j', BPMF::E); - ASSIGNKEY1(ktcm, vec, 'k', BPMF::AI); - ASSIGNKEY1(ktcm, vec, 'l', BPMF::EI); - ASSIGNKEY1(ktcm, vec, ';', BPMF::AO); - ASSIGNKEY1(ktcm, vec, 'z', BPMF::OU); - ASSIGNKEY1(ktcm, vec, 'x', BPMF::AN); - ASSIGNKEY1(ktcm, vec, 'c', BPMF::EN); - ASSIGNKEY1(ktcm, vec, 'v', BPMF::ANG); - ASSIGNKEY1(ktcm, vec, 'b', BPMF::ENG); - ASSIGNKEY1(ktcm, vec, 'n', BPMF::ERR); - ASSIGNKEY1(ktcm, vec, 'm', BPMF::Tone2); - ASSIGNKEY1(ktcm, vec, ',', BPMF::Tone3); - ASSIGNKEY1(ktcm, vec, '.', BPMF::Tone4); - ASSIGNKEY1(ktcm, vec, '/', BPMF::Tone5); - - c_IBMLayout = new BopomofoKeyboardLayout(ktcm, "IBM"); - } - - return c_IBMLayout; +const BopomofoKeyboardLayout* BopomofoKeyboardLayout::IBMLayout() { + if (!c_IBMLayout) { + vector vec; + BopomofoKeyToComponentMap ktcm; + + ASSIGNKEY1(ktcm, vec, '1', BPMF::B); + ASSIGNKEY1(ktcm, vec, '2', BPMF::P); + ASSIGNKEY1(ktcm, vec, '3', BPMF::M); + ASSIGNKEY1(ktcm, vec, '4', BPMF::F); + ASSIGNKEY1(ktcm, vec, '5', BPMF::D); + ASSIGNKEY1(ktcm, vec, '6', BPMF::T); + ASSIGNKEY1(ktcm, vec, '7', BPMF::N); + ASSIGNKEY1(ktcm, vec, '8', BPMF::L); + ASSIGNKEY1(ktcm, vec, '9', BPMF::G); + ASSIGNKEY1(ktcm, vec, '0', BPMF::K); + ASSIGNKEY1(ktcm, vec, '-', BPMF::H); + ASSIGNKEY1(ktcm, vec, 'q', BPMF::J); + ASSIGNKEY1(ktcm, vec, 'w', BPMF::Q); + ASSIGNKEY1(ktcm, vec, 'e', BPMF::X); + ASSIGNKEY1(ktcm, vec, 'r', BPMF::ZH); + ASSIGNKEY1(ktcm, vec, 't', BPMF::CH); + ASSIGNKEY1(ktcm, vec, 'y', BPMF::SH); + ASSIGNKEY1(ktcm, vec, 'u', BPMF::R); + ASSIGNKEY1(ktcm, vec, 'i', BPMF::Z); + ASSIGNKEY1(ktcm, vec, 'o', BPMF::C); + ASSIGNKEY1(ktcm, vec, 'p', BPMF::S); + ASSIGNKEY1(ktcm, vec, 'a', BPMF::I); + ASSIGNKEY1(ktcm, vec, 's', BPMF::U); + ASSIGNKEY1(ktcm, vec, 'd', BPMF::UE); + ASSIGNKEY1(ktcm, vec, 'f', BPMF::A); + ASSIGNKEY1(ktcm, vec, 'g', BPMF::O); + ASSIGNKEY1(ktcm, vec, 'h', BPMF::ER); + ASSIGNKEY1(ktcm, vec, 'j', BPMF::E); + ASSIGNKEY1(ktcm, vec, 'k', BPMF::AI); + ASSIGNKEY1(ktcm, vec, 'l', BPMF::EI); + ASSIGNKEY1(ktcm, vec, ';', BPMF::AO); + ASSIGNKEY1(ktcm, vec, 'z', BPMF::OU); + ASSIGNKEY1(ktcm, vec, 'x', BPMF::AN); + ASSIGNKEY1(ktcm, vec, 'c', BPMF::EN); + ASSIGNKEY1(ktcm, vec, 'v', BPMF::ANG); + ASSIGNKEY1(ktcm, vec, 'b', BPMF::ENG); + ASSIGNKEY1(ktcm, vec, 'n', BPMF::ERR); + ASSIGNKEY1(ktcm, vec, 'm', BPMF::Tone2); + ASSIGNKEY1(ktcm, vec, ',', BPMF::Tone3); + ASSIGNKEY1(ktcm, vec, '.', BPMF::Tone4); + ASSIGNKEY1(ktcm, vec, '/', BPMF::Tone5); + + c_IBMLayout = new BopomofoKeyboardLayout(ktcm, "IBM"); + } + + return c_IBMLayout; } -const BopomofoKeyboardLayout* BopomofoKeyboardLayout::ETenLayout() -{ - if (!c_ETenLayout) { - vector vec; - BopomofoKeyToComponentMap ktcm; - - ASSIGNKEY1(ktcm, vec, 'b', BPMF::B); - ASSIGNKEY1(ktcm, vec, 'p', BPMF::P); - ASSIGNKEY1(ktcm, vec, 'm', BPMF::M); - ASSIGNKEY1(ktcm, vec, 'f', BPMF::F); - ASSIGNKEY1(ktcm, vec, 'd', BPMF::D); - ASSIGNKEY1(ktcm, vec, 't', BPMF::T); - ASSIGNKEY1(ktcm, vec, 'n', BPMF::N); - ASSIGNKEY1(ktcm, vec, 'l', BPMF::L); - ASSIGNKEY1(ktcm, vec, 'v', BPMF::G); - ASSIGNKEY1(ktcm, vec, 'k', BPMF::K); - ASSIGNKEY1(ktcm, vec, 'h', BPMF::H); - ASSIGNKEY1(ktcm, vec, 'g', BPMF::J); - ASSIGNKEY1(ktcm, vec, '7', BPMF::Q); - ASSIGNKEY1(ktcm, vec, 'c', BPMF::X); - ASSIGNKEY1(ktcm, vec, ',', BPMF::ZH); - ASSIGNKEY1(ktcm, vec, '.', BPMF::CH); - ASSIGNKEY1(ktcm, vec, '/', BPMF::SH); - ASSIGNKEY1(ktcm, vec, 'j', BPMF::R); - ASSIGNKEY1(ktcm, vec, ';', BPMF::Z); - ASSIGNKEY1(ktcm, vec, '\'', BPMF::C); - ASSIGNKEY1(ktcm, vec, 's', BPMF::S); - ASSIGNKEY1(ktcm, vec, 'e', BPMF::I); - ASSIGNKEY1(ktcm, vec, 'x', BPMF::U); - ASSIGNKEY1(ktcm, vec, 'u', BPMF::UE); - ASSIGNKEY1(ktcm, vec, 'a', BPMF::A); - ASSIGNKEY1(ktcm, vec, 'o', BPMF::O); - ASSIGNKEY1(ktcm, vec, 'r', BPMF::ER); - ASSIGNKEY1(ktcm, vec, 'w', BPMF::E); - ASSIGNKEY1(ktcm, vec, 'i', BPMF::AI); - ASSIGNKEY1(ktcm, vec, 'q', BPMF::EI); - ASSIGNKEY1(ktcm, vec, 'z', BPMF::AO); - ASSIGNKEY1(ktcm, vec, 'y', BPMF::OU); - ASSIGNKEY1(ktcm, vec, '8', BPMF::AN); - ASSIGNKEY1(ktcm, vec, '9', BPMF::EN); - ASSIGNKEY1(ktcm, vec, '0', BPMF::ANG); - ASSIGNKEY1(ktcm, vec, '-', BPMF::ENG); - ASSIGNKEY1(ktcm, vec, '=', BPMF::ERR); - ASSIGNKEY1(ktcm, vec, '2', BPMF::Tone2); - ASSIGNKEY1(ktcm, vec, '3', BPMF::Tone3); - ASSIGNKEY1(ktcm, vec, '4', BPMF::Tone4); - ASSIGNKEY1(ktcm, vec, '1', BPMF::Tone5); - - c_ETenLayout = new BopomofoKeyboardLayout(ktcm, "ETen"); - } - - return c_ETenLayout; +const BopomofoKeyboardLayout* BopomofoKeyboardLayout::ETenLayout() { + if (!c_ETenLayout) { + vector vec; + BopomofoKeyToComponentMap ktcm; + + ASSIGNKEY1(ktcm, vec, 'b', BPMF::B); + ASSIGNKEY1(ktcm, vec, 'p', BPMF::P); + ASSIGNKEY1(ktcm, vec, 'm', BPMF::M); + ASSIGNKEY1(ktcm, vec, 'f', BPMF::F); + ASSIGNKEY1(ktcm, vec, 'd', BPMF::D); + ASSIGNKEY1(ktcm, vec, 't', BPMF::T); + ASSIGNKEY1(ktcm, vec, 'n', BPMF::N); + ASSIGNKEY1(ktcm, vec, 'l', BPMF::L); + ASSIGNKEY1(ktcm, vec, 'v', BPMF::G); + ASSIGNKEY1(ktcm, vec, 'k', BPMF::K); + ASSIGNKEY1(ktcm, vec, 'h', BPMF::H); + ASSIGNKEY1(ktcm, vec, 'g', BPMF::J); + ASSIGNKEY1(ktcm, vec, '7', BPMF::Q); + ASSIGNKEY1(ktcm, vec, 'c', BPMF::X); + ASSIGNKEY1(ktcm, vec, ',', BPMF::ZH); + ASSIGNKEY1(ktcm, vec, '.', BPMF::CH); + ASSIGNKEY1(ktcm, vec, '/', BPMF::SH); + ASSIGNKEY1(ktcm, vec, 'j', BPMF::R); + ASSIGNKEY1(ktcm, vec, ';', BPMF::Z); + ASSIGNKEY1(ktcm, vec, '\'', BPMF::C); + ASSIGNKEY1(ktcm, vec, 's', BPMF::S); + ASSIGNKEY1(ktcm, vec, 'e', BPMF::I); + ASSIGNKEY1(ktcm, vec, 'x', BPMF::U); + ASSIGNKEY1(ktcm, vec, 'u', BPMF::UE); + ASSIGNKEY1(ktcm, vec, 'a', BPMF::A); + ASSIGNKEY1(ktcm, vec, 'o', BPMF::O); + ASSIGNKEY1(ktcm, vec, 'r', BPMF::ER); + ASSIGNKEY1(ktcm, vec, 'w', BPMF::E); + ASSIGNKEY1(ktcm, vec, 'i', BPMF::AI); + ASSIGNKEY1(ktcm, vec, 'q', BPMF::EI); + ASSIGNKEY1(ktcm, vec, 'z', BPMF::AO); + ASSIGNKEY1(ktcm, vec, 'y', BPMF::OU); + ASSIGNKEY1(ktcm, vec, '8', BPMF::AN); + ASSIGNKEY1(ktcm, vec, '9', BPMF::EN); + ASSIGNKEY1(ktcm, vec, '0', BPMF::ANG); + ASSIGNKEY1(ktcm, vec, '-', BPMF::ENG); + ASSIGNKEY1(ktcm, vec, '=', BPMF::ERR); + ASSIGNKEY1(ktcm, vec, '2', BPMF::Tone2); + ASSIGNKEY1(ktcm, vec, '3', BPMF::Tone3); + ASSIGNKEY1(ktcm, vec, '4', BPMF::Tone4); + ASSIGNKEY1(ktcm, vec, '1', BPMF::Tone5); + + c_ETenLayout = new BopomofoKeyboardLayout(ktcm, "ETen"); + } + + return c_ETenLayout; } -const BopomofoKeyboardLayout* BopomofoKeyboardLayout::HsuLayout() -{ - if (!c_HsuLayout) { - vector vec; - BopomofoKeyToComponentMap ktcm; - - ASSIGNKEY1(ktcm, vec, 'b', BPMF::B); - ASSIGNKEY1(ktcm, vec, 'p', BPMF::P); - ASSIGNKEY2(ktcm, vec, 'm', BPMF::M, BPMF::AN); - ASSIGNKEY2(ktcm, vec, 'f', BPMF::F, BPMF::Tone3); - ASSIGNKEY2(ktcm, vec, 'd', BPMF::D, BPMF::Tone2); - ASSIGNKEY1(ktcm, vec, 't', BPMF::T); - ASSIGNKEY2(ktcm, vec, 'n', BPMF::N, BPMF::EN); - ASSIGNKEY3(ktcm, vec, 'l', BPMF::L, BPMF::ENG, BPMF::ERR); - ASSIGNKEY2(ktcm, vec, 'g', BPMF::G, BPMF::ER); - ASSIGNKEY2(ktcm, vec, 'k', BPMF::K, BPMF::ANG); - ASSIGNKEY2(ktcm, vec, 'h', BPMF::H, BPMF::O); - ASSIGNKEY3(ktcm, vec, 'j', BPMF::J, BPMF::ZH, BPMF::Tone4); - ASSIGNKEY2(ktcm, vec, 'v', BPMF::Q, BPMF::CH); - ASSIGNKEY2(ktcm, vec, 'c', BPMF::X, BPMF::SH); - ASSIGNKEY1(ktcm, vec, 'r', BPMF::R); - ASSIGNKEY1(ktcm, vec, 'z', BPMF::Z); - ASSIGNKEY2(ktcm, vec, 'a', BPMF::C, BPMF::EI); - ASSIGNKEY2(ktcm, vec, 's', BPMF::S, BPMF::Tone5); - ASSIGNKEY2(ktcm, vec, 'e', BPMF::I, BPMF::E); - ASSIGNKEY1(ktcm, vec, 'x', BPMF::U); - ASSIGNKEY1(ktcm, vec, 'u', BPMF::UE); - ASSIGNKEY1(ktcm, vec, 'y', BPMF::A); - ASSIGNKEY1(ktcm, vec, 'i', BPMF::AI); - ASSIGNKEY1(ktcm, vec, 'w', BPMF::AO); - ASSIGNKEY1(ktcm, vec, 'o', BPMF::OU); - - c_HsuLayout = new BopomofoKeyboardLayout(ktcm, "Hsu"); - } - - return c_HsuLayout; +const BopomofoKeyboardLayout* BopomofoKeyboardLayout::HsuLayout() { + if (!c_HsuLayout) { + vector vec; + BopomofoKeyToComponentMap ktcm; + + ASSIGNKEY1(ktcm, vec, 'b', BPMF::B); + ASSIGNKEY1(ktcm, vec, 'p', BPMF::P); + ASSIGNKEY2(ktcm, vec, 'm', BPMF::M, BPMF::AN); + ASSIGNKEY2(ktcm, vec, 'f', BPMF::F, BPMF::Tone3); + ASSIGNKEY2(ktcm, vec, 'd', BPMF::D, BPMF::Tone2); + ASSIGNKEY1(ktcm, vec, 't', BPMF::T); + ASSIGNKEY2(ktcm, vec, 'n', BPMF::N, BPMF::EN); + ASSIGNKEY3(ktcm, vec, 'l', BPMF::L, BPMF::ENG, BPMF::ERR); + ASSIGNKEY2(ktcm, vec, 'g', BPMF::G, BPMF::ER); + ASSIGNKEY2(ktcm, vec, 'k', BPMF::K, BPMF::ANG); + ASSIGNKEY2(ktcm, vec, 'h', BPMF::H, BPMF::O); + ASSIGNKEY3(ktcm, vec, 'j', BPMF::J, BPMF::ZH, BPMF::Tone4); + ASSIGNKEY2(ktcm, vec, 'v', BPMF::Q, BPMF::CH); + ASSIGNKEY2(ktcm, vec, 'c', BPMF::X, BPMF::SH); + ASSIGNKEY1(ktcm, vec, 'r', BPMF::R); + ASSIGNKEY1(ktcm, vec, 'z', BPMF::Z); + ASSIGNKEY2(ktcm, vec, 'a', BPMF::C, BPMF::EI); + ASSIGNKEY2(ktcm, vec, 's', BPMF::S, BPMF::Tone5); + ASSIGNKEY2(ktcm, vec, 'e', BPMF::I, BPMF::E); + ASSIGNKEY1(ktcm, vec, 'x', BPMF::U); + ASSIGNKEY1(ktcm, vec, 'u', BPMF::UE); + ASSIGNKEY1(ktcm, vec, 'y', BPMF::A); + ASSIGNKEY1(ktcm, vec, 'i', BPMF::AI); + ASSIGNKEY1(ktcm, vec, 'w', BPMF::AO); + ASSIGNKEY1(ktcm, vec, 'o', BPMF::OU); + + c_HsuLayout = new BopomofoKeyboardLayout(ktcm, "Hsu"); + } + + return c_HsuLayout; } -const BopomofoKeyboardLayout* BopomofoKeyboardLayout::ETen26Layout() -{ - if (!c_ETen26Layout) { - vector vec; - BopomofoKeyToComponentMap ktcm; - - ASSIGNKEY1(ktcm, vec, 'b', BPMF::B); - ASSIGNKEY2(ktcm, vec, 'p', BPMF::P, BPMF::OU); - ASSIGNKEY2(ktcm, vec, 'm', BPMF::M, BPMF::AN); - ASSIGNKEY2(ktcm, vec, 'f', BPMF::F, BPMF::Tone2); - ASSIGNKEY2(ktcm, vec, 'd', BPMF::D, BPMF::Tone5); - ASSIGNKEY2(ktcm, vec, 't', BPMF::T, BPMF::ANG); - ASSIGNKEY2(ktcm, vec, 'n', BPMF::N, BPMF::EN); - ASSIGNKEY2(ktcm, vec, 'l', BPMF::L, BPMF::ENG); - ASSIGNKEY2(ktcm, vec, 'v', BPMF::G, BPMF::Q); - ASSIGNKEY2(ktcm, vec, 'k', BPMF::K, BPMF::Tone4); - ASSIGNKEY2(ktcm, vec, 'h', BPMF::H, BPMF::ERR); - ASSIGNKEY2(ktcm, vec, 'g', BPMF::ZH, BPMF::J); - ASSIGNKEY2(ktcm, vec, 'c', BPMF::SH, BPMF::X); - ASSIGNKEY1(ktcm, vec, 'y', BPMF::CH); - ASSIGNKEY2(ktcm, vec, 'j', BPMF::R, BPMF::Tone3); - ASSIGNKEY2(ktcm, vec, 'q', BPMF::Z, BPMF::EI); - ASSIGNKEY2(ktcm, vec, 'w', BPMF::C, BPMF::E); - ASSIGNKEY1(ktcm, vec, 's', BPMF::S); - ASSIGNKEY1(ktcm, vec, 'e', BPMF::I); - ASSIGNKEY1(ktcm, vec, 'x', BPMF::U); - ASSIGNKEY1(ktcm, vec, 'u', BPMF::UE); - ASSIGNKEY1(ktcm, vec, 'a', BPMF::A); - ASSIGNKEY1(ktcm, vec, 'o', BPMF::O); - ASSIGNKEY1(ktcm, vec, 'r', BPMF::ER); - ASSIGNKEY1(ktcm, vec, 'i', BPMF::AI); - ASSIGNKEY1(ktcm, vec, 'z', BPMF::AO); - - c_ETen26Layout = new BopomofoKeyboardLayout(ktcm, "ETen26"); - } - - return c_ETen26Layout; +const BopomofoKeyboardLayout* BopomofoKeyboardLayout::ETen26Layout() { + if (!c_ETen26Layout) { + vector vec; + BopomofoKeyToComponentMap ktcm; + + ASSIGNKEY1(ktcm, vec, 'b', BPMF::B); + ASSIGNKEY2(ktcm, vec, 'p', BPMF::P, BPMF::OU); + ASSIGNKEY2(ktcm, vec, 'm', BPMF::M, BPMF::AN); + ASSIGNKEY2(ktcm, vec, 'f', BPMF::F, BPMF::Tone2); + ASSIGNKEY2(ktcm, vec, 'd', BPMF::D, BPMF::Tone5); + ASSIGNKEY2(ktcm, vec, 't', BPMF::T, BPMF::ANG); + ASSIGNKEY2(ktcm, vec, 'n', BPMF::N, BPMF::EN); + ASSIGNKEY2(ktcm, vec, 'l', BPMF::L, BPMF::ENG); + ASSIGNKEY2(ktcm, vec, 'v', BPMF::G, BPMF::Q); + ASSIGNKEY2(ktcm, vec, 'k', BPMF::K, BPMF::Tone4); + ASSIGNKEY2(ktcm, vec, 'h', BPMF::H, BPMF::ERR); + ASSIGNKEY2(ktcm, vec, 'g', BPMF::ZH, BPMF::J); + ASSIGNKEY2(ktcm, vec, 'c', BPMF::SH, BPMF::X); + ASSIGNKEY1(ktcm, vec, 'y', BPMF::CH); + ASSIGNKEY2(ktcm, vec, 'j', BPMF::R, BPMF::Tone3); + ASSIGNKEY2(ktcm, vec, 'q', BPMF::Z, BPMF::EI); + ASSIGNKEY2(ktcm, vec, 'w', BPMF::C, BPMF::E); + ASSIGNKEY1(ktcm, vec, 's', BPMF::S); + ASSIGNKEY1(ktcm, vec, 'e', BPMF::I); + ASSIGNKEY1(ktcm, vec, 'x', BPMF::U); + ASSIGNKEY1(ktcm, vec, 'u', BPMF::UE); + ASSIGNKEY1(ktcm, vec, 'a', BPMF::A); + ASSIGNKEY1(ktcm, vec, 'o', BPMF::O); + ASSIGNKEY1(ktcm, vec, 'r', BPMF::ER); + ASSIGNKEY1(ktcm, vec, 'i', BPMF::AI); + ASSIGNKEY1(ktcm, vec, 'z', BPMF::AO); + + c_ETen26Layout = new BopomofoKeyboardLayout(ktcm, "ETen26"); + } + + return c_ETen26Layout; } -const BopomofoKeyboardLayout* BopomofoKeyboardLayout::HanyuPinyinLayout() -{ - if (!c_HanyuPinyinLayout) { - BopomofoKeyToComponentMap ktcm; - c_HanyuPinyinLayout = new BopomofoKeyboardLayout(ktcm, "HanyuPinyin"); - } - return c_HanyuPinyinLayout; +const BopomofoKeyboardLayout* BopomofoKeyboardLayout::HanyuPinyinLayout() { + if (!c_HanyuPinyinLayout) { + BopomofoKeyToComponentMap ktcm; + c_HanyuPinyinLayout = new BopomofoKeyboardLayout(ktcm, "HanyuPinyin"); + } + return c_HanyuPinyinLayout; } - -} // namespace Mandarin -} // namespace Formosa +} // namespace Mandarin +} // namespace Formosa diff --git a/Source/Engine/Mandarin/Mandarin.h b/Source/Engine/Mandarin/Mandarin.h index 40df0133..2e68d162 100644 --- a/Source/Engine/Mandarin/Mandarin.h +++ b/Source/Engine/Mandarin/Mandarin.h @@ -29,576 +29,506 @@ #define Mandarin_h #include +#include #include #include -#include namespace Formosa { - namespace Mandarin { - using namespace std; - - class BopomofoSyllable { - public: - typedef unsigned int Component; - BopomofoSyllable(Component syllable = 0) - : m_syllable(syllable) - { - } - - BopomofoSyllable(const BopomofoSyllable& another) - : m_syllable(another.m_syllable) - { - } - - ~BopomofoSyllable() - { - } - - BopomofoSyllable& operator=(const BopomofoSyllable& another) - { - m_syllable = another.m_syllable; - return *this; - } - - // takes the ASCII-form, "v"-tolerant, TW-style Hanyu Pinyin (fong, pong, bong acceptable) - static const BopomofoSyllable FromHanyuPinyin(const string& str); - - // TO DO: Support accented vowels - const string HanyuPinyinString(bool includesTone, bool useVForUUmlaut) const; - // const string HanyuPinyinString(bool includesTone, bool useVForUUmlaut, bool composeAccentedVowel) const; - - // PHT = Pai-hua-tsi - static const BopomofoSyllable FromPHT(const string& str); - const string PHTString(bool includesTone) const; - - static const BopomofoSyllable FromComposedString(const string& str); - const string composedString() const; - - void clear() - { - m_syllable = 0; - } - - bool isEmpty() const - { - return !m_syllable; - } - - bool hasConsonant() const - { - return !!(m_syllable & ConsonantMask); - } - - bool hasMiddleVowel() const - { - return !!(m_syllable & MiddleVowelMask); - } - bool hasVowel() const - { - return !!(m_syllable & VowelMask); - } - - bool hasToneMarker() const - { - return !!(m_syllable & ToneMarkerMask); - } - - Component consonantComponent() const - { - return m_syllable & ConsonantMask; - } - - Component middleVowelComponent() const - { - return m_syllable & MiddleVowelMask; - } - - Component vowelComponent() const - { - return m_syllable & VowelMask; - } - - Component toneMarkerComponent() const - { - return m_syllable & ToneMarkerMask; - } - - bool operator==(const BopomofoSyllable& another) const - { - return m_syllable == another.m_syllable; - } +namespace Mandarin { +using namespace std; - bool operator!=(const BopomofoSyllable& another) const - { - return m_syllable != another.m_syllable; - } - - bool isOverlappingWith(const BopomofoSyllable& another) const - { - #define IOW_SAND(mask) ((m_syllable & mask) && (another.m_syllable & mask)) - return IOW_SAND(ConsonantMask) || IOW_SAND(MiddleVowelMask) || IOW_SAND(VowelMask) || IOW_SAND(ToneMarkerMask); - #undef IOW_SAND - } - - // consonants J, Q, X all require the existence of vowel I or UE - bool belongsToJQXClass() const - { - Component consonant = m_syllable & ConsonantMask; - return (consonant == J || consonant == Q || consonant == X); - } - - // zi, ci, si, chi, chi, shi, ri - bool belongsToZCSRClass() const - { - Component consonant = m_syllable & ConsonantMask; - return (consonant >= ZH && consonant <= S); - } - - Component maskType() const - { - Component mask = 0; - mask |= (m_syllable & ConsonantMask) ? ConsonantMask : 0; - mask |= (m_syllable & MiddleVowelMask) ? MiddleVowelMask : 0; - mask |= (m_syllable & VowelMask) ? VowelMask : 0; - mask |= (m_syllable & ToneMarkerMask) ? ToneMarkerMask : 0; - return mask; - } - - const BopomofoSyllable operator+(const BopomofoSyllable& another) const - { - Component newSyllable = m_syllable; - #define OP_SOVER(mask) if (another.m_syllable & mask) newSyllable = (newSyllable & ~mask) | (another.m_syllable & mask) - OP_SOVER(ConsonantMask); - OP_SOVER(MiddleVowelMask); - OP_SOVER(VowelMask); - OP_SOVER(ToneMarkerMask); - #undef OP_SOVER - return BopomofoSyllable(newSyllable); - } - - BopomofoSyllable& operator+=(const BopomofoSyllable& another) - { - #define OPE_SOVER(mask) if (another.m_syllable & mask) m_syllable = (m_syllable & ~mask) | (another.m_syllable & mask) - OPE_SOVER(ConsonantMask); - OPE_SOVER(MiddleVowelMask); - OPE_SOVER(VowelMask); - OPE_SOVER(ToneMarkerMask); - #undef OPE_SOVER - return *this; - } +class BopomofoSyllable { + public: + typedef unsigned int Component; + BopomofoSyllable(Component syllable = 0) : m_syllable(syllable) {} - short absoluteOrder() const - { - // turn BPMF syllable into a 4*14*4*22 number - return (short)(m_syllable & ConsonantMask) + - (short)((m_syllable & MiddleVowelMask) >> 5) * 22 + - (short)((m_syllable & VowelMask) >> 7) * 22 * 4 + - (short)((m_syllable & ToneMarkerMask) >> 11) * 22 * 4 * 14; - } - - const string absoluteOrderString() const - { - // 5*14*4*22 = 6160, we use a 79*79 encoding to represent that - short order = absoluteOrder(); - char low = 48 + (char)(order % 79); - char high = 48 + (char)(order / 79); - string result(2, ' '); - result[0] = low; - result[1] = high; - return result; - } - - static BopomofoSyllable FromAbsoluteOrder(short order) - { - return BopomofoSyllable( - (order % 22) | - ((order / 22) % 4) << 5 | - ((order / (22 * 4)) % 14) << 7 | - ((order / (22 * 4 * 14)) % 5) << 11 - ); - } - - static BopomofoSyllable FromAbsoluteOrderString(const string& str) - { - if (str.length() != 2) - return BopomofoSyllable(); - - return FromAbsoluteOrder((short)(str[1] - 48) * 79 + (short)(str[0] - 48)); - } + BopomofoSyllable(const BopomofoSyllable& another) + : m_syllable(another.m_syllable) {} - friend ostream& operator<<(ostream& stream, const BopomofoSyllable& syllable); + ~BopomofoSyllable() {} - static const Component - ConsonantMask = 0x001f, // 0000 0000 0001 1111, 21 consonants - MiddleVowelMask = 0x0060, // 0000 0000 0110 0000, 3 middle vowels - VowelMask = 0x0780, // 0000 0111 1000 0000, 13 vowels - ToneMarkerMask = 0x3800, // 0011 1000 0000 0000, 5 tones (tone1 = 0x00) - B = 0x0001, P = 0x0002, M = 0x0003, F = 0x0004, - D = 0x0005, T = 0x0006, N = 0x0007, L = 0x0008, - G = 0x0009, K = 0x000a, H = 0x000b, - J = 0x000c, Q = 0x000d, X = 0x000e, - ZH = 0x000f, CH = 0x0010, SH = 0x0011, R = 0x0012, - Z = 0x0013, C = 0x0014, S = 0x0015, - I = 0x0020, U = 0x0040, UE = 0x0060, // ue = u umlaut (we use the German convention here as an ersatz to the /ju:/ sound) - A = 0x0080, O = 0x0100, ER = 0x0180, E = 0x0200, - AI = 0x0280, EI = 0x0300, AO = 0x0380, OU = 0x0400, - AN = 0x0480, EN = 0x0500, ANG = 0x0580, ENG = 0x0600, - ERR = 0x0680, - Tone1 = 0x0000, Tone2 = 0x0800, Tone3 = 0x1000, Tone4 = 0x1800, Tone5 = 0x2000; - - protected: - Component m_syllable; - }; - - inline ostream& operator<<(ostream& stream, const BopomofoSyllable& syllable) - { - stream << syllable.composedString(); - return stream; - } - - typedef BopomofoSyllable BPMF; - - typedef map > BopomofoKeyToComponentMap; - typedef map BopomofoComponentToKeyMap; - - class BopomofoKeyboardLayout { - public: - static void FinalizeLayouts(); - static const BopomofoKeyboardLayout* StandardLayout(); - static const BopomofoKeyboardLayout* ETenLayout(); - static const BopomofoKeyboardLayout* HsuLayout(); - static const BopomofoKeyboardLayout* ETen26Layout(); - static const BopomofoKeyboardLayout* IBMLayout(); - static const BopomofoKeyboardLayout* HanyuPinyinLayout(); - - BopomofoKeyboardLayout(const BopomofoKeyToComponentMap& ktcm, const string& name) - : m_keyToComponent(ktcm) - , m_name(name) - { - for (BopomofoKeyToComponentMap::const_iterator miter = m_keyToComponent.begin() ; miter != m_keyToComponent.end() ; ++miter) - for (vector::const_iterator viter = (*miter).second.begin() ; viter != (*miter).second.end() ; ++viter) - m_componentToKey[*viter] = (*miter).first; - } - - const string name() const - { - return m_name; - } - - char componentToKey(BPMF::Component component) const - { - BopomofoComponentToKeyMap::const_iterator iter = m_componentToKey.find(component); - return (iter == m_componentToKey.end()) ? 0 : (*iter).second; - } - - const vector keyToComponents(char key) const - { - BopomofoKeyToComponentMap::const_iterator iter = m_keyToComponent.find(key); - return (iter == m_keyToComponent.end()) ? vector() : (*iter).second; - } - - const string keySequenceFromSyllable(BPMF syllable) const - { - string sequence; - - BPMF::Component c; - char k; - #define STKS_COMBINE(component) if ((c = component)) { if ((k = componentToKey(c))) sequence += string(1, k); } - STKS_COMBINE(syllable.consonantComponent()); - STKS_COMBINE(syllable.middleVowelComponent()); - STKS_COMBINE(syllable.vowelComponent()); - STKS_COMBINE(syllable.toneMarkerComponent()); - #undef STKS_COMBINE - return sequence; - } - - const BPMF syllableFromKeySequence(const string& sequence) const - { - BPMF syllable; - - for (string::const_iterator iter = sequence.begin() ; iter != sequence.end() ; ++iter) - { - bool beforeSeqHasIorUE = sequenceContainsIorUE(sequence.begin(), iter); - bool aheadSeqHasIorUE = sequenceContainsIorUE(iter + 1, sequence.end()); - - vector components = keyToComponents(*iter); + BopomofoSyllable& operator=(const BopomofoSyllable& another) { + m_syllable = another.m_syllable; + return *this; + } - if (!components.size()) - continue; - - if (components.size() == 1) { - syllable += BPMF(components[0]); - continue; - } - - BPMF head = BPMF(components[0]); - BPMF follow = BPMF(components[1]); - BPMF ending = components.size() > 2 ? BPMF(components[2]) : follow; - - // apply the I/UE + E rule - if (head.vowelComponent() == BPMF::E && follow.vowelComponent() != BPMF::E) - { - syllable += beforeSeqHasIorUE ? head : follow; - continue; - } - - if (head.vowelComponent() != BPMF::E && follow.vowelComponent() == BPMF::E) - { - syllable += beforeSeqHasIorUE ? follow : head; - continue; - } - - // apply the J/Q/X + I/UE rule, only two components are allowed in the components vector here - if (head.belongsToJQXClass() && !follow.belongsToJQXClass()) { - if (!syllable.isEmpty()) { - if (ending != follow) - syllable += ending; - } - else { - syllable += aheadSeqHasIorUE ? head : follow; - } - - continue; - } + // takes the ASCII-form, "v"-tolerant, TW-style Hanyu Pinyin (fong, pong, bong + // acceptable) + static const BopomofoSyllable FromHanyuPinyin(const string& str); - if (!head.belongsToJQXClass() && follow.belongsToJQXClass()) { - if (!syllable.isEmpty()) { - if (ending != follow) - syllable += ending; - } - else { - syllable += aheadSeqHasIorUE ? follow : head; - } - - continue; - } + // TO DO: Support accented vowels + const string HanyuPinyinString(bool includesTone, bool useVForUUmlaut) const; + // const string HanyuPinyinString(bool includesTone, bool useVForUUmlaut, bool + // composeAccentedVowel) const; - // the nasty issue of only one char in the buffer - if (iter == sequence.begin() && iter + 1 == sequence.end()) { - if (head.hasVowel() || follow.hasToneMarker() || head.belongsToZCSRClass()) - syllable += head; - else { - if (follow.hasVowel() || ending.hasToneMarker()) - syllable += follow; - else - syllable += ending; - } - - - continue; - } - - if (!(syllable.maskType() & head.maskType()) && !endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end())) { - syllable += head; - } - else { - if (endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end()) && head.belongsToZCSRClass() && syllable.isEmpty()) { - syllable += head; - } - else if (syllable.maskType() < follow.maskType()) { - syllable += follow; - } - else { - syllable += ending; - } - } - } - - // heuristics for Hsu keyboard layout - if (this == HsuLayout()) { - // fix the left out L to ERR when it has sound, and GI, GUE -> JI, JUE - if (syllable.vowelComponent() == BPMF::ENG && !syllable.hasConsonant() && !syllable.hasMiddleVowel()) { - syllable += BPMF(BPMF::ERR); - } - else if (syllable.consonantComponent() == BPMF::G && (syllable.middleVowelComponent() == BPMF::I || syllable.middleVowelComponent() == BPMF::UE)) { - syllable += BPMF(BPMF::J); - } - } - - - return syllable; - } - - - protected: - bool endAheadOrAheadHasToneMarkKey(string::const_iterator ahead, string::const_iterator end) const - { - if (ahead == end) - return true; - - char tone1 = componentToKey(BPMF::Tone1); - char tone2 = componentToKey(BPMF::Tone2); - char tone3 = componentToKey(BPMF::Tone3); - char tone4 = componentToKey(BPMF::Tone4); - char tone5 = componentToKey(BPMF::Tone5); - - if (tone1) - if (*ahead == tone1) return true; - - if (*ahead == tone2 || *ahead == tone3 || *ahead == tone4 || *ahead == tone5) - return true; - - return false; - } - - bool sequenceContainsIorUE(string::const_iterator start, string::const_iterator end) const - { - char iChar = componentToKey(BPMF::I); - char ueChar = componentToKey(BPMF::UE); - - for (; start != end; ++start) - if (*start == iChar || *start == ueChar) - return true; - return false; - } + // PHT = Pai-hua-tsi + static const BopomofoSyllable FromPHT(const string& str); + const string PHTString(bool includesTone) const; - string m_name; - BopomofoKeyToComponentMap m_keyToComponent; - BopomofoComponentToKeyMap m_componentToKey; + static const BopomofoSyllable FromComposedString(const string& str); + const string composedString() const; - static const BopomofoKeyboardLayout* c_StandardLayout; - static const BopomofoKeyboardLayout* c_ETenLayout; - static const BopomofoKeyboardLayout* c_HsuLayout; - static const BopomofoKeyboardLayout* c_ETen26Layout; - static const BopomofoKeyboardLayout* c_IBMLayout; + void clear() { m_syllable = 0; } - // this is essentially an empty layout, but we use pointer semantic to tell the differences--and pass on the responsibility to BopomofoReadingBuffer - static const BopomofoKeyboardLayout* c_HanyuPinyinLayout; - }; - - class BopomofoReadingBuffer { - public: - BopomofoReadingBuffer(const BopomofoKeyboardLayout* layout) - : m_layout(layout) - , m_pinyinMode(false) - { - if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout()) { - m_pinyinMode = true; - m_pinyinSequence = ""; - } - } - - void setKeyboardLayout(const BopomofoKeyboardLayout* layout) - { - m_layout = layout; + bool isEmpty() const { return !m_syllable; } - if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout()) { - m_pinyinMode = true; - m_pinyinSequence = ""; - } - } - - bool isValidKey(char k) const - { - if (!m_pinyinMode) { - return m_layout ? (m_layout->keyToComponents(k)).size() > 0 : false; - } - - char lk = tolower(k); - if (lk >= 'a' && lk <= 'z') { - // if a tone marker is already in place - if (m_pinyinSequence.length()) { - char lastc = m_pinyinSequence[m_pinyinSequence.length() - 1]; - if (lastc >= '2' && lastc <= '5') { - return false; - } - return true; - } - return true; - } - - if (m_pinyinSequence.length() && (lk >= '2' && lk <= '5')) { - return true; - } - - return false; - } - - bool combineKey(char k) - { - if (!isValidKey(k)) - return false; - - if (m_pinyinMode) { - m_pinyinSequence += string(1, tolower(k)); - m_syllable = BPMF::FromHanyuPinyin(m_pinyinSequence); - return true; - } + bool hasConsonant() const { return !!(m_syllable & ConsonantMask); } - string sequence = m_layout->keySequenceFromSyllable(m_syllable) + string(1, k); - m_syllable = m_layout->syllableFromKeySequence(sequence); - return true; - } - - void clear() - { - m_pinyinSequence.clear(); - m_syllable.clear(); - } - - void backspace() - { - if (!m_layout) - return; - - if (m_pinyinMode) { - if (m_pinyinSequence.length()) { - m_pinyinSequence = m_pinyinSequence.substr(0, m_pinyinSequence.length() - 1); - } - - m_syllable = BPMF::FromHanyuPinyin(m_pinyinSequence); - return; - } - - string sequence = m_layout->keySequenceFromSyllable(m_syllable); - if (sequence.length()) { - sequence = sequence.substr(0, sequence.length() - 1); - m_syllable = m_layout->syllableFromKeySequence(sequence); - } - } - - bool isEmpty() const - { - return m_syllable.isEmpty(); - } - - const string composedString() const - { - if (m_pinyinMode) { - return m_pinyinSequence; - } - - return m_syllable.composedString(); - } - - const BPMF syllable() const - { - return m_syllable; - } - - const string standardLayoutQueryString() const - { - return BopomofoKeyboardLayout::StandardLayout()->keySequenceFromSyllable(m_syllable); - } + bool hasMiddleVowel() const { return !!(m_syllable & MiddleVowelMask); } + bool hasVowel() const { return !!(m_syllable & VowelMask); } - const string absoluteOrderQueryString() const - { - return m_syllable.absoluteOrderString(); - } - - bool hasToneMarker() const - { - return m_syllable.hasToneMarker(); - } - - protected: - const BopomofoKeyboardLayout* m_layout; - BPMF m_syllable; - - bool m_pinyinMode; - string m_pinyinSequence; - }; - } + bool hasToneMarker() const { return !!(m_syllable & ToneMarkerMask); } + + Component consonantComponent() const { return m_syllable & ConsonantMask; } + + Component middleVowelComponent() const { + return m_syllable & MiddleVowelMask; + } + + Component vowelComponent() const { return m_syllable & VowelMask; } + + Component toneMarkerComponent() const { return m_syllable & ToneMarkerMask; } + + bool operator==(const BopomofoSyllable& another) const { + return m_syllable == another.m_syllable; + } + + bool operator!=(const BopomofoSyllable& another) const { + return m_syllable != another.m_syllable; + } + + bool isOverlappingWith(const BopomofoSyllable& another) const { +#define IOW_SAND(mask) ((m_syllable & mask) && (another.m_syllable & mask)) + return IOW_SAND(ConsonantMask) || IOW_SAND(MiddleVowelMask) || + IOW_SAND(VowelMask) || IOW_SAND(ToneMarkerMask); +#undef IOW_SAND + } + + // consonants J, Q, X all require the existence of vowel I or UE + bool belongsToJQXClass() const { + Component consonant = m_syllable & ConsonantMask; + return (consonant == J || consonant == Q || consonant == X); + } + + // zi, ci, si, chi, chi, shi, ri + bool belongsToZCSRClass() const { + Component consonant = m_syllable & ConsonantMask; + return (consonant >= ZH && consonant <= S); + } + + Component maskType() const { + Component mask = 0; + mask |= (m_syllable & ConsonantMask) ? ConsonantMask : 0; + mask |= (m_syllable & MiddleVowelMask) ? MiddleVowelMask : 0; + mask |= (m_syllable & VowelMask) ? VowelMask : 0; + mask |= (m_syllable & ToneMarkerMask) ? ToneMarkerMask : 0; + return mask; + } + + const BopomofoSyllable operator+(const BopomofoSyllable& another) const { + Component newSyllable = m_syllable; +#define OP_SOVER(mask) \ + if (another.m_syllable & mask) \ + newSyllable = (newSyllable & ~mask) | (another.m_syllable & mask) + OP_SOVER(ConsonantMask); + OP_SOVER(MiddleVowelMask); + OP_SOVER(VowelMask); + OP_SOVER(ToneMarkerMask); +#undef OP_SOVER + return BopomofoSyllable(newSyllable); + } + + BopomofoSyllable& operator+=(const BopomofoSyllable& another) { +#define OPE_SOVER(mask) \ + if (another.m_syllable & mask) \ + m_syllable = (m_syllable & ~mask) | (another.m_syllable & mask) + OPE_SOVER(ConsonantMask); + OPE_SOVER(MiddleVowelMask); + OPE_SOVER(VowelMask); + OPE_SOVER(ToneMarkerMask); +#undef OPE_SOVER + return *this; + } + + short absoluteOrder() const { + // turn BPMF syllable into a 4*14*4*22 number + return (short)(m_syllable & ConsonantMask) + + (short)((m_syllable & MiddleVowelMask) >> 5) * 22 + + (short)((m_syllable & VowelMask) >> 7) * 22 * 4 + + (short)((m_syllable & ToneMarkerMask) >> 11) * 22 * 4 * 14; + } + + const string absoluteOrderString() const { + // 5*14*4*22 = 6160, we use a 79*79 encoding to represent that + short order = absoluteOrder(); + char low = 48 + (char)(order % 79); + char high = 48 + (char)(order / 79); + string result(2, ' '); + result[0] = low; + result[1] = high; + return result; + } + + static BopomofoSyllable FromAbsoluteOrder(short order) { + return BopomofoSyllable((order % 22) | ((order / 22) % 4) << 5 | + ((order / (22 * 4)) % 14) << 7 | + ((order / (22 * 4 * 14)) % 5) << 11); + } + + static BopomofoSyllable FromAbsoluteOrderString(const string& str) { + if (str.length() != 2) return BopomofoSyllable(); + + return FromAbsoluteOrder((short)(str[1] - 48) * 79 + (short)(str[0] - 48)); + } + + friend ostream& operator<<(ostream& stream, const BopomofoSyllable& syllable); + + static const Component + ConsonantMask = 0x001f, // 0000 0000 0001 1111, 21 consonants + MiddleVowelMask = 0x0060, // 0000 0000 0110 0000, 3 middle vowels + VowelMask = 0x0780, // 0000 0111 1000 0000, 13 vowels + ToneMarkerMask = 0x3800, // 0011 1000 0000 0000, 5 tones (tone1 = 0x00) + B = 0x0001, P = 0x0002, M = 0x0003, F = 0x0004, D = 0x0005, T = 0x0006, + N = 0x0007, L = 0x0008, G = 0x0009, K = 0x000a, H = 0x000b, J = 0x000c, + Q = 0x000d, X = 0x000e, ZH = 0x000f, CH = 0x0010, SH = 0x0011, R = 0x0012, + Z = 0x0013, C = 0x0014, S = 0x0015, I = 0x0020, U = 0x0040, + UE = 0x0060, // ue = u umlaut (we use the German convention here as an + // ersatz to the /ju:/ sound) + A = 0x0080, O = 0x0100, ER = 0x0180, E = 0x0200, AI = 0x0280, EI = 0x0300, + AO = 0x0380, OU = 0x0400, AN = 0x0480, EN = 0x0500, ANG = 0x0580, + ENG = 0x0600, ERR = 0x0680, Tone1 = 0x0000, Tone2 = 0x0800, + Tone3 = 0x1000, Tone4 = 0x1800, Tone5 = 0x2000; + + protected: + Component m_syllable; +}; + +inline ostream& operator<<(ostream& stream, const BopomofoSyllable& syllable) { + stream << syllable.composedString(); + return stream; } +typedef BopomofoSyllable BPMF; + +typedef map > BopomofoKeyToComponentMap; +typedef map BopomofoComponentToKeyMap; + +class BopomofoKeyboardLayout { + public: + static void FinalizeLayouts(); + static const BopomofoKeyboardLayout* StandardLayout(); + static const BopomofoKeyboardLayout* ETenLayout(); + static const BopomofoKeyboardLayout* HsuLayout(); + static const BopomofoKeyboardLayout* ETen26Layout(); + static const BopomofoKeyboardLayout* IBMLayout(); + static const BopomofoKeyboardLayout* HanyuPinyinLayout(); + + BopomofoKeyboardLayout(const BopomofoKeyToComponentMap& ktcm, + const string& name) + : m_keyToComponent(ktcm), m_name(name) { + for (BopomofoKeyToComponentMap::const_iterator miter = + m_keyToComponent.begin(); + miter != m_keyToComponent.end(); ++miter) + for (vector::const_iterator viter = + (*miter).second.begin(); + viter != (*miter).second.end(); ++viter) + m_componentToKey[*viter] = (*miter).first; + } + + const string name() const { return m_name; } + + char componentToKey(BPMF::Component component) const { + BopomofoComponentToKeyMap::const_iterator iter = + m_componentToKey.find(component); + return (iter == m_componentToKey.end()) ? 0 : (*iter).second; + } + + const vector keyToComponents(char key) const { + BopomofoKeyToComponentMap::const_iterator iter = m_keyToComponent.find(key); + return (iter == m_keyToComponent.end()) ? vector() + : (*iter).second; + } + + const string keySequenceFromSyllable(BPMF syllable) const { + string sequence; + + BPMF::Component c; + char k; +#define STKS_COMBINE(component) \ + if ((c = component)) { \ + if ((k = componentToKey(c))) sequence += string(1, k); \ + } + STKS_COMBINE(syllable.consonantComponent()); + STKS_COMBINE(syllable.middleVowelComponent()); + STKS_COMBINE(syllable.vowelComponent()); + STKS_COMBINE(syllable.toneMarkerComponent()); +#undef STKS_COMBINE + return sequence; + } + + const BPMF syllableFromKeySequence(const string& sequence) const { + BPMF syllable; + + for (string::const_iterator iter = sequence.begin(); iter != sequence.end(); + ++iter) { + bool beforeSeqHasIorUE = sequenceContainsIorUE(sequence.begin(), iter); + bool aheadSeqHasIorUE = sequenceContainsIorUE(iter + 1, sequence.end()); + + vector components = keyToComponents(*iter); + + if (!components.size()) continue; + + if (components.size() == 1) { + syllable += BPMF(components[0]); + continue; + } + + BPMF head = BPMF(components[0]); + BPMF follow = BPMF(components[1]); + BPMF ending = components.size() > 2 ? BPMF(components[2]) : follow; + + // apply the I/UE + E rule + if (head.vowelComponent() == BPMF::E && + follow.vowelComponent() != BPMF::E) { + syllable += beforeSeqHasIorUE ? head : follow; + continue; + } + + if (head.vowelComponent() != BPMF::E && + follow.vowelComponent() == BPMF::E) { + syllable += beforeSeqHasIorUE ? follow : head; + continue; + } + + // apply the J/Q/X + I/UE rule, only two components are allowed in the + // components vector here + if (head.belongsToJQXClass() && !follow.belongsToJQXClass()) { + if (!syllable.isEmpty()) { + if (ending != follow) syllable += ending; + } else { + syllable += aheadSeqHasIorUE ? head : follow; + } + + continue; + } + + if (!head.belongsToJQXClass() && follow.belongsToJQXClass()) { + if (!syllable.isEmpty()) { + if (ending != follow) syllable += ending; + } else { + syllable += aheadSeqHasIorUE ? follow : head; + } + + continue; + } + + // the nasty issue of only one char in the buffer + if (iter == sequence.begin() && iter + 1 == sequence.end()) { + if (head.hasVowel() || follow.hasToneMarker() || + head.belongsToZCSRClass()) + syllable += head; + else { + if (follow.hasVowel() || ending.hasToneMarker()) + syllable += follow; + else + syllable += ending; + } + + continue; + } + + if (!(syllable.maskType() & head.maskType()) && + !endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end())) { + syllable += head; + } else { + if (endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end()) && + head.belongsToZCSRClass() && syllable.isEmpty()) { + syllable += head; + } else if (syllable.maskType() < follow.maskType()) { + syllable += follow; + } else { + syllable += ending; + } + } + } + + // heuristics for Hsu keyboard layout + if (this == HsuLayout()) { + // fix the left out L to ERR when it has sound, and GI, GUE -> JI, JUE + if (syllable.vowelComponent() == BPMF::ENG && !syllable.hasConsonant() && + !syllable.hasMiddleVowel()) { + syllable += BPMF(BPMF::ERR); + } else if (syllable.consonantComponent() == BPMF::G && + (syllable.middleVowelComponent() == BPMF::I || + syllable.middleVowelComponent() == BPMF::UE)) { + syllable += BPMF(BPMF::J); + } + } + + return syllable; + } + + protected: + bool endAheadOrAheadHasToneMarkKey(string::const_iterator ahead, + string::const_iterator end) const { + if (ahead == end) return true; + + char tone1 = componentToKey(BPMF::Tone1); + char tone2 = componentToKey(BPMF::Tone2); + char tone3 = componentToKey(BPMF::Tone3); + char tone4 = componentToKey(BPMF::Tone4); + char tone5 = componentToKey(BPMF::Tone5); + + if (tone1) + if (*ahead == tone1) return true; + + if (*ahead == tone2 || *ahead == tone3 || *ahead == tone4 || + *ahead == tone5) + return true; + + return false; + } + + bool sequenceContainsIorUE(string::const_iterator start, + string::const_iterator end) const { + char iChar = componentToKey(BPMF::I); + char ueChar = componentToKey(BPMF::UE); + + for (; start != end; ++start) + if (*start == iChar || *start == ueChar) return true; + return false; + } + + string m_name; + BopomofoKeyToComponentMap m_keyToComponent; + BopomofoComponentToKeyMap m_componentToKey; + + static const BopomofoKeyboardLayout* c_StandardLayout; + static const BopomofoKeyboardLayout* c_ETenLayout; + static const BopomofoKeyboardLayout* c_HsuLayout; + static const BopomofoKeyboardLayout* c_ETen26Layout; + static const BopomofoKeyboardLayout* c_IBMLayout; + + // this is essentially an empty layout, but we use pointer semantic to tell + // the differences--and pass on the responsibility to BopomofoReadingBuffer + static const BopomofoKeyboardLayout* c_HanyuPinyinLayout; +}; + +class BopomofoReadingBuffer { + public: + BopomofoReadingBuffer(const BopomofoKeyboardLayout* layout) + : m_layout(layout), m_pinyinMode(false) { + if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout()) { + m_pinyinMode = true; + m_pinyinSequence = ""; + } + } + + void setKeyboardLayout(const BopomofoKeyboardLayout* layout) { + m_layout = layout; + + if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout()) { + m_pinyinMode = true; + m_pinyinSequence = ""; + } + } + + bool isValidKey(char k) const { + if (!m_pinyinMode) { + return m_layout ? (m_layout->keyToComponents(k)).size() > 0 : false; + } + + char lk = tolower(k); + if (lk >= 'a' && lk <= 'z') { + // if a tone marker is already in place + if (m_pinyinSequence.length()) { + char lastc = m_pinyinSequence[m_pinyinSequence.length() - 1]; + if (lastc >= '2' && lastc <= '5') { + return false; + } + return true; + } + return true; + } + + if (m_pinyinSequence.length() && (lk >= '2' && lk <= '5')) { + return true; + } + + return false; + } + + bool combineKey(char k) { + if (!isValidKey(k)) return false; + + if (m_pinyinMode) { + m_pinyinSequence += string(1, tolower(k)); + m_syllable = BPMF::FromHanyuPinyin(m_pinyinSequence); + return true; + } + + string sequence = + m_layout->keySequenceFromSyllable(m_syllable) + string(1, k); + m_syllable = m_layout->syllableFromKeySequence(sequence); + return true; + } + + void clear() { + m_pinyinSequence.clear(); + m_syllable.clear(); + } + + void backspace() { + if (!m_layout) return; + + if (m_pinyinMode) { + if (m_pinyinSequence.length()) { + m_pinyinSequence = + m_pinyinSequence.substr(0, m_pinyinSequence.length() - 1); + } + + m_syllable = BPMF::FromHanyuPinyin(m_pinyinSequence); + return; + } + + string sequence = m_layout->keySequenceFromSyllable(m_syllable); + if (sequence.length()) { + sequence = sequence.substr(0, sequence.length() - 1); + m_syllable = m_layout->syllableFromKeySequence(sequence); + } + } + + bool isEmpty() const { return m_syllable.isEmpty(); } + + const string composedString() const { + if (m_pinyinMode) { + return m_pinyinSequence; + } + + return m_syllable.composedString(); + } + + const BPMF syllable() const { return m_syllable; } + + const string standardLayoutQueryString() const { + return BopomofoKeyboardLayout::StandardLayout()->keySequenceFromSyllable( + m_syllable); + } + + const string absoluteOrderQueryString() const { + return m_syllable.absoluteOrderString(); + } + + bool hasToneMarker() const { return m_syllable.hasToneMarker(); } + + protected: + const BopomofoKeyboardLayout* m_layout; + BPMF m_syllable; + + bool m_pinyinMode; + string m_pinyinSequence; +}; +} // namespace Mandarin +} // namespace Formosa + #endif