Merge pull request #283 from lukhnos/bopomofo-refactoring

Bopomofo refactoring
This commit is contained in:
Weizhong Yang a.k.a zonble 2022-02-14 02:04:18 +08:00 committed by GitHub
commit 6122eeee15
2 changed files with 0 additions and 365 deletions

View File

@ -553,329 +553,6 @@ const std::string BPMF::HanyuPinyinString(bool includesTone,
return consonant + middle + vowel + tone;
}
const std::string BPMF::PHTString(bool includesTone) const {
std::string consonant, middle, vowel, tone;
Component cc = consonantComponent(), mvc = middleVowelComponent(),
vc = vowelComponent();
bool hasNoMVCOrVC = !(mvc || vc);
switch (cc) {
case B:
consonant = "p";
break;
case P:
consonant = "ph";
break;
case M:
consonant = "m";
break;
case F:
consonant = "f";
break;
case D:
consonant = "t";
break;
case T:
consonant = "th";
break;
case N:
consonant = "n";
break;
case L:
consonant = "l";
break;
case G:
consonant = "k";
break;
case K:
consonant = "kh";
break;
case H:
consonant = "h";
break;
case J:
consonant = "ch";
if (mvc != I) middle = "i";
break;
case Q:
consonant = "chh";
if (mvc != I) middle = "i";
break;
case X:
consonant = "hs";
if (mvc != I) middle = "i";
break;
case ZH:
consonant = "ch";
if (hasNoMVCOrVC) middle = "i";
break;
case CH:
consonant = "chh";
if (hasNoMVCOrVC) middle = "i";
break;
case SH:
consonant = "sh";
if (hasNoMVCOrVC) middle = "i";
break;
case R:
consonant = "r";
if (hasNoMVCOrVC) middle = "i";
break;
case Z:
consonant = "ts";
if (hasNoMVCOrVC) middle = "i";
break;
case C:
consonant = "tsh";
if (hasNoMVCOrVC) middle = "i";
break;
case S:
consonant = "s";
if (hasNoMVCOrVC) middle = "i";
break;
}
switch (mvc) {
case I:
middle = "i";
break;
case U:
middle = "u";
break;
case UE:
middle = "uu";
break;
}
switch (vc) {
case A:
vowel = "a";
break;
case O:
vowel = "o";
break;
case ER:
vowel = "e";
break;
case E:
vowel = (!(cc || mvc)) ? "eh" : "e";
break;
case AI:
vowel = "ai";
break;
case EI:
vowel = "ei";
break;
case AO:
vowel = "ao";
break;
case OU:
vowel = "ou";
break;
case AN:
vowel = "an";
break;
case EN:
vowel = "en";
break;
case ANG:
vowel = "ang";
break;
case ENG:
vowel = "eng";
break;
case ERR:
vowel = "err";
break;
}
// ieng -> ing
if (mvc == I && vc == ENG) {
middle = "";
vowel = "ing";
}
// zh/ch + i without third component -> append h
if (cc == BPMF::ZH || cc == BPMF::CH) {
if (!mvc && !vc) {
vowel = "h";
}
}
if (includesTone) {
switch (toneMarkerComponent()) {
case Tone2:
tone = "2";
break;
case Tone3:
tone = "3";
break;
case Tone4:
tone = "4";
break;
case Tone5:
tone = "5";
break;
}
}
return consonant + middle + vowel + tone;
}
const BPMF BPMF::FromPHT(const std::string& str) {
if (!str.length()) {
return BPMF();
}
std::string pht = str;
transform(pht.begin(), pht.end(), pht.begin(), ::tolower);
BPMF::Component firstComponent = 0;
BPMF::Component secondComponent = 0;
BPMF::Component thirdComponent = 0;
BPMF::Component toneComponent = 0;
#define IF_CONSUME1(k, v) \
else if (PinyinParseHelper::ConsumePrefix(pht, k)) { \
firstComponent = v; \
}
// consume the first part
if (0) {
}
IF_CONSUME1("ph", BPMF::P)
IF_CONSUME1("p", BPMF::B)
IF_CONSUME1("m", BPMF::M)
IF_CONSUME1("f", BPMF::F)
IF_CONSUME1("th", BPMF::T)
IF_CONSUME1("n", BPMF::N)
IF_CONSUME1("l", BPMF::L)
IF_CONSUME1("kh", BPMF::K)
IF_CONSUME1("k", BPMF::G)
IF_CONSUME1("chh", BPMF::Q)
IF_CONSUME1("ch", BPMF::J)
IF_CONSUME1("hs", BPMF::X)
IF_CONSUME1("sh", BPMF::SH)
IF_CONSUME1("r", BPMF::R)
IF_CONSUME1("tsh", BPMF::C)
IF_CONSUME1("ts", BPMF::Z)
IF_CONSUME1("s", BPMF::S)
IF_CONSUME1("t", BPMF::D)
IF_CONSUME1("h", BPMF::H)
#define IF_CONSUME2(k, v) \
else if (PinyinParseHelper::ConsumePrefix(pht, k)) { \
secondComponent = v; \
}
// consume the second part
if (0) {
} else if (PinyinParseHelper::ConsumePrefix(pht, "ing")) {
secondComponent = BPMF::I;
thirdComponent = BPMF::ENG;
} else if (PinyinParseHelper::ConsumePrefix(pht, "ih")) {
if (firstComponent == BPMF::J) {
firstComponent = BPMF::ZH;
} else if (firstComponent == BPMF::Q) {
firstComponent = BPMF::CH;
}
}
IF_CONSUME2("i", BPMF::I)
IF_CONSUME2("uu", BPMF::UE)
IF_CONSUME2("u", BPMF::U)
#undef IF_CONSUME1
#undef IF_CONSUME2
// the vowels, longer sequence takes precedence
if (0) {
} else if (PinyinParseHelper::ConsumePrefix(pht, "ang")) {
thirdComponent = BPMF::ANG;
} else if (PinyinParseHelper::ConsumePrefix(pht, "eng")) {
thirdComponent = BPMF::ENG;
} else if (PinyinParseHelper::ConsumePrefix(pht, "err")) {
thirdComponent = BPMF::ERR;
} else if (PinyinParseHelper::ConsumePrefix(pht, "ai")) {
thirdComponent = BPMF::AI;
} else if (PinyinParseHelper::ConsumePrefix(pht, "ei")) {
thirdComponent = BPMF::EI;
} else if (PinyinParseHelper::ConsumePrefix(pht, "ao")) {
thirdComponent = BPMF::AO;
} else if (PinyinParseHelper::ConsumePrefix(pht, "ou")) {
thirdComponent = BPMF::OU;
} else if (PinyinParseHelper::ConsumePrefix(pht, "an")) {
thirdComponent = BPMF::AN;
} else if (PinyinParseHelper::ConsumePrefix(pht, "en")) {
thirdComponent = BPMF::EN;
} else if (PinyinParseHelper::ConsumePrefix(pht, "er")) {
thirdComponent = BPMF::ERR;
} else if (PinyinParseHelper::ConsumePrefix(pht, "a")) {
thirdComponent = BPMF::A;
} else if (PinyinParseHelper::ConsumePrefix(pht, "o")) {
thirdComponent = BPMF::O;
} else if (PinyinParseHelper::ConsumePrefix(pht, "eh")) {
thirdComponent = BPMF::E;
} else if (PinyinParseHelper::ConsumePrefix(pht, "e")) {
if (secondComponent) {
thirdComponent = BPMF::E;
} else {
thirdComponent = BPMF::ER;
}
}
// fix ch/chh mappings
Component corresponding = 0;
if (firstComponent == BPMF::J) {
corresponding = BPMF::ZH;
} else if (firstComponent == BPMF::Q) {
corresponding = BPMF::CH;
}
if (corresponding) {
if (secondComponent == BPMF::I && !thirdComponent) {
// if the second component is I and there's no third component, we use the
// corresponding part firstComponent = corresponding;
} else if (secondComponent == BPMF::U) {
// if second component is U, we use the corresponding part
firstComponent = corresponding;
} else if (!secondComponent) {
// if there's no second component, it must be a corresponding part
firstComponent = corresponding;
}
}
if (secondComponent == BPMF::I) {
// fixes a few impossible occurances
switch (firstComponent) {
case BPMF::ZH:
case BPMF::CH:
case BPMF::SH:
case BPMF::R:
case BPMF::Z:
case BPMF::C:
case BPMF::S:
secondComponent = 0;
}
}
// at last!
if (0) {
} else if (PinyinParseHelper::ConsumePrefix(pht, "1")) {
toneComponent = BPMF::Tone1;
} else if (PinyinParseHelper::ConsumePrefix(pht, "2")) {
toneComponent = BPMF::Tone2;
} else if (PinyinParseHelper::ConsumePrefix(pht, "3")) {
toneComponent = BPMF::Tone3;
} else if (PinyinParseHelper::ConsumePrefix(pht, "4")) {
toneComponent = BPMF::Tone4;
} else if (PinyinParseHelper::ConsumePrefix(pht, "5")) {
toneComponent = BPMF::Tone5;
}
return BPMF(firstComponent | secondComponent | thirdComponent |
toneComponent);
}
const BPMF BPMF::FromComposedString(const std::string& str) {
BPMF syllable;
auto iter = str.begin();

View File

@ -50,12 +50,6 @@ class BopomofoSyllable {
// TO DO: Support accented vowels
const std::string HanyuPinyinString(bool includesTone,
bool useVForUUmlaut) const;
// const std::string HanyuPinyinString(bool includesTone, bool useVForUUmlaut,
// bool composeAccentedVowel) const;
// PHT = Pai-hua-tsi
static const BopomofoSyllable FromPHT(const std::string& str);
const std::string PHTString(bool includesTone) const;
static const BopomofoSyllable FromComposedString(const std::string& str);
const std::string composedString() const;
@ -144,38 +138,6 @@ class BopomofoSyllable {
return *this;
}
uint16_t absoluteOrder() const {
// turn BPMF syllable into a 4*14*4*22 number
return (uint16_t)(syllable_ & ConsonantMask) +
(uint16_t)((syllable_ & MiddleVowelMask) >> 5) * 22 +
(uint16_t)((syllable_ & VowelMask) >> 7) * 22 * 4 +
(uint16_t)((syllable_ & ToneMarkerMask) >> 11) * 22 * 4 * 14;
}
const std::string absoluteOrderString() const {
// 5*14*4*22 = 6160, we use a 79*79 encoding to represent that
uint16_t order = absoluteOrder();
char low = 48 + static_cast<char>(order % 79);
char high = 48 + static_cast<char>(order / 79);
std::string result(2, ' ');
result[0] = low;
result[1] = high;
return result;
}
static BopomofoSyllable FromAbsoluteOrder(uint16_t order) {
return BopomofoSyllable((order % 22) | ((order / 22) % 4) << 5 |
((order / (22 * 4)) % 14) << 7 |
((order / (22 * 4 * 14)) % 5) << 11);
}
static BopomofoSyllable FromAbsoluteOrderString(const std::string& str) {
if (str.length() != 2) return BopomofoSyllable();
return FromAbsoluteOrder((uint16_t)(str[1] - 48) * 79 +
(uint16_t)(str[0] - 48));
}
friend std::ostream& operator<<(std::ostream& stream,
const BopomofoSyllable& syllable);
@ -502,10 +464,6 @@ class BopomofoReadingBuffer {
syllable_);
}
const std::string absoluteOrderQueryString() const {
return syllable_.absoluteOrderString();
}
bool hasToneMarker() const { return syllable_.hasToneMarker(); }
protected: