Merge pull request #283 from lukhnos/bopomofo-refactoring
Bopomofo refactoring
This commit is contained in:
commit
6122eeee15
|
@ -553,329 +553,6 @@ const std::string BPMF::HanyuPinyinString(bool includesTone,
|
|||
return consonant + middle + vowel + tone;
|
||||
}
|
||||
|
||||
const std::string BPMF::PHTString(bool includesTone) const {
|
||||
std::string consonant, middle, vowel, tone;
|
||||
|
||||
Component cc = consonantComponent(), mvc = middleVowelComponent(),
|
||||
vc = vowelComponent();
|
||||
bool hasNoMVCOrVC = !(mvc || vc);
|
||||
|
||||
switch (cc) {
|
||||
case B:
|
||||
consonant = "p";
|
||||
break;
|
||||
case P:
|
||||
consonant = "ph";
|
||||
break;
|
||||
case M:
|
||||
consonant = "m";
|
||||
break;
|
||||
case F:
|
||||
consonant = "f";
|
||||
break;
|
||||
case D:
|
||||
consonant = "t";
|
||||
break;
|
||||
case T:
|
||||
consonant = "th";
|
||||
break;
|
||||
case N:
|
||||
consonant = "n";
|
||||
break;
|
||||
case L:
|
||||
consonant = "l";
|
||||
break;
|
||||
case G:
|
||||
consonant = "k";
|
||||
break;
|
||||
case K:
|
||||
consonant = "kh";
|
||||
break;
|
||||
case H:
|
||||
consonant = "h";
|
||||
break;
|
||||
case J:
|
||||
consonant = "ch";
|
||||
if (mvc != I) middle = "i";
|
||||
break;
|
||||
case Q:
|
||||
consonant = "chh";
|
||||
if (mvc != I) middle = "i";
|
||||
break;
|
||||
case X:
|
||||
consonant = "hs";
|
||||
if (mvc != I) middle = "i";
|
||||
break;
|
||||
case ZH:
|
||||
consonant = "ch";
|
||||
if (hasNoMVCOrVC) middle = "i";
|
||||
break;
|
||||
case CH:
|
||||
consonant = "chh";
|
||||
if (hasNoMVCOrVC) middle = "i";
|
||||
break;
|
||||
case SH:
|
||||
consonant = "sh";
|
||||
if (hasNoMVCOrVC) middle = "i";
|
||||
break;
|
||||
case R:
|
||||
consonant = "r";
|
||||
if (hasNoMVCOrVC) middle = "i";
|
||||
break;
|
||||
case Z:
|
||||
consonant = "ts";
|
||||
if (hasNoMVCOrVC) middle = "i";
|
||||
break;
|
||||
case C:
|
||||
consonant = "tsh";
|
||||
if (hasNoMVCOrVC) middle = "i";
|
||||
break;
|
||||
case S:
|
||||
consonant = "s";
|
||||
if (hasNoMVCOrVC) middle = "i";
|
||||
break;
|
||||
}
|
||||
|
||||
switch (mvc) {
|
||||
case I:
|
||||
middle = "i";
|
||||
break;
|
||||
case U:
|
||||
middle = "u";
|
||||
break;
|
||||
case UE:
|
||||
middle = "uu";
|
||||
break;
|
||||
}
|
||||
|
||||
switch (vc) {
|
||||
case A:
|
||||
vowel = "a";
|
||||
break;
|
||||
case O:
|
||||
vowel = "o";
|
||||
break;
|
||||
case ER:
|
||||
vowel = "e";
|
||||
break;
|
||||
case E:
|
||||
vowel = (!(cc || mvc)) ? "eh" : "e";
|
||||
break;
|
||||
case AI:
|
||||
vowel = "ai";
|
||||
break;
|
||||
case EI:
|
||||
vowel = "ei";
|
||||
break;
|
||||
case AO:
|
||||
vowel = "ao";
|
||||
break;
|
||||
case OU:
|
||||
vowel = "ou";
|
||||
break;
|
||||
case AN:
|
||||
vowel = "an";
|
||||
break;
|
||||
case EN:
|
||||
vowel = "en";
|
||||
break;
|
||||
case ANG:
|
||||
vowel = "ang";
|
||||
break;
|
||||
case ENG:
|
||||
vowel = "eng";
|
||||
break;
|
||||
case ERR:
|
||||
vowel = "err";
|
||||
break;
|
||||
}
|
||||
|
||||
// ieng -> ing
|
||||
if (mvc == I && vc == ENG) {
|
||||
middle = "";
|
||||
vowel = "ing";
|
||||
}
|
||||
|
||||
// zh/ch + i without third component -> append h
|
||||
if (cc == BPMF::ZH || cc == BPMF::CH) {
|
||||
if (!mvc && !vc) {
|
||||
vowel = "h";
|
||||
}
|
||||
}
|
||||
|
||||
if (includesTone) {
|
||||
switch (toneMarkerComponent()) {
|
||||
case Tone2:
|
||||
tone = "2";
|
||||
break;
|
||||
case Tone3:
|
||||
tone = "3";
|
||||
break;
|
||||
case Tone4:
|
||||
tone = "4";
|
||||
break;
|
||||
case Tone5:
|
||||
tone = "5";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return consonant + middle + vowel + tone;
|
||||
}
|
||||
|
||||
const BPMF BPMF::FromPHT(const std::string& str) {
|
||||
if (!str.length()) {
|
||||
return BPMF();
|
||||
}
|
||||
|
||||
std::string pht = str;
|
||||
transform(pht.begin(), pht.end(), pht.begin(), ::tolower);
|
||||
|
||||
BPMF::Component firstComponent = 0;
|
||||
BPMF::Component secondComponent = 0;
|
||||
BPMF::Component thirdComponent = 0;
|
||||
BPMF::Component toneComponent = 0;
|
||||
|
||||
#define IF_CONSUME1(k, v) \
|
||||
else if (PinyinParseHelper::ConsumePrefix(pht, k)) { \
|
||||
firstComponent = v; \
|
||||
}
|
||||
|
||||
// consume the first part
|
||||
if (0) {
|
||||
}
|
||||
IF_CONSUME1("ph", BPMF::P)
|
||||
IF_CONSUME1("p", BPMF::B)
|
||||
IF_CONSUME1("m", BPMF::M)
|
||||
IF_CONSUME1("f", BPMF::F)
|
||||
IF_CONSUME1("th", BPMF::T)
|
||||
IF_CONSUME1("n", BPMF::N)
|
||||
IF_CONSUME1("l", BPMF::L)
|
||||
IF_CONSUME1("kh", BPMF::K)
|
||||
IF_CONSUME1("k", BPMF::G)
|
||||
IF_CONSUME1("chh", BPMF::Q)
|
||||
IF_CONSUME1("ch", BPMF::J)
|
||||
IF_CONSUME1("hs", BPMF::X)
|
||||
IF_CONSUME1("sh", BPMF::SH)
|
||||
IF_CONSUME1("r", BPMF::R)
|
||||
IF_CONSUME1("tsh", BPMF::C)
|
||||
IF_CONSUME1("ts", BPMF::Z)
|
||||
IF_CONSUME1("s", BPMF::S)
|
||||
IF_CONSUME1("t", BPMF::D)
|
||||
IF_CONSUME1("h", BPMF::H)
|
||||
|
||||
#define IF_CONSUME2(k, v) \
|
||||
else if (PinyinParseHelper::ConsumePrefix(pht, k)) { \
|
||||
secondComponent = v; \
|
||||
}
|
||||
// consume the second part
|
||||
if (0) {
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "ing")) {
|
||||
secondComponent = BPMF::I;
|
||||
thirdComponent = BPMF::ENG;
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "ih")) {
|
||||
if (firstComponent == BPMF::J) {
|
||||
firstComponent = BPMF::ZH;
|
||||
} else if (firstComponent == BPMF::Q) {
|
||||
firstComponent = BPMF::CH;
|
||||
}
|
||||
}
|
||||
IF_CONSUME2("i", BPMF::I)
|
||||
IF_CONSUME2("uu", BPMF::UE)
|
||||
IF_CONSUME2("u", BPMF::U)
|
||||
|
||||
#undef IF_CONSUME1
|
||||
#undef IF_CONSUME2
|
||||
|
||||
// the vowels, longer sequence takes precedence
|
||||
if (0) {
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "ang")) {
|
||||
thirdComponent = BPMF::ANG;
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "eng")) {
|
||||
thirdComponent = BPMF::ENG;
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "err")) {
|
||||
thirdComponent = BPMF::ERR;
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "ai")) {
|
||||
thirdComponent = BPMF::AI;
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "ei")) {
|
||||
thirdComponent = BPMF::EI;
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "ao")) {
|
||||
thirdComponent = BPMF::AO;
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "ou")) {
|
||||
thirdComponent = BPMF::OU;
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "an")) {
|
||||
thirdComponent = BPMF::AN;
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "en")) {
|
||||
thirdComponent = BPMF::EN;
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "er")) {
|
||||
thirdComponent = BPMF::ERR;
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "a")) {
|
||||
thirdComponent = BPMF::A;
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "o")) {
|
||||
thirdComponent = BPMF::O;
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "eh")) {
|
||||
thirdComponent = BPMF::E;
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "e")) {
|
||||
if (secondComponent) {
|
||||
thirdComponent = BPMF::E;
|
||||
} else {
|
||||
thirdComponent = BPMF::ER;
|
||||
}
|
||||
}
|
||||
|
||||
// fix ch/chh mappings
|
||||
Component corresponding = 0;
|
||||
if (firstComponent == BPMF::J) {
|
||||
corresponding = BPMF::ZH;
|
||||
} else if (firstComponent == BPMF::Q) {
|
||||
corresponding = BPMF::CH;
|
||||
}
|
||||
|
||||
if (corresponding) {
|
||||
if (secondComponent == BPMF::I && !thirdComponent) {
|
||||
// if the second component is I and there's no third component, we use the
|
||||
// corresponding part firstComponent = corresponding;
|
||||
} else if (secondComponent == BPMF::U) {
|
||||
// if second component is U, we use the corresponding part
|
||||
firstComponent = corresponding;
|
||||
} else if (!secondComponent) {
|
||||
// if there's no second component, it must be a corresponding part
|
||||
firstComponent = corresponding;
|
||||
}
|
||||
}
|
||||
|
||||
if (secondComponent == BPMF::I) {
|
||||
// fixes a few impossible occurances
|
||||
switch (firstComponent) {
|
||||
case BPMF::ZH:
|
||||
case BPMF::CH:
|
||||
case BPMF::SH:
|
||||
case BPMF::R:
|
||||
case BPMF::Z:
|
||||
case BPMF::C:
|
||||
case BPMF::S:
|
||||
secondComponent = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// at last!
|
||||
if (0) {
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "1")) {
|
||||
toneComponent = BPMF::Tone1;
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "2")) {
|
||||
toneComponent = BPMF::Tone2;
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "3")) {
|
||||
toneComponent = BPMF::Tone3;
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "4")) {
|
||||
toneComponent = BPMF::Tone4;
|
||||
} else if (PinyinParseHelper::ConsumePrefix(pht, "5")) {
|
||||
toneComponent = BPMF::Tone5;
|
||||
}
|
||||
|
||||
return BPMF(firstComponent | secondComponent | thirdComponent |
|
||||
toneComponent);
|
||||
}
|
||||
|
||||
const BPMF BPMF::FromComposedString(const std::string& str) {
|
||||
BPMF syllable;
|
||||
auto iter = str.begin();
|
||||
|
|
|
@ -50,12 +50,6 @@ class BopomofoSyllable {
|
|||
// TO DO: Support accented vowels
|
||||
const std::string HanyuPinyinString(bool includesTone,
|
||||
bool useVForUUmlaut) const;
|
||||
// const std::string HanyuPinyinString(bool includesTone, bool useVForUUmlaut,
|
||||
// bool composeAccentedVowel) const;
|
||||
|
||||
// PHT = Pai-hua-tsi
|
||||
static const BopomofoSyllable FromPHT(const std::string& str);
|
||||
const std::string PHTString(bool includesTone) const;
|
||||
|
||||
static const BopomofoSyllable FromComposedString(const std::string& str);
|
||||
const std::string composedString() const;
|
||||
|
@ -144,38 +138,6 @@ class BopomofoSyllable {
|
|||
return *this;
|
||||
}
|
||||
|
||||
uint16_t absoluteOrder() const {
|
||||
// turn BPMF syllable into a 4*14*4*22 number
|
||||
return (uint16_t)(syllable_ & ConsonantMask) +
|
||||
(uint16_t)((syllable_ & MiddleVowelMask) >> 5) * 22 +
|
||||
(uint16_t)((syllable_ & VowelMask) >> 7) * 22 * 4 +
|
||||
(uint16_t)((syllable_ & ToneMarkerMask) >> 11) * 22 * 4 * 14;
|
||||
}
|
||||
|
||||
const std::string absoluteOrderString() const {
|
||||
// 5*14*4*22 = 6160, we use a 79*79 encoding to represent that
|
||||
uint16_t order = absoluteOrder();
|
||||
char low = 48 + static_cast<char>(order % 79);
|
||||
char high = 48 + static_cast<char>(order / 79);
|
||||
std::string result(2, ' ');
|
||||
result[0] = low;
|
||||
result[1] = high;
|
||||
return result;
|
||||
}
|
||||
|
||||
static BopomofoSyllable FromAbsoluteOrder(uint16_t order) {
|
||||
return BopomofoSyllable((order % 22) | ((order / 22) % 4) << 5 |
|
||||
((order / (22 * 4)) % 14) << 7 |
|
||||
((order / (22 * 4 * 14)) % 5) << 11);
|
||||
}
|
||||
|
||||
static BopomofoSyllable FromAbsoluteOrderString(const std::string& str) {
|
||||
if (str.length() != 2) return BopomofoSyllable();
|
||||
|
||||
return FromAbsoluteOrder((uint16_t)(str[1] - 48) * 79 +
|
||||
(uint16_t)(str[0] - 48));
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& stream,
|
||||
const BopomofoSyllable& syllable);
|
||||
|
||||
|
@ -502,10 +464,6 @@ class BopomofoReadingBuffer {
|
|||
syllable_);
|
||||
}
|
||||
|
||||
const std::string absoluteOrderQueryString() const {
|
||||
return syllable_.absoluteOrderString();
|
||||
}
|
||||
|
||||
bool hasToneMarker() const { return syllable_.hasToneMarker(); }
|
||||
|
||||
protected:
|
||||
|
|
Loading…
Reference in New Issue