From 3842dc5013521567bf2d9e48fe35ddae00cedbdf Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Sun, 3 Apr 2022 14:05:04 +0800 Subject: [PATCH] (Obj)C(pp) // Clang-Format. --- .clang-format | 171 +++ Installer/Chronosphere.h | 31 +- Installer/Chronosphere.m | 40 +- Installer/Installer-Prefix.pch | 31 +- Installer/vChewingInstaller-Bridging-Header.h | 31 +- Source/3rdParty/OVMandarin/Mandarin.cpp | 1095 ++++++++------- Source/3rdParty/OVMandarin/Mandarin.h | 632 +++++---- Source/Headers/vChewing-Bridging-Header.h | 31 +- Source/Headers/vChewing-Prefix.pch | 31 +- Source/Modules/ControllerModules/KeyHandler.h | 42 +- .../Modules/ControllerModules/KeyHandler.mm | 1172 +++++++++++------ .../ControllerModules/KeyValueBlobReader.cpp | 97 +- .../ControllerModules/KeyValueBlobReader.h | 73 +- Source/Modules/FileHandlers/LMConsolidator.h | 43 +- Source/Modules/FileHandlers/LMConsolidator.mm | 153 ++- .../Modules/LangModelRelated/LMInstantiator.h | 80 +- .../LangModelRelated/LMInstantiator.mm | 143 +- .../SubLanguageModels/AssociatedPhrases.h | 55 +- .../SubLanguageModels/AssociatedPhrases.mm | 89 +- .../SubLanguageModels/CoreLM.h | 56 +- .../SubLanguageModels/CoreLM.mm | 178 ++- .../InstantiatedModels/CNSLM.h | 54 +- .../InstantiatedModels/SymbolLM.h | 54 +- .../InstantiatedModels/UserSymbolLM.h | 54 +- .../SubLanguageModels/ParselessLM.cpp | 96 +- .../SubLanguageModels/ParselessLM.h | 54 +- .../SubLanguageModels/ParselessPhraseDB.cpp | 111 +- .../SubLanguageModels/ParselessPhraseDB.h | 52 +- .../SubLanguageModels/PhraseReplacementMap.h | 46 +- .../SubLanguageModels/PhraseReplacementMap.mm | 80 +- .../SubLanguageModels/UserOverrideModel.cpp | 187 +-- .../SubLanguageModels/UserOverrideModel.h | 68 +- .../SubLanguageModels/UserPhrasesLM.h | 70 +- .../SubLanguageModels/UserPhrasesLM.mm | 108 +- .../Modules/LangModelRelated/mgrLangModel.h | 42 +- .../Modules/LangModelRelated/mgrLangModel.mm | 267 ++-- .../LangModelRelated/mgrLangModel_Privates.h | 43 +- .../LanguageParsers/Gramambular/Bigram.h | 104 +- .../Gramambular/BlockReadingBuilder.h | 215 +-- .../LanguageParsers/Gramambular/Gramambular.h | 31 +- .../LanguageParsers/Gramambular/Grid.h | 289 ++-- .../Gramambular/KeyValuePair.h | 65 +- .../Gramambular/LanguageModel.h | 55 +- .../LanguageParsers/Gramambular/Node.h | 240 ++-- .../LanguageParsers/Gramambular/NodeAnchor.h | 66 +- .../LanguageParsers/Gramambular/Span.h | 100 +- .../LanguageParsers/Gramambular/Unigram.h | 94 +- .../LanguageParsers/Gramambular/Walker.h | 106 +- 48 files changed, 4318 insertions(+), 2707 deletions(-) create mode 100644 .clang-format diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..41173776 --- /dev/null +++ b/.clang-format @@ -0,0 +1,171 @@ +--- +Language: Cpp +# BasedOnStyle: Microsoft +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignConsecutiveMacros: false +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Left +AlignOperands: true +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: WithoutElse +AllowShortLoopsOnASingleLine: true +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: Yes +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Regroup +IncludeCategories: + - Regex: '^' + Priority: 2 + SortPriority: 0 + - Regex: '^<.*\.h>' + Priority: 1 + SortPriority: 0 + - Regex: '^<.*' + Priority: 2 + SortPriority: 0 + - Regex: '.*' + Priority: 3 + SortPriority: 0 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IncludeIsMainSourceRegex: '' +IndentCaseLabels: true +IndentGotoLabels: true +IndentPPDirectives: None +IndentWidth: 4 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Never +ObjCBlockIndentWidth: 4 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Left +RawStringFormats: + - Language: Cpp + Delimiters: + - h + - m + - hh + - mm + - cc + - CC + - cpp + - Cpp + - CPP + - 'c++' + - 'C++' + CanonicalDelimiter: '' + BasedOnStyle: Microsoft + - Language: TextProto + Delimiters: + - pb + - PB + - proto + - PROTO + EnclosingFunctions: + - EqualsProto + - EquivToProto + - PARSE_PARTIAL_TEXT_PROTO + - PARSE_TEST_PROTO + - PARSE_TEXT_PROTO + - ParseTextOrDie + - ParseTextProtoOrDie + CanonicalDelimiter: '' + BasedOnStyle: Microsoft +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +Standard: Auto +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 4 +UseCRLF: false +UseTab: Always +... diff --git a/Installer/Chronosphere.h b/Installer/Chronosphere.h index b1021f22..4f2cc7a0 100644 --- a/Installer/Chronosphere.h +++ b/Installer/Chronosphere.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ @import Cocoa; diff --git a/Installer/Chronosphere.m b/Installer/Chronosphere.m index 7077b2b6..12ba786d 100644 --- a/Installer/Chronosphere.m +++ b/Installer/Chronosphere.m @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #import "Chronosphere.h" @@ -27,15 +34,18 @@ BOOL appBundleChronoshiftedToARandomizedPath(NSString *bundle) int entrySize = sizeof(struct statfs); struct statfs *bufs = (struct statfs *)calloc(entryCount, entrySize); entryCount = getfsstat(bufs, entryCount * entrySize, MNT_NOWAIT); - for (int i = 0; i < entryCount; i++) { - if (!strcmp(bundleAbsPath, bufs[i].f_mntfromname)) { + for (int i = 0; i < entryCount; i++) + { + if (!strcmp(bundleAbsPath, bufs[i].f_mntfromname)) + { free(bufs); // getfsstat() may return us a cached result, and so we need to get the stat of the mounted fs. // If statfs() returns an error, the mounted fs is already gone. struct statfs stat; int checkResult = statfs(bundleAbsPath, &stat); - if (checkResult != 0) { + if (checkResult != 0) + { // Meaning the app's bundle is not mounted, that is it's not translocated. // It also means that the app is not loaded. return NO; diff --git a/Installer/Installer-Prefix.pch b/Installer/Installer-Prefix.pch index 6426721c..1f3727bc 100644 --- a/Installer/Installer-Prefix.pch +++ b/Installer/Installer-Prefix.pch @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ // diff --git a/Installer/vChewingInstaller-Bridging-Header.h b/Installer/vChewingInstaller-Bridging-Header.h index 0e096ecb..a0c76f63 100644 --- a/Installer/vChewingInstaller-Bridging-Header.h +++ b/Installer/vChewingInstaller-Bridging-Header.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ // diff --git a/Source/3rdParty/OVMandarin/Mandarin.cpp b/Source/3rdParty/OVMandarin/Mandarin.cpp index 53cbd299..4d4cb9a9 100644 --- a/Source/3rdParty/OVMandarin/Mandarin.cpp +++ b/Source/3rdParty/OVMandarin/Mandarin.cpp @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "Mandarin.h" @@ -22,570 +29,744 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include #include -namespace Mandarin { +namespace Mandarin +{ -class PinyinParseHelper { -public: - static const bool ConsumePrefix(std::string& target, - const std::string& prefix) { - if (target.length() < prefix.length()) { +class PinyinParseHelper +{ + public: + static const bool ConsumePrefix(std::string &target, const std::string &prefix) + { + if (target.length() < prefix.length()) + { return false; } - - if (target.substr(0, prefix.length()) == prefix) { - target = - target.substr(prefix.length(), target.length() - prefix.length()); + + if (target.substr(0, prefix.length()) == prefix) + { + target = target.substr(prefix.length(), target.length() - prefix.length()); return true; } - + return false; } }; -class BopomofoCharacterMap { -public: - static const BopomofoCharacterMap& SharedInstance(); - +class BopomofoCharacterMap +{ + public: + static const BopomofoCharacterMap &SharedInstance(); + std::map componentToCharacter; std::map characterToComponent; - -protected: + + protected: BopomofoCharacterMap(); }; -const BPMF BPMF::FromHanyuPinyin(const std::string& str) { - if (!str.length()) { +const BPMF BPMF::FromHanyuPinyin(const std::string &str) +{ + if (!str.length()) + { return BPMF(); } - + std::string pinyin = str; transform(pinyin.begin(), pinyin.end(), pinyin.begin(), ::tolower); - + BPMF::Component firstComponent = 0; BPMF::Component secondComponent = 0; BPMF::Component thirdComponent = 0; BPMF::Component toneComponent = 0; - + // lookup consonants and consume them bool independentConsonant = false; - + // the y exceptions fist - if (0) { - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "yuan")) { + if (0) + { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "yuan")) + { secondComponent = BPMF::UE; thirdComponent = BPMF::AN; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ying")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "ying")) + { secondComponent = BPMF::I; thirdComponent = BPMF::ENG; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "yung")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "yung")) + { secondComponent = BPMF::UE; thirdComponent = BPMF::ENG; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "yong")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "yong")) + { secondComponent = BPMF::UE; thirdComponent = BPMF::ENG; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "yue")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "yue")) + { secondComponent = BPMF::UE; thirdComponent = BPMF::E; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "yun")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "yun")) + { secondComponent = BPMF::UE; thirdComponent = BPMF::EN; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "you")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "you")) + { secondComponent = BPMF::I; thirdComponent = BPMF::OU; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "yu")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "yu")) + { secondComponent = BPMF::UE; } - + // try the first character char c = pinyin.length() ? pinyin[0] : 0; - switch (c) { - case 'b': - firstComponent = BPMF::B; - pinyin = pinyin.substr(1); - break; - case 'p': - firstComponent = BPMF::P; - pinyin = pinyin.substr(1); - break; - case 'm': - firstComponent = BPMF::M; - pinyin = pinyin.substr(1); - break; - case 'f': - firstComponent = BPMF::F; - pinyin = pinyin.substr(1); - break; - case 'd': - firstComponent = BPMF::D; - pinyin = pinyin.substr(1); - break; - case 't': - firstComponent = BPMF::T; - pinyin = pinyin.substr(1); - break; - case 'n': - firstComponent = BPMF::N; - pinyin = pinyin.substr(1); - break; - case 'l': - firstComponent = BPMF::L; - pinyin = pinyin.substr(1); - break; - case 'g': - firstComponent = BPMF::G; - pinyin = pinyin.substr(1); - break; - case 'k': - firstComponent = BPMF::K; - pinyin = pinyin.substr(1); - break; - case 'h': - firstComponent = BPMF::H; - pinyin = pinyin.substr(1); - break; - case 'j': - firstComponent = BPMF::J; - pinyin = pinyin.substr(1); - break; - case 'q': - firstComponent = BPMF::Q; - pinyin = pinyin.substr(1); - break; - case 'x': - firstComponent = BPMF::X; - pinyin = pinyin.substr(1); - break; - - // special hanlding for w and y - case 'w': - secondComponent = BPMF::U; - pinyin = pinyin.substr(1); - break; - case 'y': - if (!secondComponent && !thirdComponent) { - secondComponent = BPMF::I; - } - pinyin = pinyin.substr(1); - break; + switch (c) + { + case 'b': + firstComponent = BPMF::B; + pinyin = pinyin.substr(1); + break; + case 'p': + firstComponent = BPMF::P; + pinyin = pinyin.substr(1); + break; + case 'm': + firstComponent = BPMF::M; + pinyin = pinyin.substr(1); + break; + case 'f': + firstComponent = BPMF::F; + pinyin = pinyin.substr(1); + break; + case 'd': + firstComponent = BPMF::D; + pinyin = pinyin.substr(1); + break; + case 't': + firstComponent = BPMF::T; + pinyin = pinyin.substr(1); + break; + case 'n': + firstComponent = BPMF::N; + pinyin = pinyin.substr(1); + break; + case 'l': + firstComponent = BPMF::L; + pinyin = pinyin.substr(1); + break; + case 'g': + firstComponent = BPMF::G; + pinyin = pinyin.substr(1); + break; + case 'k': + firstComponent = BPMF::K; + pinyin = pinyin.substr(1); + break; + case 'h': + firstComponent = BPMF::H; + pinyin = pinyin.substr(1); + break; + case 'j': + firstComponent = BPMF::J; + pinyin = pinyin.substr(1); + break; + case 'q': + firstComponent = BPMF::Q; + pinyin = pinyin.substr(1); + break; + case 'x': + firstComponent = BPMF::X; + pinyin = pinyin.substr(1); + break; + + // special hanlding for w and y + case 'w': + secondComponent = BPMF::U; + pinyin = pinyin.substr(1); + break; + case 'y': + if (!secondComponent && !thirdComponent) + { + secondComponent = BPMF::I; + } + pinyin = pinyin.substr(1); + break; } - + // then we try ZH, CH, SH, R, Z, C, S (in that order) - if (0) { - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "zh")) { + if (0) + { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "zh")) + { firstComponent = BPMF::ZH; independentConsonant = true; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ch")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "ch")) + { firstComponent = BPMF::CH; independentConsonant = true; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "sh")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "sh")) + { firstComponent = BPMF::SH; independentConsonant = true; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "r")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "r")) + { firstComponent = BPMF::R; independentConsonant = true; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "z")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "z")) + { firstComponent = BPMF::Z; independentConsonant = true; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "c")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "c")) + { firstComponent = BPMF::C; independentConsonant = true; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "s")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "s")) + { firstComponent = BPMF::S; independentConsonant = true; } - + // consume exceptions first: (ien, in), (iou, iu), (uen, un), (veng, iong), // (ven, vn), (uei, ui), ung but longer sequence takes precedence - if (0) { - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "veng")) { + if (0) + { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "veng")) + { secondComponent = BPMF::UE; thirdComponent = BPMF::ENG; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "iong")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "iong")) + { secondComponent = BPMF::UE; thirdComponent = BPMF::ENG; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ing")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "ing")) + { secondComponent = BPMF::I; thirdComponent = BPMF::ENG; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ien")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "ien")) + { secondComponent = BPMF::I; thirdComponent = BPMF::EN; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "iou")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "iou")) + { secondComponent = BPMF::I; thirdComponent = BPMF::OU; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "uen")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "uen")) + { secondComponent = BPMF::U; thirdComponent = BPMF::EN; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ven")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "ven")) + { secondComponent = BPMF::UE; thirdComponent = BPMF::EN; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "uei")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "uei")) + { secondComponent = BPMF::U; thirdComponent = BPMF::EI; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ung")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "ung")) + { // f exception - if (firstComponent == BPMF::F) { + if (firstComponent == BPMF::F) + { thirdComponent = BPMF::ENG; - } else { + } + else + { secondComponent = BPMF::U; thirdComponent = BPMF::ENG; } - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ong")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "ong")) + { // f exception - if (firstComponent == BPMF::F) { + if (firstComponent == BPMF::F) + { thirdComponent = BPMF::ENG; - } else { + } + else + { secondComponent = BPMF::U; thirdComponent = BPMF::ENG; } - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "un")) { - if (firstComponent == BPMF::J || firstComponent == BPMF::Q || - firstComponent == BPMF::X) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "un")) + { + if (firstComponent == BPMF::J || firstComponent == BPMF::Q || firstComponent == BPMF::X) + { secondComponent = BPMF::UE; - } else { + } + else + { secondComponent = BPMF::U; } thirdComponent = BPMF::EN; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "iu")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "iu")) + { secondComponent = BPMF::I; thirdComponent = BPMF::OU; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "in")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "in")) + { secondComponent = BPMF::I; thirdComponent = BPMF::EN; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "vn")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "vn")) + { secondComponent = BPMF::UE; thirdComponent = BPMF::EN; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ui")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "ui")) + { secondComponent = BPMF::U; thirdComponent = BPMF::EI; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ue")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "ue")) + { secondComponent = BPMF::UE; thirdComponent = BPMF::E; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, u8"ü")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, u8"ü")) + { secondComponent = BPMF::UE; } - + // then consume the middle component... - if (0) { - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "i")) { + if (0) + { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "i")) + { secondComponent = independentConsonant ? 0 : BPMF::I; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "u")) { - if (firstComponent == BPMF::J || firstComponent == BPMF::Q || - firstComponent == BPMF::X) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "u")) + { + if (firstComponent == BPMF::J || firstComponent == BPMF::Q || firstComponent == BPMF::X) + { secondComponent = BPMF::UE; - } else { + } + else + { secondComponent = BPMF::U; } - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "v")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "v")) + { secondComponent = BPMF::UE; } - + // the vowels, longer sequence takes precedence - if (0) { - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ang")) { + if (0) + { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "ang")) + { thirdComponent = BPMF::ANG; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "eng")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "eng")) + { thirdComponent = BPMF::ENG; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "err")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "err")) + { thirdComponent = BPMF::ERR; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ai")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "ai")) + { thirdComponent = BPMF::AI; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ei")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "ei")) + { thirdComponent = BPMF::EI; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ao")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "ao")) + { thirdComponent = BPMF::AO; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "ou")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "ou")) + { thirdComponent = BPMF::OU; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "an")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "an")) + { thirdComponent = BPMF::AN; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "en")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "en")) + { thirdComponent = BPMF::EN; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "er")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "er")) + { thirdComponent = BPMF::ERR; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "a")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "a")) + { thirdComponent = BPMF::A; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "o")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "o")) + { thirdComponent = BPMF::O; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "e")) { - if (secondComponent) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "e")) + { + if (secondComponent) + { thirdComponent = BPMF::E; - } else { + } + else + { thirdComponent = BPMF::ER; } } - + // at last! - if (0) { - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "1")) { + if (0) + { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "1")) + { toneComponent = BPMF::Tone1; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "2")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "2")) + { toneComponent = BPMF::Tone2; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "3")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "3")) + { toneComponent = BPMF::Tone3; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "4")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "4")) + { toneComponent = BPMF::Tone4; - } else if (PinyinParseHelper::ConsumePrefix(pinyin, "5")) { + } + else if (PinyinParseHelper::ConsumePrefix(pinyin, "5")) + { toneComponent = BPMF::Tone5; } - - return BPMF(firstComponent | secondComponent | thirdComponent | - toneComponent); + + return BPMF(firstComponent | secondComponent | thirdComponent | toneComponent); } -const std::string BPMF::HanyuPinyinString(bool includesTone, - bool useVForUUmlaut) const { +const std::string BPMF::HanyuPinyinString(bool includesTone, bool useVForUUmlaut) const +{ std::string consonant, middle, vowel, tone; - - Component cc = consonantComponent(), mvc = middleVowelComponent(), - vc = vowelComponent(); + + Component cc = consonantComponent(), mvc = middleVowelComponent(), vc = vowelComponent(); bool hasNoMVCOrVC = !(mvc || vc); - - switch (cc) { - case B: - consonant = "b"; - break; - case P: - consonant = "p"; - break; - case M: - consonant = "m"; - break; - case F: - consonant = "f"; - break; - case D: - consonant = "d"; - break; - case T: - consonant = "t"; - break; - case N: - consonant = "n"; - break; - case L: - consonant = "l"; - break; - case G: - consonant = "g"; - break; - case K: - consonant = "k"; - break; - case H: - consonant = "h"; - break; - case J: - consonant = "j"; - if (hasNoMVCOrVC) middle = "i"; - break; - case Q: - consonant = "q"; - if (hasNoMVCOrVC) middle = "i"; - break; - case X: - consonant = "x"; - if (hasNoMVCOrVC) middle = "i"; - break; - case ZH: - consonant = "zh"; - if (hasNoMVCOrVC) middle = "i"; - break; - case CH: - consonant = "ch"; - if (hasNoMVCOrVC) middle = "i"; - break; - case SH: - consonant = "sh"; - if (hasNoMVCOrVC) middle = "i"; - break; - case R: - consonant = "r"; - if (hasNoMVCOrVC) middle = "i"; - break; - case Z: - consonant = "z"; - if (hasNoMVCOrVC) middle = "i"; - break; - case C: - consonant = "c"; - if (hasNoMVCOrVC) middle = "i"; - break; - case S: - consonant = "s"; - if (hasNoMVCOrVC) middle = "i"; - break; + + switch (cc) + { + case B: + consonant = "b"; + break; + case P: + consonant = "p"; + break; + case M: + consonant = "m"; + break; + case F: + consonant = "f"; + break; + case D: + consonant = "d"; + break; + case T: + consonant = "t"; + break; + case N: + consonant = "n"; + break; + case L: + consonant = "l"; + break; + case G: + consonant = "g"; + break; + case K: + consonant = "k"; + break; + case H: + consonant = "h"; + break; + case J: + consonant = "j"; + if (hasNoMVCOrVC) + middle = "i"; + break; + case Q: + consonant = "q"; + if (hasNoMVCOrVC) + middle = "i"; + break; + case X: + consonant = "x"; + if (hasNoMVCOrVC) + middle = "i"; + break; + case ZH: + consonant = "zh"; + if (hasNoMVCOrVC) + middle = "i"; + break; + case CH: + consonant = "ch"; + if (hasNoMVCOrVC) + middle = "i"; + break; + case SH: + consonant = "sh"; + if (hasNoMVCOrVC) + middle = "i"; + break; + case R: + consonant = "r"; + if (hasNoMVCOrVC) + middle = "i"; + break; + case Z: + consonant = "z"; + if (hasNoMVCOrVC) + middle = "i"; + break; + case C: + consonant = "c"; + if (hasNoMVCOrVC) + middle = "i"; + break; + case S: + consonant = "s"; + if (hasNoMVCOrVC) + middle = "i"; + break; } - - switch (mvc) { - case I: - if (!cc) { - consonant = "y"; - } - - middle = (!vc || cc) ? "i" : ""; - break; - case U: - if (!cc) { - consonant = "w"; - } - middle = (!vc || cc) ? "u" : ""; - break; - case UE: - if (!cc) { - consonant = "y"; - } - - if ((cc == N || cc == L) && vc != E) { - middle = useVForUUmlaut ? "v" : "ü"; - } else { - middle = "u"; - } - - break; + + switch (mvc) + { + case I: + if (!cc) + { + consonant = "y"; + } + + middle = (!vc || cc) ? "i" : ""; + break; + case U: + if (!cc) + { + consonant = "w"; + } + middle = (!vc || cc) ? "u" : ""; + break; + case UE: + if (!cc) + { + consonant = "y"; + } + + if ((cc == N || cc == L) && vc != E) + { + middle = useVForUUmlaut ? "v" : "ü"; + } + else + { + middle = "u"; + } + + break; } - - switch (vc) { - case A: - vowel = "a"; - break; - case O: - vowel = "o"; - break; - case ER: - vowel = "e"; - break; - case E: - vowel = "e"; - break; - case AI: - vowel = "ai"; - break; - case EI: - vowel = "ei"; - break; - case AO: - vowel = "ao"; - break; - case OU: - vowel = "ou"; - break; - case AN: - vowel = "an"; - break; - case EN: - vowel = "en"; - break; - case ANG: - vowel = "ang"; - break; - case ENG: - vowel = "eng"; - break; - case ERR: - vowel = "er"; - break; + + switch (vc) + { + case A: + vowel = "a"; + break; + case O: + vowel = "o"; + break; + case ER: + vowel = "e"; + break; + case E: + vowel = "e"; + break; + case AI: + vowel = "ai"; + break; + case EI: + vowel = "ei"; + break; + case AO: + vowel = "ao"; + break; + case OU: + vowel = "ou"; + break; + case AN: + vowel = "an"; + break; + case EN: + vowel = "en"; + break; + case ANG: + vowel = "ang"; + break; + case ENG: + vowel = "eng"; + break; + case ERR: + vowel = "er"; + break; } - + // combination rules - + // ueng -> ong, but note "weng" - if ((mvc == U || mvc == UE) && vc == ENG) { + if ((mvc == U || mvc == UE) && vc == ENG) + { middle = ""; - vowel = (cc == J || cc == Q || cc == X) - ? "iong" - : ((!cc && mvc == U) ? "eng" : "ong"); + vowel = (cc == J || cc == Q || cc == X) ? "iong" : ((!cc && mvc == U) ? "eng" : "ong"); } - + // ien, uen, üen -> in, un, ün ; but note "wen", "yin" and "yun" - if (mvc && vc == EN) { - if (cc) { + if (mvc && vc == EN) + { + if (cc) + { vowel = "n"; - } else { - if (mvc == UE) { - vowel = "n"; // yun - } else if (mvc == U) { - vowel = "en"; // wen - } else { - vowel = "in"; // yin + } + else + { + if (mvc == UE) + { + vowel = "n"; // yun + } + else if (mvc == U) + { + vowel = "en"; // wen + } + else + { + vowel = "in"; // yin } } } - + // iou -> iu - if (cc && mvc == I && vc == OU) { + if (cc && mvc == I && vc == OU) + { middle = ""; vowel = "iu"; } - + // ieng -> ing - if (mvc == I && vc == ENG) { + if (mvc == I && vc == ENG) + { middle = ""; vowel = "ing"; } - + // uei -> ui - if (cc && mvc == U && vc == EI) { + if (cc && mvc == U && vc == EI) + { middle = ""; vowel = "ui"; } - - if (includesTone) { - switch (toneMarkerComponent()) { - case Tone2: - tone = "2"; - break; - case Tone3: - tone = "3"; - break; - case Tone4: - tone = "4"; - break; - case Tone5: - tone = "5"; - break; + + if (includesTone) + { + switch (toneMarkerComponent()) + { + case Tone2: + tone = "2"; + break; + case Tone3: + tone = "3"; + break; + case Tone4: + tone = "4"; + break; + case Tone5: + tone = "5"; + break; } } - + return consonant + middle + vowel + tone; } -const BPMF BPMF::FromComposedString(const std::string& str) { +const BPMF BPMF::FromComposedString(const std::string &str) +{ BPMF syllable; auto iter = str.begin(); - while (iter != str.end()) { + while (iter != str.end()) + { // This is a naive implementation and we bail early at anything we don't // recognize. A sound implementation would require to either use a trie for // the Bopomofo character map or to split the input by codepoints. This // suffices for now. - + // Illegal. - if (!(*iter & 0x80)) { + if (!(*iter & 0x80)) + { break; } - + size_t utf8_length = -1; - + // These are the code points for the tone markers. - if ((*iter & (0x80 | 0x40)) && !(*iter & 0x20)) { + if ((*iter & (0x80 | 0x40)) && !(*iter & 0x20)) + { utf8_length = 2; - } else if ((*iter & (0x80 | 0x40 | 0x20)) && !(*iter & 0x10)) { + } + else if ((*iter & (0x80 | 0x40 | 0x20)) && !(*iter & 0x10)) + { utf8_length = 3; - } else { + } + else + { // Illegal. break; } - - if (iter + (utf8_length - 1) == str.end()) { + + if (iter + (utf8_length - 1) == str.end()) + { break; } - + std::string component = std::string(iter, iter + utf8_length); - const std::map& charToComp = - BopomofoCharacterMap::SharedInstance().characterToComponent; - std::map::const_iterator result = - charToComp.find(component); - if (result == charToComp.end()) { + const std::map &charToComp = + BopomofoCharacterMap::SharedInstance().characterToComponent; + std::map::const_iterator result = charToComp.find(component); + if (result == charToComp.end()) + { break; - } else { + } + else + { syllable += BPMF((*result).second); } iter += utf8_length; @@ -593,14 +774,12 @@ const BPMF BPMF::FromComposedString(const std::string& str) { return syllable; } -const std::string BPMF::composedString() const { +const std::string BPMF::composedString() const +{ std::string result; -#define APPEND(c) \ -if (syllable_ & c) \ -result += \ -(*BopomofoCharacterMap::SharedInstance().componentToCharacter.find( \ -syllable_ & c)) \ -.second +#define APPEND(c) \ + if (syllable_ & c) \ + result += (*BopomofoCharacterMap::SharedInstance().componentToCharacter.find(syllable_ & c)).second APPEND(ConsonantMask); APPEND(MiddleVowelMask); APPEND(VowelMask); @@ -609,14 +788,14 @@ syllable_ & c)) \ return result; } - - -const BopomofoCharacterMap& BopomofoCharacterMap::SharedInstance() { - static BopomofoCharacterMap* map = new BopomofoCharacterMap(); +const BopomofoCharacterMap &BopomofoCharacterMap::SharedInstance() +{ + static BopomofoCharacterMap *map = new BopomofoCharacterMap(); return *map; } -BopomofoCharacterMap::BopomofoCharacterMap() { +BopomofoCharacterMap::BopomofoCharacterMap() +{ characterToComponent[u8"ㄅ"] = BPMF::B; characterToComponent[u8"ㄆ"] = BPMF::P; characterToComponent[u8"ㄇ"] = BPMF::M; @@ -658,27 +837,24 @@ BopomofoCharacterMap::BopomofoCharacterMap() { characterToComponent[u8"ˇ"] = BPMF::Tone3; characterToComponent[u8"ˋ"] = BPMF::Tone4; characterToComponent[u8"˙"] = BPMF::Tone5; - - for (std::map::iterator iter = - characterToComponent.begin(); + + for (std::map::iterator iter = characterToComponent.begin(); iter != characterToComponent.end(); ++iter) componentToCharacter[(*iter).second] = (*iter).first; } -#define ASSIGNKEY1(m, vec, k, val) \ -m[k] = (vec.clear(), vec.push_back((BPMF::Component)val), vec) -#define ASSIGNKEY2(m, vec, k, val1, val2) \ -m[k] = (vec.clear(), vec.push_back((BPMF::Component)val1), \ -vec.push_back((BPMF::Component)val2), vec) -#define ASSIGNKEY3(m, vec, k, val1, val2, val3) \ -m[k] = (vec.clear(), vec.push_back((BPMF::Component)val1), \ -vec.push_back((BPMF::Component)val2), \ -vec.push_back((BPMF::Component)val3), vec) +#define ASSIGNKEY1(m, vec, k, val) m[k] = (vec.clear(), vec.push_back((BPMF::Component)val), vec) +#define ASSIGNKEY2(m, vec, k, val1, val2) \ + m[k] = (vec.clear(), vec.push_back((BPMF::Component)val1), vec.push_back((BPMF::Component)val2), vec) +#define ASSIGNKEY3(m, vec, k, val1, val2, val3) \ + m[k] = (vec.clear(), vec.push_back((BPMF::Component)val1), vec.push_back((BPMF::Component)val2), \ + vec.push_back((BPMF::Component)val3), vec) -static BopomofoKeyboardLayout* CreateStandardLayout() { +static BopomofoKeyboardLayout *CreateStandardLayout() +{ std::vector vec; BopomofoKeyToComponentMap ktcm; - + ASSIGNKEY1(ktcm, vec, '1', BPMF::B); ASSIGNKEY1(ktcm, vec, 'q', BPMF::P); ASSIGNKEY1(ktcm, vec, 'a', BPMF::M); @@ -720,14 +896,15 @@ static BopomofoKeyboardLayout* CreateStandardLayout() { ASSIGNKEY1(ktcm, vec, '4', BPMF::Tone4); ASSIGNKEY1(ktcm, vec, '6', BPMF::Tone2); ASSIGNKEY1(ktcm, vec, '7', BPMF::Tone5); - + return new BopomofoKeyboardLayout(ktcm, "Standard"); } -static BopomofoKeyboardLayout* CreateIBMLayout() { +static BopomofoKeyboardLayout *CreateIBMLayout() +{ std::vector vec; BopomofoKeyToComponentMap ktcm; - + ASSIGNKEY1(ktcm, vec, '1', BPMF::B); ASSIGNKEY1(ktcm, vec, '2', BPMF::P); ASSIGNKEY1(ktcm, vec, '3', BPMF::M); @@ -769,14 +946,15 @@ static BopomofoKeyboardLayout* CreateIBMLayout() { ASSIGNKEY1(ktcm, vec, ',', BPMF::Tone3); ASSIGNKEY1(ktcm, vec, '.', BPMF::Tone4); ASSIGNKEY1(ktcm, vec, '/', BPMF::Tone5); - + return new BopomofoKeyboardLayout(ktcm, "IBM"); } -static BopomofoKeyboardLayout* CreateMiTACLayout() { +static BopomofoKeyboardLayout *CreateMiTACLayout() +{ std::vector vec; BopomofoKeyToComponentMap ktcm; - + ASSIGNKEY1(ktcm, vec, '1', BPMF::Tone5); ASSIGNKEY1(ktcm, vec, '2', BPMF::Tone2); ASSIGNKEY1(ktcm, vec, '3', BPMF::Tone3); @@ -818,14 +996,15 @@ static BopomofoKeyboardLayout* CreateMiTACLayout() { ASSIGNKEY1(ktcm, vec, 'x', BPMF::X); ASSIGNKEY1(ktcm, vec, 'y', BPMF::I); ASSIGNKEY1(ktcm, vec, 'z', BPMF::Z); - + return new BopomofoKeyboardLayout(ktcm, "MiTAC"); } -static BopomofoKeyboardLayout* CreateETenLayout() { +static BopomofoKeyboardLayout *CreateETenLayout() +{ std::vector vec; BopomofoKeyToComponentMap ktcm; - + ASSIGNKEY1(ktcm, vec, 'b', BPMF::B); ASSIGNKEY1(ktcm, vec, 'p', BPMF::P); ASSIGNKEY1(ktcm, vec, 'm', BPMF::M); @@ -867,14 +1046,15 @@ static BopomofoKeyboardLayout* CreateETenLayout() { ASSIGNKEY1(ktcm, vec, '3', BPMF::Tone3); ASSIGNKEY1(ktcm, vec, '4', BPMF::Tone4); ASSIGNKEY1(ktcm, vec, '1', BPMF::Tone5); - + return new BopomofoKeyboardLayout(ktcm, "ETen"); } -static BopomofoKeyboardLayout* CreateHsuLayout() { +static BopomofoKeyboardLayout *CreateHsuLayout() +{ std::vector vec; BopomofoKeyToComponentMap ktcm; - + ASSIGNKEY1(ktcm, vec, 'b', BPMF::B); ASSIGNKEY1(ktcm, vec, 'p', BPMF::P); ASSIGNKEY2(ktcm, vec, 'm', BPMF::M, BPMF::AN); @@ -900,14 +1080,15 @@ static BopomofoKeyboardLayout* CreateHsuLayout() { ASSIGNKEY1(ktcm, vec, 'i', BPMF::AI); ASSIGNKEY1(ktcm, vec, 'w', BPMF::AO); ASSIGNKEY1(ktcm, vec, 'o', BPMF::OU); - + return new BopomofoKeyboardLayout(ktcm, "Hsu"); } -static BopomofoKeyboardLayout* CreateETen26Layout() { +static BopomofoKeyboardLayout *CreateETen26Layout() +{ std::vector vec; BopomofoKeyToComponentMap ktcm; - + ASSIGNKEY1(ktcm, vec, 'b', BPMF::B); ASSIGNKEY2(ktcm, vec, 'p', BPMF::P, BPMF::OU); ASSIGNKEY2(ktcm, vec, 'm', BPMF::M, BPMF::AN); @@ -934,14 +1115,15 @@ static BopomofoKeyboardLayout* CreateETen26Layout() { ASSIGNKEY1(ktcm, vec, 'r', BPMF::ER); ASSIGNKEY1(ktcm, vec, 'i', BPMF::AI); ASSIGNKEY1(ktcm, vec, 'z', BPMF::AO); - + return new BopomofoKeyboardLayout(ktcm, "ETen26"); } -static BopomofoKeyboardLayout* CreateFakeSeigyouLayout() { +static BopomofoKeyboardLayout *CreateFakeSeigyouLayout() +{ std::vector vec; BopomofoKeyToComponentMap ktcm; - + ASSIGNKEY1(ktcm, vec, '1', BPMF::Tone5); ASSIGNKEY1(ktcm, vec, '2', BPMF::B); ASSIGNKEY1(ktcm, vec, '3', BPMF::D); @@ -983,55 +1165,62 @@ static BopomofoKeyboardLayout* CreateFakeSeigyouLayout() { ASSIGNKEY1(ktcm, vec, 'x', BPMF::F); ASSIGNKEY1(ktcm, vec, 'y', BPMF::CH); ASSIGNKEY1(ktcm, vec, 'z', BPMF::Tone4); - + return new BopomofoKeyboardLayout(ktcm, "FakeSeigyou"); } -static BopomofoKeyboardLayout* CreateHanyuPinyinLayout() { +static BopomofoKeyboardLayout *CreateHanyuPinyinLayout() +{ BopomofoKeyToComponentMap ktcm; return new BopomofoKeyboardLayout(ktcm, "HanyuPinyin"); } -const BopomofoKeyboardLayout* BopomofoKeyboardLayout::StandardLayout() { - static BopomofoKeyboardLayout* layout = CreateStandardLayout(); +const BopomofoKeyboardLayout *BopomofoKeyboardLayout::StandardLayout() +{ + static BopomofoKeyboardLayout *layout = CreateStandardLayout(); return layout; } -const BopomofoKeyboardLayout* BopomofoKeyboardLayout::ETenLayout() { - static BopomofoKeyboardLayout* layout = CreateETenLayout(); +const BopomofoKeyboardLayout *BopomofoKeyboardLayout::ETenLayout() +{ + static BopomofoKeyboardLayout *layout = CreateETenLayout(); return layout; } -const BopomofoKeyboardLayout* BopomofoKeyboardLayout::HsuLayout() { - static BopomofoKeyboardLayout* layout = CreateHsuLayout(); +const BopomofoKeyboardLayout *BopomofoKeyboardLayout::HsuLayout() +{ + static BopomofoKeyboardLayout *layout = CreateHsuLayout(); return layout; } -const BopomofoKeyboardLayout* BopomofoKeyboardLayout::ETen26Layout() { - static BopomofoKeyboardLayout* layout = CreateETen26Layout(); +const BopomofoKeyboardLayout *BopomofoKeyboardLayout::ETen26Layout() +{ + static BopomofoKeyboardLayout *layout = CreateETen26Layout(); return layout; } -const BopomofoKeyboardLayout* BopomofoKeyboardLayout::IBMLayout() { - static BopomofoKeyboardLayout* layout = CreateIBMLayout(); +const BopomofoKeyboardLayout *BopomofoKeyboardLayout::IBMLayout() +{ + static BopomofoKeyboardLayout *layout = CreateIBMLayout(); return layout; } -const BopomofoKeyboardLayout* BopomofoKeyboardLayout::MiTACLayout() { - static BopomofoKeyboardLayout* layout = CreateMiTACLayout(); +const BopomofoKeyboardLayout *BopomofoKeyboardLayout::MiTACLayout() +{ + static BopomofoKeyboardLayout *layout = CreateMiTACLayout(); return layout; } -const BopomofoKeyboardLayout* BopomofoKeyboardLayout::FakeSeigyouLayout() { - static BopomofoKeyboardLayout* layout = CreateFakeSeigyouLayout(); +const BopomofoKeyboardLayout *BopomofoKeyboardLayout::FakeSeigyouLayout() +{ + static BopomofoKeyboardLayout *layout = CreateFakeSeigyouLayout(); return layout; } -const BopomofoKeyboardLayout* BopomofoKeyboardLayout::HanyuPinyinLayout() { - static BopomofoKeyboardLayout* layout = CreateHanyuPinyinLayout(); +const BopomofoKeyboardLayout *BopomofoKeyboardLayout::HanyuPinyinLayout() +{ + static BopomofoKeyboardLayout *layout = CreateHanyuPinyinLayout(); return layout; } -} // namespace Mandarin - - +} // namespace Mandarin diff --git a/Source/3rdParty/OVMandarin/Mandarin.h b/Source/3rdParty/OVMandarin/Mandarin.h index e8a24708..61697c1c 100644 --- a/Source/3rdParty/OVMandarin/Mandarin.h +++ b/Source/3rdParty/OVMandarin/Mandarin.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef MANDARIN_H_ @@ -25,79 +32,115 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include #include -namespace Mandarin { +namespace Mandarin +{ -class BopomofoSyllable { -public: +class BopomofoSyllable +{ + public: typedef uint16_t Component; - - explicit BopomofoSyllable(Component syllable = 0) : syllable_(syllable) {} - - BopomofoSyllable(const BopomofoSyllable&) = default; - BopomofoSyllable(BopomofoSyllable&& another) = default; - BopomofoSyllable& operator=(const BopomofoSyllable&) = default; - BopomofoSyllable& operator=(BopomofoSyllable&&) = default; - + + explicit BopomofoSyllable(Component syllable = 0) : syllable_(syllable) + { + } + + BopomofoSyllable(const BopomofoSyllable &) = default; + BopomofoSyllable(BopomofoSyllable &&another) = default; + BopomofoSyllable &operator=(const BopomofoSyllable &) = default; + BopomofoSyllable &operator=(BopomofoSyllable &&) = default; + // takes the ASCII-form, "v"-tolerant, TW-style Hanyu Pinyin (fong, pong, bong // acceptable) - static const BopomofoSyllable FromHanyuPinyin(const std::string& str); - + static const BopomofoSyllable FromHanyuPinyin(const std::string &str); + // TO DO: Support accented vowels - const std::string HanyuPinyinString(bool includesTone, - bool useVForUUmlaut) const; - - static const BopomofoSyllable FromComposedString(const std::string& str); + const std::string HanyuPinyinString(bool includesTone, bool useVForUUmlaut) const; + + static const BopomofoSyllable FromComposedString(const std::string &str); const std::string composedString() const; - - void clear() { syllable_ = 0; } - - bool isEmpty() const { return !syllable_; } - - bool hasConsonant() const { return !!(syllable_ & ConsonantMask); } - - bool hasMiddleVowel() const { return !!(syllable_ & MiddleVowelMask); } - bool hasVowel() const { return !!(syllable_ & VowelMask); } - - bool hasToneMarker() const { return !!(syllable_ & ToneMarkerMask); } - - Component consonantComponent() const { return syllable_ & ConsonantMask; } - - Component middleVowelComponent() const { + + void clear() + { + syllable_ = 0; + } + + bool isEmpty() const + { + return !syllable_; + } + + bool hasConsonant() const + { + return !!(syllable_ & ConsonantMask); + } + + bool hasMiddleVowel() const + { + return !!(syllable_ & MiddleVowelMask); + } + bool hasVowel() const + { + return !!(syllable_ & VowelMask); + } + + bool hasToneMarker() const + { + return !!(syllable_ & ToneMarkerMask); + } + + Component consonantComponent() const + { + return syllable_ & ConsonantMask; + } + + Component middleVowelComponent() const + { return syllable_ & MiddleVowelMask; } - - Component vowelComponent() const { return syllable_ & VowelMask; } - - Component toneMarkerComponent() const { return syllable_ & ToneMarkerMask; } - - bool operator==(const BopomofoSyllable& another) const { + + Component vowelComponent() const + { + return syllable_ & VowelMask; + } + + Component toneMarkerComponent() const + { + return syllable_ & ToneMarkerMask; + } + + bool operator==(const BopomofoSyllable &another) const + { return syllable_ == another.syllable_; } - - bool operator!=(const BopomofoSyllable& another) const { + + bool operator!=(const BopomofoSyllable &another) const + { return syllable_ != another.syllable_; } - - bool isOverlappingWith(const BopomofoSyllable& another) const { + + bool isOverlappingWith(const BopomofoSyllable &another) const + { #define IOW_SAND(mask) ((syllable_ & mask) && (another.syllable_ & mask)) - return IOW_SAND(ConsonantMask) || IOW_SAND(MiddleVowelMask) || - IOW_SAND(VowelMask) || IOW_SAND(ToneMarkerMask); + return IOW_SAND(ConsonantMask) || IOW_SAND(MiddleVowelMask) || IOW_SAND(VowelMask) || IOW_SAND(ToneMarkerMask); #undef IOW_SAND } - + // consonants J, Q, X all require the existence of vowel I or UE - bool belongsToJQXClass() const { + bool belongsToJQXClass() const + { Component consonant = syllable_ & ConsonantMask; return (consonant == J || consonant == Q || consonant == X); } - + // zi, ci, si, chi, chi, shi, ri - bool belongsToZCSRClass() const { + bool belongsToZCSRClass() const + { Component consonant = syllable_ & ConsonantMask; return (consonant >= ZH && consonant <= S); } - - Component maskType() const { + + Component maskType() const + { Component mask = 0; mask |= (syllable_ & ConsonantMask) ? ConsonantMask : 0; mask |= (syllable_ & MiddleVowelMask) ? MiddleVowelMask : 0; @@ -105,13 +148,15 @@ public: mask |= (syllable_ & ToneMarkerMask) ? ToneMarkerMask : 0; return mask; } - - const BopomofoSyllable operator+(const BopomofoSyllable& another) const { + + const BopomofoSyllable operator+(const BopomofoSyllable &another) const + { Component newSyllable = syllable_; -#define OP_SOVER(mask) \ -if (another.syllable_ & mask) { \ -newSyllable = (newSyllable & ~mask) | (another.syllable_ & mask); \ -} +#define OP_SOVER(mask) \ + if (another.syllable_ & mask) \ + { \ + newSyllable = (newSyllable & ~mask) | (another.syllable_ & mask); \ + } OP_SOVER(ConsonantMask); OP_SOVER(MiddleVowelMask); OP_SOVER(VowelMask); @@ -119,12 +164,14 @@ newSyllable = (newSyllable & ~mask) | (another.syllable_ & mask); \ #undef OP_SOVER return BopomofoSyllable(newSyllable); } - - BopomofoSyllable& operator+=(const BopomofoSyllable& another) { -#define OPE_SOVER(mask) \ -if (another.syllable_ & mask) { \ -syllable_ = (syllable_ & ~mask) | (another.syllable_ & mask); \ -} + + BopomofoSyllable &operator+=(const BopomofoSyllable &another) + { +#define OPE_SOVER(mask) \ + if (another.syllable_ & mask) \ + { \ + syllable_ = (syllable_ & ~mask) | (another.syllable_ & mask); \ + } OPE_SOVER(ConsonantMask); OPE_SOVER(MiddleVowelMask); OPE_SOVER(VowelMask); @@ -132,87 +179,88 @@ syllable_ = (syllable_ & ~mask) | (another.syllable_ & mask); \ #undef OPE_SOVER return *this; } - - friend std::ostream& operator<<(std::ostream& stream, - const BopomofoSyllable& syllable); - - static constexpr Component - ConsonantMask = 0x001f, // 0000 0000 0001 1111, 21 consonants - MiddleVowelMask = 0x0060, // 0000 0000 0110 0000, 3 middle vowels - VowelMask = 0x0780, // 0000 0111 1000 0000, 13 vowels - ToneMarkerMask = 0x3800, // 0011 1000 0000 0000, 5 tones (tone1 = 0x00) - B = 0x0001, P = 0x0002, M = 0x0003, F = 0x0004, D = 0x0005, T = 0x0006, - N = 0x0007, L = 0x0008, G = 0x0009, K = 0x000a, H = 0x000b, J = 0x000c, - Q = 0x000d, X = 0x000e, ZH = 0x000f, CH = 0x0010, SH = 0x0011, R = 0x0012, - Z = 0x0013, C = 0x0014, S = 0x0015, I = 0x0020, U = 0x0040, - UE = 0x0060, // ue = u umlaut (we use the German convention here as an - // ersatz to the /ju:/ sound) - A = 0x0080, O = 0x0100, ER = 0x0180, E = 0x0200, AI = 0x0280, EI = 0x0300, - AO = 0x0380, OU = 0x0400, AN = 0x0480, EN = 0x0500, ANG = 0x0580, - ENG = 0x0600, ERR = 0x0680, Tone1 = 0x0000, Tone2 = 0x0800, - Tone3 = 0x1000, Tone4 = 0x1800, Tone5 = 0x2000; - -protected: + + friend std::ostream &operator<<(std::ostream &stream, const BopomofoSyllable &syllable); + + static constexpr Component ConsonantMask = 0x001f, // 0000 0000 0001 1111, 21 consonants + MiddleVowelMask = 0x0060, // 0000 0000 0110 0000, 3 middle vowels + VowelMask = 0x0780, // 0000 0111 1000 0000, 13 vowels + ToneMarkerMask = 0x3800, // 0011 1000 0000 0000, 5 tones (tone1 = 0x00) + B = 0x0001, P = 0x0002, M = 0x0003, F = 0x0004, D = 0x0005, T = 0x0006, N = 0x0007, L = 0x0008, G = 0x0009, + K = 0x000a, H = 0x000b, J = 0x000c, Q = 0x000d, X = 0x000e, ZH = 0x000f, CH = 0x0010, + SH = 0x0011, R = 0x0012, Z = 0x0013, C = 0x0014, S = 0x0015, I = 0x0020, U = 0x0040, + UE = 0x0060, // ue = u umlaut (we use the German convention here as an + // ersatz to the /ju:/ sound) + A = 0x0080, O = 0x0100, ER = 0x0180, E = 0x0200, AI = 0x0280, EI = 0x0300, AO = 0x0380, OU = 0x0400, + AN = 0x0480, EN = 0x0500, ANG = 0x0580, ENG = 0x0600, ERR = 0x0680, Tone1 = 0x0000, + Tone2 = 0x0800, Tone3 = 0x1000, Tone4 = 0x1800, Tone5 = 0x2000; + + protected: Component syllable_; }; -inline std::ostream& operator<<(std::ostream& stream, - const BopomofoSyllable& syllable) { +inline std::ostream &operator<<(std::ostream &stream, const BopomofoSyllable &syllable) +{ stream << syllable.composedString(); return stream; } typedef BopomofoSyllable BPMF; -typedef std::map > BopomofoKeyToComponentMap; +typedef std::map> BopomofoKeyToComponentMap; typedef std::map BopomofoComponentToKeyMap; -class BopomofoKeyboardLayout { -public: - static const BopomofoKeyboardLayout* StandardLayout(); - static const BopomofoKeyboardLayout* ETenLayout(); - static const BopomofoKeyboardLayout* HsuLayout(); - static const BopomofoKeyboardLayout* ETen26Layout(); - static const BopomofoKeyboardLayout* IBMLayout(); - static const BopomofoKeyboardLayout* MiTACLayout(); - static const BopomofoKeyboardLayout* FakeSeigyouLayout(); - static const BopomofoKeyboardLayout* HanyuPinyinLayout(); - - BopomofoKeyboardLayout(const BopomofoKeyToComponentMap& ktcm, - const std::string& name) - : m_keyToComponent(ktcm), m_name(name) { - for (BopomofoKeyToComponentMap::const_iterator miter = - m_keyToComponent.begin(); +class BopomofoKeyboardLayout +{ + public: + static const BopomofoKeyboardLayout *StandardLayout(); + static const BopomofoKeyboardLayout *ETenLayout(); + static const BopomofoKeyboardLayout *HsuLayout(); + static const BopomofoKeyboardLayout *ETen26Layout(); + static const BopomofoKeyboardLayout *IBMLayout(); + static const BopomofoKeyboardLayout *MiTACLayout(); + static const BopomofoKeyboardLayout *FakeSeigyouLayout(); + static const BopomofoKeyboardLayout *HanyuPinyinLayout(); + + BopomofoKeyboardLayout(const BopomofoKeyToComponentMap &ktcm, const std::string &name) + : m_keyToComponent(ktcm), m_name(name) + { + for (BopomofoKeyToComponentMap::const_iterator miter = m_keyToComponent.begin(); miter != m_keyToComponent.end(); ++miter) - for (std::vector::const_iterator viter = - (*miter).second.begin(); + for (std::vector::const_iterator viter = (*miter).second.begin(); viter != (*miter).second.end(); ++viter) m_componentToKey[*viter] = (*miter).first; } - - const std::string name() const { return m_name; } - - char componentToKey(BPMF::Component component) const { - BopomofoComponentToKeyMap::const_iterator iter = - m_componentToKey.find(component); + + const std::string name() const + { + return m_name; + } + + char componentToKey(BPMF::Component component) const + { + BopomofoComponentToKeyMap::const_iterator iter = m_componentToKey.find(component); return (iter == m_componentToKey.end()) ? 0 : (*iter).second; } - - const std::vector keyToComponents(char key) const { + + const std::vector keyToComponents(char key) const + { BopomofoKeyToComponentMap::const_iterator iter = m_keyToComponent.find(key); - return (iter == m_keyToComponent.end()) ? std::vector() - : (*iter).second; + return (iter == m_keyToComponent.end()) ? std::vector() : (*iter).second; } - - const std::string keySequenceFromSyllable(BPMF syllable) const { + + const std::string keySequenceFromSyllable(BPMF syllable) const + { std::string sequence; - + BPMF::Component c; char k; -#define STKS_COMBINE(component) \ -if ((c = component)) { \ -if ((k = componentToKey(c))) sequence += std::string(1, k); \ -} +#define STKS_COMBINE(component) \ + if ((c = component)) \ + { \ + if ((k = componentToKey(c))) \ + sequence += std::string(1, k); \ + } STKS_COMBINE(syllable.consonantComponent()); STKS_COMBINE(syllable.middleVowelComponent()); STKS_COMBINE(syllable.vowelComponent()); @@ -220,256 +268,314 @@ if ((k = componentToKey(c))) sequence += std::string(1, k); \ #undef STKS_COMBINE return sequence; } - - const BPMF syllableFromKeySequence(const std::string& sequence) const { + + const BPMF syllableFromKeySequence(const std::string &sequence) const + { BPMF syllable; - - for (std::string::const_iterator iter = sequence.begin(); - iter != sequence.end(); ++iter) { + + for (std::string::const_iterator iter = sequence.begin(); iter != sequence.end(); ++iter) + { bool beforeSeqHasIorUE = sequenceContainsIorUE(sequence.begin(), iter); bool aheadSeqHasIorUE = sequenceContainsIorUE(iter + 1, sequence.end()); - + std::vector components = keyToComponents(*iter); - - if (!components.size()) continue; - - if (components.size() == 1) { + + if (!components.size()) + continue; + + if (components.size() == 1) + { syllable += BPMF(components[0]); continue; } - + BPMF head = BPMF(components[0]); BPMF follow = BPMF(components[1]); BPMF ending = components.size() > 2 ? BPMF(components[2]) : follow; - + // apply the I/UE + E rule - if (head.vowelComponent() == BPMF::E && - follow.vowelComponent() != BPMF::E) { + if (head.vowelComponent() == BPMF::E && follow.vowelComponent() != BPMF::E) + { syllable += beforeSeqHasIorUE ? head : follow; continue; } - - if (head.vowelComponent() != BPMF::E && - follow.vowelComponent() == BPMF::E) { + + if (head.vowelComponent() != BPMF::E && follow.vowelComponent() == BPMF::E) + { syllable += beforeSeqHasIorUE ? follow : head; continue; } - + // apply the J/Q/X + I/UE rule, only two components are allowed in the // components vector here - if (head.belongsToJQXClass() && !follow.belongsToJQXClass()) { - if (!syllable.isEmpty()) { - if (ending != follow) syllable += ending; - } else { + if (head.belongsToJQXClass() && !follow.belongsToJQXClass()) + { + if (!syllable.isEmpty()) + { + if (ending != follow) + syllable += ending; + } + else + { syllable += aheadSeqHasIorUE ? head : follow; } - + continue; } - - if (!head.belongsToJQXClass() && follow.belongsToJQXClass()) { - if (!syllable.isEmpty()) { - if (ending != follow) syllable += ending; - } else { + + if (!head.belongsToJQXClass() && follow.belongsToJQXClass()) + { + if (!syllable.isEmpty()) + { + if (ending != follow) + syllable += ending; + } + else + { syllable += aheadSeqHasIorUE ? follow : head; } - + continue; } - + // the nasty issue of only one char in the buffer - if (iter == sequence.begin() && iter + 1 == sequence.end()) { - if (head.hasVowel() || follow.hasToneMarker() || - head.belongsToZCSRClass()) { + if (iter == sequence.begin() && iter + 1 == sequence.end()) + { + if (head.hasVowel() || follow.hasToneMarker() || head.belongsToZCSRClass()) + { syllable += head; - } else { - if (follow.hasVowel() || ending.hasToneMarker()) { + } + else + { + if (follow.hasVowel() || ending.hasToneMarker()) + { syllable += follow; - } else { + } + else + { syllable += ending; } } - + continue; } - - if (!(syllable.maskType() & head.maskType()) && - !endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end())) { + + if (!(syllable.maskType() & head.maskType()) && !endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end())) + { syllable += head; - } else { - if (endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end()) && - head.belongsToZCSRClass() && syllable.isEmpty()) { + } + else + { + if (endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end()) && head.belongsToZCSRClass() && + syllable.isEmpty()) + { syllable += head; - } else if (syllable.maskType() < follow.maskType()) { + } + else if (syllable.maskType() < follow.maskType()) + { syllable += follow; - } else { + } + else + { syllable += ending; } } } - + // heuristics for Hsu keyboard layout - if (this == HsuLayout()) { + if (this == HsuLayout()) + { // fix the left out L to ERR when it has sound, and GI, GUE -> JI, JUE - if (syllable.vowelComponent() == BPMF::ENG && !syllable.hasConsonant() && - !syllable.hasMiddleVowel()) { + if (syllable.vowelComponent() == BPMF::ENG && !syllable.hasConsonant() && !syllable.hasMiddleVowel()) + { syllable += BPMF(BPMF::ERR); - } else if (syllable.consonantComponent() == BPMF::G && - (syllable.middleVowelComponent() == BPMF::I || - syllable.middleVowelComponent() == BPMF::UE)) { + } + else if (syllable.consonantComponent() == BPMF::G && + (syllable.middleVowelComponent() == BPMF::I || syllable.middleVowelComponent() == BPMF::UE)) + { syllable += BPMF(BPMF::J); } } - + return syllable; } - -protected: - bool endAheadOrAheadHasToneMarkKey(std::string::const_iterator ahead, - std::string::const_iterator end) const { - if (ahead == end) return true; - + + protected: + bool endAheadOrAheadHasToneMarkKey(std::string::const_iterator ahead, std::string::const_iterator end) const + { + if (ahead == end) + return true; + char tone1 = componentToKey(BPMF::Tone1); char tone2 = componentToKey(BPMF::Tone2); char tone3 = componentToKey(BPMF::Tone3); char tone4 = componentToKey(BPMF::Tone4); char tone5 = componentToKey(BPMF::Tone5); - + if (tone1) - if (*ahead == tone1) return true; - - if (*ahead == tone2 || *ahead == tone3 || *ahead == tone4 || - *ahead == tone5) + if (*ahead == tone1) + return true; + + if (*ahead == tone2 || *ahead == tone3 || *ahead == tone4 || *ahead == tone5) return true; - + return false; } - - bool sequenceContainsIorUE(std::string::const_iterator start, - std::string::const_iterator end) const { + + bool sequenceContainsIorUE(std::string::const_iterator start, std::string::const_iterator end) const + { char iChar = componentToKey(BPMF::I); char ueChar = componentToKey(BPMF::UE); - + for (; start != end; ++start) - if (*start == iChar || *start == ueChar) return true; + if (*start == iChar || *start == ueChar) + return true; return false; } - + std::string m_name; BopomofoKeyToComponentMap m_keyToComponent; BopomofoComponentToKeyMap m_componentToKey; }; -class BopomofoReadingBuffer { -public: - explicit BopomofoReadingBuffer(const BopomofoKeyboardLayout* layout) - : layout_(layout), pinyin_mode_(false) { - if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout()) { +class BopomofoReadingBuffer +{ + public: + explicit BopomofoReadingBuffer(const BopomofoKeyboardLayout *layout) : layout_(layout), pinyin_mode_(false) + { + if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout()) + { pinyin_mode_ = true; pinyin_sequence_ = ""; } } - - void setKeyboardLayout(const BopomofoKeyboardLayout* layout) { + + void setKeyboardLayout(const BopomofoKeyboardLayout *layout) + { layout_ = layout; - - if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout()) { + + if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout()) + { pinyin_mode_ = true; pinyin_sequence_ = ""; } } - - bool isValidKey(char k) const { - if (!pinyin_mode_) { + + bool isValidKey(char k) const + { + if (!pinyin_mode_) + { return layout_ ? (layout_->keyToComponents(k)).size() > 0 : false; } - + char lk = tolower(k); - if (lk >= 'a' && lk <= 'z') { + if (lk >= 'a' && lk <= 'z') + { // if a tone marker is already in place - if (pinyin_sequence_.length()) { + if (pinyin_sequence_.length()) + { char lastc = pinyin_sequence_[pinyin_sequence_.length() - 1]; - if (lastc >= '2' && lastc <= '5') { + if (lastc >= '2' && lastc <= '5') + { return false; } return true; } return true; } - - if (pinyin_sequence_.length() && (lk >= '2' && lk <= '5')) { + + if (pinyin_sequence_.length() && (lk >= '2' && lk <= '5')) + { return true; } - + return false; } - - bool combineKey(char k) { - if (!isValidKey(k)) return false; - - if (pinyin_mode_) { + + bool combineKey(char k) + { + if (!isValidKey(k)) + return false; + + if (pinyin_mode_) + { pinyin_sequence_ += std::string(1, tolower(k)); syllable_ = BPMF::FromHanyuPinyin(pinyin_sequence_); return true; } - - std::string sequence = - layout_->keySequenceFromSyllable(syllable_) + std::string(1, k); + + std::string sequence = layout_->keySequenceFromSyllable(syllable_) + std::string(1, k); syllable_ = layout_->syllableFromKeySequence(sequence); return true; } - - void clear() { + + void clear() + { pinyin_sequence_.clear(); syllable_.clear(); } - - void backspace() { - if (!layout_) return; - - if (pinyin_mode_) { - if (pinyin_sequence_.length()) { - pinyin_sequence_ = - pinyin_sequence_.substr(0, pinyin_sequence_.length() - 1); + + void backspace() + { + if (!layout_) + return; + + if (pinyin_mode_) + { + if (pinyin_sequence_.length()) + { + pinyin_sequence_ = pinyin_sequence_.substr(0, pinyin_sequence_.length() - 1); } - + syllable_ = BPMF::FromHanyuPinyin(pinyin_sequence_); return; } - + std::string sequence = layout_->keySequenceFromSyllable(syllable_); - if (sequence.length()) { + if (sequence.length()) + { sequence = sequence.substr(0, sequence.length() - 1); syllable_ = layout_->syllableFromKeySequence(sequence); } } - - bool isEmpty() const { return syllable_.isEmpty(); } - - const std::string composedString() const { - if (pinyin_mode_) { + + bool isEmpty() const + { + return syllable_.isEmpty(); + } + + const std::string composedString() const + { + if (pinyin_mode_) + { return pinyin_sequence_; } - + return syllable_.composedString(); } - - const BPMF syllable() const { return syllable_; } - - const std::string standardLayoutQueryString() const { + + const BPMF syllable() const + { + return syllable_; + } + + const std::string standardLayoutQueryString() const + { return BopomofoKeyboardLayout::StandardLayout()->keySequenceFromSyllable(syllable_); } - - bool hasToneMarker() const { return syllable_.hasToneMarker(); } - -protected: - const BopomofoKeyboardLayout* layout_; + + bool hasToneMarker() const + { + return syllable_.hasToneMarker(); + } + + protected: + const BopomofoKeyboardLayout *layout_; BPMF syllable_; - + bool pinyin_mode_; std::string pinyin_sequence_; }; -} // namespace Mandarin +} // namespace Mandarin - -#endif // MANDARIN_H_ +#endif // MANDARIN_H_ diff --git a/Source/Headers/vChewing-Bridging-Header.h b/Source/Headers/vChewing-Bridging-Header.h index d78ca431..3f79ac94 100644 --- a/Source/Headers/vChewing-Bridging-Header.h +++ b/Source/Headers/vChewing-Bridging-Header.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ // diff --git a/Source/Headers/vChewing-Prefix.pch b/Source/Headers/vChewing-Prefix.pch index 19be19bb..292aa8dd 100644 --- a/Source/Headers/vChewing-Prefix.pch +++ b/Source/Headers/vChewing-Prefix.pch @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ // diff --git a/Source/Modules/ControllerModules/KeyHandler.h b/Source/Modules/ControllerModules/KeyHandler.h index 0b3c4cef..6b6116d0 100644 --- a/Source/Modules/ControllerModules/KeyHandler.h +++ b/Source/Modules/ControllerModules/KeyHandler.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #import @@ -33,7 +40,9 @@ extern InputMode imeModeNULL; @protocol KeyHandlerDelegate - (id)candidateControllerForKeyHandler:(KeyHandler *)keyHandler; -- (void)keyHandler:(KeyHandler *)keyHandler didSelectCandidateAtIndex:(NSInteger)index candidateController:(id)controller; +- (void)keyHandler:(KeyHandler *)keyHandler + didSelectCandidateAtIndex:(NSInteger)index + candidateController:(id)controller; - (BOOL)keyHandler:(KeyHandler *)keyHandler didRequestWriteUserPhraseWithState:(InputState *)state; @end @@ -43,7 +52,8 @@ extern InputMode imeModeNULL; - (BOOL)handleInput:(keyParser *)input state:(InputState *)state stateCallback:(void (^)(InputState *))stateCallback - errorCallback:(void (^)(void))errorCallback NS_SWIFT_NAME(handle(input:state:stateCallback:errorCallback:)); + errorCallback:(void (^)(void))errorCallback + NS_SWIFT_NAME(handle(input:state:stateCallback:errorCallback:)); - (void)syncWithPreferences; - (void)fixNodeWithValue:(NSString *)value NS_SWIFT_NAME(fixNode(value:)); @@ -52,8 +62,8 @@ extern InputMode imeModeNULL; - (InputState *)buildInputtingState; - (nullable InputState *)buildAssociatePhraseStateWithKey:(NSString *)key useVerticalMode:(BOOL)useVerticalMode; -@property (strong, nonatomic) InputMode inputMode; -@property (weak, nonatomic) id delegate; +@property(strong, nonatomic) InputMode inputMode; +@property(weak, nonatomic) id delegate; @end NS_ASSUME_NONNULL_END diff --git a/Source/Modules/ControllerModules/KeyHandler.mm b/Source/Modules/ControllerModules/KeyHandler.mm index 0509a9cb..c2eb3b12 100644 --- a/Source/Modules/ControllerModules/KeyHandler.mm +++ b/Source/Modules/ControllerModules/KeyHandler.mm @@ -1,29 +1,35 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#import "Gramambular.h" #import "KeyHandler.h" +#import "Gramambular.h" #import "LMInstantiator.h" #import "Mandarin.h" -#import "mgrLangModel_Privates.h" #import "UserOverrideModel.h" +#import "mgrLangModel_Privates.h" #import "vChewing-Swift.h" #import @@ -33,32 +39,35 @@ InputMode imeModeNULL = ctlInputMethod.kIMEModeNULL; static const double kEpsilon = 0.000001; -static double FindHighestScore(const std::vector &nodes, double epsilon) { +static double FindHighestScore(const std::vector &nodes, double epsilon) +{ double highestScore = 0.0; - for (auto ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) { + for (auto ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) + { double score = ni->node->highestUnigramScore(); - if (score > highestScore) { + if (score > highestScore) + { highestScore = score; } } return highestScore + epsilon; } -class NodeAnchorDescendingSorter { -public: +class NodeAnchorDescendingSorter +{ + public: bool operator()(const Gramambular::NodeAnchor &a, const Gramambular::NodeAnchor &b) const { return a.node->key().length() > b.node->key().length(); } }; -;// if DEBUG is defined, a DOT file (GraphViz format) will be written to the +// if DEBUG is defined, a DOT file (GraphViz format) will be written to the // specified path every time the grid is walked #if DEBUG static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; #endif - @implementation KeyHandler { // the reading buffer that takes user input @@ -87,10 +96,14 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return _inputMode; } -- (BOOL)isBuilderEmpty { - if (_builder->grid().width() == 0) { +- (BOOL)isBuilderEmpty +{ + if (_builder->grid().width() == 0) + { return YES; - } else { + } + else + { return NO; } } @@ -101,11 +114,14 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; vChewing::LMInstantiator *newLanguageModel; vChewing::UserOverrideModel *newUserOverrideModel; - if ([value isKindOfClass:[NSString class]] && [value isEqual:imeModeCHS]) { + if ([value isKindOfClass:[NSString class]] && [value isEqual:imeModeCHS]) + { newInputMode = imeModeCHS; newLanguageModel = [mgrLangModel lmCHS]; newUserOverrideModel = [mgrLangModel userOverrideModelCHS]; - } else { + } + else + { newInputMode = imeModeCHT; newLanguageModel = [mgrLangModel lmCHT]; newUserOverrideModel = [mgrLangModel userOverrideModelCHT]; @@ -121,18 +137,21 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; newLanguageModel->setCNSEnabled(mgrPrefs.cns11643Enabled); // Only apply the changes if the value is changed - if (![_inputMode isEqualToString:newInputMode]) { + if (![_inputMode isEqualToString:newInputMode]) + { _inputMode = newInputMode; _languageModel = newLanguageModel; _userOverrideModel = newUserOverrideModel; - if (_builder) { + if (_builder) + { delete _builder; _builder = new Gramambular::BlockReadingBuilder(_languageModel); _builder->setJoinSeparator("-"); } - if (!_bpmfReadingBuffer->isEmpty()) { + if (!_bpmfReadingBuffer->isEmpty()) + { _bpmfReadingBuffer->clear(); } } @@ -141,11 +160,13 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; - (void)dealloc { // clean up everything - if (_bpmfReadingBuffer) { + if (_bpmfReadingBuffer) + { delete _bpmfReadingBuffer; } - if (_builder) { + if (_builder) + { delete _builder; } } @@ -153,7 +174,8 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; - (instancetype)init { self = [super init]; - if (self) { + if (self) + { _bpmfReadingBuffer = new Mandarin::BopomofoReadingBuffer(Mandarin::BopomofoKeyboardLayout::StandardLayout()); // create the lattice builder @@ -164,7 +186,7 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; _userOverrideModel = [mgrLangModel userOverrideModelCHT]; _builder = new Gramambular::BlockReadingBuilder(_languageModel); - + // each Mandarin syllable is separated by a hyphen _builder->setJoinSeparator("-"); _inputMode = imeModeCHT; @@ -175,34 +197,35 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; - (void)syncWithPreferences { NSInteger layout = mgrPrefs.keyboardLayout; - switch (layout) { - case KeyboardLayoutOfStandard: - _bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::StandardLayout()); - break; - case KeyboardLayoutOfEten: - _bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::ETenLayout()); - break; - case KeyboardLayoutOfHsu: - _bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::HsuLayout()); - break; - case KeyboardLayoutOfEen26: - _bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::ETen26Layout()); - break; - case KeyboardLayoutOfIBM: - _bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::IBMLayout()); - break; - case KeyboardLayoutOfMiTAC: - _bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::MiTACLayout()); - break; - case KeyboardLayoutOfFakeSeigyou: - _bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::FakeSeigyouLayout()); - break; - case KeyboardLayoutOfHanyuPinyin: - _bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::HanyuPinyinLayout()); - break; - default: - _bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::StandardLayout()); - mgrPrefs.keyboardLayout = KeyboardLayoutOfStandard; + switch (layout) + { + case KeyboardLayoutOfStandard: + _bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::StandardLayout()); + break; + case KeyboardLayoutOfEten: + _bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::ETenLayout()); + break; + case KeyboardLayoutOfHsu: + _bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::HsuLayout()); + break; + case KeyboardLayoutOfEen26: + _bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::ETen26Layout()); + break; + case KeyboardLayoutOfIBM: + _bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::IBMLayout()); + break; + case KeyboardLayoutOfMiTAC: + _bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::MiTACLayout()); + break; + case KeyboardLayoutOfFakeSeigyou: + _bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::FakeSeigyouLayout()); + break; + case KeyboardLayoutOfHanyuPinyin: + _bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::HanyuPinyinLayout()); + break; + default: + _bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::StandardLayout()); + mgrPrefs.keyboardLayout = KeyboardLayoutOfStandard; } } @@ -211,35 +234,44 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; size_t cursorIndex = [self _actualCandidateCursorIndex]; std::string stringValue(value.UTF8String); Gramambular::NodeAnchor selectedNode = _builder->grid().fixNodeSelectedCandidate(cursorIndex, stringValue); - if (!mgrPrefs.useSCPCTypingMode) { // 不要針對逐字選字模式啟用臨時半衰記憶模型。 + if (!mgrPrefs.useSCPCTypingMode) + { // 不要針對逐字選字模式啟用臨時半衰記憶模型。 // If the length of the readings and the characters do not match, // it often means it is a special symbol and it should not be stored // in the user override model. BOOL addToOverrideModel = YES; - if (selectedNode.spanningLength != [value count]) { + if (selectedNode.spanningLength != [value count]) + { addToOverrideModel = NO; } - if (addToOverrideModel) { + if (addToOverrideModel) + { double score = selectedNode.node->scoreForCandidate(stringValue); - if (score <= -12) { // 威注音的 SymbolLM 的 Score 是 -12。 + if (score <= -12) + { // 威注音的 SymbolLM 的 Score 是 -12。 addToOverrideModel = NO; } } - if (addToOverrideModel) { + if (addToOverrideModel) + { _userOverrideModel->observe(_walkedNodes, cursorIndex, stringValue, [[NSDate date] timeIntervalSince1970]); } } [self _walk]; - if (mgrPrefs.moveCursorAfterSelectingCandidate) { + if (mgrPrefs.moveCursorAfterSelectingCandidate) + { size_t nextPosition = 0; - for (auto node: _walkedNodes) { - if (nextPosition >= cursorIndex) { + for (auto node : _walkedNodes) + { + if (nextPosition >= cursorIndex) + { break; } nextPosition += node.spanningLength; } - if (nextPosition <= _builder->length()) { + if (nextPosition <= _builder->length()) + { _builder->setCursorIndex(nextPosition); } } @@ -259,59 +291,76 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return layout; } -- (BOOL)handleInput:(keyParser *)input state:(InputState *)inState stateCallback:(void (^)(InputState *))stateCallback errorCallback:(void (^)(void))errorCallback +- (BOOL)handleInput:(keyParser *)input + state:(InputState *)inState + stateCallback:(void (^)(InputState *))stateCallback + errorCallback:(void (^)(void))errorCallback { InputState *state = inState; UniChar charCode = input.charCode; vChewingEmacsKey emacsKey = input.emacsKey; // if the inputText is empty, it's a function key combination, we ignore it - if (!input.inputText.length) { + if (!input.inputText.length) + { return NO; } // if the composing buffer is empty and there's no reading, and there is some function key combination, we ignore it - BOOL isFunctionKey = ([input isCommandHold] || [input isOptionHotKey] || [input isNumericPad]) || [input isControlHotKey]; + BOOL isFunctionKey = + ([input isCommandHold] || [input isOptionHotKey] || [input isNumericPad]) || [input isControlHotKey]; if (![state isKindOfClass:[InputStateNotEmpty class]] && - ![state isKindOfClass:[InputStateAssociatedPhrases class]] && - isFunctionKey) { + ![state isKindOfClass:[InputStateAssociatedPhrases class]] && isFunctionKey) + { return NO; } // Caps Lock processing: if Caps Lock is ON, temporarily disable bopomofo. // Note: Alphanumerical mode processing. - if ([input isBackSpace] || [input isEnter] || [input isAbsorbedArrowKey] || [input isExtraChooseCandidateKey] || [input isExtraChooseCandidateKeyReverse] || [input isCursorForward] || [input isCursorBackward]) { + if ([input isBackSpace] || [input isEnter] || [input isAbsorbedArrowKey] || [input isExtraChooseCandidateKey] || + [input isExtraChooseCandidateKeyReverse] || [input isCursorForward] || [input isCursorBackward]) + { // do nothing if backspace is pressed -- we ignore the key - } else if ([input isCapsLockOn]) { + } + else if ([input isCapsLockOn]) + { // process all possible combination, we hope. [self clear]; InputStateEmpty *emptyState = [[InputStateEmpty alloc] init]; stateCallback(emptyState); // When shift is pressed, don't do further processing, since it outputs capital letter anyway. - if ([input isShiftHold]) { + if ([input isShiftHold]) + { return NO; } - // if ASCII but not printable, don't use insertText:replacementRange: as many apps don't handle non-ASCII char insertions. - if (charCode < 0x80 && !isprint(charCode)) { + // if ASCII but not printable, don't use insertText:replacementRange: as many apps don't handle non-ASCII char + // insertions. + if (charCode < 0x80 && !isprint(charCode)) + { return NO; } // commit everything in the buffer. - InputStateCommitting *committingState = [[InputStateCommitting alloc] initWithPoppedText:[input.inputText lowercaseString]]; + InputStateCommitting *committingState = + [[InputStateCommitting alloc] initWithPoppedText:[input.inputText lowercaseString]]; stateCallback(committingState); stateCallback(emptyState); return YES; } - if ([input isNumericPad]) { - if (![input isLeft] && ![input isRight] && ![input isDown] && ![input isUp] && ![input isSpace] && isprint(charCode)) { + if ([input isNumericPad]) + { + if (![input isLeft] && ![input isRight] && ![input isDown] && ![input isUp] && ![input isSpace] && + isprint(charCode)) + { [self clear]; InputStateEmpty *emptyState = [[InputStateEmpty alloc] init]; stateCallback(emptyState); - InputStateCommitting *committing = [[InputStateCommitting alloc] initWithPoppedText:[input.inputText lowercaseString]]; + InputStateCommitting *committing = + [[InputStateCommitting alloc] initWithPoppedText:[input.inputText lowercaseString]]; stateCallback(committing); stateCallback(emptyState); return YES; @@ -319,14 +368,20 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; } // MARK: Handle Candidates - if ([state isKindOfClass:[InputStateChoosingCandidate class]]) { + if ([state isKindOfClass:[InputStateChoosingCandidate class]]) + { return [self _handleCandidateState:state input:input stateCallback:stateCallback errorCallback:errorCallback]; } // MARK: Handle Associated Phrases - if ([state isKindOfClass:[InputStateAssociatedPhrases class]]) { - BOOL result = [self _handleCandidateState:state input:input stateCallback:stateCallback errorCallback:errorCallback]; - if (result) { + if ([state isKindOfClass:[InputStateAssociatedPhrases class]]) + { + BOOL result = [self _handleCandidateState:state + input:input + stateCallback:stateCallback + errorCallback:errorCallback]; + if (result) + { return YES; } state = [[InputStateEmpty alloc] init]; @@ -334,9 +389,14 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; } // MARK: Handle Marking - if ([state isKindOfClass:[InputStateMarking class]]) { - InputStateMarking *marking = (InputStateMarking *) state; - if ([self _handleMarkingState:(InputStateMarking *) state input:input stateCallback:stateCallback errorCallback:errorCallback]) { + if ([state isKindOfClass:[InputStateMarking class]]) + { + InputStateMarking *marking = (InputStateMarking *)state; + if ([self _handleMarkingState:(InputStateMarking *)state + input:input + stateCallback:stateCallback + errorCallback:errorCallback]) + { return YES; } state = [marking convertToInputting]; @@ -349,14 +409,16 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; // MARK: Handle BPMF Keys // see if it's valid BPMF reading - if (!skipBpmfHandling && _bpmfReadingBuffer->isValidKey((char) charCode)) { - _bpmfReadingBuffer->combineKey((char) charCode); + if (!skipBpmfHandling && _bpmfReadingBuffer->isValidKey((char)charCode)) + { + _bpmfReadingBuffer->combineKey((char)charCode); // if we have a tone marker, we have to insert the reading to the // builder in other words, if we don't have a tone marker, we just // update the composing buffer composeReading = _bpmfReadingBuffer->hasToneMarker(); - if (!composeReading) { + if (!composeReading) + { InputStateInputting *inputting = (InputStateInputting *)[self buildInputtingState]; stateCallback(inputting); return YES; @@ -366,12 +428,14 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; // see if we have composition if Enter/Space is hit and buffer is not empty // this is bit-OR'ed so that the tone marker key is also taken into account composeReading |= (!_bpmfReadingBuffer->isEmpty() && ([input isSpace] || [input isEnter])); - if (composeReading) { + if (composeReading) + { // combine the reading std::string reading = _bpmfReadingBuffer->syllable().composedString(); // see if we have a unigram for this - if (!_languageModel->hasUnigramsForKey(reading)) { + if (!_languageModel->hasUnigramsForKey(reading)) + { [IME prtDebugIntel:@"B49C0979"]; errorCallback(); InputStateInputting *inputting = (InputStateInputting *)[self buildInputtingState]; @@ -386,14 +450,18 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; NSString *poppedText = [self _popOverflowComposingTextAndWalk]; // get user override model suggestion - std::string overrideValue = (mgrPrefs.useSCPCTypingMode) ? "" : - _userOverrideModel->suggest(_walkedNodes, _builder->cursorIndex(), [[NSDate date] timeIntervalSince1970]); + std::string overrideValue = (mgrPrefs.useSCPCTypingMode) + ? "" + : _userOverrideModel->suggest(_walkedNodes, _builder->cursorIndex(), + [[NSDate date] timeIntervalSince1970]); - if (!overrideValue.empty()) { + if (!overrideValue.empty()) + { size_t cursorIndex = [self _actualCandidateCursorIndex]; std::vector nodes = _builder->grid().nodesCrossingOrEndingAt(cursorIndex); double highestScore = FindHighestScore(nodes, kEpsilon); - _builder->grid().overrideNodeScoreForSelectedCandidate(cursorIndex, overrideValue, static_cast(highestScore)); + _builder->grid().overrideNodeScoreForSelectedCandidate(cursorIndex, overrideValue, + static_cast(highestScore)); } // then update the text @@ -403,27 +471,40 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; inputting.poppedText = poppedText; stateCallback(inputting); - if (mgrPrefs.useSCPCTypingMode) { - InputStateChoosingCandidate *choosingCandidates = [self _buildCandidateState:inputting useVerticalMode:input.useVerticalMode]; - if (choosingCandidates.candidates.count == 1) { + if (mgrPrefs.useSCPCTypingMode) + { + InputStateChoosingCandidate *choosingCandidates = [self _buildCandidateState:inputting + useVerticalMode:input.useVerticalMode]; + if (choosingCandidates.candidates.count == 1) + { [self clear]; NSString *text = choosingCandidates.candidates.firstObject; InputStateCommitting *committing = [[InputStateCommitting alloc] initWithPoppedText:text]; stateCallback(committing); - if (!mgrPrefs.associatedPhrasesEnabled) { + if (!mgrPrefs.associatedPhrasesEnabled) + { InputStateEmpty *empty = [[InputStateEmpty alloc] init]; stateCallback(empty); - } else { - InputStateAssociatedPhrases *associatedPhrases = (InputStateAssociatedPhrases *)[self buildAssociatePhraseStateWithKey:text useVerticalMode:input.useVerticalMode]; - if (associatedPhrases) { + } + else + { + InputStateAssociatedPhrases *associatedPhrases = + (InputStateAssociatedPhrases *)[self buildAssociatePhraseStateWithKey:text + useVerticalMode:input.useVerticalMode]; + if (associatedPhrases) + { stateCallback(associatedPhrases); - } else { + } + else + { InputStateEmpty *empty = [[InputStateEmpty alloc] init]; stateCallback(empty); } } - } else { + } + else + { stateCallback(choosingCandidates); } } @@ -433,26 +514,33 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; } // MARK: Calling candidate window using Space or Down or PageUp / PageDn. - if (_bpmfReadingBuffer->isEmpty() && - [state isKindOfClass:[InputStateNotEmpty class]] && - ([input isExtraChooseCandidateKey] || [input isExtraChooseCandidateKeyReverse] - || [input isSpace] || [input isPageDown] || [input isPageUp] || [input isTab] - || (input.useVerticalMode && ([input isVerticalModeOnlyChooseCandidateKey])))) { - if ([input isSpace]) { + if (_bpmfReadingBuffer->isEmpty() && [state isKindOfClass:[InputStateNotEmpty class]] && + ([input isExtraChooseCandidateKey] || [input isExtraChooseCandidateKeyReverse] || [input isSpace] || + [input isPageDown] || [input isPageUp] || [input isTab] || + (input.useVerticalMode && ([input isVerticalModeOnlyChooseCandidateKey])))) + { + if ([input isSpace]) + { // if the spacebar is NOT set to be a selection key - if ([input isShiftHold] || !mgrPrefs.chooseCandidateUsingSpace) { - if (_builder->cursorIndex() >= _builder->length()) { - NSString *composingBuffer = [(InputStateNotEmpty*) state composingBuffer]; - if (composingBuffer.length) { - InputStateCommitting *committing = [[InputStateCommitting alloc] initWithPoppedText:composingBuffer]; - stateCallback (committing); + if ([input isShiftHold] || !mgrPrefs.chooseCandidateUsingSpace) + { + if (_builder->cursorIndex() >= _builder->length()) + { + NSString *composingBuffer = [(InputStateNotEmpty *)state composingBuffer]; + if (composingBuffer.length) + { + InputStateCommitting *committing = + [[InputStateCommitting alloc] initWithPoppedText:composingBuffer]; + stateCallback(committing); } [self clear]; InputStateCommitting *committing = [[InputStateCommitting alloc] initWithPoppedText:@" "]; stateCallback(committing); InputStateEmpty *empty = [[InputStateEmpty alloc] init]; stateCallback(empty); - } else if (_languageModel->hasUnigramsForKey(" ")) { + } + else if (_languageModel->hasUnigramsForKey(" ")) + { _builder->insertReadingAtCursor(" "); NSString *poppedText = [self _popOverflowComposingTextAndWalk]; InputStateInputting *inputting = (InputStateInputting *)[self buildInputtingState]; @@ -460,141 +548,187 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; stateCallback(inputting); } return YES; - } } - InputStateChoosingCandidate *choosingCandidates = [self _buildCandidateState:(InputStateNotEmpty *) state useVerticalMode:input.useVerticalMode]; + InputStateChoosingCandidate *choosingCandidates = [self _buildCandidateState:(InputStateNotEmpty *)state + useVerticalMode:input.useVerticalMode]; stateCallback(choosingCandidates); return YES; } // MARK: Esc - if ([input isESC]) { + if ([input isESC]) + { return [self _handleEscWithState:state stateCallback:stateCallback errorCallback:errorCallback]; } // MARK: Cursor backward - if ([input isCursorBackward] || emacsKey == vChewingEmacsKeyBackward) { - return [self _handleBackwardWithState:state input:input stateCallback:stateCallback errorCallback:errorCallback]; + if ([input isCursorBackward] || emacsKey == vChewingEmacsKeyBackward) + { + return [self _handleBackwardWithState:state + input:input + stateCallback:stateCallback + errorCallback:errorCallback]; } // MARK: Cursor forward - if ([input isCursorForward] || emacsKey == vChewingEmacsKeyForward) { + if ([input isCursorForward] || emacsKey == vChewingEmacsKeyForward) + { return [self _handleForwardWithState:state input:input stateCallback:stateCallback errorCallback:errorCallback]; } // MARK: Home - if ([input isHome] || emacsKey == vChewingEmacsKeyHome) { + if ([input isHome] || emacsKey == vChewingEmacsKeyHome) + { return [self _handleHomeWithState:state stateCallback:stateCallback errorCallback:errorCallback]; } // MARK: End - if ([input isEnd] || emacsKey == vChewingEmacsKeyEnd) { + if ([input isEnd] || emacsKey == vChewingEmacsKeyEnd) + { return [self _handleEndWithState:state stateCallback:stateCallback errorCallback:errorCallback]; } // MARK: Ctrl+PgLf or Shift+PgLf - if ([input isControlHold] || [input isShiftHold]) { - if ([input isOptionHold] && [input isLeft]) { + if ([input isControlHold] || [input isShiftHold]) + { + if ([input isOptionHold] && [input isLeft]) + { return [self _handleHomeWithState:state stateCallback:stateCallback errorCallback:errorCallback]; } } // MARK: Ctrl+PgRt or Shift+PgRt - if ([input isControlHold] || [input isShiftHold]) { - if ([input isOptionHold] && [input isRight]) { + if ([input isControlHold] || [input isShiftHold]) + { + if ([input isOptionHold] && [input isRight]) + { return [self _handleEndWithState:state stateCallback:stateCallback errorCallback:errorCallback]; } } // MARK: AbsorbedArrowKey - if ([input isAbsorbedArrowKey] || [input isExtraChooseCandidateKey] || [input isExtraChooseCandidateKeyReverse]) { + if ([input isAbsorbedArrowKey] || [input isExtraChooseCandidateKey] || [input isExtraChooseCandidateKeyReverse]) + { return [self _handleAbsorbedArrowKeyWithState:state stateCallback:stateCallback errorCallback:errorCallback]; } // MARK: Backspace - if ([input isBackSpace]) { + if ([input isBackSpace]) + { return [self _handleBackspaceWithState:state stateCallback:stateCallback errorCallback:errorCallback]; } // MARK: Delete - if ([input isDelete] || emacsKey == vChewingEmacsKeyDelete) { + if ([input isDelete] || emacsKey == vChewingEmacsKeyDelete) + { return [self _handleDeleteWithState:state stateCallback:stateCallback errorCallback:errorCallback]; } // MARK: Enter - if ([input isEnter]) { - return ([input isControlHold] && [input isCommandHold]) ? - [self _handleCtrlCommandEnterWithState:state stateCallback:stateCallback errorCallback:errorCallback] - : - [self _handleEnterWithState:state stateCallback:stateCallback errorCallback:errorCallback]; + if ([input isEnter]) + { + return ([input isControlHold] && [input isCommandHold]) + ? [self _handleCtrlCommandEnterWithState:state + stateCallback:stateCallback + errorCallback:errorCallback] + : [self _handleEnterWithState:state stateCallback:stateCallback errorCallback:errorCallback]; } // MARK: Punctuation list - if ([input isSymbolMenuPhysicalKey] && ![input isShiftHold]) { + if ([input isSymbolMenuPhysicalKey] && ![input isShiftHold]) + { // 得在這裡先 commit buffer,不然會導致「在摁 ESC 離開符號選單時會重複輸入上一次的組字區的內容」的不當行為。 // 於是這裡用「模擬一次 Enter 鍵的操作」使其代為執行這個 commit buffer 的動作。 [self _handleEnterWithState:state stateCallback:stateCallback errorCallback:errorCallback]; SymbolNode *root = [SymbolNode root]; - InputStateSymbolTable *symbolState = [[InputStateSymbolTable alloc] initWithNode:root useVerticalMode:input.useVerticalMode]; + InputStateSymbolTable *symbolState = [[InputStateSymbolTable alloc] initWithNode:root + useVerticalMode:input.useVerticalMode]; stateCallback(symbolState); return YES; -// if (_languageModel->hasUnigramsForKey("_punctuation_list"))) { -// if (_bpmfReadingBuffer->isEmpty()) { -// _builder->insertReadingAtCursor(string("_punctuation_list")); -// NSString *poppedText = [self _popOverflowComposingTextAndWalk]; -// InputStateInputting *inputting = (InputStateInputting *)[self buildInputtingState]; -// inputting.poppedText = poppedText; -// stateCallback(inputting); -// InputStateChoosingCandidate *choosingCandidate = [self _buildCandidateState:inputting useVerticalMode:input.useVerticalMode]; -// stateCallback(choosingCandidate); -// } else { // If there is still unfinished bpmf reading, ignore the punctuation -// errorCallback(); -// } -// return YES; -// } + // if (_languageModel->hasUnigramsForKey("_punctuation_list"))) { + // if (_bpmfReadingBuffer->isEmpty()) { + // _builder->insertReadingAtCursor(string("_punctuation_list")); + // NSString *poppedText = [self _popOverflowComposingTextAndWalk]; + // InputStateInputting *inputting = (InputStateInputting *)[self buildInputtingState]; + // inputting.poppedText = poppedText; + // stateCallback(inputting); + // InputStateChoosingCandidate *choosingCandidate = [self _buildCandidateState:inputting + // useVerticalMode:input.useVerticalMode]; stateCallback(choosingCandidate); + // } else { // If there is still unfinished bpmf reading, ignore the punctuation + // errorCallback(); + // } + // return YES; + // } } // MARK: Punctuation // if nothing is matched, see if it's a punctuation key for current layout. std::string punctuationNamePrefix; - if ([input isOptionHold]) { + if ([input isOptionHold]) + { punctuationNamePrefix = std::string("_alt_punctuation_"); - } else if ([input isControlHold]) { + } + else if ([input isControlHold]) + { punctuationNamePrefix = std::string("_ctrl_punctuation_"); - } else if (mgrPrefs.halfWidthPunctuationEnabled) { + } + else if (mgrPrefs.halfWidthPunctuationEnabled) + { punctuationNamePrefix = std::string("_half_punctuation_"); - } else { + } + else + { punctuationNamePrefix = std::string("_punctuation_"); } std::string layout = [self _currentLayout]; - std::string customPunctuation = punctuationNamePrefix + layout + std::string(1, (char) charCode); - if ([self _handlePunctuation:customPunctuation state:state usingVerticalMode:input.useVerticalMode stateCallback:stateCallback errorCallback:errorCallback]) { + std::string customPunctuation = punctuationNamePrefix + layout + std::string(1, (char)charCode); + if ([self _handlePunctuation:customPunctuation + state:state + usingVerticalMode:input.useVerticalMode + stateCallback:stateCallback + errorCallback:errorCallback]) + { return YES; } // if nothing is matched, see if it's a punctuation key. - std::string punctuation = punctuationNamePrefix + std::string(1, (char) charCode); - if ([self _handlePunctuation:punctuation state:state usingVerticalMode:input.useVerticalMode stateCallback:stateCallback errorCallback:errorCallback]) { + std::string punctuation = punctuationNamePrefix + std::string(1, (char)charCode); + if ([self _handlePunctuation:punctuation + state:state + usingVerticalMode:input.useVerticalMode + stateCallback:stateCallback + errorCallback:errorCallback]) + { return YES; } // Lukhnos 這裡的處理反而會使得 Apple 倚天注音動態鍵盤佈局「敲不了半形大寫英文」的缺點曝露無疑,所以注釋掉。 - // 至於他試圖用這種處理來解決的上游 UPR293 的問題,其實針對詞庫檔案的排序做點手腳就可以解決。威注音本來也就是這麼做的。 - if (/*[state isKindOfClass:[InputStateNotEmpty class]] && */[input isUpperCaseASCIILetterKey]) { - std::string letter = std::string("_letter_") + std::string(1, (char) charCode); - if ([self _handlePunctuation:letter state:state usingVerticalMode:input.useVerticalMode stateCallback:stateCallback errorCallback:errorCallback]) { + // 至於他試圖用這種處理來解決的上游 UPR293 + // 的問題,其實針對詞庫檔案的排序做點手腳就可以解決。威注音本來也就是這麼做的。 + if (/*[state isKindOfClass:[InputStateNotEmpty class]] && */ [input isUpperCaseASCIILetterKey]) + { + std::string letter = std::string("_letter_") + std::string(1, (char)charCode); + if ([self _handlePunctuation:letter + state:state + usingVerticalMode:input.useVerticalMode + stateCallback:stateCallback + errorCallback:errorCallback]) + { return YES; } } - // still nothing, then we update the composing buffer (some app has strange behavior if we don't do this, "thinking" the key is not actually consumed) - // 砍掉這一段會導致「F1-F12 按鍵干擾組字區」的問題。暫時只能先恢復這段,且補上偵錯彙報機制,方便今後排查故障。 - if ([state isKindOfClass:[InputStateNotEmpty class]] || !_bpmfReadingBuffer->isEmpty()) { - [IME prtDebugIntel:[NSString stringWithFormat:@"Blocked data: charCode: %c, keyCode: %c", charCode, input.keyCode]]; + // still nothing, then we update the composing buffer (some app has strange behavior if we don't do this, "thinking" + // the key is not actually consumed) 砍掉這一段會導致「F1-F12 + // 按鍵干擾組字區」的問題。暫時只能先恢復這段,且補上偵錯彙報機制,方便今後排查故障。 + if ([state isKindOfClass:[InputStateNotEmpty class]] || !_bpmfReadingBuffer->isEmpty()) + { + [IME prtDebugIntel:[NSString + stringWithFormat:@"Blocked data: charCode: %c, keyCode: %c", charCode, input.keyCode]]; [IME prtDebugIntel:@"A9BFF20E"]; errorCallback(); stateCallback(state); @@ -604,33 +738,43 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return NO; } -- (BOOL)_handleEscWithState:(InputState *)state stateCallback:(void (^)(InputState *))stateCallback errorCallback:(void (^)(void))errorCallback +- (BOOL)_handleEscWithState:(InputState *)state + stateCallback:(void (^)(InputState *))stateCallback + errorCallback:(void (^)(void))errorCallback { - if (![state isKindOfClass:[InputStateInputting class]]) { + if (![state isKindOfClass:[InputStateInputting class]]) + { return NO; } BOOL escToClearInputBufferEnabled = mgrPrefs.escToCleanInputBuffer; - if (escToClearInputBufferEnabled) { + if (escToClearInputBufferEnabled) + { // if the option is enabled, we clear everything including the composing // buffer, walked nodes and the reading. [self clear]; InputStateEmptyIgnoringPreviousState *empty = [[InputStateEmptyIgnoringPreviousState alloc] init]; stateCallback(empty); - } else { + } + else + { // if reading is not empty, we cancel the reading; Apple's built-in // Zhuyin (and the erstwhile Hanin) has a default option that Esc // "cancels" the current composed character and revert it to // Bopomofo reading, in odds with the expectation of users from // other platforms - if (!_bpmfReadingBuffer->isEmpty()) { + if (!_bpmfReadingBuffer->isEmpty()) + { _bpmfReadingBuffer->clear(); - if (!_builder->length()) { + if (!_builder->length()) + { InputStateEmpty *empty = [[InputStateEmpty alloc] init]; stateCallback(empty); - } else { + } + else + { InputStateInputting *inputting = (InputStateInputting *)[self buildInputtingState]; stateCallback(inputting); } @@ -639,39 +783,57 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return YES; } -- (BOOL)_handleBackwardWithState:(InputState *)state input:(keyParser *)input stateCallback:(void (^)(InputState *))stateCallback errorCallback:(void (^)(void))errorCallback +- (BOOL)_handleBackwardWithState:(InputState *)state + input:(keyParser *)input + stateCallback:(void (^)(InputState *))stateCallback + errorCallback:(void (^)(void))errorCallback { - if (![state isKindOfClass:[InputStateInputting class]]) { + if (![state isKindOfClass:[InputStateInputting class]]) + { return NO; } - if (!_bpmfReadingBuffer->isEmpty()) { + if (!_bpmfReadingBuffer->isEmpty()) + { [IME prtDebugIntel:@"6ED95318"]; errorCallback(); stateCallback(state); return YES; } - InputStateInputting *currentState = (InputStateInputting *) state; + InputStateInputting *currentState = (InputStateInputting *)state; - if ([input isShiftHold]) { + if ([input isShiftHold]) + { // Shift + left - if (currentState.cursorIndex > 0) { - NSInteger previousPosition = [currentState.composingBuffer previousUtf16PositionFor:currentState.cursorIndex]; - InputStateMarking *marking = [[InputStateMarking alloc] initWithComposingBuffer:currentState.composingBuffer cursorIndex:currentState.cursorIndex markerIndex:previousPosition readings:[self _currentReadings]]; + if (currentState.cursorIndex > 0) + { + NSInteger previousPosition = + [currentState.composingBuffer previousUtf16PositionFor:currentState.cursorIndex]; + InputStateMarking *marking = [[InputStateMarking alloc] initWithComposingBuffer:currentState.composingBuffer + cursorIndex:currentState.cursorIndex + markerIndex:previousPosition + readings:[self _currentReadings]]; marking.tooltipForInputting = currentState.tooltip; stateCallback(marking); - } else { + } + else + { [IME prtDebugIntel:@"D326DEA3"]; errorCallback(); stateCallback(state); } - } else { - if (_builder->cursorIndex() > 0) { + } + else + { + if (_builder->cursorIndex() > 0) + { _builder->setCursorIndex(_builder->cursorIndex() - 1); InputStateInputting *inputting = (InputStateInputting *)[self buildInputtingState]; stateCallback(inputting); - } else { + } + else + { [IME prtDebugIntel:@"7045E6F3"]; errorCallback(); stateCallback(state); @@ -680,39 +842,56 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return YES; } -- (BOOL)_handleForwardWithState:(InputState *)state input:(keyParser *)input stateCallback:(void (^)(InputState *))stateCallback errorCallback:(void (^)(void))errorCallback +- (BOOL)_handleForwardWithState:(InputState *)state + input:(keyParser *)input + stateCallback:(void (^)(InputState *))stateCallback + errorCallback:(void (^)(void))errorCallback { - if (![state isKindOfClass:[InputStateInputting class]]) { + if (![state isKindOfClass:[InputStateInputting class]]) + { return NO; } - if (!_bpmfReadingBuffer->isEmpty()) { + if (!_bpmfReadingBuffer->isEmpty()) + { [IME prtDebugIntel:@"B3BA5257"]; errorCallback(); stateCallback(state); return YES; } - InputStateInputting *currentState = (InputStateInputting *) state; + InputStateInputting *currentState = (InputStateInputting *)state; - if ([input isShiftHold]) { + if ([input isShiftHold]) + { // Shift + Right - if (currentState.cursorIndex < currentState.composingBuffer.length) { + if (currentState.cursorIndex < currentState.composingBuffer.length) + { NSInteger nextPosition = [currentState.composingBuffer nextUtf16PositionFor:currentState.cursorIndex]; - InputStateMarking *marking = [[InputStateMarking alloc] initWithComposingBuffer:currentState.composingBuffer cursorIndex:currentState.cursorIndex markerIndex:nextPosition readings:[self _currentReadings]]; + InputStateMarking *marking = [[InputStateMarking alloc] initWithComposingBuffer:currentState.composingBuffer + cursorIndex:currentState.cursorIndex + markerIndex:nextPosition + readings:[self _currentReadings]]; marking.tooltipForInputting = currentState.tooltip; stateCallback(marking); - } else { + } + else + { [IME prtDebugIntel:@"BB7F6DB9"]; errorCallback(); stateCallback(state); } - } else { - if (_builder->cursorIndex() < _builder->length()) { + } + else + { + if (_builder->cursorIndex() < _builder->length()) + { _builder->setCursorIndex(_builder->cursorIndex() + 1); InputStateInputting *inputting = (InputStateInputting *)[self buildInputtingState]; stateCallback(inputting); - } else { + } + else + { [IME prtDebugIntel:@"A96AAD58"]; errorCallback(); stateCallback(state); @@ -722,24 +901,31 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return YES; } -- (BOOL)_handleHomeWithState:(InputState *)state stateCallback:(void (^)(InputState *))stateCallback errorCallback:(void (^)(void))errorCallback +- (BOOL)_handleHomeWithState:(InputState *)state + stateCallback:(void (^)(InputState *))stateCallback + errorCallback:(void (^)(void))errorCallback { - if (![state isKindOfClass:[InputStateInputting class]]) { + if (![state isKindOfClass:[InputStateInputting class]]) + { return NO; } - if (!_bpmfReadingBuffer->isEmpty()) { + if (!_bpmfReadingBuffer->isEmpty()) + { [IME prtDebugIntel:@"ABC44080"]; errorCallback(); stateCallback(state); return YES; } - if (_builder->cursorIndex()) { + if (_builder->cursorIndex()) + { _builder->setCursorIndex(0); InputStateInputting *inputting = (InputStateInputting *)[self buildInputtingState]; stateCallback(inputting); - } else { + } + else + { [IME prtDebugIntel:@"66D97F90"]; errorCallback(); stateCallback(state); @@ -748,24 +934,31 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return YES; } -- (BOOL)_handleEndWithState:(InputState *)state stateCallback:(void (^)(InputState *))stateCallback errorCallback:(void (^)(void))errorCallback +- (BOOL)_handleEndWithState:(InputState *)state + stateCallback:(void (^)(InputState *))stateCallback + errorCallback:(void (^)(void))errorCallback { - if (![state isKindOfClass:[InputStateInputting class]]) { + if (![state isKindOfClass:[InputStateInputting class]]) + { return NO; } - if (!_bpmfReadingBuffer->isEmpty()) { + if (!_bpmfReadingBuffer->isEmpty()) + { [IME prtDebugIntel:@"9B69908D"]; errorCallback(); stateCallback(state); return YES; } - if (_builder->cursorIndex() != _builder->length()) { + if (_builder->cursorIndex() != _builder->length()) + { _builder->setCursorIndex(_builder->length()); InputStateInputting *inputting = (InputStateInputting *)[self buildInputtingState]; stateCallback(inputting); - } else { + } + else + { [IME prtDebugIntel:@"9B69908E"]; errorCallback(); stateCallback(state); @@ -774,13 +967,17 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return YES; } -- (BOOL)_handleAbsorbedArrowKeyWithState:(InputState *)state stateCallback:(void (^)(InputState *))stateCallback errorCallback:(void (^)(void))errorCallback +- (BOOL)_handleAbsorbedArrowKeyWithState:(InputState *)state + stateCallback:(void (^)(InputState *))stateCallback + errorCallback:(void (^)(void))errorCallback { - if (![state isKindOfClass:[InputStateInputting class]]) { + if (![state isKindOfClass:[InputStateInputting class]]) + { return NO; } - if (!_bpmfReadingBuffer->isEmpty()) { + if (!_bpmfReadingBuffer->isEmpty()) + { [IME prtDebugIntel:@"9B6F908D"]; errorCallback(); } @@ -788,59 +985,83 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return YES; } -- (BOOL)_handleBackspaceWithState:(InputState *)state stateCallback:(void (^)(InputState *))stateCallback errorCallback:(void (^)(void))errorCallback +- (BOOL)_handleBackspaceWithState:(InputState *)state + stateCallback:(void (^)(InputState *))stateCallback + errorCallback:(void (^)(void))errorCallback { - if (![state isKindOfClass:[InputStateInputting class]]) { + if (![state isKindOfClass:[InputStateInputting class]]) + { return NO; } - if (_bpmfReadingBuffer->isEmpty()) { - if (_builder->cursorIndex()) { + if (_bpmfReadingBuffer->isEmpty()) + { + if (_builder->cursorIndex()) + { _builder->deleteReadingBeforeCursor(); [self _walk]; - } else { + } + else + { [IME prtDebugIntel:@"9D69908D"]; errorCallback(); stateCallback(state); return YES; } - } else { + } + else + { _bpmfReadingBuffer->backspace(); } - if (_bpmfReadingBuffer->isEmpty() && !_builder->length()) { + if (_bpmfReadingBuffer->isEmpty() && !_builder->length()) + { InputStateEmptyIgnoringPreviousState *empty = [[InputStateEmptyIgnoringPreviousState alloc] init]; stateCallback(empty); - } else { + } + else + { InputStateInputting *inputting = (InputStateInputting *)[self buildInputtingState]; stateCallback(inputting); } return YES; } -- (BOOL)_handleDeleteWithState:(InputState *)state stateCallback:(void (^)(InputState *))stateCallback errorCallback:(void (^)(void))errorCallback +- (BOOL)_handleDeleteWithState:(InputState *)state + stateCallback:(void (^)(InputState *))stateCallback + errorCallback:(void (^)(void))errorCallback { - if (![state isKindOfClass:[InputStateInputting class]]) { + if (![state isKindOfClass:[InputStateInputting class]]) + { return NO; } - if (_bpmfReadingBuffer->isEmpty()) { - if (_builder->cursorIndex() != _builder->length()) { + if (_bpmfReadingBuffer->isEmpty()) + { + if (_builder->cursorIndex() != _builder->length()) + { _builder->deleteReadingAfterCursor(); [self _walk]; InputStateInputting *inputting = (InputStateInputting *)[self buildInputtingState]; - if (!inputting.composingBuffer.length) { + if (!inputting.composingBuffer.length) + { InputStateEmptyIgnoringPreviousState *empty = [[InputStateEmptyIgnoringPreviousState alloc] init]; stateCallback(empty); - } else { + } + else + { stateCallback(inputting); } - } else { + } + else + { [IME prtDebugIntel:@"9B69938D"]; errorCallback(); stateCallback(state); } - } else { + } + else + { [IME prtDebugIntel:@"9C69908D"]; errorCallback(); stateCallback(state); @@ -849,12 +1070,17 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return YES; } -- (BOOL)_handleCtrlCommandEnterWithState:(InputState *)state stateCallback:(void (^)(InputState *))stateCallback errorCallback:(void (^)(void))errorCallback +- (BOOL)_handleCtrlCommandEnterWithState:(InputState *)state + stateCallback:(void (^)(InputState *))stateCallback + errorCallback:(void (^)(void))errorCallback { - if (![state isKindOfClass:[InputStateInputting class]]) return NO; + if (![state isKindOfClass:[InputStateInputting class]]) + return NO; NSArray *readings = [self _currentReadings]; - NSString *composingBuffer = (ctlInputMethod.areWeUsingOurOwnPhraseEditor) ? [readings componentsJoinedByString:@"-"] : [readings componentsJoinedByString:@" "] ; + NSString *composingBuffer = (ctlInputMethod.areWeUsingOurOwnPhraseEditor) + ? [readings componentsJoinedByString:@"-"] + : [readings componentsJoinedByString:@" "]; [self clear]; @@ -865,15 +1091,18 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return YES; } -- (BOOL)_handleEnterWithState:(InputState *)state stateCallback:(void (^)(InputState *))stateCallback errorCallback:(void (^)(void))errorCallback +- (BOOL)_handleEnterWithState:(InputState *)state + stateCallback:(void (^)(InputState *))stateCallback + errorCallback:(void (^)(void))errorCallback { - if (![state isKindOfClass:[InputStateInputting class]]) { + if (![state isKindOfClass:[InputStateInputting class]]) + { return NO; } [self clear]; - InputStateInputting *current = (InputStateInputting *) state; + InputStateInputting *current = (InputStateInputting *)state; NSString *composingBuffer = current.composingBuffer; InputStateCommitting *committing = [[InputStateCommitting alloc] initWithPoppedText:composingBuffer]; stateCallback(committing); @@ -882,17 +1111,25 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return YES; } -- (BOOL)_handlePunctuation:(std::string)customPunctuation state:(InputState *)state usingVerticalMode:(BOOL)useVerticalMode stateCallback:(void (^)(InputState *))stateCallback errorCallback:(void (^)(void))errorCallback +- (BOOL)_handlePunctuation:(std::string)customPunctuation + state:(InputState *)state + usingVerticalMode:(BOOL)useVerticalMode + stateCallback:(void (^)(InputState *))stateCallback + errorCallback:(void (^)(void))errorCallback { - if (!_languageModel->hasUnigramsForKey(customPunctuation)) { + if (!_languageModel->hasUnigramsForKey(customPunctuation)) + { return NO; } NSString *poppedText; - if (_bpmfReadingBuffer->isEmpty()) { + if (_bpmfReadingBuffer->isEmpty()) + { _builder->insertReadingAtCursor(customPunctuation); poppedText = [self _popOverflowComposingTextAndWalk]; - } else { // If there is still unfinished bpmf reading, ignore the punctuation + } + else + { // If there is still unfinished bpmf reading, ignore the punctuation [IME prtDebugIntel:@"A9B69908D"]; errorCallback(); stateCallback(state); @@ -903,38 +1140,46 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; inputting.poppedText = poppedText; stateCallback(inputting); - if (mgrPrefs.useSCPCTypingMode && _bpmfReadingBuffer->isEmpty()) { - InputStateChoosingCandidate *candidateState = [self _buildCandidateState:inputting useVerticalMode:useVerticalMode]; + if (mgrPrefs.useSCPCTypingMode && _bpmfReadingBuffer->isEmpty()) + { + InputStateChoosingCandidate *candidateState = [self _buildCandidateState:inputting + useVerticalMode:useVerticalMode]; - if ([candidateState.candidates count] == 1) { + if ([candidateState.candidates count] == 1) + { [self clear]; - InputStateCommitting *committing = [[InputStateCommitting alloc] initWithPoppedText:candidateState.candidates.firstObject]; + InputStateCommitting *committing = + [[InputStateCommitting alloc] initWithPoppedText:candidateState.candidates.firstObject]; stateCallback(committing); InputStateEmpty *empty = [[InputStateEmpty alloc] init]; stateCallback(empty); - } else { + } + else + { stateCallback(candidateState); } } return YES; } - - (BOOL)_handleMarkingState:(InputStateMarking *)state input:(keyParser *)input stateCallback:(void (^)(InputState *))stateCallback errorCallback:(void (^)(void))errorCallback { - if ([input isESC]) { + if ([input isESC]) + { InputStateInputting *inputting = (InputStateInputting *)[self buildInputtingState]; stateCallback(inputting); return YES; } // Enter - if ([input isEnter]) { - if (![self.delegate keyHandler:self didRequestWriteUserPhraseWithState:state]) { + if ([input isEnter]) + { + if (![self.delegate keyHandler:self didRequestWriteUserPhraseWithState:state]) + { [IME prtDebugIntel:@"5B69CC8D"]; errorCallback(); return YES; @@ -945,21 +1190,30 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; } // Shift + left - if (([input isCursorBackward] || input.emacsKey == vChewingEmacsKeyBackward) - && ([input isShiftHold])) { + if (([input isCursorBackward] || input.emacsKey == vChewingEmacsKeyBackward) && ([input isShiftHold])) + { NSUInteger index = state.markerIndex; - if (index > 0) { + if (index > 0) + { index = [state.composingBuffer previousUtf16PositionFor:index]; - InputStateMarking *marking = [[InputStateMarking alloc] initWithComposingBuffer:state.composingBuffer cursorIndex:state.cursorIndex markerIndex:index readings:state.readings]; + InputStateMarking *marking = [[InputStateMarking alloc] initWithComposingBuffer:state.composingBuffer + cursorIndex:state.cursorIndex + markerIndex:index + readings:state.readings]; marking.tooltipForInputting = state.tooltipForInputting; - if (marking.markedRange.length == 0) { + if (marking.markedRange.length == 0) + { InputState *inputting = [marking convertToInputting]; stateCallback(inputting); - } else { + } + else + { stateCallback(marking); } - } else { + } + else + { [IME prtDebugIntel:@"1149908D"]; errorCallback(); stateCallback(state); @@ -968,20 +1222,29 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; } // Shift + Right - if (([input isCursorForward] || input.emacsKey == vChewingEmacsKeyForward) - && ([input isShiftHold])) { + if (([input isCursorForward] || input.emacsKey == vChewingEmacsKeyForward) && ([input isShiftHold])) + { NSUInteger index = state.markerIndex; - if (index < state.composingBuffer.length) { + if (index < state.composingBuffer.length) + { index = [state.composingBuffer nextUtf16PositionFor:index]; - InputStateMarking *marking = [[InputStateMarking alloc] initWithComposingBuffer:state.composingBuffer cursorIndex:state.cursorIndex markerIndex:index readings:state.readings]; + InputStateMarking *marking = [[InputStateMarking alloc] initWithComposingBuffer:state.composingBuffer + cursorIndex:state.cursorIndex + markerIndex:index + readings:state.readings]; marking.tooltipForInputting = state.tooltipForInputting; - if (marking.markedRange.length == 0) { + if (marking.markedRange.length == 0) + { InputState *inputting = [marking convertToInputting]; stateCallback(inputting); - } else { + } + else + { stateCallback(marking); } - } else { + } + else + { [IME prtDebugIntel:@"9B51408D"]; errorCallback(); stateCallback(state); @@ -991,7 +1254,6 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return NO; } - - (BOOL)_handleCandidateState:(InputState *)state input:(keyParser *)input stateCallback:(void (^)(InputState *))stateCallback @@ -1001,99 +1263,122 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; UniChar charCode = input.charCode; VTCandidateController *gCurrentCandidateController = [self.delegate candidateControllerForKeyHandler:self]; - BOOL cancelCandidateKey = [input isBackSpace] || [input isESC] || [input isDelete] - || (([input isCursorBackward] || [input isCursorForward]) && [input isShiftHold]); + BOOL cancelCandidateKey = [input isBackSpace] || [input isESC] || [input isDelete] || + (([input isCursorBackward] || [input isCursorForward]) && [input isShiftHold]); - if (cancelCandidateKey) { - if ([state isKindOfClass: [InputStateAssociatedPhrases class]]) { + if (cancelCandidateKey) + { + if ([state isKindOfClass:[InputStateAssociatedPhrases class]]) + { [self clear]; InputStateEmptyIgnoringPreviousState *empty = [[InputStateEmptyIgnoringPreviousState alloc] init]; stateCallback(empty); } - else if (mgrPrefs.useSCPCTypingMode) { + else if (mgrPrefs.useSCPCTypingMode) + { [self clear]; InputStateEmptyIgnoringPreviousState *empty = [[InputStateEmptyIgnoringPreviousState alloc] init]; stateCallback(empty); - } else if ([self isBuilderEmpty]) { + } + else if ([self isBuilderEmpty]) + { // 如果此時發現當前組字緩衝區為真空的情況的話,就將當前的組字緩衝區析構處理、強制重設輸入狀態。 // 不然的話,一個本不該出現的真空組字緩衝區會使前後方向鍵與 BackSpace 鍵失靈。 [self clear]; InputStateEmptyIgnoringPreviousState *empty = [[InputStateEmptyIgnoringPreviousState alloc] init]; stateCallback(empty); - } else { + } + else + { InputStateInputting *inputting = (InputStateInputting *)[self buildInputtingState]; stateCallback(inputting); } return YES; } - if ([input isEnter]) { - if ([state isKindOfClass: [InputStateAssociatedPhrases class]]) { + if ([input isEnter]) + { + if ([state isKindOfClass:[InputStateAssociatedPhrases class]]) + { [self clear]; InputStateEmptyIgnoringPreviousState *empty = [[InputStateEmptyIgnoringPreviousState alloc] init]; stateCallback(empty); return YES; } - [self.delegate keyHandler:self didSelectCandidateAtIndex:gCurrentCandidateController.selectedCandidateIndex candidateController:gCurrentCandidateController]; + [self.delegate keyHandler:self + didSelectCandidateAtIndex:gCurrentCandidateController.selectedCandidateIndex + candidateController:gCurrentCandidateController]; return YES; } - if ([input isTab]) { - BOOL updated = - mgrPrefs.specifyTabKeyBehavior? - ([input isShiftHold] ? [gCurrentCandidateController showPreviousPage] : [gCurrentCandidateController showNextPage]) - : - ([input isShiftHold] ? [gCurrentCandidateController highlightPreviousCandidate] : [gCurrentCandidateController highlightNextCandidate]) - ; - if (!updated) { + if ([input isTab]) + { + BOOL updated = mgrPrefs.specifyTabKeyBehavior + ? ([input isShiftHold] ? [gCurrentCandidateController showPreviousPage] + : [gCurrentCandidateController showNextPage]) + : ([input isShiftHold] ? [gCurrentCandidateController highlightPreviousCandidate] + : [gCurrentCandidateController highlightNextCandidate]); + if (!updated) + { [IME prtDebugIntel:@"9B691919"]; errorCallback(); } return YES; } - if ([input isSpace]) { - BOOL updated = - mgrPrefs.specifySpaceKeyBehavior? - ([input isShiftHold] ? [gCurrentCandidateController highlightNextCandidate] : [gCurrentCandidateController showNextPage]) - : - ([input isShiftHold] ? [gCurrentCandidateController showNextPage] : [gCurrentCandidateController highlightNextCandidate]) - ; - if (!updated) { + if ([input isSpace]) + { + BOOL updated = mgrPrefs.specifySpaceKeyBehavior + ? ([input isShiftHold] ? [gCurrentCandidateController highlightNextCandidate] + : [gCurrentCandidateController showNextPage]) + : ([input isShiftHold] ? [gCurrentCandidateController showNextPage] + : [gCurrentCandidateController highlightNextCandidate]); + if (!updated) + { [IME prtDebugIntel:@"A11C781F"]; errorCallback(); } return YES; } - if ([input isPageDown] || input.emacsKey == vChewingEmacsKeyNextPage) { + if ([input isPageDown] || input.emacsKey == vChewingEmacsKeyNextPage) + { BOOL updated = [gCurrentCandidateController showNextPage]; - if (!updated) { + if (!updated) + { [IME prtDebugIntel:@"9B691919"]; errorCallback(); } return YES; } - if ([input isPageUp]) { + if ([input isPageUp]) + { BOOL updated = [gCurrentCandidateController showPreviousPage]; - if (!updated) { + if (!updated) + { [IME prtDebugIntel:@"9569955D"]; errorCallback(); } return YES; } - if ([input isLeft]) { - if ([gCurrentCandidateController isKindOfClass:[VTHorizontalCandidateController class]]) { + if ([input isLeft]) + { + if ([gCurrentCandidateController isKindOfClass:[VTHorizontalCandidateController class]]) + { BOOL updated = [gCurrentCandidateController highlightPreviousCandidate]; - if (!updated) { + if (!updated) + { [IME prtDebugIntel:@"1145148D"]; errorCallback(); } - } else { + } + else + { BOOL updated = [gCurrentCandidateController showPreviousPage]; - if (!updated) { + if (!updated) + { [IME prtDebugIntel:@"1919810D"]; errorCallback(); } @@ -1101,25 +1386,33 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return YES; } - if (input.emacsKey == vChewingEmacsKeyBackward) { + if (input.emacsKey == vChewingEmacsKeyBackward) + { BOOL updated = [gCurrentCandidateController highlightPreviousCandidate]; - if (!updated) { + if (!updated) + { [IME prtDebugIntel:@"9B89308D"]; errorCallback(); } return YES; } - if ([input isRight]) { - if ([gCurrentCandidateController isKindOfClass:[VTHorizontalCandidateController class]]) { + if ([input isRight]) + { + if ([gCurrentCandidateController isKindOfClass:[VTHorizontalCandidateController class]]) + { BOOL updated = [gCurrentCandidateController highlightNextCandidate]; - if (!updated) { + if (!updated) + { [IME prtDebugIntel:@"9B65138D"]; errorCallback(); } - } else { + } + else + { BOOL updated = [gCurrentCandidateController showNextPage]; - if (!updated) { + if (!updated) + { [IME prtDebugIntel:@"9244908D"]; errorCallback(); } @@ -1127,25 +1420,33 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return YES; } - if (input.emacsKey == vChewingEmacsKeyForward) { + if (input.emacsKey == vChewingEmacsKeyForward) + { BOOL updated = [gCurrentCandidateController highlightNextCandidate]; - if (!updated) { + if (!updated) + { [IME prtDebugIntel:@"9B2428D"]; errorCallback(); } return YES; } - if ([input isUp]) { - if ([gCurrentCandidateController isKindOfClass:[VTHorizontalCandidateController class]]) { + if ([input isUp]) + { + if ([gCurrentCandidateController isKindOfClass:[VTHorizontalCandidateController class]]) + { BOOL updated = [gCurrentCandidateController showPreviousPage]; - if (!updated) { + if (!updated) + { [IME prtDebugIntel:@"9B614524"]; errorCallback(); } - } else { + } + else + { BOOL updated = [gCurrentCandidateController highlightPreviousCandidate]; - if (!updated) { + if (!updated) + { [IME prtDebugIntel:@"ASD9908D"]; errorCallback(); } @@ -1153,16 +1454,22 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return YES; } - if ([input isDown]) { - if ([gCurrentCandidateController isKindOfClass:[VTHorizontalCandidateController class]]) { + if ([input isDown]) + { + if ([gCurrentCandidateController isKindOfClass:[VTHorizontalCandidateController class]]) + { BOOL updated = [gCurrentCandidateController showNextPage]; - if (!updated) { + if (!updated) + { [IME prtDebugIntel:@"92B990DD"]; errorCallback(); } - } else { + } + else + { BOOL updated = [gCurrentCandidateController highlightNextCandidate]; - if (!updated) { + if (!updated) + { [IME prtDebugIntel:@"6B99908D"]; errorCallback(); } @@ -1170,11 +1477,15 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return YES; } - if ([input isHome] || input.emacsKey == vChewingEmacsKeyHome) { - if (gCurrentCandidateController.selectedCandidateIndex == 0) { + if ([input isHome] || input.emacsKey == vChewingEmacsKeyHome) + { + if (gCurrentCandidateController.selectedCandidateIndex == 0) + { [IME prtDebugIntel:@"9B6EDE8D"]; errorCallback(); - } else { + } + else + { gCurrentCandidateController.selectedCandidateIndex = 0; } @@ -1183,89 +1494,124 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; NSArray *candidates; - if ([state isKindOfClass: [InputStateChoosingCandidate class]]) { + if ([state isKindOfClass:[InputStateChoosingCandidate class]]) + { candidates = [(InputStateChoosingCandidate *)state candidates]; - } else if ([state isKindOfClass: [InputStateAssociatedPhrases class]]) { + } + else if ([state isKindOfClass:[InputStateAssociatedPhrases class]]) + { candidates = [(InputStateAssociatedPhrases *)state candidates]; } - if (!candidates) { + if (!candidates) + { return NO; } - if (([input isEnd] || input.emacsKey == vChewingEmacsKeyEnd) && candidates.count > 0) { - if (gCurrentCandidateController.selectedCandidateIndex == candidates.count - 1) { + if (([input isEnd] || input.emacsKey == vChewingEmacsKeyEnd) && candidates.count > 0) + { + if (gCurrentCandidateController.selectedCandidateIndex == candidates.count - 1) + { [IME prtDebugIntel:@"9B69AAAD"]; errorCallback(); - } else { + } + else + { gCurrentCandidateController.selectedCandidateIndex = candidates.count - 1; } return YES; } - if ([state isKindOfClass:[InputStateAssociatedPhrases class]]) { - if (![input isShiftHold]) { + if ([state isKindOfClass:[InputStateAssociatedPhrases class]]) + { + if (![input isShiftHold]) + { return NO; } } NSInteger index = NSNotFound; NSString *match; - if ([state isKindOfClass:[InputStateAssociatedPhrases class]]) { + if ([state isKindOfClass:[InputStateAssociatedPhrases class]]) + { match = input.inputTextIgnoringModifiers; - } else { + } + else + { match = inputText; } - for (NSUInteger j = 0, c = [gCurrentCandidateController.keyLabels count]; j < c; j++) { + for (NSUInteger j = 0, c = [gCurrentCandidateController.keyLabels count]; j < c; j++) + { VTCandidateKeyLabel *label = gCurrentCandidateController.keyLabels[j]; - if ([match compare:label.key options:NSCaseInsensitiveSearch] == NSOrderedSame) { + if ([match compare:label.key options:NSCaseInsensitiveSearch] == NSOrderedSame) + { index = j; break; } } - if (index != NSNotFound) { + if (index != NSNotFound) + { NSUInteger candidateIndex = [gCurrentCandidateController candidateIndexAtKeyLabelIndex:index]; - if (candidateIndex != NSUIntegerMax) { - [self.delegate keyHandler:self didSelectCandidateAtIndex:candidateIndex candidateController:gCurrentCandidateController]; + if (candidateIndex != NSUIntegerMax) + { + [self.delegate keyHandler:self + didSelectCandidateAtIndex:candidateIndex + candidateController:gCurrentCandidateController]; return YES; } } - if ([state isKindOfClass:[InputStateAssociatedPhrases class]]) { + if ([state isKindOfClass:[InputStateAssociatedPhrases class]]) + { return NO; } - if (mgrPrefs.useSCPCTypingMode) { + if (mgrPrefs.useSCPCTypingMode) + { std::string layout = [self _currentLayout]; std::string punctuationNamePrefix; - if ([input isOptionHold]) { + if ([input isOptionHold]) + { punctuationNamePrefix = std::string("_alt_punctuation_"); - } else if ([input isControlHold]) { + } + else if ([input isControlHold]) + { punctuationNamePrefix = std::string("_ctrl_punctuation_"); - } else if (mgrPrefs.halfWidthPunctuationEnabled) { + } + else if (mgrPrefs.halfWidthPunctuationEnabled) + { punctuationNamePrefix = std::string("_half_punctuation_"); - } else { + } + else + { punctuationNamePrefix = std::string("_punctuation_"); } - std::string customPunctuation = punctuationNamePrefix + layout + std::string(1, (char) charCode); - std::string punctuation = punctuationNamePrefix + std::string(1, (char) charCode); + std::string customPunctuation = punctuationNamePrefix + layout + std::string(1, (char)charCode); + std::string punctuation = punctuationNamePrefix + std::string(1, (char)charCode); - BOOL shouldAutoSelectCandidate = _bpmfReadingBuffer->isValidKey((char) charCode) || _languageModel->hasUnigramsForKey(customPunctuation) || - _languageModel->hasUnigramsForKey(punctuation); + BOOL shouldAutoSelectCandidate = _bpmfReadingBuffer->isValidKey((char)charCode) || + _languageModel->hasUnigramsForKey(customPunctuation) || + _languageModel->hasUnigramsForKey(punctuation); - if (!shouldAutoSelectCandidate && [input isUpperCaseASCIILetterKey]) { - std::string letter = std::string("_letter_") + std::string(1, (char) charCode); - if (_languageModel->hasUnigramsForKey(letter)) { + if (!shouldAutoSelectCandidate && [input isUpperCaseASCIILetterKey]) + { + std::string letter = std::string("_letter_") + std::string(1, (char)charCode); + if (_languageModel->hasUnigramsForKey(letter)) + { shouldAutoSelectCandidate = YES; } } - if (shouldAutoSelectCandidate) { + if (shouldAutoSelectCandidate) + { NSUInteger candidateIndex = [gCurrentCandidateController candidateIndexAtKeyLabelIndex:0]; - if (candidateIndex != NSUIntegerMax) { - [self.delegate keyHandler:self didSelectCandidateAtIndex:candidateIndex candidateController:gCurrentCandidateController]; + if (candidateIndex != NSUIntegerMax) + { + [self.delegate keyHandler:self + didSelectCandidateAtIndex:candidateIndex + candidateController:gCurrentCandidateController]; [self clear]; InputStateEmptyIgnoringPreviousState *empty = [[InputStateEmptyIgnoringPreviousState alloc] init]; stateCallback(empty); @@ -1297,8 +1643,11 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; // we must do some Unicode codepoint counting to find the actual cursor location for the client // i.e. we need to take UTF-16 into consideration, for which a surrogate pair takes 2 UniChars // locations - for (std::vector::iterator wi = _walkedNodes.begin(), we = _walkedNodes.end(); wi != we; ++wi) { - if ((*wi).node) { + for (std::vector::iterator wi = _walkedNodes.begin(), we = _walkedNodes.end(); wi != we; + ++wi) + { + if ((*wi).node) + { std::string nodeStr = (*wi).node->currentKeyValue().value; NSString *valueString = [NSString stringWithUTF8String:nodeStr.c_str()]; [composingBuffer appendString:valueString]; @@ -1313,32 +1662,57 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; // accumulate those lengths to calculate the displayed cursor // index size_t spanningLength = (*wi).spanningLength; - if (readingCursorIndex + spanningLength <= builderCursorIndex) { + if (readingCursorIndex + spanningLength <= builderCursorIndex) + { composedStringCursorIndex += [valueString length]; readingCursorIndex += spanningLength; - } else { - if (codepointCount == spanningLength) { - for (size_t i = 0; i < codepointCount && readingCursorIndex < builderCursorIndex; i++) { + } + else + { + if (codepointCount == spanningLength) + { + for (size_t i = 0; i < codepointCount && readingCursorIndex < builderCursorIndex; i++) + { composedStringCursorIndex += [splited[i] length]; readingCursorIndex++; } - } else { - if (readingCursorIndex < builderCursorIndex) { + } + else + { + if (readingCursorIndex < builderCursorIndex) + { composedStringCursorIndex += [valueString length]; readingCursorIndex += spanningLength; - if (readingCursorIndex > builderCursorIndex) { + if (readingCursorIndex > builderCursorIndex) + { readingCursorIndex = builderCursorIndex; } - if (builderCursorIndex == 0) { - tooltip = [NSString stringWithFormat:NSLocalizedString(@"Cursor is before \"%@\".", @""), - [NSString stringWithUTF8String:_builder->readings()[builderCursorIndex].c_str()]]; - } else if (builderCursorIndex >= _builder->readings().size()) { - tooltip = [NSString stringWithFormat:NSLocalizedString(@"Cursor is after \"%@\".", @""), - [NSString stringWithUTF8String:_builder->readings()[_builder->readings().size() - 1].c_str()]]; - } else { - tooltip = [NSString stringWithFormat:NSLocalizedString(@"Cursor is between \"%@\" and \"%@\".", @""), - [NSString stringWithUTF8String:_builder->readings()[builderCursorIndex - 1].c_str()], - [NSString stringWithUTF8String:_builder->readings()[builderCursorIndex].c_str()]]; + if (builderCursorIndex == 0) + { + tooltip = [NSString + stringWithFormat:NSLocalizedString(@"Cursor is before \"%@\".", @""), + [NSString stringWithUTF8String:_builder->readings()[builderCursorIndex] + .c_str()]]; + } + else if (builderCursorIndex >= _builder->readings().size()) + { + tooltip = [NSString + stringWithFormat:NSLocalizedString(@"Cursor is after \"%@\".", @""), + [NSString + stringWithUTF8String:_builder + ->readings()[_builder->readings().size() - + 1] + .c_str()]]; + } + else + { + tooltip = [NSString + stringWithFormat:NSLocalizedString(@"Cursor is between \"%@\" and \"%@\".", @""), + [NSString + stringWithUTF8String:_builder->readings()[builderCursorIndex - 1] + .c_str()], + [NSString stringWithUTF8String:_builder->readings()[builderCursorIndex] + .c_str()]]; } } } @@ -1355,7 +1729,8 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; NSString *composedText = [head stringByAppendingString:[reading stringByAppendingString:tail]]; NSInteger cursorIndex = composedStringCursorIndex + [reading length]; - InputStateInputting *newState = [[InputStateInputting alloc] initWithComposingBuffer:composedText cursorIndex:cursorIndex]; + InputStateInputting *newState = [[InputStateInputting alloc] initWithComposingBuffer:composedText + cursorIndex:cursorIndex]; newState.tooltip = tooltip; return newState; } @@ -1379,7 +1754,10 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; NSString *dotStr = [NSString stringWithUTF8String:dotDump.c_str()]; NSError *error = nil; - BOOL __unused success = [dotStr writeToFile:kGraphVizOutputfile atomically:YES encoding:NSUTF8StringEncoding error:&error]; + BOOL __unused success = [dotStr writeToFile:kGraphVizOutputfile + atomically:YES + encoding:NSUTF8StringEncoding + error:&error]; #endif } @@ -1396,8 +1774,10 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; NSString *poppedText = @""; NSInteger composingBufferSize = mgrPrefs.composingBufferSize; - if (_builder->grid().width() > (size_t) composingBufferSize) { - if (_walkedNodes.size() > 0) { + if (_builder->grid().width() > (size_t)composingBufferSize) + { + if (_walkedNodes.size() > 0) + { Gramambular::NodeAnchor &anchor = _walkedNodes[0]; poppedText = [NSString stringWithUTF8String:anchor.node->currentKeyValue().value.c_str()]; _builder->removeHeadReadings(anchor.spanningLength); @@ -1408,7 +1788,8 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return poppedText; } -- (InputStateChoosingCandidate *)_buildCandidateState:(InputStateNotEmpty *)currentState useVerticalMode:(BOOL)useVerticalMode +- (InputStateChoosingCandidate *)_buildCandidateState:(InputStateNotEmpty *)currentState + useVerticalMode:(BOOL)useVerticalMode { NSMutableArray *candidatesArray = [[NSMutableArray alloc] init]; @@ -1419,14 +1800,21 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; stable_sort(nodes.begin(), nodes.end(), NodeAnchorDescendingSorter()); // then use the C++ trick to retrieve the candidates for each node at/crossing the cursor - for (std::vector::iterator ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) { + for (std::vector::iterator ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) + { const std::vector &candidates = (*ni).node->candidates(); - for (std::vector::const_iterator ci = candidates.begin(), ce = candidates.end(); ci != ce; ++ci) { + for (std::vector::const_iterator ci = candidates.begin(), ce = candidates.end(); + ci != ce; ++ci) + { [candidatesArray addObject:[NSString stringWithUTF8String:(*ci).value.c_str()]]; } } - InputStateChoosingCandidate *state = [[InputStateChoosingCandidate alloc] initWithComposingBuffer:currentState.composingBuffer cursorIndex:currentState.cursorIndex candidates:candidatesArray useVerticalMode:useVerticalMode]; + InputStateChoosingCandidate *state = + [[InputStateChoosingCandidate alloc] initWithComposingBuffer:currentState.composingBuffer + cursorIndex:currentState.cursorIndex + candidates:candidatesArray + useVerticalMode:useVerticalMode]; return state; } @@ -1434,8 +1822,8 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; { size_t cursorIndex = _builder->cursorIndex(); // MS Phonetics IME style, phrase is *after* the cursor, i.e. cursor is always *before* the phrase - if ((mgrPrefs.selectPhraseAfterCursorAsCandidate && (cursorIndex < _builder->length())) - || !cursorIndex) { + if ((mgrPrefs.selectPhraseAfterCursorAsCandidate && (cursorIndex < _builder->length())) || !cursorIndex) + { ++cursorIndex; } @@ -1446,7 +1834,8 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; { NSMutableArray *readingsArray = [[NSMutableArray alloc] init]; std::vector v = _builder->readings(); - for (std::vector::iterator it_i = v.begin(); it_i != v.end(); ++it_i) { + for (std::vector::iterator it_i = v.begin(); it_i != v.end(); ++it_i) + { [readingsArray addObject:[NSString stringWithUTF8String:it_i->c_str()]]; } return readingsArray; @@ -1455,14 +1844,17 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; - (nullable InputState *)buildAssociatePhraseStateWithKey:(NSString *)key useVerticalMode:(BOOL)useVerticalMode { std::string cppKey = std::string(key.UTF8String); - if (_languageModel->hasAssociatedPhrasesForKey(cppKey)) { + if (_languageModel->hasAssociatedPhrasesForKey(cppKey)) + { std::vector phrases = _languageModel->associatedPhrasesForKey(cppKey); - NSMutableArray *array = [NSMutableArray array]; - for (auto phrase: phrases) { + NSMutableArray *array = [NSMutableArray array]; + for (auto phrase : phrases) + { NSString *item = [[NSString alloc] initWithUTF8String:phrase.c_str()]; [array addObject:item]; } - InputStateAssociatedPhrases *associatedPhrases = [[InputStateAssociatedPhrases alloc] initWithCandidates:array useVerticalMode:useVerticalMode]; + InputStateAssociatedPhrases *associatedPhrases = + [[InputStateAssociatedPhrases alloc] initWithCandidates:array useVerticalMode:useVerticalMode]; return associatedPhrases; } return nil; diff --git a/Source/Modules/ControllerModules/KeyValueBlobReader.cpp b/Source/Modules/ControllerModules/KeyValueBlobReader.cpp index 68d4f13b..eee32bbf 100644 --- a/Source/Modules/ControllerModules/KeyValueBlobReader.cpp +++ b/Source/Modules/ControllerModules/KeyValueBlobReader.cpp @@ -1,55 +1,67 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "KeyValueBlobReader.h" -namespace vChewing { +namespace vChewing +{ -KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue* out) +KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue *out) { static auto new_line = [](char c) { return c == '\n' || c == '\r'; }; static auto blank = [](char c) { return c == ' ' || c == '\t'; }; - static auto blank_or_newline - = [](char c) { return blank(c) || new_line(c); }; + static auto blank_or_newline = [](char c) { return blank(c) || new_line(c); }; static auto content_char = [](char c) { return !blank(c) && !new_line(c); }; - if (state_ == State::ERROR) { + if (state_ == State::ERROR) + { return state_; } - const char* key_begin = nullptr; + const char *key_begin = nullptr; size_t key_length = 0; - const char* value_begin = nullptr; + const char *value_begin = nullptr; size_t value_length = 0; - while (true) { + while (true) + { state_ = SkipUntilNot(blank_or_newline); - if (state_ != State::CAN_CONTINUE) { + if (state_ != State::CAN_CONTINUE) + { return state_; } // Check if it's a comment line; if so, read until end of line. - if (*current_ != '#') { + if (*current_ != '#') + { break; } state_ = SkipUntil(new_line); - if (state_ != State::CAN_CONTINUE) { + if (state_ != State::CAN_CONTINUE) + { return state_; } } @@ -59,22 +71,26 @@ KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue* out) key_begin = current_; state_ = SkipUntilNot(content_char); - if (state_ != State::CAN_CONTINUE) { + if (state_ != State::CAN_CONTINUE) + { goto error; } key_length = current_ - key_begin; // There should be at least one blank character after the key string. - if (!blank(*current_)) { + if (!blank(*current_)) + { goto error; } state_ = SkipUntilNot(blank); - if (state_ != State::CAN_CONTINUE) { + if (state_ != State::CAN_CONTINUE) + { goto error; } - if (!content_char(*current_)) { + if (!content_char(*current_)) + { goto error; } @@ -90,9 +106,9 @@ KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue* out) // like "foo bar baz\n" where baz should not be treated as the Next key. SkipUntil(new_line); - if (out != nullptr) { - *out = KeyValue { std::string_view { key_begin, key_length }, - std::string_view { value_begin, value_length } }; + if (out != nullptr) + { + *out = KeyValue{std::string_view{key_begin, key_length}, std::string_view{value_begin, value_length}}; } state_ = State::HAS_PAIR; return state_; @@ -102,11 +118,12 @@ error: return state_; } -KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot( - const std::function& f) +KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot(const std::function &f) { - while (current_ != end_ && *current_) { - if (!f(*current_)) { + while (current_ != end_ && *current_) + { + if (!f(*current_)) + { return State::CAN_CONTINUE; } ++current_; @@ -115,11 +132,12 @@ KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot( return State::END; } -KeyValueBlobReader::State KeyValueBlobReader::SkipUntil( - const std::function& f) +KeyValueBlobReader::State KeyValueBlobReader::SkipUntil(const std::function &f) { - while (current_ != end_ && *current_) { - if (f(*current_)) { + while (current_ != end_ && *current_) + { + if (f(*current_)) + { return State::CAN_CONTINUE; } ++current_; @@ -128,8 +146,7 @@ KeyValueBlobReader::State KeyValueBlobReader::SkipUntil( return State::END; } -std::ostream& operator<<( - std::ostream& os, const KeyValueBlobReader::KeyValue& kv) +std::ostream &operator<<(std::ostream &os, const KeyValueBlobReader::KeyValue &kv) { os << "(key: " << kv.key << ", value: " << kv.value << ")"; return os; diff --git a/Source/Modules/ControllerModules/KeyValueBlobReader.h b/Source/Modules/ControllerModules/KeyValueBlobReader.h index 5973a34e..8ca313be 100644 --- a/Source/Modules/ControllerModules/KeyValueBlobReader.h +++ b/Source/Modules/ControllerModules/KeyValueBlobReader.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef SOURCE_ENGINE_KEYVALUEBLOBREADER_H_ @@ -39,11 +46,14 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH // std::string_view is used to allow returning results efficiently. As a result, // the blob is a const char* and will never be mutated. This implies, for // example, read-only mmap can be used to parse large files. -namespace vChewing { +namespace vChewing +{ -class KeyValueBlobReader { -public: - enum class State : int { +class KeyValueBlobReader +{ + public: + enum class State : int + { // There are no more key-value pairs in this blob. END = 0, // The reader has produced a new key-value pair. @@ -54,19 +64,16 @@ public: CAN_CONTINUE = 2 }; - struct KeyValue { - constexpr KeyValue() - : key("") - , value("") + struct KeyValue + { + constexpr KeyValue() : key(""), value("") { } - constexpr KeyValue(std::string_view k, std::string_view v) - : key(k) - , value(v) + constexpr KeyValue(std::string_view k, std::string_view v) : key(k), value(v) { } - bool operator==(const KeyValue& another) const + bool operator==(const KeyValue &another) const { return key == another.key && value == another.value; } @@ -75,27 +82,25 @@ public: std::string_view value; }; - KeyValueBlobReader(const char* blob, size_t size) - : current_(blob) - , end_(blob + size) + KeyValueBlobReader(const char *blob, size_t size) : current_(blob), end_(blob + size) { } // Parse the next key-value pair and return the state of the reader. If // `out` is passed, out will be set to the produced key-value pair if there // is one. - State Next(KeyValue* out = nullptr); + State Next(KeyValue *out = nullptr); -private: - State SkipUntil(const std::function& f); - State SkipUntilNot(const std::function& f); + private: + State SkipUntil(const std::function &f); + State SkipUntilNot(const std::function &f); - const char* current_; - const char* end_; + const char *current_; + const char *end_; State state_ = State::CAN_CONTINUE; }; -std::ostream& operator<<(std::ostream&, const KeyValueBlobReader::KeyValue&); +std::ostream &operator<<(std::ostream &, const KeyValueBlobReader::KeyValue &); } // namespace vChewing diff --git a/Source/Modules/FileHandlers/LMConsolidator.h b/Source/Modules/FileHandlers/LMConsolidator.h index 38b25ae8..9bda0d9e 100644 --- a/Source/Modules/FileHandlers/LMConsolidator.h +++ b/Source/Modules/FileHandlers/LMConsolidator.h @@ -1,40 +1,47 @@ // Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef LMConsolidator_hpp #define LMConsolidator_hpp -#include -#include #include -#include #include -#include #include -#include #include +#include +#include +#include +#include +#include using namespace std; -namespace vChewing { +namespace vChewing +{ class LMConsolidator { -public: + public: static bool CheckPragma(const char *path); static bool FixEOF(const char *path); static bool ConsolidateContent(const char *path, bool shouldCheckPragma); diff --git a/Source/Modules/FileHandlers/LMConsolidator.mm b/Source/Modules/FileHandlers/LMConsolidator.mm index 8a9a7b4a..0843e93d 100644 --- a/Source/Modules/FileHandlers/LMConsolidator.mm +++ b/Source/Modules/FileHandlers/LMConsolidator.mm @@ -1,28 +1,35 @@ // Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "LMConsolidator.h" #include "vChewing-Swift.h" -namespace vChewing { +namespace vChewing +{ -constexpr std::string_view FORMATTED_PRAGMA_HEADER - = "# 𝙵𝙾𝚁𝙼𝙰𝚃 𝚘𝚛𝚐.𝚊𝚝𝚎𝚕𝚒𝚎𝚛𝙸𝚗𝚖𝚞.𝚟𝚌𝚑𝚎𝚠𝚒𝚗𝚐.𝚞𝚜𝚎𝚛𝙻𝚊𝚗𝚐𝚞𝚊𝚐𝚎𝙼𝚘𝚍𝚎𝚕𝙳𝚊𝚝𝚊.𝚏𝚘𝚛𝚖𝚊𝚝𝚝𝚎𝚍"; +constexpr std::string_view FORMATTED_PRAGMA_HEADER = + "# 𝙵𝙾𝚁𝙼𝙰𝚃 𝚘𝚛𝚐.𝚊𝚝𝚎𝚕𝚒𝚎𝚛𝙸𝚗𝚖𝚞.𝚟𝚌𝚑𝚎𝚠𝚒𝚗𝚐.𝚞𝚜𝚎𝚛𝙻𝚊𝚗𝚐𝚞𝚊𝚐𝚎𝙼𝚘𝚍𝚎𝚕𝙳𝚊𝚝𝚊.𝚏𝚘𝚛𝚖𝚊𝚝𝚝𝚎𝚍"; // HEADER VERIFIER. CREDIT: Shiki Suen bool LMConsolidator::CheckPragma(const char *path) @@ -32,13 +39,17 @@ bool LMConsolidator::CheckPragma(const char *path) { string firstLine; getline(zfdCheckPragma, firstLine); - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "HEADER SEEN ||%s", firstLine.c_str()); - if (firstLine != FORMATTED_PRAGMA_HEADER) { - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "HEADER VERIFICATION FAILED. START IN-PLACE CONSOLIDATING PROCESS."); + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "HEADER SEEN ||%s", firstLine.c_str()); + if (firstLine != FORMATTED_PRAGMA_HEADER) + { + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "HEADER VERIFICATION FAILED. START IN-PLACE CONSOLIDATING PROCESS."); return false; } } - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "HEADER VERIFICATION SUCCESSFUL."); + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "HEADER VERIFICATION SUCCESSFUL."); return true; } @@ -46,58 +57,76 @@ bool LMConsolidator::CheckPragma(const char *path) bool LMConsolidator::FixEOF(const char *path) { std::fstream zfdEOFFixerIncomingStream(path); - zfdEOFFixerIncomingStream.seekg(-1,std::ios_base::end); + zfdEOFFixerIncomingStream.seekg(-1, std::ios_base::end); char z; zfdEOFFixerIncomingStream.get(z); - if(z!='\n'){ - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// REPORT: Data File not ended with a new line.\n"); - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// DATA FILE: %s", path); - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// PROCEDURE: Trying to insert a new line as EOF before per-line check process.\n"); + if (z != '\n') + { + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "// REPORT: Data File not ended with a new line.\n"); + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "// DATA FILE: %s", path); + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "// PROCEDURE: Trying to insert a new line as EOF before per-line check process.\n"); std::ofstream zfdEOFFixerOutput(path, std::ios_base::app); zfdEOFFixerOutput << std::endl; zfdEOFFixerOutput.close(); - if (zfdEOFFixerOutput.fail()) { - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// REPORT: Failed to append a newline to the data file. Insufficient Privileges?\n"); - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// DATA FILE: %s", path); + if (zfdEOFFixerOutput.fail()) + { + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "// REPORT: Failed to append a newline to the data file. Insufficient Privileges?\n"); + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "// DATA FILE: %s", path); return false; } } zfdEOFFixerIncomingStream.close(); - if (zfdEOFFixerIncomingStream.fail()) { - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// REPORT: Failed to read lines through the data file for EOF check. Insufficient Privileges?\n"); - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// DATA FILE: %s", path); + if (zfdEOFFixerIncomingStream.fail()) + { + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, + "// REPORT: Failed to read lines through the data file for EOF check. Insufficient Privileges?\n"); + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "// DATA FILE: %s", path); return false; } return true; } // END: EOF FIXER. // CONTENT CONSOLIDATOR. CREDIT: Shiki Suen. -bool LMConsolidator::ConsolidateContent(const char *path, bool shouldCheckPragma) { +bool LMConsolidator::ConsolidateContent(const char *path, bool shouldCheckPragma) +{ bool pragmaCheckResult = LMConsolidator::CheckPragma(path); - if (pragmaCheckResult && shouldCheckPragma){ + if (pragmaCheckResult && shouldCheckPragma) + { return true; } ifstream zfdContentConsolidatorIncomingStream(path); - vectorvecEntry; - while(!zfdContentConsolidatorIncomingStream.eof()) + vector vecEntry; + while (!zfdContentConsolidatorIncomingStream.eof()) { // Xcode 13 能用的 ObjCpp 與 Cpp 並無原生支援「\h」這個 Regex 參數的能力,只能逐行處理。 string zfdBuffer; - getline(zfdContentConsolidatorIncomingStream,zfdBuffer); + getline(zfdContentConsolidatorIncomingStream, zfdBuffer); vecEntry.push_back(zfdBuffer); } // 第一遍 for 用來統整每行內的內容。 - // regex sedCJKWhiteSpace("\\x{3000}"), sedNonBreakWhiteSpace("\\x{A0}"), sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // 這樣寫會導致輸入法敲不了任何字,推測 Xcode 13 支援的 cpp / objCpp 可能對某些 Regex 寫法有相容性問題。 - // regex sedCJKWhiteSpace(" "), sedNonBreakWhiteSpace(" "), sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // RegEx 先定義好。 + // regex sedCJKWhiteSpace("\\x{3000}"), sedNonBreakWhiteSpace("\\x{A0}"), sedWhiteSpace("\\s+"), + // sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // 這樣寫會導致輸入法敲不了任何字,推測 Xcode 13 支援的 cpp / + // objCpp 可能對某些 Regex 寫法有相容性問題。 regex sedCJKWhiteSpace(" "), sedNonBreakWhiteSpace(" "), + // sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // RegEx 先定義好。 regex sedToConsolidate("( +| +| +|\t+)+"), sedToTrim("(^\\s|\\s$)"); - for(int i=0;i #include -namespace vChewing { +namespace vChewing +{ using namespace Gramambular; @@ -57,58 +65,59 @@ using namespace Gramambular; /// model while launching and to load the user phrases anytime if the custom /// files are modified. It does not keep the reference of the data pathes but /// you have to pass the paths when you ask it to do loading. -class LMInstantiator : public Gramambular::LanguageModel { -public: +class LMInstantiator : public Gramambular::LanguageModel +{ + public: LMInstantiator(); ~LMInstantiator(); /// Asks to load the primary language model at the given path. /// @param languageModelPath The path of the language model. - void loadLanguageModel(const char* languageModelPath); + void loadLanguageModel(const char *languageModelPath); /// If the data model is already loaded. bool isDataModelLoaded(); /// Asks to load the primary language model at the given path. /// @param miscDataPath The path of the misc data model. - void loadMiscData(const char* miscDataPath); + void loadMiscData(const char *miscDataPath); /// If the data model is already loaded. bool isMiscDataLoaded(); /// Asks to load the primary language model at the given path. /// @param symbolDataPath The path of the symbol data model. - void loadSymbolData(const char* symbolDataPath); + void loadSymbolData(const char *symbolDataPath); /// If the data model is already loaded. bool isSymbolDataLoaded(); /// Asks to load the primary language model at the given path. /// @param cnsDataPath The path of the CNS data model. - void loadCNSData(const char* cnsDataPath); + void loadCNSData(const char *cnsDataPath); /// If the data model is already loaded. bool isCNSDataLoaded(); /// Asks to load the user phrases and excluded phrases at the given path. /// @param userPhrasesPath The path of user phrases. /// @param excludedPhrasesPath The path of excluded phrases. - void loadUserPhrases(const char* userPhrasesPath, const char* excludedPhrasesPath); + void loadUserPhrases(const char *userPhrasesPath, const char *excludedPhrasesPath); /// Asks to load the user symbol data at the given path. /// @param userSymbolDataPath The path of user symbol data. - void loadUserSymbolData(const char* userPhrasesPath); + void loadUserSymbolData(const char *userPhrasesPath); /// Asks to load the user associated phrases at the given path. /// @param userAssociatedPhrasesPath The path of the user associated phrases. - void loadUserAssociatedPhrases(const char* userAssociatedPhrasesPath); + void loadUserAssociatedPhrases(const char *userAssociatedPhrasesPath); /// Asks to load the phrase replacement table at the given path. /// @param phraseReplacementPath The path of the phrase replacement table. - void loadPhraseReplacementMap(const char* phraseReplacementPath); + void loadPhraseReplacementMap(const char *phraseReplacementPath); /// Not implemented since we do not have data to provide bigram function. - const std::vector bigramsForKeys(const std::string& preceedingKey, const std::string& key); + const std::vector bigramsForKeys(const std::string &preceedingKey, const std::string &key); /// Returns a list of available unigram for the given key. /// @param key A std::string represents the BPMF reading or a symbol key. For /// example, it you pass "ㄇㄚ", it returns "嗎", "媽", and so on. - const std::vector unigramsForKey(const std::string& key); + const std::vector unigramsForKey(const std::string &key); /// If the model has unigrams for the given key. /// @param key The key. - bool hasUnigramsForKey(const std::string& key); + bool hasUnigramsForKey(const std::string &key); /// Enables or disables phrase replacement. void setPhraseReplacementEnabled(bool enabled); @@ -125,21 +134,20 @@ public: /// If CNS11643 input is enabled or not. bool cnsEnabled(); - const std::vector associatedPhrasesForKey(const std::string& key); - bool hasAssociatedPhrasesForKey(const std::string& key); + const std::vector associatedPhrasesForKey(const std::string &key); + bool hasAssociatedPhrasesForKey(const std::string &key); - -protected: + protected: /// Filters and converts the input unigrams and return a new list of unigrams. - /// + /// /// @param unigrams The unigrams to be processed. /// @param excludedValues The values to excluded unigrams. /// @param insertedValues The values for unigrams already in the results. /// It helps to prevent duplicated unigrams. Please note that the method /// has a side effect that it inserts values to `insertedValues`. - const std::vector filterAndTransformUnigrams(const std::vector unigrams, - const std::unordered_set& excludedValues, - std::unordered_set& insertedValues); + const std::vector filterAndTransformUnigrams( + const std::vector unigrams, const std::unordered_set &excludedValues, + std::unordered_set &insertedValues); ParselessLM m_languageModel; CoreLM m_miscModel; @@ -154,6 +162,6 @@ protected: bool m_cnsEnabled; bool m_symbolEnabled; }; -}; +}; // namespace vChewing #endif diff --git a/Source/Modules/LangModelRelated/LMInstantiator.mm b/Source/Modules/LangModelRelated/LMInstantiator.mm index 57f88d7a..2873cbf2 100644 --- a/Source/Modules/LangModelRelated/LMInstantiator.mm +++ b/Source/Modules/LangModelRelated/LMInstantiator.mm @@ -1,27 +1,35 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "LMInstantiator.h" #include #include -namespace vChewing { +namespace vChewing +{ LMInstantiator::LMInstantiator() { @@ -39,9 +47,10 @@ LMInstantiator::~LMInstantiator() m_associatedPhrases.close(); } -void LMInstantiator::loadLanguageModel(const char* languageModelDataPath) +void LMInstantiator::loadLanguageModel(const char *languageModelDataPath) { - if (languageModelDataPath) { + if (languageModelDataPath) + { m_languageModel.close(); m_languageModel.open(languageModelDataPath); } @@ -52,9 +61,10 @@ bool LMInstantiator::isDataModelLoaded() return m_languageModel.isLoaded(); } -void LMInstantiator::loadCNSData(const char* cnsDataPath) +void LMInstantiator::loadCNSData(const char *cnsDataPath) { - if (cnsDataPath) { + if (cnsDataPath) + { m_cnsModel.close(); m_cnsModel.open(cnsDataPath); } @@ -65,9 +75,10 @@ bool LMInstantiator::isCNSDataLoaded() return m_cnsModel.isLoaded(); } -void LMInstantiator::loadMiscData(const char* miscDataPath) +void LMInstantiator::loadMiscData(const char *miscDataPath) { - if (miscDataPath) { + if (miscDataPath) + { m_miscModel.close(); m_miscModel.open(miscDataPath); } @@ -78,9 +89,10 @@ bool LMInstantiator::isMiscDataLoaded() return m_miscModel.isLoaded(); } -void LMInstantiator::loadSymbolData(const char* symbolDataPath) +void LMInstantiator::loadSymbolData(const char *symbolDataPath) { - if (symbolDataPath) { + if (symbolDataPath) + { m_symbolModel.close(); m_symbolModel.open(symbolDataPath); } @@ -91,14 +103,15 @@ bool LMInstantiator::isSymbolDataLoaded() return m_symbolModel.isLoaded(); } -void LMInstantiator::loadUserPhrases(const char* userPhrasesDataPath, - const char* excludedPhrasesDataPath) +void LMInstantiator::loadUserPhrases(const char *userPhrasesDataPath, const char *excludedPhrasesDataPath) { - if (userPhrasesDataPath) { + if (userPhrasesDataPath) + { m_userPhrases.close(); m_userPhrases.open(userPhrasesDataPath); } - if (excludedPhrasesDataPath) { + if (excludedPhrasesDataPath) + { m_excludedPhrases.close(); m_excludedPhrases.open(excludedPhrasesDataPath); } @@ -106,7 +119,8 @@ void LMInstantiator::loadUserPhrases(const char* userPhrasesDataPath, void LMInstantiator::loadUserSymbolData(const char *userSymbolDataPath) { - if (userSymbolDataPath) { + if (userSymbolDataPath) + { m_userSymbolModel.close(); m_userSymbolModel.open(userSymbolDataPath); } @@ -114,28 +128,32 @@ void LMInstantiator::loadUserSymbolData(const char *userSymbolDataPath) void LMInstantiator::loadUserAssociatedPhrases(const char *userAssociatedPhrasesPath) { - if (userAssociatedPhrasesPath) { + if (userAssociatedPhrasesPath) + { m_associatedPhrases.close(); m_associatedPhrases.open(userAssociatedPhrasesPath); } } -void LMInstantiator::loadPhraseReplacementMap(const char* phraseReplacementPath) +void LMInstantiator::loadPhraseReplacementMap(const char *phraseReplacementPath) { - if (phraseReplacementPath) { + if (phraseReplacementPath) + { m_phraseReplacement.close(); m_phraseReplacement.open(phraseReplacementPath); } } -const std::vector LMInstantiator::bigramsForKeys(const std::string& preceedingKey, const std::string& key) +const std::vector LMInstantiator::bigramsForKeys(const std::string &preceedingKey, + const std::string &key) { return std::vector(); } -const std::vector LMInstantiator::unigramsForKey(const std::string& key) +const std::vector LMInstantiator::unigramsForKey(const std::string &key) { - if (key == " ") { + if (key == " ") + { std::vector spaceUnigrams; Gramambular::Unigram g; g.keyValue.key = " "; @@ -152,17 +170,18 @@ const std::vector LMInstantiator::unigramsForKey(const std std::vector userSymbolUnigrams; std::vector cnsUnigrams; - std::unordered_set excludedValues; - std::unordered_set insertedValues; + std::unordered_set excludedValues; + std::unordered_set insertedValues; - if (m_excludedPhrases.hasUnigramsForKey(key)) { + if (m_excludedPhrases.hasUnigramsForKey(key)) + { std::vector excludedUnigrams = m_excludedPhrases.unigramsForKey(key); - transform(excludedUnigrams.begin(), excludedUnigrams.end(), - inserter(excludedValues, excludedValues.end()), - [](const Gramambular::Unigram& u) { return u.keyValue.value; }); + transform(excludedUnigrams.begin(), excludedUnigrams.end(), inserter(excludedValues, excludedValues.end()), + [](const Gramambular::Unigram &u) { return u.keyValue.value; }); } - if (m_userPhrases.hasUnigramsForKey(key)) { + if (m_userPhrases.hasUnigramsForKey(key)) + { std::vector rawUserUnigrams = m_userPhrases.unigramsForKey(key); // 用這句指令讓使用者語彙檔案內的詞條優先順序隨著行數增加而逐漸增高。 // 這樣一來就可以在就地新增語彙時徹底複寫優先權。 @@ -170,27 +189,32 @@ const std::vector LMInstantiator::unigramsForKey(const std userUnigrams = filterAndTransformUnigrams(rawUserUnigrams, excludedValues, insertedValues); } - if (m_languageModel.hasUnigramsForKey(key)) { + if (m_languageModel.hasUnigramsForKey(key)) + { std::vector rawGlobalUnigrams = m_languageModel.unigramsForKey(key); allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues); } - if (m_miscModel.hasUnigramsForKey(key)) { + if (m_miscModel.hasUnigramsForKey(key)) + { std::vector rawMiscUnigrams = m_miscModel.unigramsForKey(key); miscUnigrams = filterAndTransformUnigrams(rawMiscUnigrams, excludedValues, insertedValues); } - if (m_symbolModel.hasUnigramsForKey(key) && m_symbolEnabled) { + if (m_symbolModel.hasUnigramsForKey(key) && m_symbolEnabled) + { std::vector rawSymbolUnigrams = m_symbolModel.unigramsForKey(key); symbolUnigrams = filterAndTransformUnigrams(rawSymbolUnigrams, excludedValues, insertedValues); } - if (m_userSymbolModel.hasUnigramsForKey(key) && m_symbolEnabled) { + if (m_userSymbolModel.hasUnigramsForKey(key) && m_symbolEnabled) + { std::vector rawUserSymbolUnigrams = m_userSymbolModel.unigramsForKey(key); userSymbolUnigrams = filterAndTransformUnigrams(rawUserSymbolUnigrams, excludedValues, insertedValues); } - if (m_cnsModel.hasUnigramsForKey(key) && m_cnsEnabled) { + if (m_cnsModel.hasUnigramsForKey(key) && m_cnsEnabled) + { std::vector rawCNSUnigrams = m_cnsModel.unigramsForKey(key); cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues); } @@ -203,13 +227,15 @@ const std::vector LMInstantiator::unigramsForKey(const std return allUnigrams; } -bool LMInstantiator::hasUnigramsForKey(const std::string& key) +bool LMInstantiator::hasUnigramsForKey(const std::string &key) { - if (key == " ") { + if (key == " ") + { return true; } - if (!m_excludedPhrases.hasUnigramsForKey(key)) { + if (!m_excludedPhrases.hasUnigramsForKey(key)) + { return m_userPhrases.hasUnigramsForKey(key) || m_languageModel.hasUnigramsForKey(key); } @@ -246,26 +272,33 @@ bool LMInstantiator::symbolEnabled() return m_symbolEnabled; } -const std::vector LMInstantiator::filterAndTransformUnigrams(const std::vector unigrams, const std::unordered_set& excludedValues, std::unordered_set& insertedValues) +const std::vector LMInstantiator::filterAndTransformUnigrams( + const std::vector unigrams, const std::unordered_set &excludedValues, + std::unordered_set &insertedValues) { std::vector results; - for (auto&& unigram : unigrams) { + for (auto &&unigram : unigrams) + { // excludedValues filters out the unigrams with the original value. // insertedValues filters out the ones with the converted value std::string originalValue = unigram.keyValue.value; - if (excludedValues.find(originalValue) != excludedValues.end()) { + if (excludedValues.find(originalValue) != excludedValues.end()) + { continue; } std::string value = originalValue; - if (m_phraseReplacementEnabled) { + if (m_phraseReplacementEnabled) + { std::string replacement = m_phraseReplacement.valueForKey(value); - if (replacement != "") { + if (replacement != "") + { value = replacement; } } - if (insertedValues.find(value) == insertedValues.end()) { + if (insertedValues.find(value) == insertedValues.end()) + { Gramambular::Unigram g; g.keyValue.value = value; g.keyValue.key = unigram.keyValue.key; @@ -277,12 +310,12 @@ const std::vector LMInstantiator::filterAndTransformUnigra return results; } -const std::vector LMInstantiator::associatedPhrasesForKey(const std::string& key) +const std::vector LMInstantiator::associatedPhrasesForKey(const std::string &key) { return m_associatedPhrases.valuesForKey(key); } -bool LMInstantiator::hasAssociatedPhrasesForKey(const std::string& key) +bool LMInstantiator::hasAssociatedPhrasesForKey(const std::string &key) { return m_associatedPhrases.hasValuesForKey(key); } diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.h b/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.h index 98978b2a..63f6aca1 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.h +++ b/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.h @@ -1,47 +1,58 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef ASSOCIATEDPHRASES_H #define ASSOCIATEDPHRASES_H -#include -#include #include +#include +#include #include -namespace vChewing { +namespace vChewing +{ class AssociatedPhrases { -public: + public: AssociatedPhrases(); ~AssociatedPhrases(); const bool isLoaded(); bool open(const char *path); void close(); - const std::vector valuesForKey(const std::string& key); - const bool hasValuesForKey(const std::string& key); + const std::vector valuesForKey(const std::string &key); + const bool hasValuesForKey(const std::string &key); -protected: - struct Row { - Row(std::string_view& k, std::string_view& v) : key(k), value(v) {} + protected: + struct Row + { + Row(std::string_view &k, std::string_view &v) : key(k), value(v) + { + } std::string_view key; std::string_view value; }; @@ -53,6 +64,6 @@ protected: size_t length; }; -} +} // namespace vChewing #endif /* AssociatedPhrases_hpp */ diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.mm b/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.mm index 430df6e7..ac0f223e 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.mm +++ b/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.mm @@ -1,52 +1,59 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "AssociatedPhrases.h" #include "vChewing-Swift.h" -#include -#include #include #include +#include +#include #include #include "KeyValueBlobReader.h" #include "LMConsolidator.h" -namespace vChewing { +namespace vChewing +{ -AssociatedPhrases::AssociatedPhrases() -: fd(-1) -, data(0) -, length(0) +AssociatedPhrases::AssociatedPhrases() : fd(-1), data(0), length(0) { } AssociatedPhrases::~AssociatedPhrases() { - if (data) { + if (data) + { close(); } } const bool AssociatedPhrases::isLoaded() { - if (data) { + if (data) + { return true; } return false; @@ -54,7 +61,8 @@ const bool AssociatedPhrases::isLoaded() bool AssociatedPhrases::open(const char *path) { - if (data) { + if (data) + { return false; } @@ -62,13 +70,15 @@ bool AssociatedPhrases::open(const char *path) LMConsolidator::ConsolidateContent(path, true); fd = ::open(path, O_RDONLY); - if (fd == -1) { + if (fd == -1) + { printf("open:: file not exist"); return false; } struct stat sb; - if (fstat(fd, &sb) == -1) { + if (fstat(fd, &sb) == -1) + { printf("open:: cannot open file"); return false; } @@ -76,21 +86,25 @@ bool AssociatedPhrases::open(const char *path) length = (size_t)sb.st_size; data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0); - if (!data) { + if (!data) + { ::close(fd); return false; } - KeyValueBlobReader reader(static_cast(data), length); + KeyValueBlobReader reader(static_cast(data), length); KeyValueBlobReader::KeyValue keyValue; KeyValueBlobReader::State state; - while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) { + while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) + { keyRowMap[keyValue.key].emplace_back(keyValue.key, keyValue.value); } // 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行) - if (state == KeyValueBlobReader::State::ERROR) { + if (state == KeyValueBlobReader::State::ERROR) + { // close(); - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "AssociatedPhrases: Failed at Open Step 5. On Error Resume Next.\n"); + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "AssociatedPhrases: Failed at Open Step 5. On Error Resume Next.\n"); // return false; } return true; @@ -98,7 +112,8 @@ bool AssociatedPhrases::open(const char *path) void AssociatedPhrases::close() { - if (data) { + if (data) + { munmap(data, length); ::close(fd); data = 0; @@ -107,13 +122,15 @@ void AssociatedPhrases::close() keyRowMap.clear(); } -const std::vector AssociatedPhrases::valuesForKey(const std::string& key) +const std::vector AssociatedPhrases::valuesForKey(const std::string &key) { std::vector v; auto iter = keyRowMap.find(key); - if (iter != keyRowMap.end()) { - const std::vector& rows = iter->second; - for (const auto& row : rows) { + if (iter != keyRowMap.end()) + { + const std::vector &rows = iter->second; + for (const auto &row : rows) + { std::string_view value = row.value; v.push_back({value.data(), value.size()}); } @@ -121,9 +138,9 @@ const std::vector AssociatedPhrases::valuesForKey(const std::string return v; } -const bool AssociatedPhrases::hasValuesForKey(const std::string& key) +const bool AssociatedPhrases::hasValuesForKey(const std::string &key) { return keyRowMap.find(key) != keyRowMap.end(); } -}; // namespace vChewing +}; // namespace vChewing diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.h b/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.h index 0beffc83..46625e74 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.h +++ b/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.h @@ -1,30 +1,37 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef CoreLM_H #define CoreLM_H #include "LanguageModel.h" +#include +#include #include #include -#include -#include // this class relies on the fact that we have a space-separated data // format, and we use mmap and zero-out the separators and line feeds @@ -33,10 +40,12 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH using namespace std; using namespace Gramambular; -namespace vChewing { +namespace vChewing +{ -class CoreLM : public Gramambular::LanguageModel { -public: +class CoreLM : public Gramambular::LanguageModel +{ + public: CoreLM(); ~CoreLM(); @@ -45,20 +54,21 @@ public: void close(); void dump(); - virtual const std::vector bigramsForKeys(const string& preceedingKey, const string& key); - virtual const std::vector unigramsForKey(const string& key); - virtual bool hasUnigramsForKey(const string& key); + virtual const std::vector bigramsForKeys(const string &preceedingKey, const string &key); + virtual const std::vector unigramsForKey(const string &key); + virtual bool hasUnigramsForKey(const string &key); -protected: + protected: struct CStringCmp { - bool operator()(const char* s1, const char* s2) const + bool operator()(const char *s1, const char *s2) const { return strcmp(s1, s2) < 0; } }; - struct Row { + struct Row + { const char *key; const char *value; const char *logProbability; diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.mm b/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.mm index b9fae8fd..de24f821 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.mm +++ b/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.mm @@ -1,50 +1,56 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "CoreLM.h" -#include -#include +#include "vChewing-Swift.h" #include #include -#include +#include +#include #include -#include "vChewing-Swift.h" +#include using namespace Gramambular; -vChewing::CoreLM::CoreLM() - : fd(-1) - , data(0) - , length(0) +vChewing::CoreLM::CoreLM() : fd(-1), data(0), length(0) { } vChewing::CoreLM::~CoreLM() { - if (data) { + if (data) + { close(); } } bool vChewing::CoreLM::isLoaded() { - if (data) { + if (data) + { return true; } return false; @@ -52,24 +58,28 @@ bool vChewing::CoreLM::isLoaded() bool vChewing::CoreLM::open(const char *path) { - if (data) { + if (data) + { return false; } - + fd = ::open(path, O_RDONLY); - if (fd == -1) { + if (fd == -1) + { return false; } struct stat sb; - if (fstat(fd, &sb) == -1) { + if (fstat(fd, &sb) == -1) + { return false; } length = (size_t)sb.st_size; data = mmap(NULL, length, PROT_WRITE, MAP_PRIVATE, fd, 0); - if (!data) { + if (!data) + { ::close(fd); return false; } @@ -117,18 +127,22 @@ bool vChewing::CoreLM::open(const char *path) start: // EOF -> end - if (head == end) { + if (head == end) + { goto end; } c = *head; // \s -> error - if (c == ' ') { - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // Start: \\s -> error"); + if (c == ' ') + { + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "vChewingDebug: CoreLM // Start: \\s -> error"); goto error; } // \n -> start - else if (c == '\n') { + else if (c == '\n') + { head++; goto start; } @@ -140,19 +154,24 @@ start: state1: // EOF -> error - if (head == end) { - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: EOF -> error"); + if (head == end) + { + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: EOF -> error"); goto error; } c = *head; // \n -> error - if (c == '\n') { - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: \\n -> error"); + if (c == '\n') + { + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: \\n -> error"); goto error; } // \s -> state2 + zero out ending + record column start - else if (c == ' ') { + else if (c == ' ') + { *head = 0; head++; row.key = head; @@ -165,15 +184,19 @@ state1: state2: // eof -> error - if (head == end) { - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: EOF -> error"); + if (head == end) + { + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: EOF -> error"); goto error; } c = *head; // \n, \s -> error - if (c == '\n' || c == ' ') { - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: \\n \\s -> error"); + if (c == '\n' || c == ' ') + { + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: \\n \\s -> error"); goto error; } @@ -184,20 +207,25 @@ state2: state3: // eof -> error - if (head == end) { - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: EOF -> error"); + if (head == end) + { + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: EOF -> error"); goto error; } c = *head; // \n -> error - if (c == '\n') { - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: \\n -> error"); + if (c == '\n') + { + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: \\n -> error"); goto error; } // \s -> state4 + zero out ending + record column start - else if (c == ' ') { + else if (c == ' ') + { *head = 0; head++; row.logProbability = head; @@ -210,15 +238,19 @@ state3: state4: // eof -> error - if (head == end) { - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: EOF -> error"); + if (head == end) + { + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: EOF -> error"); goto error; } c = *head; // \n, \s -> error - if (c == '\n' || c == ' ') { - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: \\n \\s -> error"); + if (c == '\n' || c == ' ') + { + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: \\n \\s -> error"); goto error; } @@ -227,22 +259,26 @@ state4: // fall through to state 5 - state5: // eof -> error - if (head == end) { - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: EOF -> error"); + if (head == end) + { + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: EOF -> error"); goto error; } c = *head; // \s -> error - if (c == ' ') { - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: \\s -> error"); + if (c == ' ') + { + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: \\s -> error"); goto error; } // \n -> start - else if (c == '\n') { + else if (c == '\n') + { *head = 0; head++; keyRowMap[row.key].push_back(row); @@ -265,13 +301,15 @@ end: emptyRow.value = space; emptyRow.logProbability = zero; keyRowMap[space].push_back(emptyRow); - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // File Load Complete."); + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "vChewingDebug: CoreLM // File Load Complete."); return true; } void vChewing::CoreLM::close() { - if (data) { + if (data) + { munmap(data, length); ::close(fd); data = 0; @@ -283,30 +321,34 @@ void vChewing::CoreLM::close() void vChewing::CoreLM::dump() { size_t rows = 0; - for (map >::const_iterator i = keyRowMap.begin(), e = keyRowMap.end(); i != e; ++i) { - const vector& r = (*i).second; - for (vector::const_iterator ri = r.begin(), re = r.end(); ri != re; ++ri) { - const Row& row = *ri; + for (map>::const_iterator i = keyRowMap.begin(), e = keyRowMap.end(); i != e; ++i) + { + const vector &r = (*i).second; + for (vector::const_iterator ri = r.begin(), re = r.end(); ri != re; ++ri) + { + const Row &row = *ri; cerr << row.key << " " << row.value << " " << row.logProbability << "\n"; rows++; } } } -const std::vector vChewing::CoreLM::bigramsForKeys(const string& preceedingKey, const string& key) +const std::vector vChewing::CoreLM::bigramsForKeys(const string &preceedingKey, const string &key) { return std::vector(); } -const std::vector vChewing::CoreLM::unigramsForKey(const string& key) +const std::vector vChewing::CoreLM::unigramsForKey(const string &key) { std::vector v; - map >::const_iterator i = keyRowMap.find(key.c_str()); + map>::const_iterator i = keyRowMap.find(key.c_str()); - if (i != keyRowMap.end()) { - for (vector::const_iterator ri = (*i).second.begin(), re = (*i).second.end(); ri != re; ++ri) { + if (i != keyRowMap.end()) + { + for (vector::const_iterator ri = (*i).second.begin(), re = (*i).second.end(); ri != re; ++ri) + { Unigram g; - const Row& r = *ri; + const Row &r = *ri; g.keyValue.key = r.key; g.keyValue.value = r.value; g.score = atof(r.logProbability); @@ -317,7 +359,7 @@ const std::vector vChewing::CoreLM::unigramsForKey(const s return v; } -bool vChewing::CoreLM::hasUnigramsForKey(const string& key) +bool vChewing::CoreLM::hasUnigramsForKey(const string &key) { return keyRowMap.find(key.c_str()) != keyRowMap.end(); } diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/CNSLM.h b/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/CNSLM.h index 80c47b96..6154330a 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/CNSLM.h +++ b/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/CNSLM.h @@ -1,44 +1,54 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef CNSLM_H #define CNSLM_H -#include -#include -#include #include "LanguageModel.h" #include "UserPhrasesLM.h" +#include +#include +#include -namespace vChewing { - -class CNSLM: public UserPhrasesLM +namespace vChewing { -public: - virtual bool allowConsolidation() override { + +class CNSLM : public UserPhrasesLM +{ + public: + virtual bool allowConsolidation() override + { return false; } - virtual float overridedValue() override { + virtual float overridedValue() override + { return -11.0; } }; -} +} // namespace vChewing #endif diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/SymbolLM.h b/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/SymbolLM.h index 48409bc4..78d2a5b7 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/SymbolLM.h +++ b/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/SymbolLM.h @@ -1,44 +1,54 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef SYMBOLLM_H #define SYMBOLLM_H -#include -#include -#include #include "LanguageModel.h" #include "UserPhrasesLM.h" +#include +#include +#include -namespace vChewing { - -class SymbolLM: public UserPhrasesLM +namespace vChewing { -public: - virtual bool allowConsolidation() override { + +class SymbolLM : public UserPhrasesLM +{ + public: + virtual bool allowConsolidation() override + { return false; } - virtual float overridedValue() override { + virtual float overridedValue() override + { return -13.0; } }; -} +} // namespace vChewing #endif diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/UserSymbolLM.h b/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/UserSymbolLM.h index 96ee02d5..7d8646cc 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/UserSymbolLM.h +++ b/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/UserSymbolLM.h @@ -1,44 +1,54 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef USERSYMBOLLM_H #define USERSYMBOLLM_H -#include -#include -#include #include "LanguageModel.h" #include "UserPhrasesLM.h" +#include +#include +#include -namespace vChewing { - -class UserSymbolLM: public UserPhrasesLM +namespace vChewing { -public: - virtual bool allowConsolidation() override { + +class UserSymbolLM : public UserPhrasesLM +{ + public: + virtual bool allowConsolidation() override + { return true; } - virtual float overridedValue() override { + virtual float overridedValue() override + { return -12.0; } }; -} +} // namespace vChewing #endif diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/ParselessLM.cpp b/Source/Modules/LangModelRelated/SubLanguageModels/ParselessLM.cpp index 92104c22..4f40bb8c 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/ParselessLM.cpp +++ b/Source/Modules/LangModelRelated/SubLanguageModels/ParselessLM.cpp @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "ParselessLM.h" @@ -26,29 +33,36 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include -vChewing::ParselessLM::~ParselessLM() { close(); } +vChewing::ParselessLM::~ParselessLM() +{ + close(); +} bool vChewing::ParselessLM::isLoaded() { - if (data_) { + if (data_) + { return true; } return false; } -bool vChewing::ParselessLM::open(const std::string_view& path) +bool vChewing::ParselessLM::open(const std::string_view &path) { - if (data_) { + if (data_) + { return false; } fd_ = ::open(path.data(), O_RDONLY); - if (fd_ == -1) { + if (fd_ == -1) + { return false; } struct stat sb; - if (fstat(fd_, &sb) == -1) { + if (fstat(fd_, &sb) == -1) + { ::close(fd_); fd_ = -1; return false; @@ -57,21 +71,22 @@ bool vChewing::ParselessLM::open(const std::string_view& path) length_ = static_cast(sb.st_size); data_ = mmap(NULL, length_, PROT_READ, MAP_SHARED, fd_, 0); - if (data_ == nullptr) { + if (data_ == nullptr) + { ::close(fd_); fd_ = -1; length_ = 0; return false; } - db_ = std::unique_ptr(new ParselessPhraseDB( - static_cast(data_), length_)); + db_ = std::unique_ptr(new ParselessPhraseDB(static_cast(data_), length_)); return true; } void vChewing::ParselessLM::close() { - if (data_ != nullptr) { + if (data_ != nullptr) + { munmap(data_, length_); ::close(fd_); fd_ = -1; @@ -80,55 +95,61 @@ void vChewing::ParselessLM::close() } } -const std::vector -vChewing::ParselessLM::bigramsForKeys( - const std::string& preceedingKey, const std::string& key) +const std::vector vChewing::ParselessLM::bigramsForKeys(const std::string &preceedingKey, + const std::string &key) { return std::vector(); } -const std::vector -vChewing::ParselessLM::unigramsForKey(const std::string& key) +const std::vector vChewing::ParselessLM::unigramsForKey(const std::string &key) { - if (db_ == nullptr) { + if (db_ == nullptr) + { return std::vector(); } std::vector results; - for (const auto& row : db_->findRows(key + " ")) { + for (const auto &row : db_->findRows(key + " ")) + { Gramambular::Unigram unigram; // Move ahead until we encounter the first space. This is the key. auto it = row.begin(); - while (it != row.end() && *it != ' ') { + while (it != row.end() && *it != ' ') + { ++it; } unigram.keyValue.key = std::string(row.begin(), it); // Read past the space. - if (it != row.end()) { + if (it != row.end()) + { ++it; } - if (it != row.end()) { + if (it != row.end()) + { // Now it is the start of the value portion. auto value_begin = it; // Move ahead until we encounter the second space. This is the // value. - while (it != row.end() && *it != ' ') { + while (it != row.end() && *it != ' ') + { ++it; } unigram.keyValue.value = std::string(value_begin, it); } // Read past the space. The remainder, if it exists, is the score. - if (it != row.end()) { + if (it != row.end()) + { ++it; } - if (it != row.end()) { + if (it != row.end()) + { unigram.score = std::stod(std::string(it, row.end())); } results.push_back(unigram); @@ -136,9 +157,10 @@ vChewing::ParselessLM::unigramsForKey(const std::string& key) return results; } -bool vChewing::ParselessLM::hasUnigramsForKey(const std::string& key) +bool vChewing::ParselessLM::hasUnigramsForKey(const std::string &key) { - if (db_ == nullptr) { + if (db_ == nullptr) + { return false; } diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/ParselessLM.h b/Source/Modules/LangModelRelated/SubLanguageModels/ParselessLM.h index 984054a2..698bcecc 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/ParselessLM.h +++ b/Source/Modules/LangModelRelated/SubLanguageModels/ParselessLM.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef SOURCE_ENGINE_PARSELESSLM_H_ @@ -27,25 +34,26 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include "LanguageModel.h" #include "ParselessPhraseDB.h" -namespace vChewing { +namespace vChewing +{ -class ParselessLM : public Gramambular::LanguageModel { -public: +class ParselessLM : public Gramambular::LanguageModel +{ + public: ~ParselessLM() override; bool isLoaded(); - bool open(const std::string_view& path); + bool open(const std::string_view &path); void close(); - const std::vector bigramsForKeys( - const std::string& preceedingKey, const std::string& key) override; - const std::vector unigramsForKey( - const std::string& key) override; - bool hasUnigramsForKey(const std::string& key) override; + const std::vector bigramsForKeys(const std::string &preceedingKey, + const std::string &key) override; + const std::vector unigramsForKey(const std::string &key) override; + bool hasUnigramsForKey(const std::string &key) override; -private: + private: int fd_ = -1; - void* data_ = nullptr; + void *data_ = nullptr; size_t length_ = 0; std::unique_ptr db_; }; diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/ParselessPhraseDB.cpp b/Source/Modules/LangModelRelated/SubLanguageModels/ParselessPhraseDB.cpp index c28f33b4..a0097e69 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/ParselessPhraseDB.cpp +++ b/Source/Modules/LangModelRelated/SubLanguageModels/ParselessPhraseDB.cpp @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "ParselessPhraseDB.h" @@ -22,35 +29,35 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include #include -namespace vChewing { +namespace vChewing +{ -ParselessPhraseDB::ParselessPhraseDB( - const char* buf, size_t length) - : begin_(buf) - , end_(buf + length) +ParselessPhraseDB::ParselessPhraseDB(const char *buf, size_t length) : begin_(buf), end_(buf + length) { } -std::vector ParselessPhraseDB::findRows( - const std::string_view& key) +std::vector ParselessPhraseDB::findRows(const std::string_view &key) { std::vector rows; - const char* ptr = findFirstMatchingLine(key); - if (ptr == nullptr) { + const char *ptr = findFirstMatchingLine(key); + if (ptr == nullptr) + { return rows; } - while (ptr + key.length() <= end_ - && memcmp(ptr, key.data(), key.length()) == 0) { - const char* eol = ptr; + while (ptr + key.length() <= end_ && memcmp(ptr, key.data(), key.length()) == 0) + { + const char *eol = ptr; - while (eol != end_ && *eol != '\n') { + while (eol != end_ && *eol != '\n') + { ++eol; } rows.emplace_back(ptr, eol - ptr); - if (eol == end_) { + if (eol == end_) + { break; } @@ -66,71 +73,83 @@ std::vector ParselessPhraseDB::findRows( // current line is actually the first matching line: if the previous line is // less to the key and the current line starts exactly with the key, then // the current line is the first matching line. -const char* ParselessPhraseDB::findFirstMatchingLine( - const std::string_view& key) +const char *ParselessPhraseDB::findFirstMatchingLine(const std::string_view &key) { - if (key.empty()) { + if (key.empty()) + { return begin_; } - const char* top = begin_; - const char* bottom = end_; + const char *top = begin_; + const char *bottom = end_; - while (top < bottom) { - const char* mid = top + (bottom - top) / 2; - const char* ptr = mid; + while (top < bottom) + { + const char *mid = top + (bottom - top) / 2; + const char *ptr = mid; - if (ptr != begin_) { + if (ptr != begin_) + { --ptr; } - while (ptr != begin_ && *ptr != '\n') { + while (ptr != begin_ && *ptr != '\n') + { --ptr; } - const char* prev = nullptr; - if (*ptr == '\n') { + const char *prev = nullptr; + if (*ptr == '\n') + { prev = ptr; ++ptr; } // ptr is now in the "current" line we're interested in. - if (ptr + key.length() > end_) { + if (ptr + key.length() > end_) + { // not enough data to compare at this point, bail. break; } int current_cmp = memcmp(ptr, key.data(), key.length()); - if (current_cmp > 0) { + if (current_cmp > 0) + { bottom = mid - 1; continue; } - if (current_cmp < 0) { + if (current_cmp < 0) + { top = mid + 1; continue; } - if (!prev) { + if (!prev) + { return ptr; } // Move the prev so that it reaches the previous line. - if (prev != begin_) { + if (prev != begin_) + { --prev; } - while (prev != begin_ && *prev != '\n') { + while (prev != begin_ && *prev != '\n') + { --prev; } - if (*prev == '\n') { + if (*prev == '\n') + { ++prev; } int prev_cmp = memcmp(prev, key.data(), key.length()); // This is the first occurrence. - if (prev_cmp < 0 && current_cmp == 0) { + if (prev_cmp < 0 && current_cmp == 0) + { return ptr; } diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/ParselessPhraseDB.h b/Source/Modules/LangModelRelated/SubLanguageModels/ParselessPhraseDB.h index d632e653..3ac28768 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/ParselessPhraseDB.h +++ b/Source/Modules/LangModelRelated/SubLanguageModels/ParselessPhraseDB.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef SOURCE_ENGINE_PARSELESSPHRASEDB_H_ @@ -24,28 +31,29 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include #include -namespace vChewing { +namespace vChewing +{ // Defines phrase database that consists of (key, value, score) rows that are // pre-sorted by the byte value of the keys. It is way faster than FastLM // because it does not need to parse anything. Instead, it relies on the fact // that the database is already sorted, and binary search is used to find the // rows. -class ParselessPhraseDB { -public: - ParselessPhraseDB( - const char* buf, size_t length); +class ParselessPhraseDB +{ + public: + ParselessPhraseDB(const char *buf, size_t length); // Find the rows that match the key. Note that prefix match is used. If you // need exact match, the key will need to have a delimiter (usually a space) // at the end. - std::vector findRows(const std::string_view& key); + std::vector findRows(const std::string_view &key); - const char* findFirstMatchingLine(const std::string_view& key); + const char *findFirstMatchingLine(const std::string_view &key); -private: - const char* begin_; - const char* end_; + private: + const char *begin_; + const char *end_; }; }; // namespace vChewing diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.h b/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.h index e4c61b60..43263923 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.h +++ b/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.h @@ -1,48 +1,56 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef PHRASEREPLACEMENTMAP_H #define PHRASEREPLACEMENTMAP_H -#include -#include #include +#include +#include -namespace vChewing { +namespace vChewing +{ class PhraseReplacementMap { -public: + public: PhraseReplacementMap(); ~PhraseReplacementMap(); bool open(const char *path); void close(); - const std::string valueForKey(const std::string& key); + const std::string valueForKey(const std::string &key); -protected: + protected: std::map keyValueMap; int fd; void *data; size_t length; }; -} +} // namespace vChewing #endif diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.mm b/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.mm index b3a10e49..7fde339b 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.mm +++ b/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.mm @@ -1,55 +1,62 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "PhraseReplacementMap.h" #include "vChewing-Swift.h" -#include -#include #include #include -#include +#include +#include #include +#include #include "KeyValueBlobReader.h" #include "LMConsolidator.h" -namespace vChewing { +namespace vChewing +{ using std::string; -PhraseReplacementMap::PhraseReplacementMap() -: fd(-1) -, data(0) -, length(0) +PhraseReplacementMap::PhraseReplacementMap() : fd(-1), data(0), length(0) { } PhraseReplacementMap::~PhraseReplacementMap() { - if (data) { + if (data) + { close(); } } bool PhraseReplacementMap::open(const char *path) { - if (data) { + if (data) + { return false; } @@ -57,13 +64,15 @@ bool PhraseReplacementMap::open(const char *path) LMConsolidator::ConsolidateContent(path, true); fd = ::open(path, O_RDONLY); - if (fd == -1) { + if (fd == -1) + { printf("open:: file not exist"); return false; } struct stat sb; - if (fstat(fd, &sb) == -1) { + if (fstat(fd, &sb) == -1) + { printf("open:: cannot open file"); return false; } @@ -71,21 +80,25 @@ bool PhraseReplacementMap::open(const char *path) length = (size_t)sb.st_size; data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0); - if (!data) { + if (!data) + { ::close(fd); return false; } - KeyValueBlobReader reader(static_cast(data), length); + KeyValueBlobReader reader(static_cast(data), length); KeyValueBlobReader::KeyValue keyValue; KeyValueBlobReader::State state; - while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) { + while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) + { keyValueMap[keyValue.key] = keyValue.value; } // 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行) - if (state == KeyValueBlobReader::State::ERROR) { + if (state == KeyValueBlobReader::State::ERROR) + { // close(); - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "PhraseReplacementMap: Failed at Open Step 5. On Error Resume Next.\n"); + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "PhraseReplacementMap: Failed at Open Step 5. On Error Resume Next.\n"); // return false; } return true; @@ -93,7 +106,8 @@ bool PhraseReplacementMap::open(const char *path) void PhraseReplacementMap::close() { - if (data) { + if (data) + { munmap(data, length); ::close(fd); data = 0; @@ -102,15 +116,15 @@ void PhraseReplacementMap::close() keyValueMap.clear(); } -const std::string PhraseReplacementMap::valueForKey(const std::string& key) +const std::string PhraseReplacementMap::valueForKey(const std::string &key) { auto iter = keyValueMap.find(key); - if (iter != keyValueMap.end()) { + if (iter != keyValueMap.end()) + { const std::string_view v = iter->second; return {v.data(), v.size()}; } return string(""); } - } diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/UserOverrideModel.cpp b/Source/Modules/LangModelRelated/SubLanguageModels/UserOverrideModel.cpp index 8d30149a..4ae8443f 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/UserOverrideModel.cpp +++ b/Source/Modules/LangModelRelated/SubLanguageModels/UserOverrideModel.cpp @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "UserOverrideModel.h" @@ -23,88 +30,84 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include #include -namespace vChewing { +namespace vChewing +{ // About 20 generations. static const double DecayThreshould = 1.0 / 1048576.0; -static double Score(size_t eventCount, - size_t totalCount, - double eventTimestamp, - double timestamp, - double lambda); -static bool IsEndingPunctuation(const std::string& value); -static std::string WalkedNodesToKey(const std::vector& walkedNodes, - size_t cursorIndex); +static double Score(size_t eventCount, size_t totalCount, double eventTimestamp, double timestamp, double lambda); +static bool IsEndingPunctuation(const std::string &value); +static std::string WalkedNodesToKey(const std::vector &walkedNodes, size_t cursorIndex); -UserOverrideModel::UserOverrideModel(size_t capacity, double decayConstant) - : m_capacity(capacity) { +UserOverrideModel::UserOverrideModel(size_t capacity, double decayConstant) : m_capacity(capacity) +{ assert(m_capacity > 0); m_decayExponent = log(0.5) / decayConstant; } -void UserOverrideModel::observe(const std::vector& walkedNodes, - size_t cursorIndex, - const std::string& candidate, - double timestamp) { +void UserOverrideModel::observe(const std::vector &walkedNodes, size_t cursorIndex, + const std::string &candidate, double timestamp) +{ std::string key = WalkedNodesToKey(walkedNodes, cursorIndex); auto mapIter = m_lruMap.find(key); - if (mapIter == m_lruMap.end()) { + if (mapIter == m_lruMap.end()) + { auto keyValuePair = KeyObservationPair(key, Observation()); - Observation& observation = keyValuePair.second; + Observation &observation = keyValuePair.second; observation.update(candidate, timestamp); m_lruList.push_front(keyValuePair); auto listIter = m_lruList.begin(); - auto lruKeyValue = std::pair::iterator>(key, listIter); + auto lruKeyValue = std::pair::iterator>(key, listIter); m_lruMap.insert(lruKeyValue); - if (m_lruList.size() > m_capacity) { + if (m_lruList.size() > m_capacity) + { auto lastKeyValuePair = m_lruList.end(); --lastKeyValuePair; m_lruMap.erase(lastKeyValuePair->first); m_lruList.pop_back(); } - } else { + } + else + { auto listIter = mapIter->second; m_lruList.splice(m_lruList.begin(), m_lruList, listIter); - auto& keyValuePair = *listIter; - Observation& observation = keyValuePair.second; + auto &keyValuePair = *listIter; + Observation &observation = keyValuePair.second; observation.update(candidate, timestamp); } } -std::string UserOverrideModel::suggest(const std::vector& walkedNodes, - size_t cursorIndex, - double timestamp) { +std::string UserOverrideModel::suggest(const std::vector &walkedNodes, size_t cursorIndex, + double timestamp) +{ std::string key = WalkedNodesToKey(walkedNodes, cursorIndex); auto mapIter = m_lruMap.find(key); - if (mapIter == m_lruMap.end()) { + if (mapIter == m_lruMap.end()) + { return std::string(); } auto listIter = mapIter->second; - auto& keyValuePair = *listIter; - const Observation& observation = keyValuePair.second; + auto &keyValuePair = *listIter; + const Observation &observation = keyValuePair.second; std::string candidate; double score = 0.0; - for (auto i = observation.overrides.begin(); - i != observation.overrides.end(); - ++i) { - const Override& o = i->second; - double overrideScore = Score(o.count, - observation.count, - o.timestamp, - timestamp, - m_decayExponent); - if (overrideScore == 0.0) { + for (auto i = observation.overrides.begin(); i != observation.overrides.end(); ++i) + { + const Override &o = i->second; + double overrideScore = Score(o.count, observation.count, o.timestamp, timestamp, m_decayExponent); + if (overrideScore == 0.0) + { continue; } - if (overrideScore > score) { + if (overrideScore > score) + { candidate = i->first; score = overrideScore; } @@ -112,21 +115,19 @@ std::string UserOverrideModel::suggest(const std::vector& walkedNodes, - size_t cursorIndex) { +static std::string WalkedNodesToKey(const std::vector &walkedNodes, size_t cursorIndex) +{ std::stringstream s; std::vector n; size_t ll = 0; - for (std::vector::const_iterator i = walkedNodes.begin(); - i != walkedNodes.end(); - ++i) { - const auto& nn = *i; + for (std::vector::const_iterator i = walkedNodes.begin(); i != walkedNodes.end(); ++i) + { + const auto &nn = *i; n.push_back(nn); ll += nn.spanningLength; - if (ll >= cursorIndex) { + if (ll >= cursorIndex) + { break; } } std::vector::const_reverse_iterator r = n.rbegin(); - if (r == n.rend()) { + if (r == n.rend()) + { return ""; } @@ -165,40 +168,44 @@ static std::string WalkedNodesToKey(const std::vector& s.clear(); s.str(std::string()); - if (r != n.rend()) { + if (r != n.rend()) + { std::string value = (*r).node->currentKeyValue().value; - if (IsEndingPunctuation(value)) { + if (IsEndingPunctuation(value)) + { s << "()"; r = n.rend(); - } else { - s << "(" - << (*r).node->currentKeyValue().key - << "," - << value - << ")"; + } + else + { + s << "(" << (*r).node->currentKeyValue().key << "," << value << ")"; ++r; } - } else { + } + else + { s << "()"; } std::string prev = s.str(); s.clear(); s.str(std::string()); - if (r != n.rend()) { + if (r != n.rend()) + { std::string value = (*r).node->currentKeyValue().value; - if (IsEndingPunctuation(value)) { + if (IsEndingPunctuation(value)) + { s << "()"; r = n.rend(); - } else { - s << "(" - << (*r).node->currentKeyValue().key - << "," - << value - << ")"; + } + else + { + s << "(" << (*r).node->currentKeyValue().key << "," << value << ")"; ++r; } - } else { + } + else + { s << "()"; } std::string anterior = s.str(); diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/UserOverrideModel.h b/Source/Modules/LangModelRelated/SubLanguageModels/UserOverrideModel.h index 10824c4b..6479c2d6 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/UserOverrideModel.h +++ b/Source/Modules/LangModelRelated/SubLanguageModels/UserOverrideModel.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef USEROVERRIDEMODEL_H @@ -25,37 +32,41 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include "Gramambular.h" -namespace vChewing { +namespace vChewing +{ using namespace Gramambular; -class UserOverrideModel { -public: +class UserOverrideModel +{ + public: UserOverrideModel(size_t capacity, double decayConstant); - void observe(const std::vector& walkedNodes, - size_t cursorIndex, - const std::string& candidate, - double timestamp); + void observe(const std::vector &walkedNodes, size_t cursorIndex, + const std::string &candidate, double timestamp); - std::string suggest(const std::vector& walkedNodes, - size_t cursorIndex, - double timestamp); + std::string suggest(const std::vector &walkedNodes, size_t cursorIndex, double timestamp); -private: - struct Override { + private: + struct Override + { size_t count; double timestamp; - Override() : count(0), timestamp(0.0) {} + Override() : count(0), timestamp(0.0) + { + } }; - struct Observation { + struct Observation + { size_t count; std::map overrides; - Observation() : count(0) {} - void update(const std::string& candidate, double timestamp); + Observation() : count(0) + { + } + void update(const std::string &candidate, double timestamp); }; typedef std::pair KeyObservationPair; @@ -69,4 +80,3 @@ private: }; // namespace vChewing #endif - diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.h b/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.h index 05881911..4c27d748 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.h +++ b/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.h @@ -1,35 +1,43 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef USERPHRASESLM_H #define USERPHRASESLM_H -#include -#include -#include #include "LanguageModel.h" +#include +#include +#include -namespace vChewing { +namespace vChewing +{ class UserPhrasesLM : public Gramambular::LanguageModel { -public: + public: UserPhrasesLM(); ~UserPhrasesLM(); @@ -38,31 +46,37 @@ public: void close(); void dump(); - virtual bool allowConsolidation() { + virtual bool allowConsolidation() + { return true; } - virtual float overridedValue() { + virtual float overridedValue() + { return 0.0; } - virtual const std::vector bigramsForKeys(const std::string& preceedingKey, const std::string& key); - virtual const std::vector unigramsForKey(const std::string& key); - virtual bool hasUnigramsForKey(const std::string& key); - -protected: - struct Row { - Row(std::string_view& k, std::string_view& v) : key(k), value(v) {} + virtual const std::vector bigramsForKeys(const std::string &preceedingKey, + const std::string &key); + virtual const std::vector unigramsForKey(const std::string &key); + virtual bool hasUnigramsForKey(const std::string &key); + + protected: + struct Row + { + Row(std::string_view &k, std::string_view &v) : key(k), value(v) + { + } std::string_view key; std::string_view value; }; - + std::map> keyRowMap; int fd; void *data; size_t length; }; -} +} // namespace vChewing #endif diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.mm b/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.mm index b62db2db..e3565d0e 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.mm +++ b/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.mm @@ -1,53 +1,60 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "UserPhrasesLM.h" #include "vChewing-Swift.h" -#include -#include #include #include -#include +#include +#include #include +#include #include "KeyValueBlobReader.h" #include "LMConsolidator.h" -namespace vChewing { +namespace vChewing +{ -UserPhrasesLM::UserPhrasesLM() - : fd(-1) - , data(0) - , length(0) +UserPhrasesLM::UserPhrasesLM() : fd(-1), data(0), length(0) { } UserPhrasesLM::~UserPhrasesLM() { - if (data) { + if (data) + { close(); } } bool UserPhrasesLM::isLoaded() { - if (data) { + if (data) + { return true; } return false; @@ -55,23 +62,27 @@ bool UserPhrasesLM::isLoaded() bool UserPhrasesLM::open(const char *path) { - if (data) { + if (data) + { return false; } - if (allowConsolidation()) { + if (allowConsolidation()) + { LMConsolidator::FixEOF(path); LMConsolidator::ConsolidateContent(path, true); } fd = ::open(path, O_RDONLY); - if (fd == -1) { + if (fd == -1) + { printf("open:: file not exist"); return false; } struct stat sb; - if (fstat(fd, &sb) == -1) { + if (fstat(fd, &sb) == -1) + { printf("open:: cannot open file"); return false; } @@ -79,22 +90,27 @@ bool UserPhrasesLM::open(const char *path) length = (size_t)sb.st_size; data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0); - if (!data) { + if (!data) + { ::close(fd); return false; } - KeyValueBlobReader reader(static_cast(data), length); + KeyValueBlobReader reader(static_cast(data), length); KeyValueBlobReader::KeyValue keyValue; KeyValueBlobReader::State state; - while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) { - // We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF reading. + while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) + { + // We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF + // reading. keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key); } // 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行) - if (state == KeyValueBlobReader::State::ERROR) { + if (state == KeyValueBlobReader::State::ERROR) + { // close(); - if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "UserPhrasesLM: Failed at Open Step 5. On Error Resume Next.\n"); + if (mgrPrefs.isDebugModeEnabled) + syslog(LOG_CONS, "UserPhrasesLM: Failed at Open Step 5. On Error Resume Next.\n"); // return false; } return true; @@ -102,7 +118,8 @@ bool UserPhrasesLM::open(const char *path) void UserPhrasesLM::close() { - if (data) { + if (data) + { munmap(data, length); ::close(fd); data = 0; @@ -113,26 +130,31 @@ void UserPhrasesLM::close() void UserPhrasesLM::dump() { - for (const auto& entry : keyRowMap) { - const std::vector& rows = entry.second; - for (const auto& row : rows) { + for (const auto &entry : keyRowMap) + { + const std::vector &rows = entry.second; + for (const auto &row : rows) + { std::cerr << row.key << " " << row.value << "\n"; } } } -const std::vector UserPhrasesLM::bigramsForKeys(const std::string& preceedingKey, const std::string& key) +const std::vector UserPhrasesLM::bigramsForKeys(const std::string &preceedingKey, + const std::string &key) { return std::vector(); } -const std::vector UserPhrasesLM::unigramsForKey(const std::string& key) +const std::vector UserPhrasesLM::unigramsForKey(const std::string &key) { std::vector v; auto iter = keyRowMap.find(key); - if (iter != keyRowMap.end()) { - const std::vector& rows = iter->second; - for (const auto& row : rows) { + if (iter != keyRowMap.end()) + { + const std::vector &rows = iter->second; + for (const auto &row : rows) + { Gramambular::Unigram g; g.keyValue.key = row.key; g.keyValue.value = row.value; @@ -144,9 +166,9 @@ const std::vector UserPhrasesLM::unigramsForKey(const std: return v; } -bool UserPhrasesLM::hasUnigramsForKey(const std::string& key) +bool UserPhrasesLM::hasUnigramsForKey(const std::string &key) { return keyRowMap.find(key) != keyRowMap.end(); } -}; // namespace vChewing +}; // namespace vChewing diff --git a/Source/Modules/LangModelRelated/mgrLangModel.h b/Source/Modules/LangModelRelated/mgrLangModel.h index 0a3d3332..816d44cb 100644 --- a/Source/Modules/LangModelRelated/mgrLangModel.h +++ b/Source/Modules/LangModelRelated/mgrLangModel.h @@ -1,24 +1,31 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#import #import "KeyHandler.h" +#import NS_ASSUME_NONNULL_BEGIN @@ -33,8 +40,13 @@ NS_ASSUME_NONNULL_BEGIN + (BOOL)checkIfSpecifiedUserDataFolderValid:(NSString *)folderPath; + (NSString *)dataFolderPath:(bool)isDefaultFolder NS_SWIFT_NAME(dataFolderPath(isDefaultFolder:)); -+ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase inputMode:(InputMode)mode key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:)); -+ (BOOL)writeUserPhrase:(NSString *)userPhrase inputMode:(InputMode)mode areWeDuplicating:(BOOL)areWeDuplicating areWeDeleting:(BOOL)areWeDeleting; ++ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase + inputMode:(InputMode)mode + key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:)); ++ (BOOL)writeUserPhrase:(NSString *)userPhrase + inputMode:(InputMode)mode + areWeDuplicating:(BOOL)areWeDuplicating + areWeDeleting:(BOOL)areWeDeleting; + (void)setPhraseReplacementEnabled:(BOOL)phraseReplacementEnabled; + (void)setCNSEnabled:(BOOL)cnsEnabled; + (void)setSymbolEnabled:(BOOL)symbolEnabled; diff --git a/Source/Modules/LangModelRelated/mgrLangModel.mm b/Source/Modules/LangModelRelated/mgrLangModel.mm index 3ca207ba..9b055161 100644 --- a/Source/Modules/LangModelRelated/mgrLangModel.mm +++ b/Source/Modules/LangModelRelated/mgrLangModel.mm @@ -1,26 +1,33 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #import "mgrLangModel.h" +#import "LMConsolidator.h" #import "mgrLangModel_Privates.h" #import "vChewing-Swift.h" -#import "LMConsolidator.h" static const int kUserOverrideModelCapacity = 500; static const double kObservedOverrideHalflife = 5400.0; @@ -54,70 +61,90 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing + (void)loadDataModels { - if (!gLangModelCHT.isDataModelLoaded()) { + if (!gLangModelCHT.isDataModelLoaded()) + { LTLoadLanguageModelFile(@"data-cht", gLangModelCHT); } - if (!gLangModelCHT.isMiscDataLoaded()) { - gLangModelCHT.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]); + if (!gLangModelCHT.isMiscDataLoaded()) + { + gLangModelCHT.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]); } - if (!gLangModelCHT.isSymbolDataLoaded()){ - gLangModelCHT.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]); + if (!gLangModelCHT.isSymbolDataLoaded()) + { + gLangModelCHT.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]); } - if (!gLangModelCHT.isCNSDataLoaded()){ - gLangModelCHT.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]); + if (!gLangModelCHT.isCNSDataLoaded()) + { + gLangModelCHT.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]); } // ----------------- - if (!gLangModelCHS.isDataModelLoaded()) { + if (!gLangModelCHS.isDataModelLoaded()) + { LTLoadLanguageModelFile(@"data-chs", gLangModelCHS); } - if (!gLangModelCHS.isMiscDataLoaded()) { - gLangModelCHS.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]); + if (!gLangModelCHS.isMiscDataLoaded()) + { + gLangModelCHS.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]); } - if (!gLangModelCHS.isSymbolDataLoaded()){ - gLangModelCHS.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]); + if (!gLangModelCHS.isSymbolDataLoaded()) + { + gLangModelCHS.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]); } - if (!gLangModelCHS.isCNSDataLoaded()){ - gLangModelCHS.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]); + if (!gLangModelCHS.isCNSDataLoaded()) + { + gLangModelCHS.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]); } } + (void)loadDataModel:(InputMode)mode { - if ([mode isEqualToString:imeModeCHT]) { - if (!gLangModelCHT.isDataModelLoaded()) { + if ([mode isEqualToString:imeModeCHT]) + { + if (!gLangModelCHT.isDataModelLoaded()) + { LTLoadLanguageModelFile(@"data-cht", gLangModelCHT); } - if (!gLangModelCHT.isMiscDataLoaded()) { - gLangModelCHT.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]); + if (!gLangModelCHT.isMiscDataLoaded()) + { + gLangModelCHT.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]); } - if (!gLangModelCHT.isSymbolDataLoaded()){ - gLangModelCHT.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]); + if (!gLangModelCHT.isSymbolDataLoaded()) + { + gLangModelCHT.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]); } - if (!gLangModelCHT.isCNSDataLoaded()){ - gLangModelCHT.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]); + if (!gLangModelCHT.isCNSDataLoaded()) + { + gLangModelCHT.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]); } } - if ([mode isEqualToString:imeModeCHS]) { - if (!gLangModelCHS.isDataModelLoaded()) { + if ([mode isEqualToString:imeModeCHS]) + { + if (!gLangModelCHS.isDataModelLoaded()) + { LTLoadLanguageModelFile(@"data-chs", gLangModelCHS); } - if (!gLangModelCHS.isMiscDataLoaded()) { - gLangModelCHS.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]); + if (!gLangModelCHS.isMiscDataLoaded()) + { + gLangModelCHS.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]); } - if (!gLangModelCHS.isSymbolDataLoaded()){ - gLangModelCHS.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]); + if (!gLangModelCHS.isSymbolDataLoaded()) + { + gLangModelCHS.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]); } - if (!gLangModelCHS.isCNSDataLoaded()){ - gLangModelCHS.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]); + if (!gLangModelCHS.isCNSDataLoaded()) + { + gLangModelCHS.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]); } } } + (void)loadUserPhrases { - gLangModelCHT.loadUserPhrases([[self userPhrasesDataPath:imeModeCHT] UTF8String], [[self excludedPhrasesDataPath:imeModeCHT] UTF8String]); - gLangModelCHS.loadUserPhrases([[self userPhrasesDataPath:imeModeCHS] UTF8String], [[self excludedPhrasesDataPath:imeModeCHS] UTF8String]); + gLangModelCHT.loadUserPhrases([[self userPhrasesDataPath:imeModeCHT] UTF8String], + [[self excludedPhrasesDataPath:imeModeCHT] UTF8String]); + gLangModelCHS.loadUserPhrases([[self userPhrasesDataPath:imeModeCHS] UTF8String], + [[self excludedPhrasesDataPath:imeModeCHS] UTF8String]); gLangModelCHT.loadUserSymbolData([[self userSymbolDataPath:imeModeCHT] UTF8String]); gLangModelCHS.loadUserSymbolData([[self userSymbolDataPath:imeModeCHS] UTF8String]); } @@ -139,19 +166,26 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing NSString *folderPath = [self dataFolderPath:false]; BOOL isFolder = NO; BOOL folderExist = [[NSFileManager defaultManager] fileExistsAtPath:folderPath isDirectory:&isFolder]; - if (folderExist && !isFolder) { + if (folderExist && !isFolder) + { NSError *error = nil; [[NSFileManager defaultManager] removeItemAtPath:folderPath error:&error]; - if (error) { + if (error) + { NSLog(@"Failed to remove folder %@", error); return NO; } folderExist = NO; } - if (!folderExist) { + if (!folderExist) + { NSError *error = nil; - [[NSFileManager defaultManager] createDirectoryAtPath:folderPath withIntermediateDirectories:YES attributes:nil error:&error]; - if (error) { + [[NSFileManager defaultManager] createDirectoryAtPath:folderPath + withIntermediateDirectories:YES + attributes:nil + error:&error]; + if (error) + { NSLog(@"Failed to create folder %@", error); return NO; } @@ -163,26 +197,34 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing { BOOL isFolder = NO; BOOL folderExist = [[NSFileManager defaultManager] fileExistsAtPath:folderPath isDirectory:&isFolder]; - if ((folderExist && !isFolder) || (!folderExist)) { + if ((folderExist && !isFolder) || (!folderExist)) + { return NO; } return YES; } -+ (BOOL)ensureFileExists:(NSString *)filePath populateWithTemplate:(NSString *)templateBasename extension:(NSString *)ext ++ (BOOL)ensureFileExists:(NSString *)filePath + populateWithTemplate:(NSString *)templateBasename + extension:(NSString *)ext { - if (![[NSFileManager defaultManager] fileExistsAtPath:filePath]) { + if (![[NSFileManager defaultManager] fileExistsAtPath:filePath]) + { NSURL *templateURL = [[NSBundle mainBundle] URLForResource:templateBasename withExtension:ext]; NSData *templateData; - if (templateURL) { + if (templateURL) + { templateData = [NSData dataWithContentsOfURL:templateURL]; - } else { + } + else + { templateData = [@"" dataUsingEncoding:NSUTF8StringEncoding]; } BOOL result = [templateData writeToFile:filePath atomically:YES]; - if (!result) { + if (!result) + { NSLog(@"Failed to write file"); return NO; } @@ -192,36 +234,76 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing + (BOOL)checkIfUserLanguageModelFilesExist { - if (![self checkIfUserDataFolderExists]) return NO; - if (![self ensureFileExists:[self userPhrasesDataPath:imeModeCHS] populateWithTemplate:kUserDataTemplateName extension:kTemplateExtension]) return NO; - if (![self ensureFileExists:[self userPhrasesDataPath:imeModeCHT] populateWithTemplate:kUserDataTemplateName extension:kTemplateExtension]) return NO; - if (![self ensureFileExists:[self userAssociatedPhrasesDataPath:imeModeCHS] populateWithTemplate:kUserAssDataTemplateName extension:kTemplateExtension]) return NO; - if (![self ensureFileExists:[self userAssociatedPhrasesDataPath:imeModeCHT] populateWithTemplate:kUserAssDataTemplateName extension:kTemplateExtension]) return NO; - if (![self ensureFileExists:[self excludedPhrasesDataPath:imeModeCHS] populateWithTemplate:kExcludedPhrasesvChewingTemplateName extension:kTemplateExtension]) return NO; - if (![self ensureFileExists:[self excludedPhrasesDataPath:imeModeCHT] populateWithTemplate:kExcludedPhrasesvChewingTemplateName extension:kTemplateExtension]) return NO; - if (![self ensureFileExists:[self phraseReplacementDataPath:imeModeCHS] populateWithTemplate:kPhraseReplacementTemplateName extension:kTemplateExtension]) return NO; - if (![self ensureFileExists:[self phraseReplacementDataPath:imeModeCHT] populateWithTemplate:kPhraseReplacementTemplateName extension:kTemplateExtension]) return NO; - if (![self ensureFileExists:[self userSymbolDataPath:imeModeCHT] populateWithTemplate:kUserSymbolDataTemplateName extension:kTemplateExtension]) return NO; - if (![self ensureFileExists:[self userSymbolDataPath:imeModeCHS] populateWithTemplate:kUserSymbolDataTemplateName extension:kTemplateExtension]) return NO; + if (![self checkIfUserDataFolderExists]) + return NO; + if (![self ensureFileExists:[self userPhrasesDataPath:imeModeCHS] + populateWithTemplate:kUserDataTemplateName + extension:kTemplateExtension]) + return NO; + if (![self ensureFileExists:[self userPhrasesDataPath:imeModeCHT] + populateWithTemplate:kUserDataTemplateName + extension:kTemplateExtension]) + return NO; + if (![self ensureFileExists:[self userAssociatedPhrasesDataPath:imeModeCHS] + populateWithTemplate:kUserAssDataTemplateName + extension:kTemplateExtension]) + return NO; + if (![self ensureFileExists:[self userAssociatedPhrasesDataPath:imeModeCHT] + populateWithTemplate:kUserAssDataTemplateName + extension:kTemplateExtension]) + return NO; + if (![self ensureFileExists:[self excludedPhrasesDataPath:imeModeCHS] + populateWithTemplate:kExcludedPhrasesvChewingTemplateName + extension:kTemplateExtension]) + return NO; + if (![self ensureFileExists:[self excludedPhrasesDataPath:imeModeCHT] + populateWithTemplate:kExcludedPhrasesvChewingTemplateName + extension:kTemplateExtension]) + return NO; + if (![self ensureFileExists:[self phraseReplacementDataPath:imeModeCHS] + populateWithTemplate:kPhraseReplacementTemplateName + extension:kTemplateExtension]) + return NO; + if (![self ensureFileExists:[self phraseReplacementDataPath:imeModeCHT] + populateWithTemplate:kPhraseReplacementTemplateName + extension:kTemplateExtension]) + return NO; + if (![self ensureFileExists:[self userSymbolDataPath:imeModeCHT] + populateWithTemplate:kUserSymbolDataTemplateName + extension:kTemplateExtension]) + return NO; + if (![self ensureFileExists:[self userSymbolDataPath:imeModeCHS] + populateWithTemplate:kUserSymbolDataTemplateName + extension:kTemplateExtension]) + return NO; return YES; } -+ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase inputMode:(InputMode)mode key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:)) ++ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase + inputMode:(InputMode)mode + key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:)) { string unigramKey = string(key.UTF8String); - vector unigrams = [mode isEqualToString:imeModeCHT] ? gLangModelCHT.unigramsForKey(unigramKey): gLangModelCHS.unigramsForKey(unigramKey); + vector unigrams = [mode isEqualToString:imeModeCHT] ? gLangModelCHT.unigramsForKey(unigramKey) + : gLangModelCHS.unigramsForKey(unigramKey); string userPhraseString = string(userPhrase.UTF8String); - for (auto unigram: unigrams) { - if (unigram.keyValue.value == userPhraseString) { + for (auto unigram : unigrams) + { + if (unigram.keyValue.value == userPhraseString) + { return YES; } } return NO; } -+ (BOOL)writeUserPhrase:(NSString *)userPhrase inputMode:(InputMode)mode areWeDuplicating:(BOOL)areWeDuplicating areWeDeleting:(BOOL)areWeDeleting ++ (BOOL)writeUserPhrase:(NSString *)userPhrase + inputMode:(InputMode)mode + areWeDuplicating:(BOOL)areWeDuplicating + areWeDeleting:(BOOL)areWeDeleting { - if (![self checkIfUserLanguageModelFilesExist]) { + if (![self checkIfUserLanguageModelFilesExist]) + { return NO; } @@ -233,7 +315,8 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing // [currentMarkedPhrase appendString:@"\n"]; // } [currentMarkedPhrase appendString:userPhrase]; - if (areWeDuplicating && !areWeDeleting) { + if (areWeDuplicating && !areWeDeleting) + { // Do not use ASCII characters to comment here. // Otherwise, it will be scrambled by cnvHYPYtoBPMF module shipped in the vChewing Phrase Editor. [currentMarkedPhrase appendString:@"\t#𝙾𝚟𝚎𝚛𝚛𝚒𝚍𝚎"]; @@ -241,7 +324,8 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing [currentMarkedPhrase appendString:@"\n"]; NSFileHandle *writeFile = [NSFileHandle fileHandleForUpdatingAtPath:path]; - if (!writeFile) { + if (!writeFile) + { return NO; } [writeFile seekToEndOfFile]; @@ -249,12 +333,14 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing [writeFile writeData:data]; [writeFile closeFile]; - // We enforce the format consolidation here, since the pragma header will let the UserPhraseLM bypasses the consolidating process on load. + // We enforce the format consolidation here, since the pragma header will let the UserPhraseLM bypasses the + // consolidating process on load. vChewing::LMConsolidator::ConsolidateContent([path UTF8String], false); // We use FSEventStream to monitor the change of the user phrase folder, // so we don't have to load data here unless FSEventStream is disabled by user. - if (!mgrPrefs.shouldAutoReloadUserDataFiles) { + if (!mgrPrefs.shouldAutoReloadUserDataFiles) + { [self loadUserPhrases]; } return YES; @@ -263,15 +349,21 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing + (NSString *)dataFolderPath:(bool)isDefaultFolder { // 此處不能用「~」來取代當前使用者目錄名稱。不然的話,一旦輸入法被系統的沙箱干預的話,則反而會定位到沙箱目錄內。 - NSString *appSupportPath = [NSFileManager.defaultManager URLsForDirectory:NSApplicationSupportDirectory inDomains:NSUserDomainMask][0].path; + NSString *appSupportPath = [NSFileManager.defaultManager URLsForDirectory:NSApplicationSupportDirectory + inDomains:NSUserDomainMask][0].path; NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"vChewing"].stringByExpandingTildeInPath; - if (mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath == userDictPath || isDefaultFolder) { + if (mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath == userDictPath || isDefaultFolder) + { return userDictPath; } - if ([mgrPrefs ifSpecifiedUserDataPathExistsInPlist]) { - if ([self checkIfSpecifiedUserDataFolderValid:mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath]) { + if ([mgrPrefs ifSpecifiedUserDataPathExistsInPlist]) + { + if ([self checkIfSpecifiedUserDataFolderValid:mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath]) + { return mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath; - } else { + } + else + { [NSUserDefaults.standardUserDefaults removeObjectForKey:@"UserDataFolderSpecified"]; } } @@ -286,13 +378,15 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing + (NSString *)userSymbolDataPath:(InputMode)mode; { - NSString *fileName = [mode isEqualToString:imeModeCHT] ? @"usersymbolphrases-cht.txt" : @"usersymbolphrases-chs.txt"; + NSString *fileName = + [mode isEqualToString:imeModeCHT] ? @"usersymbolphrases-cht.txt" : @"usersymbolphrases-chs.txt"; return [[self dataFolderPath:false] stringByAppendingPathComponent:fileName]; } + (NSString *)userAssociatedPhrasesDataPath:(InputMode)mode; { - NSString *fileName = [mode isEqualToString:imeModeCHT] ? @"associatedPhrases-cht.txt" : @"associatedPhrases-chs.txt"; + NSString *fileName = + [mode isEqualToString:imeModeCHT] ? @"associatedPhrases-cht.txt" : @"associatedPhrases-chs.txt"; return [[self dataFolderPath:false] stringByAppendingPathComponent:fileName]; } @@ -304,11 +398,12 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing + (NSString *)phraseReplacementDataPath:(InputMode)mode; { - NSString *fileName = [mode isEqualToString:imeModeCHT] ? @"phrases-replacement-cht.txt" : @"phrases-replacement-chs.txt"; + NSString *fileName = + [mode isEqualToString:imeModeCHT] ? @"phrases-replacement-cht.txt" : @"phrases-replacement-chs.txt"; return [[self dataFolderPath:false] stringByAppendingPathComponent:fileName]; } - + (vChewing::LMInstantiator *)lmCHT ++ (vChewing::LMInstantiator *)lmCHT { return &gLangModelCHT; } diff --git a/Source/Modules/LangModelRelated/mgrLangModel_Privates.h b/Source/Modules/LangModelRelated/mgrLangModel_Privates.h index 22077d25..cc42ca2a 100644 --- a/Source/Modules/LangModelRelated/mgrLangModel_Privates.h +++ b/Source/Modules/LangModelRelated/mgrLangModel_Privates.h @@ -1,33 +1,40 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#import "mgrLangModel.h" -#import "UserOverrideModel.h" #import "LMInstantiator.h" +#import "UserOverrideModel.h" +#import "mgrLangModel.h" NS_ASSUME_NONNULL_BEGIN @interface mgrLangModel () -@property (class, readonly, nonatomic) vChewing::LMInstantiator *lmCHT; -@property (class, readonly, nonatomic) vChewing::LMInstantiator *lmCHS; -@property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHS; -@property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHT; +@property(class, readonly, nonatomic) vChewing::LMInstantiator *lmCHT; +@property(class, readonly, nonatomic) vChewing::LMInstantiator *lmCHS; +@property(class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHS; +@property(class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHT; @end NS_ASSUME_NONNULL_END diff --git a/Source/Modules/LanguageParsers/Gramambular/Bigram.h b/Source/Modules/LanguageParsers/Gramambular/Bigram.h index 16576b31..a4b8c8b2 100644 --- a/Source/Modules/LanguageParsers/Gramambular/Bigram.h +++ b/Source/Modules/LanguageParsers/Gramambular/Bigram.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef BIGRAM_H_ @@ -24,69 +31,80 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include "KeyValuePair.h" -namespace Gramambular { -class Bigram { -public: +namespace Gramambular +{ +class Bigram +{ + public: Bigram(); - + KeyValuePair preceedingKeyValue; KeyValuePair keyValue; double score; - - bool operator==(const Bigram& another) const; - bool operator<(const Bigram& another) const; + + bool operator==(const Bigram &another) const; + bool operator<(const Bigram &another) const; }; -inline std::ostream& operator<<(std::ostream& stream, const Bigram& gram) { +inline std::ostream &operator<<(std::ostream &stream, const Bigram &gram) +{ std::streamsize p = stream.precision(); stream.precision(6); - stream << "(" << gram.keyValue << "|" << gram.preceedingKeyValue << "," - << gram.score << ")"; + stream << "(" << gram.keyValue << "|" << gram.preceedingKeyValue << "," << gram.score << ")"; stream.precision(p); return stream; } -inline std::ostream& operator<<(std::ostream& stream, - const std::vector& grams) { +inline std::ostream &operator<<(std::ostream &stream, const std::vector &grams) +{ stream << "[" << grams.size() << "]=>{"; - + size_t index = 0; - - for (std::vector::const_iterator gi = grams.begin(); - gi != grams.end(); ++gi, ++index) { + + for (std::vector::const_iterator gi = grams.begin(); gi != grams.end(); ++gi, ++index) + { stream << index << "=>"; stream << *gi; - if (gi + 1 != grams.end()) { + if (gi + 1 != grams.end()) + { stream << ","; } } - + stream << "}"; return stream; } -inline Bigram::Bigram() : score(0.0) {} - -inline bool Bigram::operator==(const Bigram& another) const { - return preceedingKeyValue == another.preceedingKeyValue && - keyValue == another.keyValue && score == another.score; +inline Bigram::Bigram() : score(0.0) +{ } -inline bool Bigram::operator<(const Bigram& another) const { - if (preceedingKeyValue < another.preceedingKeyValue) { +inline bool Bigram::operator==(const Bigram &another) const +{ + return preceedingKeyValue == another.preceedingKeyValue && keyValue == another.keyValue && score == another.score; +} + +inline bool Bigram::operator<(const Bigram &another) const +{ + if (preceedingKeyValue < another.preceedingKeyValue) + { return true; - } else if (preceedingKeyValue == another.preceedingKeyValue) { - if (keyValue < another.keyValue) { + } + else if (preceedingKeyValue == another.preceedingKeyValue) + { + if (keyValue < another.keyValue) + { return true; - } else if (keyValue == another.keyValue) { + } + else if (keyValue == another.keyValue) + { return score < another.score; } return false; } - + return false; } -} // namespace Gramambular - +} // namespace Gramambular #endif diff --git a/Source/Modules/LanguageParsers/Gramambular/BlockReadingBuilder.h b/Source/Modules/LanguageParsers/Gramambular/BlockReadingBuilder.h index 4ece863b..12046b15 100644 --- a/Source/Modules/LanguageParsers/Gramambular/BlockReadingBuilder.h +++ b/Source/Modules/LanguageParsers/Gramambular/BlockReadingBuilder.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef BLOCKREADINGBUILDER_H_ @@ -26,157 +33,186 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include "Grid.h" #include "LanguageModel.h" -namespace Gramambular { +namespace Gramambular +{ -class BlockReadingBuilder { -public: - explicit BlockReadingBuilder(LanguageModel* lm); +class BlockReadingBuilder +{ + public: + explicit BlockReadingBuilder(LanguageModel *lm); void clear(); - + size_t length() const; size_t cursorIndex() const; void setCursorIndex(size_t newIndex); - void insertReadingAtCursor(const std::string& reading); - bool deleteReadingBeforeCursor(); // backspace - bool deleteReadingAfterCursor(); // delete - + void insertReadingAtCursor(const std::string &reading); + bool deleteReadingBeforeCursor(); // backspace + bool deleteReadingAfterCursor(); // delete + bool removeHeadReadings(size_t count); - - void setJoinSeparator(const std::string& separator); + + void setJoinSeparator(const std::string &separator); const std::string joinSeparator() const; - + std::vector readings() const; - - Grid& grid(); - -protected: + + Grid &grid(); + + protected: void build(); - + static const std::string Join(std::vector::const_iterator begin, - std::vector::const_iterator end, - const std::string& separator); - + std::vector::const_iterator end, const std::string &separator); + // 規定最多可以組成的詞的字數上限為 10 static const size_t MaximumBuildSpanLength = 10; - + size_t m_cursorIndex; std::vector m_readings; - + Grid m_grid; - LanguageModel* m_LM; + LanguageModel *m_LM; std::string m_joinSeparator; }; -inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel* lm) -: m_LM(lm), m_cursorIndex(0) {} +inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *lm) : m_LM(lm), m_cursorIndex(0) +{ +} -inline void BlockReadingBuilder::clear() { +inline void BlockReadingBuilder::clear() +{ m_cursorIndex = 0; m_readings.clear(); m_grid.clear(); } -inline size_t BlockReadingBuilder::length() const { return m_readings.size(); } +inline size_t BlockReadingBuilder::length() const +{ + return m_readings.size(); +} -inline size_t BlockReadingBuilder::cursorIndex() const { return m_cursorIndex; } +inline size_t BlockReadingBuilder::cursorIndex() const +{ + return m_cursorIndex; +} -inline void BlockReadingBuilder::setCursorIndex(size_t newIndex) { +inline void BlockReadingBuilder::setCursorIndex(size_t newIndex) +{ m_cursorIndex = newIndex > m_readings.size() ? m_readings.size() : newIndex; } -inline void BlockReadingBuilder::insertReadingAtCursor( - const std::string& reading) { +inline void BlockReadingBuilder::insertReadingAtCursor(const std::string &reading) +{ m_readings.insert(m_readings.begin() + m_cursorIndex, reading); - + m_grid.expandGridByOneAtLocation(m_cursorIndex); build(); m_cursorIndex++; } -inline std::vector BlockReadingBuilder::readings() const { +inline std::vector BlockReadingBuilder::readings() const +{ return m_readings; } -inline bool BlockReadingBuilder::deleteReadingBeforeCursor() { - if (!m_cursorIndex) { +inline bool BlockReadingBuilder::deleteReadingBeforeCursor() +{ + if (!m_cursorIndex) + { return false; } - - m_readings.erase(m_readings.begin() + m_cursorIndex - 1, - m_readings.begin() + m_cursorIndex); + + m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.begin() + m_cursorIndex); m_cursorIndex--; m_grid.shrinkGridByOneAtLocation(m_cursorIndex); build(); return true; } -inline bool BlockReadingBuilder::deleteReadingAfterCursor() { - if (m_cursorIndex == m_readings.size()) { +inline bool BlockReadingBuilder::deleteReadingAfterCursor() +{ + if (m_cursorIndex == m_readings.size()) + { return false; } - - m_readings.erase(m_readings.begin() + m_cursorIndex, - m_readings.begin() + m_cursorIndex + 1); + + m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.begin() + m_cursorIndex + 1); m_grid.shrinkGridByOneAtLocation(m_cursorIndex); build(); return true; } -inline bool BlockReadingBuilder::removeHeadReadings(size_t count) { - if (count > length()) { +inline bool BlockReadingBuilder::removeHeadReadings(size_t count) +{ + if (count > length()) + { return false; } - - for (size_t i = 0; i < count; i++) { - if (m_cursorIndex) { + + for (size_t i = 0; i < count; i++) + { + if (m_cursorIndex) + { m_cursorIndex--; } m_readings.erase(m_readings.begin(), m_readings.begin() + 1); m_grid.shrinkGridByOneAtLocation(0); build(); } - + return true; } -inline void BlockReadingBuilder::setJoinSeparator( - const std::string& separator) { +inline void BlockReadingBuilder::setJoinSeparator(const std::string &separator) +{ m_joinSeparator = separator; } -inline const std::string BlockReadingBuilder::joinSeparator() const { +inline const std::string BlockReadingBuilder::joinSeparator() const +{ return m_joinSeparator; } -inline Grid& BlockReadingBuilder::grid() { return m_grid; } +inline Grid &BlockReadingBuilder::grid() +{ + return m_grid; +} -inline void BlockReadingBuilder::build() { - if (!m_LM) { +inline void BlockReadingBuilder::build() +{ + if (!m_LM) + { return; } - + size_t begin = 0; size_t end = m_cursorIndex + MaximumBuildSpanLength; - - if (m_cursorIndex < MaximumBuildSpanLength) { + + if (m_cursorIndex < MaximumBuildSpanLength) + { begin = 0; - } else { + } + else + { begin = m_cursorIndex - MaximumBuildSpanLength; } - - if (end > m_readings.size()) { + + if (end > m_readings.size()) + { end = m_readings.size(); } - - for (size_t p = begin; p < end; p++) { - for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) { - std::string combinedReading = Join( - m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator); - if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, - combinedReading)) { + + for (size_t p = begin; p < end; p++) + { + for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) + { + std::string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator); + if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) + { std::vector unigrams = m_LM->unigramsForKey(combinedReading); - - if (unigrams.size() > 0) { + + if (unigrams.size() > 0) + { Node n(combinedReading, unigrams, std::vector()); m_grid.insertNode(n, p, q); } @@ -185,21 +221,22 @@ inline void BlockReadingBuilder::build() { } } -inline const std::string BlockReadingBuilder::Join( - std::vector::const_iterator begin, +inline const std::string BlockReadingBuilder::Join(std::vector::const_iterator begin, std::vector::const_iterator end, - const std::string& separator) { + const std::string &separator) +{ std::string result; - for (std::vector::const_iterator iter = begin; iter != end;) { + for (std::vector::const_iterator iter = begin; iter != end;) + { result += *iter; ++iter; - if (iter != end) { + if (iter != end) + { result += separator; } } return result; } -} // namespace Gramambular - +} // namespace Gramambular #endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Gramambular.h b/Source/Modules/LanguageParsers/Gramambular/Gramambular.h index d2601d3f..d33a298b 100644 --- a/Source/Modules/LanguageParsers/Gramambular/Gramambular.h +++ b/Source/Modules/LanguageParsers/Gramambular/Gramambular.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef GRAMAMBULAR_H_ diff --git a/Source/Modules/LanguageParsers/Gramambular/Grid.h b/Source/Modules/LanguageParsers/Gramambular/Grid.h index 6113bb8c..0244d076 100644 --- a/Source/Modules/LanguageParsers/Gramambular/Grid.h +++ b/Source/Modules/LanguageParsers/Gramambular/Grid.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef GRID_H_ @@ -27,207 +34,247 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include "NodeAnchor.h" #include "Span.h" -namespace Gramambular { +namespace Gramambular +{ -class Grid { -public: +class Grid +{ + public: void clear(); - void insertNode(const Node& node, size_t location, size_t spanningLength); - bool hasNodeAtLocationSpanningLengthMatchingKey(size_t location, - size_t spanningLength, - const std::string& key); - + void insertNode(const Node &node, size_t location, size_t spanningLength); + bool hasNodeAtLocationSpanningLengthMatchingKey(size_t location, size_t spanningLength, const std::string &key); + void expandGridByOneAtLocation(size_t location); void shrinkGridByOneAtLocation(size_t location); - + size_t width() const; std::vector nodesEndingAt(size_t location); std::vector nodesCrossingOrEndingAt(size_t location); - + // "Freeze" the node with the unigram that represents the selected candidate // value. After this, the node that contains the unigram will always be // evaluated to that unigram, while all other overlapping nodes will be reset // to their initial state (that is, if any of those nodes were "frozen" or // fixed, they will be unfrozen.) - NodeAnchor fixNodeSelectedCandidate(size_t location, - const std::string& value); - + NodeAnchor fixNodeSelectedCandidate(size_t location, const std::string &value); + // Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, // only boost the unigram that represents the value with an overriding score. // This has the same side effect as fixNodeSelectedCandidate, which is that // all other overlapping nodes will be reset to their initial state. - void overrideNodeScoreForSelectedCandidate(size_t location, - const std::string& value, - float overridingScore); - - std::string dumpDOT() { + void overrideNodeScoreForSelectedCandidate(size_t location, const std::string &value, float overridingScore); + + std::string dumpDOT() + { std::stringstream sst; sst << "digraph {" << std::endl; sst << "graph [ rankdir=LR ];" << std::endl; sst << "BOS;" << std::endl; - - for (size_t p = 0; p < m_spans.size(); p++) { - Span& span = m_spans[p]; - for (size_t ni = 0; ni <= span.maximumLength(); ni++) { - Node* np = span.nodeOfLength(ni); - if (np) { - if (!p) { + + for (size_t p = 0; p < m_spans.size(); p++) + { + Span &span = m_spans[p]; + for (size_t ni = 0; ni <= span.maximumLength(); ni++) + { + Node *np = span.nodeOfLength(ni); + if (np) + { + if (!p) + { sst << "BOS -> " << np->currentKeyValue().value << ";" << std::endl; } - + sst << np->currentKeyValue().value << ";" << std::endl; - - if (p + ni < m_spans.size()) { - Span& dstSpan = m_spans[p + ni]; - for (size_t q = 0; q <= dstSpan.maximumLength(); q++) { - Node* dn = dstSpan.nodeOfLength(q); - if (dn) { - sst << np->currentKeyValue().value << " -> " - << dn->currentKeyValue().value << ";" << std::endl; + + if (p + ni < m_spans.size()) + { + Span &dstSpan = m_spans[p + ni]; + for (size_t q = 0; q <= dstSpan.maximumLength(); q++) + { + Node *dn = dstSpan.nodeOfLength(q); + if (dn) + { + sst << np->currentKeyValue().value << " -> " << dn->currentKeyValue().value << ";" + << std::endl; } } } - - if (p + ni == m_spans.size()) { + + if (p + ni == m_spans.size()) + { sst << np->currentKeyValue().value << " -> " - << "EOS;" << std::endl; + << "EOS;" << std::endl; } } } } - + sst << "EOS;" << std::endl; sst << "}"; return sst.str(); } - -protected: + + protected: std::vector m_spans; }; -inline void Grid::clear() { m_spans.clear(); } +inline void Grid::clear() +{ + m_spans.clear(); +} -inline void Grid::insertNode(const Node& node, size_t location, - size_t spanningLength) { - if (location >= m_spans.size()) { +inline void Grid::insertNode(const Node &node, size_t location, size_t spanningLength) +{ + if (location >= m_spans.size()) + { size_t diff = location - m_spans.size() + 1; - - for (size_t i = 0; i < diff; i++) { + + for (size_t i = 0; i < diff; i++) + { m_spans.push_back(Span()); } } - + m_spans[location].insertNodeOfLength(node, spanningLength); } -inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey( - size_t location, size_t spanningLength, const std::string& key) { - if (location > m_spans.size()) { +inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(size_t location, size_t spanningLength, + const std::string &key) +{ + if (location > m_spans.size()) + { return false; } - - const Node* n = m_spans[location].nodeOfLength(spanningLength); - if (!n) { + + const Node *n = m_spans[location].nodeOfLength(spanningLength); + if (!n) + { return false; } - + return key == n->key(); } -inline void Grid::expandGridByOneAtLocation(size_t location) { - if (!location || location == m_spans.size()) { +inline void Grid::expandGridByOneAtLocation(size_t location) +{ + if (!location || location == m_spans.size()) + { m_spans.insert(m_spans.begin() + location, Span()); - } else { + } + else + { m_spans.insert(m_spans.begin() + location, Span()); - for (size_t i = 0; i < location; i++) { + for (size_t i = 0; i < location; i++) + { // zaps overlapping spans m_spans[i].removeNodeOfLengthGreaterThan(location - i); } } } -inline void Grid::shrinkGridByOneAtLocation(size_t location) { - if (location >= m_spans.size()) { +inline void Grid::shrinkGridByOneAtLocation(size_t location) +{ + if (location >= m_spans.size()) + { return; } - + m_spans.erase(m_spans.begin() + location); - for (size_t i = 0; i < location; i++) { + for (size_t i = 0; i < location; i++) + { // zaps overlapping spans m_spans[i].removeNodeOfLengthGreaterThan(location - i); } } -inline size_t Grid::width() const { return m_spans.size(); } +inline size_t Grid::width() const +{ + return m_spans.size(); +} -inline std::vector Grid::nodesEndingAt(size_t location) { +inline std::vector Grid::nodesEndingAt(size_t location) +{ std::vector result; - - if (m_spans.size() && location <= m_spans.size()) { - for (size_t i = 0; i < location; i++) { - Span& span = m_spans[i]; - if (i + span.maximumLength() >= location) { - Node* np = span.nodeOfLength(location - i); - if (np) { + + if (m_spans.size() && location <= m_spans.size()) + { + for (size_t i = 0; i < location; i++) + { + Span &span = m_spans[i]; + if (i + span.maximumLength() >= location) + { + Node *np = span.nodeOfLength(location - i); + if (np) + { NodeAnchor na; na.node = np; na.location = i; na.spanningLength = location - i; - + result.push_back(na); } } } } - + return result; } -inline std::vector Grid::nodesCrossingOrEndingAt(size_t location) { +inline std::vector Grid::nodesCrossingOrEndingAt(size_t location) +{ std::vector result; - - if (m_spans.size() && location <= m_spans.size()) { - for (size_t i = 0; i < location; i++) { - Span& span = m_spans[i]; - - if (i + span.maximumLength() >= location) { - for (size_t j = 1, m = span.maximumLength(); j <= m; j++) { - if (i + j < location) { + + if (m_spans.size() && location <= m_spans.size()) + { + for (size_t i = 0; i < location; i++) + { + Span &span = m_spans[i]; + + if (i + span.maximumLength() >= location) + { + for (size_t j = 1, m = span.maximumLength(); j <= m; j++) + { + if (i + j < location) + { continue; } - - Node* np = span.nodeOfLength(j); - if (np) { + + Node *np = span.nodeOfLength(j); + if (np) + { NodeAnchor na; na.node = np; na.location = i; na.spanningLength = location - i; - + result.push_back(na); } } } } } - + return result; } // For nodes found at the location, fix their currently-selected candidate using // the supplied string value. -inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, - const std::string& value) { +inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, const std::string &value) +{ std::vector nodes = nodesCrossingOrEndingAt(location); NodeAnchor node; - for (auto nodeAnchor : nodes) { + for (auto nodeAnchor : nodes) + { auto candidates = nodeAnchor.node->candidates(); - + // Reset the candidate-fixed state of every node at the location. - const_cast(nodeAnchor.node)->resetCandidate(); - - for (size_t i = 0, c = candidates.size(); i < c; ++i) { - if (candidates[i].value == value) { - const_cast(nodeAnchor.node)->selectCandidateAtIndex(i); + const_cast(nodeAnchor.node)->resetCandidate(); + + for (size_t i = 0, c = candidates.size(); i < c; ++i) + { + if (candidates[i].value == value) + { + const_cast(nodeAnchor.node)->selectCandidateAtIndex(i); node = nodeAnchor; break; } @@ -236,26 +283,28 @@ inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, return node; } -inline void Grid::overrideNodeScoreForSelectedCandidate( - size_t location, const std::string& value, float overridingScore) { +inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const std::string &value, + float overridingScore) +{ std::vector nodes = nodesCrossingOrEndingAt(location); - for (auto nodeAnchor : nodes) { + for (auto nodeAnchor : nodes) + { auto candidates = nodeAnchor.node->candidates(); - + // Reset the candidate-fixed state of every node at the location. - const_cast(nodeAnchor.node)->resetCandidate(); - - for (size_t i = 0, c = candidates.size(); i < c; ++i) { - if (candidates[i].value == value) { - const_cast(nodeAnchor.node) - ->selectFloatingCandidateAtIndex(i, overridingScore); + const_cast(nodeAnchor.node)->resetCandidate(); + + for (size_t i = 0, c = candidates.size(); i < c; ++i) + { + if (candidates[i].value == value) + { + const_cast(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore); break; } } } } -} // namespace Gramambular - +} // namespace Gramambular #endif diff --git a/Source/Modules/LanguageParsers/Gramambular/KeyValuePair.h b/Source/Modules/LanguageParsers/Gramambular/KeyValuePair.h index 569687ed..231d6342 100644 --- a/Source/Modules/LanguageParsers/Gramambular/KeyValuePair.h +++ b/Source/Modules/LanguageParsers/Gramambular/KeyValuePair.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef KEYVALUEPAIR_H_ @@ -23,36 +30,42 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include #include -namespace Gramambular { +namespace Gramambular +{ -class KeyValuePair { -public: +class KeyValuePair +{ + public: std::string key; std::string value; - - bool operator==(const KeyValuePair& another) const; - bool operator<(const KeyValuePair& another) const; + + bool operator==(const KeyValuePair &another) const; + bool operator<(const KeyValuePair &another) const; }; -inline std::ostream& operator<<(std::ostream& stream, - const KeyValuePair& pair) { +inline std::ostream &operator<<(std::ostream &stream, const KeyValuePair &pair) +{ stream << "(" << pair.key << "," << pair.value << ")"; return stream; } -inline bool KeyValuePair::operator==(const KeyValuePair& another) const { +inline bool KeyValuePair::operator==(const KeyValuePair &another) const +{ return key == another.key && value == another.value; } -inline bool KeyValuePair::operator<(const KeyValuePair& another) const { - if (key < another.key) { +inline bool KeyValuePair::operator<(const KeyValuePair &another) const +{ + if (key < another.key) + { return true; - } else if (key == another.key) { + } + else if (key == another.key) + { return value < another.value; } return false; } -} // namespace Gramambular - +} // namespace Gramambular #endif diff --git a/Source/Modules/LanguageParsers/Gramambular/LanguageModel.h b/Source/Modules/LanguageParsers/Gramambular/LanguageModel.h index bed61ab5..1049c011 100644 --- a/Source/Modules/LanguageParsers/Gramambular/LanguageModel.h +++ b/Source/Modules/LanguageParsers/Gramambular/LanguageModel.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef LANGUAGEMODEL_H_ @@ -26,18 +33,20 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include "Bigram.h" #include "Unigram.h" -namespace Gramambular { +namespace Gramambular +{ -class LanguageModel { -public: - virtual ~LanguageModel() {} - - virtual const std::vector bigramsForKeys( - const std::string& preceedingKey, const std::string& key) = 0; - virtual const std::vector unigramsForKey(const std::string& key) = 0; - virtual bool hasUnigramsForKey(const std::string& key) = 0; +class LanguageModel +{ + public: + virtual ~LanguageModel() + { + } + + virtual const std::vector bigramsForKeys(const std::string &preceedingKey, const std::string &key) = 0; + virtual const std::vector unigramsForKey(const std::string &key) = 0; + virtual bool hasUnigramsForKey(const std::string &key) = 0; }; -} // namespace Gramambular - +} // namespace Gramambular #endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Node.h b/Source/Modules/LanguageParsers/Gramambular/Node.h index 42de5910..16b69fdf 100644 --- a/Source/Modules/LanguageParsers/Gramambular/Node.h +++ b/Source/Modules/LanguageParsers/Gramambular/Node.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef NODE_H_ @@ -27,105 +34,105 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include "LanguageModel.h" -namespace Gramambular { +namespace Gramambular +{ -class Node { -public: +class Node +{ + public: Node(); - Node(const std::string& key, const std::vector& unigrams, - const std::vector& bigrams); - - void primeNodeWithPreceedingKeyValues( - const std::vector& keyValues); - + Node(const std::string &key, const std::vector &unigrams, const std::vector &bigrams); + + void primeNodeWithPreceedingKeyValues(const std::vector &keyValues); + bool isCandidateFixed() const; - const std::vector& candidates() const; + const std::vector &candidates() const; void selectCandidateAtIndex(size_t index = 0, bool fix = true); void resetCandidate(); void selectFloatingCandidateAtIndex(size_t index, double score); - - const std::string& key() const; + + const std::string &key() const; double score() const; - double scoreForCandidate(const std::string& candidate) const; + double scoreForCandidate(const std::string &candidate) const; const KeyValuePair currentKeyValue() const; double highestUnigramScore() const; - -protected: - const LanguageModel* m_LM; - + + protected: + const LanguageModel *m_LM; + std::string m_key; double m_score; - + std::vector m_unigrams; std::vector m_candidates; std::map m_valueUnigramIndexMap; - std::map > m_preceedingGramBigramMap; - + std::map> m_preceedingGramBigramMap; + bool m_candidateFixed; size_t m_selectedUnigramIndex; - - friend std::ostream& operator<<(std::ostream& stream, const Node& node); + + friend std::ostream &operator<<(std::ostream &stream, const Node &node); }; -inline std::ostream& operator<<(std::ostream& stream, const Node& node) { - stream << "(node,key:" << node.m_key - << ",fixed:" << (node.m_candidateFixed ? "true" : "false") - << ",selected:" << node.m_selectedUnigramIndex << "," - << node.m_unigrams << ")"; +inline std::ostream &operator<<(std::ostream &stream, const Node &node) +{ + stream << "(node,key:" << node.m_key << ",fixed:" << (node.m_candidateFixed ? "true" : "false") + << ",selected:" << node.m_selectedUnigramIndex << "," << node.m_unigrams << ")"; return stream; } -inline Node::Node() -: m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) {} +inline Node::Node() : m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) +{ +} -inline Node::Node(const std::string& key, const std::vector& unigrams, - const std::vector& bigrams) -: m_key(key), -m_unigrams(unigrams), -m_candidateFixed(false), -m_selectedUnigramIndex(0), -m_score(0.0) { +inline Node::Node(const std::string &key, const std::vector &unigrams, const std::vector &bigrams) + : m_key(key), m_unigrams(unigrams), m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) +{ stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare); - - if (m_unigrams.size()) { + + if (m_unigrams.size()) + { m_score = m_unigrams[0].score; } - + size_t i = 0; - for (std::vector::const_iterator ui = m_unigrams.begin(); - ui != m_unigrams.end(); ++ui) { + for (std::vector::const_iterator ui = m_unigrams.begin(); ui != m_unigrams.end(); ++ui) + { m_valueUnigramIndexMap[(*ui).keyValue.value] = i; i++; - + m_candidates.push_back((*ui).keyValue); } - - for (std::vector::const_iterator bi = bigrams.begin(); - bi != bigrams.end(); ++bi) { + + for (std::vector::const_iterator bi = bigrams.begin(); bi != bigrams.end(); ++bi) + { m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi); } } -inline void Node::primeNodeWithPreceedingKeyValues( - const std::vector& keyValues) { +inline void Node::primeNodeWithPreceedingKeyValues(const std::vector &keyValues) +{ size_t newIndex = m_selectedUnigramIndex; double max = m_score; - - if (!isCandidateFixed()) { - for (std::vector::const_iterator kvi = keyValues.begin(); - kvi != keyValues.end(); ++kvi) { - std::map >::const_iterator f = - m_preceedingGramBigramMap.find(*kvi); - if (f != m_preceedingGramBigramMap.end()) { - const std::vector& bigrams = (*f).second; - - for (std::vector::const_iterator bi = bigrams.begin(); - bi != bigrams.end(); ++bi) { - const Bigram& bigram = *bi; - if (bigram.score > max) { + + if (!isCandidateFixed()) + { + for (std::vector::const_iterator kvi = keyValues.begin(); kvi != keyValues.end(); ++kvi) + { + std::map>::const_iterator f = m_preceedingGramBigramMap.find(*kvi); + if (f != m_preceedingGramBigramMap.end()) + { + const std::vector &bigrams = (*f).second; + + for (std::vector::const_iterator bi = bigrams.begin(); bi != bigrams.end(); ++bi) + { + const Bigram &bigram = *bi; + if (bigram.score > max) + { std::map::const_iterator uf = - m_valueUnigramIndexMap.find((*bi).keyValue.value); - if (uf != m_valueUnigramIndexMap.end()) { + m_valueUnigramIndexMap.find((*bi).keyValue.value); + if (uf != m_valueUnigramIndexMap.end()) + { newIndex = (*uf).second; max = bigram.score; } @@ -134,80 +141,109 @@ inline void Node::primeNodeWithPreceedingKeyValues( } } } - - if (m_score != max) { + + if (m_score != max) + { m_score = max; } - - if (newIndex != m_selectedUnigramIndex) { + + if (newIndex != m_selectedUnigramIndex) + { m_selectedUnigramIndex = newIndex; } } -inline bool Node::isCandidateFixed() const { return m_candidateFixed; } +inline bool Node::isCandidateFixed() const +{ + return m_candidateFixed; +} -inline const std::vector& Node::candidates() const { +inline const std::vector &Node::candidates() const +{ return m_candidates; } -inline void Node::selectCandidateAtIndex(size_t index, bool fix) { - if (index >= m_unigrams.size()) { +inline void Node::selectCandidateAtIndex(size_t index, bool fix) +{ + if (index >= m_unigrams.size()) + { m_selectedUnigramIndex = 0; - } else { + } + else + { m_selectedUnigramIndex = index; } - + m_candidateFixed = fix; m_score = 99; } -inline void Node::resetCandidate() { +inline void Node::resetCandidate() +{ m_selectedUnigramIndex = 0; m_candidateFixed = 0; - if (m_unigrams.size()) { + if (m_unigrams.size()) + { m_score = m_unigrams[0].score; } } -inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) { - if (index >= m_unigrams.size()) { +inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) +{ + if (index >= m_unigrams.size()) + { m_selectedUnigramIndex = 0; - } else { + } + else + { m_selectedUnigramIndex = index; } m_candidateFixed = false; m_score = score; } -inline const std::string& Node::key() const { return m_key; } +inline const std::string &Node::key() const +{ + return m_key; +} -inline double Node::score() const { return m_score; } +inline double Node::score() const +{ + return m_score; +} - -inline double Node::scoreForCandidate(const std::string& candidate) const { - for (auto unigram : m_unigrams) { - if (unigram.keyValue.value == candidate) { +inline double Node::scoreForCandidate(const std::string &candidate) const +{ + for (auto unigram : m_unigrams) + { + if (unigram.keyValue.value == candidate) + { return unigram.score; } } return 0.0; } -inline double Node::highestUnigramScore() const { - if (m_unigrams.empty()) { +inline double Node::highestUnigramScore() const +{ + if (m_unigrams.empty()) + { return 0.0; } return m_unigrams[0].score; } -inline const KeyValuePair Node::currentKeyValue() const { - if (m_selectedUnigramIndex >= m_unigrams.size()) { +inline const KeyValuePair Node::currentKeyValue() const +{ + if (m_selectedUnigramIndex >= m_unigrams.size()) + { return KeyValuePair(); - } else { + } + else + { return m_candidates[m_selectedUnigramIndex]; } } -} // namespace Gramambular - +} // namespace Gramambular #endif diff --git a/Source/Modules/LanguageParsers/Gramambular/NodeAnchor.h b/Source/Modules/LanguageParsers/Gramambular/NodeAnchor.h index 485bb51f..432566a0 100644 --- a/Source/Modules/LanguageParsers/Gramambular/NodeAnchor.h +++ b/Source/Modules/LanguageParsers/Gramambular/NodeAnchor.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef NODEANCHOR_H_ @@ -24,40 +31,45 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include "Node.h" -namespace Gramambular { +namespace Gramambular +{ -struct NodeAnchor { - const Node* node = nullptr; +struct NodeAnchor +{ + const Node *node = nullptr; size_t location = 0; size_t spanningLength = 0; double accumulatedScore = 0.0; }; -inline std::ostream& operator<<(std::ostream& stream, - const NodeAnchor& anchor) { +inline std::ostream &operator<<(std::ostream &stream, const NodeAnchor &anchor) +{ stream << "{@(" << anchor.location << "," << anchor.spanningLength << "),"; - if (anchor.node) { + if (anchor.node) + { stream << *(anchor.node); - } else { + } + else + { stream << "null"; } stream << "}"; return stream; } -inline std::ostream& operator<<(std::ostream& stream, - const std::vector& anchor) { - for (std::vector::const_iterator i = anchor.begin(); - i != anchor.end(); ++i) { +inline std::ostream &operator<<(std::ostream &stream, const std::vector &anchor) +{ + for (std::vector::const_iterator i = anchor.begin(); i != anchor.end(); ++i) + { stream << *i; - if (i + 1 != anchor.end()) { + if (i + 1 != anchor.end()) + { stream << "<-"; } } - + return stream; } -} // namespace Gramambular - +} // namespace Gramambular #endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Span.h b/Source/Modules/LanguageParsers/Gramambular/Span.h index 30c12692..57c9a64c 100644 --- a/Source/Modules/LanguageParsers/Gramambular/Span.h +++ b/Source/Modules/LanguageParsers/Gramambular/Span.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef SPAN_H_ @@ -26,67 +33,80 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include "Node.h" -namespace Gramambular { -class Span { -public: +namespace Gramambular +{ +class Span +{ + public: void clear(); - void insertNodeOfLength(const Node& node, size_t length); + void insertNodeOfLength(const Node &node, size_t length); void removeNodeOfLengthGreaterThan(size_t length); - - Node* nodeOfLength(size_t length); + + Node *nodeOfLength(size_t length); size_t maximumLength() const; - -protected: + + protected: std::map m_lengthNodeMap; size_t m_maximumLength = 0; }; -inline void Span::clear() { +inline void Span::clear() +{ m_lengthNodeMap.clear(); m_maximumLength = 0; } -inline void Span::insertNodeOfLength(const Node& node, size_t length) { +inline void Span::insertNodeOfLength(const Node &node, size_t length) +{ m_lengthNodeMap[length] = node; - if (length > m_maximumLength) { + if (length > m_maximumLength) + { m_maximumLength = length; } } -inline void Span::removeNodeOfLengthGreaterThan(size_t length) { - if (length > m_maximumLength) { +inline void Span::removeNodeOfLengthGreaterThan(size_t length) +{ + if (length > m_maximumLength) + { return; } - + size_t max = 0; std::set removeSet; - for (std::map::iterator i = m_lengthNodeMap.begin(), - e = m_lengthNodeMap.end(); - i != e; ++i) { - if ((*i).first > length) { + for (std::map::iterator i = m_lengthNodeMap.begin(), e = m_lengthNodeMap.end(); i != e; ++i) + { + if ((*i).first > length) + { removeSet.insert((*i).first); - } else { - if ((*i).first > max) { + } + else + { + if ((*i).first > max) + { max = (*i).first; } } } - - for (std::set::iterator i = removeSet.begin(), e = removeSet.end(); - i != e; ++i) { + + for (std::set::iterator i = removeSet.begin(), e = removeSet.end(); i != e; ++i) + { m_lengthNodeMap.erase(*i); } - + m_maximumLength = max; } -inline Node* Span::nodeOfLength(size_t length) { +inline Node *Span::nodeOfLength(size_t length) +{ std::map::iterator f = m_lengthNodeMap.find(length); return f == m_lengthNodeMap.end() ? 0 : &(*f).second; } -inline size_t Span::maximumLength() const { return m_maximumLength; } -} // namespace Gramambular - +inline size_t Span::maximumLength() const +{ + return m_maximumLength; +} +} // namespace Gramambular #endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Unigram.h b/Source/Modules/LanguageParsers/Gramambular/Unigram.h index 45b461d4..7faac48d 100644 --- a/Source/Modules/LanguageParsers/Gramambular/Unigram.h +++ b/Source/Modules/LanguageParsers/Gramambular/Unigram.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef UNIGRAM_H_ @@ -24,22 +31,25 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include "KeyValuePair.h" -namespace Gramambular { +namespace Gramambular +{ -class Unigram { -public: +class Unigram +{ + public: Unigram(); - + KeyValuePair keyValue; double score; - - bool operator==(const Unigram& another) const; - bool operator<(const Unigram& another) const; - - static bool ScoreCompare(const Unigram& a, const Unigram& b); + + bool operator==(const Unigram &another) const; + bool operator<(const Unigram &another) const; + + static bool ScoreCompare(const Unigram &a, const Unigram &b); }; -inline std::ostream& operator<<(std::ostream& stream, const Unigram& gram) { +inline std::ostream &operator<<(std::ostream &stream, const Unigram &gram) +{ std::streamsize p = stream.precision(); stream.precision(6); stream << "(" << gram.keyValue << "," << gram.score << ")"; @@ -47,44 +57,52 @@ inline std::ostream& operator<<(std::ostream& stream, const Unigram& gram) { return stream; } -inline std::ostream& operator<<(std::ostream& stream, - const std::vector& grams) { +inline std::ostream &operator<<(std::ostream &stream, const std::vector &grams) +{ stream << "[" << grams.size() << "]=>{"; - + size_t index = 0; - - for (std::vector::const_iterator gi = grams.begin(); - gi != grams.end(); ++gi, ++index) { + + for (std::vector::const_iterator gi = grams.begin(); gi != grams.end(); ++gi, ++index) + { stream << index << "=>"; stream << *gi; - if (gi + 1 != grams.end()) { + if (gi + 1 != grams.end()) + { stream << ","; } } - + stream << "}"; return stream; } -inline Unigram::Unigram() : score(0.0) {} +inline Unigram::Unigram() : score(0.0) +{ +} -inline bool Unigram::operator==(const Unigram& another) const { +inline bool Unigram::operator==(const Unigram &another) const +{ return keyValue == another.keyValue && score == another.score; } -inline bool Unigram::operator<(const Unigram& another) const { - if (keyValue < another.keyValue) { +inline bool Unigram::operator<(const Unigram &another) const +{ + if (keyValue < another.keyValue) + { return true; - } else if (keyValue == another.keyValue) { + } + else if (keyValue == another.keyValue) + { return score < another.score; } return false; } -inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) { +inline bool Unigram::ScoreCompare(const Unigram &a, const Unigram &b) +{ return a.score > b.score; } -} // namespace Gramambular - +} // namespace Gramambular #endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Walker.h b/Source/Modules/LanguageParsers/Gramambular/Walker.h index b694c7e7..c5ef2e3d 100644 --- a/Source/Modules/LanguageParsers/Gramambular/Walker.h +++ b/Source/Modules/LanguageParsers/Gramambular/Walker.h @@ -1,20 +1,27 @@ // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// All possible vChewing-specific modifications are of: +// (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: -1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. -2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, - except as required to fulfill notice requirements above. +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef WALKER_H_ @@ -25,60 +32,65 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH #include "Grid.h" -namespace Gramambular { +namespace Gramambular +{ -class Walker { -public: - explicit Walker(Grid* inGrid); - const std::vector reverseWalk(size_t location, - double accumulatedScore = 0.0); - -protected: - Grid* m_grid; +class Walker +{ + public: + explicit Walker(Grid *inGrid); + const std::vector reverseWalk(size_t location, double accumulatedScore = 0.0); + + protected: + Grid *m_grid; }; -inline Walker::Walker(Grid* inGrid) : m_grid(inGrid) {} +inline Walker::Walker(Grid *inGrid) : m_grid(inGrid) +{ +} -inline const std::vector Walker::reverseWalk( - size_t location, double accumulatedScore) { - if (!location || location > m_grid->width()) { +inline const std::vector Walker::reverseWalk(size_t location, double accumulatedScore) +{ + if (!location || location > m_grid->width()) + { return std::vector(); } - - std::vector > paths; - + + std::vector> paths; + std::vector nodes = m_grid->nodesEndingAt(location); - - for (std::vector::iterator ni = nodes.begin(); ni != nodes.end(); - ++ni) { - if (!(*ni).node) { + + for (std::vector::iterator ni = nodes.begin(); ni != nodes.end(); ++ni) + { + if (!(*ni).node) + { continue; } - + (*ni).accumulatedScore = accumulatedScore + (*ni).node->score(); - - std::vector path = - reverseWalk(location - (*ni).spanningLength, (*ni).accumulatedScore); + + std::vector path = reverseWalk(location - (*ni).spanningLength, (*ni).accumulatedScore); path.insert(path.begin(), *ni); - + paths.push_back(path); } - - if (!paths.size()) { + + if (!paths.size()) + { return std::vector(); } - - std::vector* result = &*(paths.begin()); - for (std::vector >::iterator pi = paths.begin(); - pi != paths.end(); ++pi) { - if ((*pi).back().accumulatedScore > result->back().accumulatedScore) { + + std::vector *result = &*(paths.begin()); + for (std::vector>::iterator pi = paths.begin(); pi != paths.end(); ++pi) + { + if ((*pi).back().accumulatedScore > result->back().accumulatedScore) + { result = &*pi; } } - + return *result; } -} // namespace Gramambular - +} // namespace Gramambular #endif