(Obj)C(pp) // Clang-Format.

This commit is contained in:
ShikiSuen 2022-04-03 14:05:04 +08:00
parent eedf95f307
commit 3842dc5013
48 changed files with 4318 additions and 2707 deletions

171
.clang-format Normal file
View File

@ -0,0 +1,171 @@
---
Language: Cpp
# BasedOnStyle: Microsoft
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignConsecutiveMacros: false
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Left
AlignOperands: true
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortLambdasOnASingleLine: All
AllowShortIfStatementsOnASingleLine: WithoutElse
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeInheritanceComma: false
BreakInheritanceList: BeforeColon
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DeriveLineEnding: true
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^<ext/.*\.h>'
Priority: 2
SortPriority: 0
- Regex: '^<.*\.h>'
Priority: 1
SortPriority: 0
- Regex: '^<.*'
Priority: 2
SortPriority: 0
- Regex: '.*'
Priority: 3
SortPriority: 0
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
IndentCaseLabels: true
IndentGotoLabels: true
IndentPPDirectives: None
IndentWidth: 4
IndentWrappedFunctionNames: false
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBinPackProtocolList: Never
ObjCBlockIndentWidth: 4
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
RawStringFormats:
- Language: Cpp
Delimiters:
- h
- m
- hh
- mm
- cc
- CC
- cpp
- Cpp
- CPP
- 'c++'
- 'C++'
CanonicalDelimiter: ''
BasedOnStyle: Microsoft
- Language: TextProto
Delimiters:
- pb
- PB
- proto
- PROTO
EnclosingFunctions:
- EqualsProto
- EquivToProto
- PARSE_PARTIAL_TEXT_PROTO
- PARSE_TEST_PROTO
- PARSE_TEXT_PROTO
- ParseTextOrDie
- ParseTextProtoOrDie
CanonicalDelimiter: ''
BasedOnStyle: Microsoft
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
SpaceBeforeSquareBrackets: false
Standard: Auto
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
TabWidth: 4
UseCRLF: false
UseTab: Always
...

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
@import Cocoa; @import Cocoa;

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#import "Chronosphere.h" #import "Chronosphere.h"
@ -27,15 +34,18 @@ BOOL appBundleChronoshiftedToARandomizedPath(NSString *bundle)
int entrySize = sizeof(struct statfs); int entrySize = sizeof(struct statfs);
struct statfs *bufs = (struct statfs *)calloc(entryCount, entrySize); struct statfs *bufs = (struct statfs *)calloc(entryCount, entrySize);
entryCount = getfsstat(bufs, entryCount * entrySize, MNT_NOWAIT); entryCount = getfsstat(bufs, entryCount * entrySize, MNT_NOWAIT);
for (int i = 0; i < entryCount; i++) { for (int i = 0; i < entryCount; i++)
if (!strcmp(bundleAbsPath, bufs[i].f_mntfromname)) { {
if (!strcmp(bundleAbsPath, bufs[i].f_mntfromname))
{
free(bufs); free(bufs);
// getfsstat() may return us a cached result, and so we need to get the stat of the mounted fs. // getfsstat() may return us a cached result, and so we need to get the stat of the mounted fs.
// If statfs() returns an error, the mounted fs is already gone. // If statfs() returns an error, the mounted fs is already gone.
struct statfs stat; struct statfs stat;
int checkResult = statfs(bundleAbsPath, &stat); int checkResult = statfs(bundleAbsPath, &stat);
if (checkResult != 0) { if (checkResult != 0)
{
// Meaning the app's bundle is not mounted, that is it's not translocated. // Meaning the app's bundle is not mounted, that is it's not translocated.
// It also means that the app is not loaded. // It also means that the app is not loaded.
return NO; return NO;

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
// //

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
// //

File diff suppressed because it is too large Load Diff

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef MANDARIN_H_ #ifndef MANDARIN_H_
@ -25,79 +32,115 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include <string> #include <string>
#include <vector> #include <vector>
namespace Mandarin { namespace Mandarin
{
class BopomofoSyllable { class BopomofoSyllable
public: {
public:
typedef uint16_t Component; typedef uint16_t Component;
explicit BopomofoSyllable(Component syllable = 0) : syllable_(syllable) {} explicit BopomofoSyllable(Component syllable = 0) : syllable_(syllable)
{
}
BopomofoSyllable(const BopomofoSyllable&) = default; BopomofoSyllable(const BopomofoSyllable &) = default;
BopomofoSyllable(BopomofoSyllable&& another) = default; BopomofoSyllable(BopomofoSyllable &&another) = default;
BopomofoSyllable& operator=(const BopomofoSyllable&) = default; BopomofoSyllable &operator=(const BopomofoSyllable &) = default;
BopomofoSyllable& operator=(BopomofoSyllable&&) = default; BopomofoSyllable &operator=(BopomofoSyllable &&) = default;
// takes the ASCII-form, "v"-tolerant, TW-style Hanyu Pinyin (fong, pong, bong // takes the ASCII-form, "v"-tolerant, TW-style Hanyu Pinyin (fong, pong, bong
// acceptable) // acceptable)
static const BopomofoSyllable FromHanyuPinyin(const std::string& str); static const BopomofoSyllable FromHanyuPinyin(const std::string &str);
// TO DO: Support accented vowels // TO DO: Support accented vowels
const std::string HanyuPinyinString(bool includesTone, const std::string HanyuPinyinString(bool includesTone, bool useVForUUmlaut) const;
bool useVForUUmlaut) const;
static const BopomofoSyllable FromComposedString(const std::string& str); static const BopomofoSyllable FromComposedString(const std::string &str);
const std::string composedString() const; const std::string composedString() const;
void clear() { syllable_ = 0; } void clear()
{
syllable_ = 0;
}
bool isEmpty() const { return !syllable_; } bool isEmpty() const
{
return !syllable_;
}
bool hasConsonant() const { return !!(syllable_ & ConsonantMask); } bool hasConsonant() const
{
return !!(syllable_ & ConsonantMask);
}
bool hasMiddleVowel() const { return !!(syllable_ & MiddleVowelMask); } bool hasMiddleVowel() const
bool hasVowel() const { return !!(syllable_ & VowelMask); } {
return !!(syllable_ & MiddleVowelMask);
}
bool hasVowel() const
{
return !!(syllable_ & VowelMask);
}
bool hasToneMarker() const { return !!(syllable_ & ToneMarkerMask); } bool hasToneMarker() const
{
return !!(syllable_ & ToneMarkerMask);
}
Component consonantComponent() const { return syllable_ & ConsonantMask; } Component consonantComponent() const
{
return syllable_ & ConsonantMask;
}
Component middleVowelComponent() const { Component middleVowelComponent() const
{
return syllable_ & MiddleVowelMask; return syllable_ & MiddleVowelMask;
} }
Component vowelComponent() const { return syllable_ & VowelMask; } Component vowelComponent() const
{
return syllable_ & VowelMask;
}
Component toneMarkerComponent() const { return syllable_ & ToneMarkerMask; } Component toneMarkerComponent() const
{
return syllable_ & ToneMarkerMask;
}
bool operator==(const BopomofoSyllable& another) const { bool operator==(const BopomofoSyllable &another) const
{
return syllable_ == another.syllable_; return syllable_ == another.syllable_;
} }
bool operator!=(const BopomofoSyllable& another) const { bool operator!=(const BopomofoSyllable &another) const
{
return syllable_ != another.syllable_; return syllable_ != another.syllable_;
} }
bool isOverlappingWith(const BopomofoSyllable& another) const { bool isOverlappingWith(const BopomofoSyllable &another) const
{
#define IOW_SAND(mask) ((syllable_ & mask) && (another.syllable_ & mask)) #define IOW_SAND(mask) ((syllable_ & mask) && (another.syllable_ & mask))
return IOW_SAND(ConsonantMask) || IOW_SAND(MiddleVowelMask) || return IOW_SAND(ConsonantMask) || IOW_SAND(MiddleVowelMask) || IOW_SAND(VowelMask) || IOW_SAND(ToneMarkerMask);
IOW_SAND(VowelMask) || IOW_SAND(ToneMarkerMask);
#undef IOW_SAND #undef IOW_SAND
} }
// consonants J, Q, X all require the existence of vowel I or UE // consonants J, Q, X all require the existence of vowel I or UE
bool belongsToJQXClass() const { bool belongsToJQXClass() const
{
Component consonant = syllable_ & ConsonantMask; Component consonant = syllable_ & ConsonantMask;
return (consonant == J || consonant == Q || consonant == X); return (consonant == J || consonant == Q || consonant == X);
} }
// zi, ci, si, chi, chi, shi, ri // zi, ci, si, chi, chi, shi, ri
bool belongsToZCSRClass() const { bool belongsToZCSRClass() const
{
Component consonant = syllable_ & ConsonantMask; Component consonant = syllable_ & ConsonantMask;
return (consonant >= ZH && consonant <= S); return (consonant >= ZH && consonant <= S);
} }
Component maskType() const { Component maskType() const
{
Component mask = 0; Component mask = 0;
mask |= (syllable_ & ConsonantMask) ? ConsonantMask : 0; mask |= (syllable_ & ConsonantMask) ? ConsonantMask : 0;
mask |= (syllable_ & MiddleVowelMask) ? MiddleVowelMask : 0; mask |= (syllable_ & MiddleVowelMask) ? MiddleVowelMask : 0;
@ -106,12 +149,14 @@ public:
return mask; return mask;
} }
const BopomofoSyllable operator+(const BopomofoSyllable& another) const { const BopomofoSyllable operator+(const BopomofoSyllable &another) const
{
Component newSyllable = syllable_; Component newSyllable = syllable_;
#define OP_SOVER(mask) \ #define OP_SOVER(mask) \
if (another.syllable_ & mask) { \ if (another.syllable_ & mask) \
newSyllable = (newSyllable & ~mask) | (another.syllable_ & mask); \ { \
} newSyllable = (newSyllable & ~mask) | (another.syllable_ & mask); \
}
OP_SOVER(ConsonantMask); OP_SOVER(ConsonantMask);
OP_SOVER(MiddleVowelMask); OP_SOVER(MiddleVowelMask);
OP_SOVER(VowelMask); OP_SOVER(VowelMask);
@ -120,11 +165,13 @@ newSyllable = (newSyllable & ~mask) | (another.syllable_ & mask); \
return BopomofoSyllable(newSyllable); return BopomofoSyllable(newSyllable);
} }
BopomofoSyllable& operator+=(const BopomofoSyllable& another) { BopomofoSyllable &operator+=(const BopomofoSyllable &another)
#define OPE_SOVER(mask) \ {
if (another.syllable_ & mask) { \ #define OPE_SOVER(mask) \
syllable_ = (syllable_ & ~mask) | (another.syllable_ & mask); \ if (another.syllable_ & mask) \
} { \
syllable_ = (syllable_ & ~mask) | (another.syllable_ & mask); \
}
OPE_SOVER(ConsonantMask); OPE_SOVER(ConsonantMask);
OPE_SOVER(MiddleVowelMask); OPE_SOVER(MiddleVowelMask);
OPE_SOVER(VowelMask); OPE_SOVER(VowelMask);
@ -133,86 +180,87 @@ syllable_ = (syllable_ & ~mask) | (another.syllable_ & mask); \
return *this; return *this;
} }
friend std::ostream& operator<<(std::ostream& stream, friend std::ostream &operator<<(std::ostream &stream, const BopomofoSyllable &syllable);
const BopomofoSyllable& syllable);
static constexpr Component static constexpr Component ConsonantMask = 0x001f, // 0000 0000 0001 1111, 21 consonants
ConsonantMask = 0x001f, // 0000 0000 0001 1111, 21 consonants MiddleVowelMask = 0x0060, // 0000 0000 0110 0000, 3 middle vowels
MiddleVowelMask = 0x0060, // 0000 0000 0110 0000, 3 middle vowels VowelMask = 0x0780, // 0000 0111 1000 0000, 13 vowels
VowelMask = 0x0780, // 0000 0111 1000 0000, 13 vowels ToneMarkerMask = 0x3800, // 0011 1000 0000 0000, 5 tones (tone1 = 0x00)
ToneMarkerMask = 0x3800, // 0011 1000 0000 0000, 5 tones (tone1 = 0x00) B = 0x0001, P = 0x0002, M = 0x0003, F = 0x0004, D = 0x0005, T = 0x0006, N = 0x0007, L = 0x0008, G = 0x0009,
B = 0x0001, P = 0x0002, M = 0x0003, F = 0x0004, D = 0x0005, T = 0x0006, K = 0x000a, H = 0x000b, J = 0x000c, Q = 0x000d, X = 0x000e, ZH = 0x000f, CH = 0x0010,
N = 0x0007, L = 0x0008, G = 0x0009, K = 0x000a, H = 0x000b, J = 0x000c, SH = 0x0011, R = 0x0012, Z = 0x0013, C = 0x0014, S = 0x0015, I = 0x0020, U = 0x0040,
Q = 0x000d, X = 0x000e, ZH = 0x000f, CH = 0x0010, SH = 0x0011, R = 0x0012, UE = 0x0060, // ue = u umlaut (we use the German convention here as an
Z = 0x0013, C = 0x0014, S = 0x0015, I = 0x0020, U = 0x0040, // ersatz to the /ju:/ sound)
UE = 0x0060, // ue = u umlaut (we use the German convention here as an A = 0x0080, O = 0x0100, ER = 0x0180, E = 0x0200, AI = 0x0280, EI = 0x0300, AO = 0x0380, OU = 0x0400,
// ersatz to the /ju:/ sound) AN = 0x0480, EN = 0x0500, ANG = 0x0580, ENG = 0x0600, ERR = 0x0680, Tone1 = 0x0000,
A = 0x0080, O = 0x0100, ER = 0x0180, E = 0x0200, AI = 0x0280, EI = 0x0300, Tone2 = 0x0800, Tone3 = 0x1000, Tone4 = 0x1800, Tone5 = 0x2000;
AO = 0x0380, OU = 0x0400, AN = 0x0480, EN = 0x0500, ANG = 0x0580,
ENG = 0x0600, ERR = 0x0680, Tone1 = 0x0000, Tone2 = 0x0800,
Tone3 = 0x1000, Tone4 = 0x1800, Tone5 = 0x2000;
protected: protected:
Component syllable_; Component syllable_;
}; };
inline std::ostream& operator<<(std::ostream& stream, inline std::ostream &operator<<(std::ostream &stream, const BopomofoSyllable &syllable)
const BopomofoSyllable& syllable) { {
stream << syllable.composedString(); stream << syllable.composedString();
return stream; return stream;
} }
typedef BopomofoSyllable BPMF; typedef BopomofoSyllable BPMF;
typedef std::map<char, std::vector<BPMF::Component> > BopomofoKeyToComponentMap; typedef std::map<char, std::vector<BPMF::Component>> BopomofoKeyToComponentMap;
typedef std::map<BPMF::Component, char> BopomofoComponentToKeyMap; typedef std::map<BPMF::Component, char> BopomofoComponentToKeyMap;
class BopomofoKeyboardLayout { class BopomofoKeyboardLayout
public: {
static const BopomofoKeyboardLayout* StandardLayout(); public:
static const BopomofoKeyboardLayout* ETenLayout(); static const BopomofoKeyboardLayout *StandardLayout();
static const BopomofoKeyboardLayout* HsuLayout(); static const BopomofoKeyboardLayout *ETenLayout();
static const BopomofoKeyboardLayout* ETen26Layout(); static const BopomofoKeyboardLayout *HsuLayout();
static const BopomofoKeyboardLayout* IBMLayout(); static const BopomofoKeyboardLayout *ETen26Layout();
static const BopomofoKeyboardLayout* MiTACLayout(); static const BopomofoKeyboardLayout *IBMLayout();
static const BopomofoKeyboardLayout* FakeSeigyouLayout(); static const BopomofoKeyboardLayout *MiTACLayout();
static const BopomofoKeyboardLayout* HanyuPinyinLayout(); static const BopomofoKeyboardLayout *FakeSeigyouLayout();
static const BopomofoKeyboardLayout *HanyuPinyinLayout();
BopomofoKeyboardLayout(const BopomofoKeyToComponentMap& ktcm, BopomofoKeyboardLayout(const BopomofoKeyToComponentMap &ktcm, const std::string &name)
const std::string& name) : m_keyToComponent(ktcm), m_name(name)
: m_keyToComponent(ktcm), m_name(name) { {
for (BopomofoKeyToComponentMap::const_iterator miter = for (BopomofoKeyToComponentMap::const_iterator miter = m_keyToComponent.begin();
m_keyToComponent.begin();
miter != m_keyToComponent.end(); ++miter) miter != m_keyToComponent.end(); ++miter)
for (std::vector<BPMF::Component>::const_iterator viter = for (std::vector<BPMF::Component>::const_iterator viter = (*miter).second.begin();
(*miter).second.begin();
viter != (*miter).second.end(); ++viter) viter != (*miter).second.end(); ++viter)
m_componentToKey[*viter] = (*miter).first; m_componentToKey[*viter] = (*miter).first;
} }
const std::string name() const { return m_name; } const std::string name() const
{
return m_name;
}
char componentToKey(BPMF::Component component) const { char componentToKey(BPMF::Component component) const
BopomofoComponentToKeyMap::const_iterator iter = {
m_componentToKey.find(component); BopomofoComponentToKeyMap::const_iterator iter = m_componentToKey.find(component);
return (iter == m_componentToKey.end()) ? 0 : (*iter).second; return (iter == m_componentToKey.end()) ? 0 : (*iter).second;
} }
const std::vector<BPMF::Component> keyToComponents(char key) const { const std::vector<BPMF::Component> keyToComponents(char key) const
{
BopomofoKeyToComponentMap::const_iterator iter = m_keyToComponent.find(key); BopomofoKeyToComponentMap::const_iterator iter = m_keyToComponent.find(key);
return (iter == m_keyToComponent.end()) ? std::vector<BPMF::Component>() return (iter == m_keyToComponent.end()) ? std::vector<BPMF::Component>() : (*iter).second;
: (*iter).second;
} }
const std::string keySequenceFromSyllable(BPMF syllable) const { const std::string keySequenceFromSyllable(BPMF syllable) const
{
std::string sequence; std::string sequence;
BPMF::Component c; BPMF::Component c;
char k; char k;
#define STKS_COMBINE(component) \ #define STKS_COMBINE(component) \
if ((c = component)) { \ if ((c = component)) \
if ((k = componentToKey(c))) sequence += std::string(1, k); \ { \
} if ((k = componentToKey(c))) \
sequence += std::string(1, k); \
}
STKS_COMBINE(syllable.consonantComponent()); STKS_COMBINE(syllable.consonantComponent());
STKS_COMBINE(syllable.middleVowelComponent()); STKS_COMBINE(syllable.middleVowelComponent());
STKS_COMBINE(syllable.vowelComponent()); STKS_COMBINE(syllable.vowelComponent());
@ -221,19 +269,22 @@ if ((k = componentToKey(c))) sequence += std::string(1, k); \
return sequence; return sequence;
} }
const BPMF syllableFromKeySequence(const std::string& sequence) const { const BPMF syllableFromKeySequence(const std::string &sequence) const
{
BPMF syllable; BPMF syllable;
for (std::string::const_iterator iter = sequence.begin(); for (std::string::const_iterator iter = sequence.begin(); iter != sequence.end(); ++iter)
iter != sequence.end(); ++iter) { {
bool beforeSeqHasIorUE = sequenceContainsIorUE(sequence.begin(), iter); bool beforeSeqHasIorUE = sequenceContainsIorUE(sequence.begin(), iter);
bool aheadSeqHasIorUE = sequenceContainsIorUE(iter + 1, sequence.end()); bool aheadSeqHasIorUE = sequenceContainsIorUE(iter + 1, sequence.end());
std::vector<BPMF::Component> components = keyToComponents(*iter); std::vector<BPMF::Component> components = keyToComponents(*iter);
if (!components.size()) continue; if (!components.size())
continue;
if (components.size() == 1) { if (components.size() == 1)
{
syllable += BPMF(components[0]); syllable += BPMF(components[0]);
continue; continue;
} }
@ -243,34 +294,44 @@ if ((k = componentToKey(c))) sequence += std::string(1, k); \
BPMF ending = components.size() > 2 ? BPMF(components[2]) : follow; BPMF ending = components.size() > 2 ? BPMF(components[2]) : follow;
// apply the I/UE + E rule // apply the I/UE + E rule
if (head.vowelComponent() == BPMF::E && if (head.vowelComponent() == BPMF::E && follow.vowelComponent() != BPMF::E)
follow.vowelComponent() != BPMF::E) { {
syllable += beforeSeqHasIorUE ? head : follow; syllable += beforeSeqHasIorUE ? head : follow;
continue; continue;
} }
if (head.vowelComponent() != BPMF::E && if (head.vowelComponent() != BPMF::E && follow.vowelComponent() == BPMF::E)
follow.vowelComponent() == BPMF::E) { {
syllable += beforeSeqHasIorUE ? follow : head; syllable += beforeSeqHasIorUE ? follow : head;
continue; continue;
} }
// apply the J/Q/X + I/UE rule, only two components are allowed in the // apply the J/Q/X + I/UE rule, only two components are allowed in the
// components vector here // components vector here
if (head.belongsToJQXClass() && !follow.belongsToJQXClass()) { if (head.belongsToJQXClass() && !follow.belongsToJQXClass())
if (!syllable.isEmpty()) { {
if (ending != follow) syllable += ending; if (!syllable.isEmpty())
} else { {
if (ending != follow)
syllable += ending;
}
else
{
syllable += aheadSeqHasIorUE ? head : follow; syllable += aheadSeqHasIorUE ? head : follow;
} }
continue; continue;
} }
if (!head.belongsToJQXClass() && follow.belongsToJQXClass()) { if (!head.belongsToJQXClass() && follow.belongsToJQXClass())
if (!syllable.isEmpty()) { {
if (ending != follow) syllable += ending; if (!syllable.isEmpty())
} else { {
if (ending != follow)
syllable += ending;
}
else
{
syllable += aheadSeqHasIorUE ? follow : head; syllable += aheadSeqHasIorUE ? follow : head;
} }
@ -278,14 +339,20 @@ if ((k = componentToKey(c))) sequence += std::string(1, k); \
} }
// the nasty issue of only one char in the buffer // the nasty issue of only one char in the buffer
if (iter == sequence.begin() && iter + 1 == sequence.end()) { if (iter == sequence.begin() && iter + 1 == sequence.end())
if (head.hasVowel() || follow.hasToneMarker() || {
head.belongsToZCSRClass()) { if (head.hasVowel() || follow.hasToneMarker() || head.belongsToZCSRClass())
{
syllable += head; syllable += head;
} else { }
if (follow.hasVowel() || ending.hasToneMarker()) { else
{
if (follow.hasVowel() || ending.hasToneMarker())
{
syllable += follow; syllable += follow;
} else { }
else
{
syllable += ending; syllable += ending;
} }
} }
@ -293,30 +360,39 @@ if ((k = componentToKey(c))) sequence += std::string(1, k); \
continue; continue;
} }
if (!(syllable.maskType() & head.maskType()) && if (!(syllable.maskType() & head.maskType()) && !endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end()))
!endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end())) { {
syllable += head; syllable += head;
} else { }
if (endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end()) && else
head.belongsToZCSRClass() && syllable.isEmpty()) { {
if (endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end()) && head.belongsToZCSRClass() &&
syllable.isEmpty())
{
syllable += head; syllable += head;
} else if (syllable.maskType() < follow.maskType()) { }
else if (syllable.maskType() < follow.maskType())
{
syllable += follow; syllable += follow;
} else { }
else
{
syllable += ending; syllable += ending;
} }
} }
} }
// heuristics for Hsu keyboard layout // heuristics for Hsu keyboard layout
if (this == HsuLayout()) { if (this == HsuLayout())
{
// fix the left out L to ERR when it has sound, and GI, GUE -> JI, JUE // fix the left out L to ERR when it has sound, and GI, GUE -> JI, JUE
if (syllable.vowelComponent() == BPMF::ENG && !syllable.hasConsonant() && if (syllable.vowelComponent() == BPMF::ENG && !syllable.hasConsonant() && !syllable.hasMiddleVowel())
!syllable.hasMiddleVowel()) { {
syllable += BPMF(BPMF::ERR); syllable += BPMF(BPMF::ERR);
} else if (syllable.consonantComponent() == BPMF::G && }
(syllable.middleVowelComponent() == BPMF::I || else if (syllable.consonantComponent() == BPMF::G &&
syllable.middleVowelComponent() == BPMF::UE)) { (syllable.middleVowelComponent() == BPMF::I || syllable.middleVowelComponent() == BPMF::UE))
{
syllable += BPMF(BPMF::J); syllable += BPMF(BPMF::J);
} }
} }
@ -324,10 +400,11 @@ if ((k = componentToKey(c))) sequence += std::string(1, k); \
return syllable; return syllable;
} }
protected: protected:
bool endAheadOrAheadHasToneMarkKey(std::string::const_iterator ahead, bool endAheadOrAheadHasToneMarkKey(std::string::const_iterator ahead, std::string::const_iterator end) const
std::string::const_iterator end) const { {
if (ahead == end) return true; if (ahead == end)
return true;
char tone1 = componentToKey(BPMF::Tone1); char tone1 = componentToKey(BPMF::Tone1);
char tone2 = componentToKey(BPMF::Tone2); char tone2 = componentToKey(BPMF::Tone2);
@ -336,22 +413,23 @@ protected:
char tone5 = componentToKey(BPMF::Tone5); char tone5 = componentToKey(BPMF::Tone5);
if (tone1) if (tone1)
if (*ahead == tone1) return true; if (*ahead == tone1)
return true;
if (*ahead == tone2 || *ahead == tone3 || *ahead == tone4 || if (*ahead == tone2 || *ahead == tone3 || *ahead == tone4 || *ahead == tone5)
*ahead == tone5)
return true; return true;
return false; return false;
} }
bool sequenceContainsIorUE(std::string::const_iterator start, bool sequenceContainsIorUE(std::string::const_iterator start, std::string::const_iterator end) const
std::string::const_iterator end) const { {
char iChar = componentToKey(BPMF::I); char iChar = componentToKey(BPMF::I);
char ueChar = componentToKey(BPMF::UE); char ueChar = componentToKey(BPMF::UE);
for (; start != end; ++start) for (; start != end; ++start)
if (*start == iChar || *start == ueChar) return true; if (*start == iChar || *start == ueChar)
return true;
return false; return false;
} }
@ -360,36 +438,45 @@ protected:
BopomofoComponentToKeyMap m_componentToKey; BopomofoComponentToKeyMap m_componentToKey;
}; };
class BopomofoReadingBuffer { class BopomofoReadingBuffer
public: {
explicit BopomofoReadingBuffer(const BopomofoKeyboardLayout* layout) public:
: layout_(layout), pinyin_mode_(false) { explicit BopomofoReadingBuffer(const BopomofoKeyboardLayout *layout) : layout_(layout), pinyin_mode_(false)
if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout()) { {
if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout())
{
pinyin_mode_ = true; pinyin_mode_ = true;
pinyin_sequence_ = ""; pinyin_sequence_ = "";
} }
} }
void setKeyboardLayout(const BopomofoKeyboardLayout* layout) { void setKeyboardLayout(const BopomofoKeyboardLayout *layout)
{
layout_ = layout; layout_ = layout;
if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout()) { if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout())
{
pinyin_mode_ = true; pinyin_mode_ = true;
pinyin_sequence_ = ""; pinyin_sequence_ = "";
} }
} }
bool isValidKey(char k) const { bool isValidKey(char k) const
if (!pinyin_mode_) { {
if (!pinyin_mode_)
{
return layout_ ? (layout_->keyToComponents(k)).size() > 0 : false; return layout_ ? (layout_->keyToComponents(k)).size() > 0 : false;
} }
char lk = tolower(k); char lk = tolower(k);
if (lk >= 'a' && lk <= 'z') { if (lk >= 'a' && lk <= 'z')
{
// if a tone marker is already in place // if a tone marker is already in place
if (pinyin_sequence_.length()) { if (pinyin_sequence_.length())
{
char lastc = pinyin_sequence_[pinyin_sequence_.length() - 1]; char lastc = pinyin_sequence_[pinyin_sequence_.length() - 1];
if (lastc >= '2' && lastc <= '5') { if (lastc >= '2' && lastc <= '5')
{
return false; return false;
} }
return true; return true;
@ -397,40 +484,47 @@ public:
return true; return true;
} }
if (pinyin_sequence_.length() && (lk >= '2' && lk <= '5')) { if (pinyin_sequence_.length() && (lk >= '2' && lk <= '5'))
{
return true; return true;
} }
return false; return false;
} }
bool combineKey(char k) { bool combineKey(char k)
if (!isValidKey(k)) return false; {
if (!isValidKey(k))
return false;
if (pinyin_mode_) { if (pinyin_mode_)
{
pinyin_sequence_ += std::string(1, tolower(k)); pinyin_sequence_ += std::string(1, tolower(k));
syllable_ = BPMF::FromHanyuPinyin(pinyin_sequence_); syllable_ = BPMF::FromHanyuPinyin(pinyin_sequence_);
return true; return true;
} }
std::string sequence = std::string sequence = layout_->keySequenceFromSyllable(syllable_) + std::string(1, k);
layout_->keySequenceFromSyllable(syllable_) + std::string(1, k);
syllable_ = layout_->syllableFromKeySequence(sequence); syllable_ = layout_->syllableFromKeySequence(sequence);
return true; return true;
} }
void clear() { void clear()
{
pinyin_sequence_.clear(); pinyin_sequence_.clear();
syllable_.clear(); syllable_.clear();
} }
void backspace() { void backspace()
if (!layout_) return; {
if (!layout_)
return;
if (pinyin_mode_) { if (pinyin_mode_)
if (pinyin_sequence_.length()) { {
pinyin_sequence_ = if (pinyin_sequence_.length())
pinyin_sequence_.substr(0, pinyin_sequence_.length() - 1); {
pinyin_sequence_ = pinyin_sequence_.substr(0, pinyin_sequence_.length() - 1);
} }
syllable_ = BPMF::FromHanyuPinyin(pinyin_sequence_); syllable_ = BPMF::FromHanyuPinyin(pinyin_sequence_);
@ -438,38 +532,50 @@ public:
} }
std::string sequence = layout_->keySequenceFromSyllable(syllable_); std::string sequence = layout_->keySequenceFromSyllable(syllable_);
if (sequence.length()) { if (sequence.length())
{
sequence = sequence.substr(0, sequence.length() - 1); sequence = sequence.substr(0, sequence.length() - 1);
syllable_ = layout_->syllableFromKeySequence(sequence); syllable_ = layout_->syllableFromKeySequence(sequence);
} }
} }
bool isEmpty() const { return syllable_.isEmpty(); } bool isEmpty() const
{
return syllable_.isEmpty();
}
const std::string composedString() const { const std::string composedString() const
if (pinyin_mode_) { {
if (pinyin_mode_)
{
return pinyin_sequence_; return pinyin_sequence_;
} }
return syllable_.composedString(); return syllable_.composedString();
} }
const BPMF syllable() const { return syllable_; } const BPMF syllable() const
{
return syllable_;
}
const std::string standardLayoutQueryString() const { const std::string standardLayoutQueryString() const
{
return BopomofoKeyboardLayout::StandardLayout()->keySequenceFromSyllable(syllable_); return BopomofoKeyboardLayout::StandardLayout()->keySequenceFromSyllable(syllable_);
} }
bool hasToneMarker() const { return syllable_.hasToneMarker(); } bool hasToneMarker() const
{
return syllable_.hasToneMarker();
}
protected: protected:
const BopomofoKeyboardLayout* layout_; const BopomofoKeyboardLayout *layout_;
BPMF syllable_; BPMF syllable_;
bool pinyin_mode_; bool pinyin_mode_;
std::string pinyin_sequence_; std::string pinyin_sequence_;
}; };
} // namespace Mandarin } // namespace Mandarin
#endif // MANDARIN_H_
#endif // MANDARIN_H_

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
// //

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
// //

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#import <Foundation/Foundation.h> #import <Foundation/Foundation.h>
@ -33,7 +40,9 @@ extern InputMode imeModeNULL;
@protocol KeyHandlerDelegate <NSObject> @protocol KeyHandlerDelegate <NSObject>
- (id)candidateControllerForKeyHandler:(KeyHandler *)keyHandler; - (id)candidateControllerForKeyHandler:(KeyHandler *)keyHandler;
- (void)keyHandler:(KeyHandler *)keyHandler didSelectCandidateAtIndex:(NSInteger)index candidateController:(id)controller; - (void)keyHandler:(KeyHandler *)keyHandler
didSelectCandidateAtIndex:(NSInteger)index
candidateController:(id)controller;
- (BOOL)keyHandler:(KeyHandler *)keyHandler didRequestWriteUserPhraseWithState:(InputState *)state; - (BOOL)keyHandler:(KeyHandler *)keyHandler didRequestWriteUserPhraseWithState:(InputState *)state;
@end @end
@ -43,7 +52,8 @@ extern InputMode imeModeNULL;
- (BOOL)handleInput:(keyParser *)input - (BOOL)handleInput:(keyParser *)input
state:(InputState *)state state:(InputState *)state
stateCallback:(void (^)(InputState *))stateCallback stateCallback:(void (^)(InputState *))stateCallback
errorCallback:(void (^)(void))errorCallback NS_SWIFT_NAME(handle(input:state:stateCallback:errorCallback:)); errorCallback:(void (^)(void))errorCallback
NS_SWIFT_NAME(handle(input:state:stateCallback:errorCallback:));
- (void)syncWithPreferences; - (void)syncWithPreferences;
- (void)fixNodeWithValue:(NSString *)value NS_SWIFT_NAME(fixNode(value:)); - (void)fixNodeWithValue:(NSString *)value NS_SWIFT_NAME(fixNode(value:));
@ -52,8 +62,8 @@ extern InputMode imeModeNULL;
- (InputState *)buildInputtingState; - (InputState *)buildInputtingState;
- (nullable InputState *)buildAssociatePhraseStateWithKey:(NSString *)key useVerticalMode:(BOOL)useVerticalMode; - (nullable InputState *)buildAssociatePhraseStateWithKey:(NSString *)key useVerticalMode:(BOOL)useVerticalMode;
@property (strong, nonatomic) InputMode inputMode; @property(strong, nonatomic) InputMode inputMode;
@property (weak, nonatomic) id <KeyHandlerDelegate> delegate; @property(weak, nonatomic) id<KeyHandlerDelegate> delegate;
@end @end
NS_ASSUME_NONNULL_END NS_ASSUME_NONNULL_END

File diff suppressed because it is too large Load Diff

View File

@ -1,55 +1,67 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "KeyValueBlobReader.h" #include "KeyValueBlobReader.h"
namespace vChewing { namespace vChewing
{
KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue* out) KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue *out)
{ {
static auto new_line = [](char c) { return c == '\n' || c == '\r'; }; static auto new_line = [](char c) { return c == '\n' || c == '\r'; };
static auto blank = [](char c) { return c == ' ' || c == '\t'; }; static auto blank = [](char c) { return c == ' ' || c == '\t'; };
static auto blank_or_newline static auto blank_or_newline = [](char c) { return blank(c) || new_line(c); };
= [](char c) { return blank(c) || new_line(c); };
static auto content_char = [](char c) { return !blank(c) && !new_line(c); }; static auto content_char = [](char c) { return !blank(c) && !new_line(c); };
if (state_ == State::ERROR) { if (state_ == State::ERROR)
{
return state_; return state_;
} }
const char* key_begin = nullptr; const char *key_begin = nullptr;
size_t key_length = 0; size_t key_length = 0;
const char* value_begin = nullptr; const char *value_begin = nullptr;
size_t value_length = 0; size_t value_length = 0;
while (true) { while (true)
{
state_ = SkipUntilNot(blank_or_newline); state_ = SkipUntilNot(blank_or_newline);
if (state_ != State::CAN_CONTINUE) { if (state_ != State::CAN_CONTINUE)
{
return state_; return state_;
} }
// Check if it's a comment line; if so, read until end of line. // Check if it's a comment line; if so, read until end of line.
if (*current_ != '#') { if (*current_ != '#')
{
break; break;
} }
state_ = SkipUntil(new_line); state_ = SkipUntil(new_line);
if (state_ != State::CAN_CONTINUE) { if (state_ != State::CAN_CONTINUE)
{
return state_; return state_;
} }
} }
@ -59,22 +71,26 @@ KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue* out)
key_begin = current_; key_begin = current_;
state_ = SkipUntilNot(content_char); state_ = SkipUntilNot(content_char);
if (state_ != State::CAN_CONTINUE) { if (state_ != State::CAN_CONTINUE)
{
goto error; goto error;
} }
key_length = current_ - key_begin; key_length = current_ - key_begin;
// There should be at least one blank character after the key string. // There should be at least one blank character after the key string.
if (!blank(*current_)) { if (!blank(*current_))
{
goto error; goto error;
} }
state_ = SkipUntilNot(blank); state_ = SkipUntilNot(blank);
if (state_ != State::CAN_CONTINUE) { if (state_ != State::CAN_CONTINUE)
{
goto error; goto error;
} }
if (!content_char(*current_)) { if (!content_char(*current_))
{
goto error; goto error;
} }
@ -90,9 +106,9 @@ KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue* out)
// like "foo bar baz\n" where baz should not be treated as the Next key. // like "foo bar baz\n" where baz should not be treated as the Next key.
SkipUntil(new_line); SkipUntil(new_line);
if (out != nullptr) { if (out != nullptr)
*out = KeyValue { std::string_view { key_begin, key_length }, {
std::string_view { value_begin, value_length } }; *out = KeyValue{std::string_view{key_begin, key_length}, std::string_view{value_begin, value_length}};
} }
state_ = State::HAS_PAIR; state_ = State::HAS_PAIR;
return state_; return state_;
@ -102,11 +118,12 @@ error:
return state_; return state_;
} }
KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot( KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot(const std::function<bool(char)> &f)
const std::function<bool(char)>& f)
{ {
while (current_ != end_ && *current_) { while (current_ != end_ && *current_)
if (!f(*current_)) { {
if (!f(*current_))
{
return State::CAN_CONTINUE; return State::CAN_CONTINUE;
} }
++current_; ++current_;
@ -115,11 +132,12 @@ KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot(
return State::END; return State::END;
} }
KeyValueBlobReader::State KeyValueBlobReader::SkipUntil( KeyValueBlobReader::State KeyValueBlobReader::SkipUntil(const std::function<bool(char)> &f)
const std::function<bool(char)>& f)
{ {
while (current_ != end_ && *current_) { while (current_ != end_ && *current_)
if (f(*current_)) { {
if (f(*current_))
{
return State::CAN_CONTINUE; return State::CAN_CONTINUE;
} }
++current_; ++current_;
@ -128,8 +146,7 @@ KeyValueBlobReader::State KeyValueBlobReader::SkipUntil(
return State::END; return State::END;
} }
std::ostream& operator<<( std::ostream &operator<<(std::ostream &os, const KeyValueBlobReader::KeyValue &kv)
std::ostream& os, const KeyValueBlobReader::KeyValue& kv)
{ {
os << "(key: " << kv.key << ", value: " << kv.value << ")"; os << "(key: " << kv.key << ", value: " << kv.value << ")";
return os; return os;

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef SOURCE_ENGINE_KEYVALUEBLOBREADER_H_ #ifndef SOURCE_ENGINE_KEYVALUEBLOBREADER_H_
@ -39,11 +46,14 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
// std::string_view is used to allow returning results efficiently. As a result, // std::string_view is used to allow returning results efficiently. As a result,
// the blob is a const char* and will never be mutated. This implies, for // the blob is a const char* and will never be mutated. This implies, for
// example, read-only mmap can be used to parse large files. // example, read-only mmap can be used to parse large files.
namespace vChewing { namespace vChewing
{
class KeyValueBlobReader { class KeyValueBlobReader
public: {
enum class State : int { public:
enum class State : int
{
// There are no more key-value pairs in this blob. // There are no more key-value pairs in this blob.
END = 0, END = 0,
// The reader has produced a new key-value pair. // The reader has produced a new key-value pair.
@ -54,19 +64,16 @@ public:
CAN_CONTINUE = 2 CAN_CONTINUE = 2
}; };
struct KeyValue { struct KeyValue
constexpr KeyValue() {
: key("") constexpr KeyValue() : key(""), value("")
, value("")
{ {
} }
constexpr KeyValue(std::string_view k, std::string_view v) constexpr KeyValue(std::string_view k, std::string_view v) : key(k), value(v)
: key(k)
, value(v)
{ {
} }
bool operator==(const KeyValue& another) const bool operator==(const KeyValue &another) const
{ {
return key == another.key && value == another.value; return key == another.key && value == another.value;
} }
@ -75,27 +82,25 @@ public:
std::string_view value; std::string_view value;
}; };
KeyValueBlobReader(const char* blob, size_t size) KeyValueBlobReader(const char *blob, size_t size) : current_(blob), end_(blob + size)
: current_(blob)
, end_(blob + size)
{ {
} }
// Parse the next key-value pair and return the state of the reader. If // Parse the next key-value pair and return the state of the reader. If
// `out` is passed, out will be set to the produced key-value pair if there // `out` is passed, out will be set to the produced key-value pair if there
// is one. // is one.
State Next(KeyValue* out = nullptr); State Next(KeyValue *out = nullptr);
private: private:
State SkipUntil(const std::function<bool(char)>& f); State SkipUntil(const std::function<bool(char)> &f);
State SkipUntilNot(const std::function<bool(char)>& f); State SkipUntilNot(const std::function<bool(char)> &f);
const char* current_; const char *current_;
const char* end_; const char *end_;
State state_ = State::CAN_CONTINUE; State state_ = State::CAN_CONTINUE;
}; };
std::ostream& operator<<(std::ostream&, const KeyValueBlobReader::KeyValue&); std::ostream &operator<<(std::ostream &, const KeyValueBlobReader::KeyValue &);
} // namespace vChewing } // namespace vChewing

View File

@ -1,40 +1,47 @@
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). // Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef LMConsolidator_hpp #ifndef LMConsolidator_hpp
#define LMConsolidator_hpp #define LMConsolidator_hpp
#include <syslog.h>
#include <stdio.h>
#include <fstream> #include <fstream>
#include <sstream>
#include <iostream> #include <iostream>
#include <string>
#include <map> #include <map>
#include <set>
#include <regex> #include <regex>
#include <set>
#include <sstream>
#include <stdio.h>
#include <string>
#include <syslog.h>
using namespace std; using namespace std;
namespace vChewing { namespace vChewing
{
class LMConsolidator class LMConsolidator
{ {
public: public:
static bool CheckPragma(const char *path); static bool CheckPragma(const char *path);
static bool FixEOF(const char *path); static bool FixEOF(const char *path);
static bool ConsolidateContent(const char *path, bool shouldCheckPragma); static bool ConsolidateContent(const char *path, bool shouldCheckPragma);

View File

@ -1,28 +1,35 @@
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). // Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "LMConsolidator.h" #include "LMConsolidator.h"
#include "vChewing-Swift.h" #include "vChewing-Swift.h"
namespace vChewing { namespace vChewing
{
constexpr std::string_view FORMATTED_PRAGMA_HEADER constexpr std::string_view FORMATTED_PRAGMA_HEADER =
= "# 𝙵𝙾𝚁𝙼𝙰𝚃 𝚘𝚛𝚐.𝚊𝚝𝚎𝚕𝚒𝚎𝚛𝙸𝚗𝚖𝚞.𝚟𝚌𝚑𝚎𝚠𝚒𝚗𝚐.𝚞𝚜𝚎𝚛𝙻𝚊𝚗𝚐𝚞𝚊𝚐𝚎𝙼𝚘𝚍𝚎𝚕𝙳𝚊𝚝𝚊.𝚏𝚘𝚛𝚖𝚊𝚝𝚝𝚎𝚍"; "# 𝙵𝙾𝚁𝙼𝙰𝚃 𝚘𝚛𝚐.𝚊𝚝𝚎𝚕𝚒𝚎𝚛𝙸𝚗𝚖𝚞.𝚟𝚌𝚑𝚎𝚠𝚒𝚗𝚐.𝚞𝚜𝚎𝚛𝙻𝚊𝚗𝚐𝚞𝚊𝚐𝚎𝙼𝚘𝚍𝚎𝚕𝙳𝚊𝚝𝚊.𝚏𝚘𝚛𝚖𝚊𝚝𝚝𝚎𝚍";
// HEADER VERIFIER. CREDIT: Shiki Suen // HEADER VERIFIER. CREDIT: Shiki Suen
bool LMConsolidator::CheckPragma(const char *path) bool LMConsolidator::CheckPragma(const char *path)
@ -32,13 +39,17 @@ bool LMConsolidator::CheckPragma(const char *path)
{ {
string firstLine; string firstLine;
getline(zfdCheckPragma, firstLine); getline(zfdCheckPragma, firstLine);
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "HEADER SEEN ||%s", firstLine.c_str()); if (mgrPrefs.isDebugModeEnabled)
if (firstLine != FORMATTED_PRAGMA_HEADER) { syslog(LOG_CONS, "HEADER SEEN ||%s", firstLine.c_str());
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "HEADER VERIFICATION FAILED. START IN-PLACE CONSOLIDATING PROCESS."); if (firstLine != FORMATTED_PRAGMA_HEADER)
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "HEADER VERIFICATION FAILED. START IN-PLACE CONSOLIDATING PROCESS.");
return false; return false;
} }
} }
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "HEADER VERIFICATION SUCCESSFUL."); if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "HEADER VERIFICATION SUCCESSFUL.");
return true; return true;
} }
@ -46,58 +57,76 @@ bool LMConsolidator::CheckPragma(const char *path)
bool LMConsolidator::FixEOF(const char *path) bool LMConsolidator::FixEOF(const char *path)
{ {
std::fstream zfdEOFFixerIncomingStream(path); std::fstream zfdEOFFixerIncomingStream(path);
zfdEOFFixerIncomingStream.seekg(-1,std::ios_base::end); zfdEOFFixerIncomingStream.seekg(-1, std::ios_base::end);
char z; char z;
zfdEOFFixerIncomingStream.get(z); zfdEOFFixerIncomingStream.get(z);
if(z!='\n'){ if (z != '\n')
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// REPORT: Data File not ended with a new line.\n"); {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// DATA FILE: %s", path); if (mgrPrefs.isDebugModeEnabled)
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// PROCEDURE: Trying to insert a new line as EOF before per-line check process.\n"); syslog(LOG_CONS, "// REPORT: Data File not ended with a new line.\n");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// DATA FILE: %s", path);
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// PROCEDURE: Trying to insert a new line as EOF before per-line check process.\n");
std::ofstream zfdEOFFixerOutput(path, std::ios_base::app); std::ofstream zfdEOFFixerOutput(path, std::ios_base::app);
zfdEOFFixerOutput << std::endl; zfdEOFFixerOutput << std::endl;
zfdEOFFixerOutput.close(); zfdEOFFixerOutput.close();
if (zfdEOFFixerOutput.fail()) { if (zfdEOFFixerOutput.fail())
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// REPORT: Failed to append a newline to the data file. Insufficient Privileges?\n"); {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// DATA FILE: %s", path); if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// REPORT: Failed to append a newline to the data file. Insufficient Privileges?\n");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// DATA FILE: %s", path);
return false; return false;
} }
} }
zfdEOFFixerIncomingStream.close(); zfdEOFFixerIncomingStream.close();
if (zfdEOFFixerIncomingStream.fail()) { if (zfdEOFFixerIncomingStream.fail())
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// REPORT: Failed to read lines through the data file for EOF check. Insufficient Privileges?\n"); {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// DATA FILE: %s", path); if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS,
"// REPORT: Failed to read lines through the data file for EOF check. Insufficient Privileges?\n");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// DATA FILE: %s", path);
return false; return false;
} }
return true; return true;
} // END: EOF FIXER. } // END: EOF FIXER.
// CONTENT CONSOLIDATOR. CREDIT: Shiki Suen. // CONTENT CONSOLIDATOR. CREDIT: Shiki Suen.
bool LMConsolidator::ConsolidateContent(const char *path, bool shouldCheckPragma) { bool LMConsolidator::ConsolidateContent(const char *path, bool shouldCheckPragma)
{
bool pragmaCheckResult = LMConsolidator::CheckPragma(path); bool pragmaCheckResult = LMConsolidator::CheckPragma(path);
if (pragmaCheckResult && shouldCheckPragma){ if (pragmaCheckResult && shouldCheckPragma)
{
return true; return true;
} }
ifstream zfdContentConsolidatorIncomingStream(path); ifstream zfdContentConsolidatorIncomingStream(path);
vector<string>vecEntry; vector<string> vecEntry;
while(!zfdContentConsolidatorIncomingStream.eof()) while (!zfdContentConsolidatorIncomingStream.eof())
{ // Xcode 13 能用的 ObjCpp 與 Cpp 並無原生支援「\h」這個 Regex 參數的能力,只能逐行處理。 { // Xcode 13 能用的 ObjCpp 與 Cpp 並無原生支援「\h」這個 Regex 參數的能力,只能逐行處理。
string zfdBuffer; string zfdBuffer;
getline(zfdContentConsolidatorIncomingStream,zfdBuffer); getline(zfdContentConsolidatorIncomingStream, zfdBuffer);
vecEntry.push_back(zfdBuffer); vecEntry.push_back(zfdBuffer);
} }
// 第一遍 for 用來統整每行內的內容。 // 第一遍 for 用來統整每行內的內容。
// regex sedCJKWhiteSpace("\\x{3000}"), sedNonBreakWhiteSpace("\\x{A0}"), sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // 這樣寫會導致輸入法敲不了任何字,推測 Xcode 13 支援的 cpp / objCpp 可能對某些 Regex 寫法有相容性問題。 // regex sedCJKWhiteSpace("\\x{3000}"), sedNonBreakWhiteSpace("\\x{A0}"), sedWhiteSpace("\\s+"),
// regex sedCJKWhiteSpace(" "), sedNonBreakWhiteSpace(" "), sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // RegEx 先定義好。 // sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // 這樣寫會導致輸入法敲不了任何字,推測 Xcode 13 支援的 cpp /
// objCpp 可能對某些 Regex 寫法有相容性問題。 regex sedCJKWhiteSpace(" "), sedNonBreakWhiteSpace(" "),
// sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // RegEx 先定義好。
regex sedToConsolidate("( +| +| +|\t+)+"), sedToTrim("(^\\s|\\s$)"); regex sedToConsolidate("( +| +| +|\t+)+"), sedToTrim("(^\\s|\\s$)");
for(int i=0;i<vecEntry.size();i++) { // 第一遍 for 用來統整每行內的內容。 for (int i = 0; i < vecEntry.size(); i++)
if (vecEntry[i].size() != 0) { // 不要理會空行,否則給空行加上 endl 等於再加空行。 { // 第一遍 for 用來統整每行內的內容。
// RegEx 處理順序:先將全形空格換成西文空格,然後合併任何意義上的連續空格(包括 tab 等),最後去除每行首尾空格。 if (vecEntry[i].size() != 0)
// vecEntry[i] = regex_replace(vecEntry[i], sedCJKWhiteSpace, " ").c_str(); // 中日韓全形空格轉為 ASCII 空格。 { // 不要理會空行,否則給空行加上 endl 等於再加空行。
// vecEntry[i] = regex_replace(vecEntry[i], sedNonBreakWhiteSpace, " ").c_str(); // Non-Break 型空格轉為 ASCII 空格。 // RegEx 處理順序:先將全形空格換成西文空格,然後合併任何意義上的連續空格(包括 tab
// vecEntry[i] = regex_replace(vecEntry[i], sedWhiteSpace, " ").c_str(); // 所有意義上的連續的 \s 型空格都轉為單個 ASCII 空格。 // 等),最後去除每行首尾空格。 vecEntry[i] = regex_replace(vecEntry[i], sedCJKWhiteSpace, " ").c_str(); //
// vecEntry[i] = regex_replace(vecEntry[i], sedLeadingSpace, "").c_str(); // 去掉行首空格。 // 中日韓全形空格轉為 ASCII 空格。 vecEntry[i] = regex_replace(vecEntry[i], sedNonBreakWhiteSpace, "
// vecEntry[i] = regex_replace(vecEntry[i], sedTrailingSpace, "").c_str(); // 去掉行尾空格。 // ").c_str(); // Non-Break 型空格轉為 ASCII 空格。 vecEntry[i] = regex_replace(vecEntry[i], sedWhiteSpace,
// " ").c_str(); // 所有意義上的連續的 \s 型空格都轉為單個 ASCII 空格。 vecEntry[i] =
// regex_replace(vecEntry[i], sedLeadingSpace, "").c_str(); // 去掉行首空格。 vecEntry[i] =
// regex_replace(vecEntry[i], sedTrailingSpace, "").c_str(); // 去掉行尾空格。
// 上述命令分步驟執行容易產生效能問題,故濃縮為下述兩句。 // 上述命令分步驟執行容易產生效能問題,故濃縮為下述兩句。
vecEntry[i] = regex_replace(vecEntry[i], sedToConsolidate, " ").c_str(); vecEntry[i] = regex_replace(vecEntry[i], sedToConsolidate, " ").c_str();
vecEntry[i] = regex_replace(vecEntry[i], sedToTrim, "").c_str(); vecEntry[i] = regex_replace(vecEntry[i], sedToTrim, "").c_str();
@ -106,27 +135,39 @@ bool LMConsolidator::ConsolidateContent(const char *path, bool shouldCheckPragma
// 在第二遍 for 運算之前,針對 vecEntry 去除重複條目。 // 在第二遍 for 運算之前,針對 vecEntry 去除重複條目。
std::reverse(vecEntry.begin(), vecEntry.end()); // 先首尾顛倒,免得破壞最新的 override 資訊。 std::reverse(vecEntry.begin(), vecEntry.end()); // 先首尾顛倒,免得破壞最新的 override 資訊。
vecEntry.erase(unique(vecEntry.begin(), vecEntry.end()), vecEntry.end()); // 去重複。 vecEntry.erase(unique(vecEntry.begin(), vecEntry.end()), vecEntry.end()); // 去重複。
std::reverse(vecEntry.begin(), vecEntry.end()); // 再顛倒回來。 std::reverse(vecEntry.begin(), vecEntry.end()); // 再顛倒回來。
// 統整完畢。開始將統整過的內容寫入檔案。 // 統整完畢。開始將統整過的內容寫入檔案。
ofstream zfdContentConsolidatorOutput(path); // 這裡是要從頭開始重寫檔案內容,所以不需要「 ios_base::app 」。 ofstream zfdContentConsolidatorOutput(path); // 這裡是要從頭開始重寫檔案內容,所以不需要「 ios_base::app 」。
if (!pragmaCheckResult){ if (!pragmaCheckResult)
zfdContentConsolidatorOutput<<FORMATTED_PRAGMA_HEADER<<endl; // 寫入經過整理處理的 HEADER。 {
zfdContentConsolidatorOutput << FORMATTED_PRAGMA_HEADER << endl; // 寫入經過整理處理的 HEADER。
} }
for(int i=0;i<vecEntry.size();i++) { // 第二遍 for 用來寫入統整過的內容。 for (int i = 0; i < vecEntry.size(); i++)
if (vecEntry[i].size() != 0) { // 這句很重要,不然還是會把經過 RegEx 處理後出現的空行搞到檔案裡。 { // 第二遍 for 用來寫入統整過的內容。
zfdContentConsolidatorOutput<<vecEntry[i]<<endl; // 這裡是必須得加上 endl 的,不然所有行都變成一個整合行。 if (vecEntry[i].size() != 0)
{ // 這句很重要,不然還是會把經過 RegEx 處理後出現的空行搞到檔案裡。
zfdContentConsolidatorOutput << vecEntry[i]
<< endl; // 這裡是必須得加上 endl 的,不然所有行都變成一個整合行。
} }
} }
zfdContentConsolidatorOutput.close(); zfdContentConsolidatorOutput.close();
if (zfdContentConsolidatorOutput.fail()) { if (zfdContentConsolidatorOutput.fail())
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// REPORT: Failed to write content-consolidated data to the file. Insufficient Privileges?\n"); {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// DATA FILE: %s", path); if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS,
"// REPORT: Failed to write content-consolidated data to the file. Insufficient Privileges?\n");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// DATA FILE: %s", path);
return false; return false;
} }
zfdContentConsolidatorIncomingStream.close(); zfdContentConsolidatorIncomingStream.close();
if (zfdContentConsolidatorIncomingStream.fail()) { if (zfdContentConsolidatorIncomingStream.fail())
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// REPORT: Failed to read lines through the data file for content-consolidation. Insufficient Privileges?\n"); {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// DATA FILE: %s", path); if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// REPORT: Failed to read lines through the data file for content-consolidation. "
"Insufficient Privileges?\n");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// DATA FILE: %s", path);
return false; return false;
} }
return true; return true;

View File

@ -1,28 +1,35 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef LMInstantiator_H #ifndef LMInstantiator_H
#define LMInstantiator_H #define LMInstantiator_H
#include "AssociatedPhrases.h" #include "AssociatedPhrases.h"
#include "CoreLM.h"
#include "CNSLM.h" #include "CNSLM.h"
#include "CoreLM.h"
#include "ParselessLM.h" #include "ParselessLM.h"
#include "PhraseReplacementMap.h" #include "PhraseReplacementMap.h"
#include "SymbolLM.h" #include "SymbolLM.h"
@ -31,7 +38,8 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include <stdio.h> #include <stdio.h>
#include <unordered_set> #include <unordered_set>
namespace vChewing { namespace vChewing
{
using namespace Gramambular; using namespace Gramambular;
@ -57,58 +65,59 @@ using namespace Gramambular;
/// model while launching and to load the user phrases anytime if the custom /// model while launching and to load the user phrases anytime if the custom
/// files are modified. It does not keep the reference of the data pathes but /// files are modified. It does not keep the reference of the data pathes but
/// you have to pass the paths when you ask it to do loading. /// you have to pass the paths when you ask it to do loading.
class LMInstantiator : public Gramambular::LanguageModel { class LMInstantiator : public Gramambular::LanguageModel
public: {
public:
LMInstantiator(); LMInstantiator();
~LMInstantiator(); ~LMInstantiator();
/// Asks to load the primary language model at the given path. /// Asks to load the primary language model at the given path.
/// @param languageModelPath The path of the language model. /// @param languageModelPath The path of the language model.
void loadLanguageModel(const char* languageModelPath); void loadLanguageModel(const char *languageModelPath);
/// If the data model is already loaded. /// If the data model is already loaded.
bool isDataModelLoaded(); bool isDataModelLoaded();
/// Asks to load the primary language model at the given path. /// Asks to load the primary language model at the given path.
/// @param miscDataPath The path of the misc data model. /// @param miscDataPath The path of the misc data model.
void loadMiscData(const char* miscDataPath); void loadMiscData(const char *miscDataPath);
/// If the data model is already loaded. /// If the data model is already loaded.
bool isMiscDataLoaded(); bool isMiscDataLoaded();
/// Asks to load the primary language model at the given path. /// Asks to load the primary language model at the given path.
/// @param symbolDataPath The path of the symbol data model. /// @param symbolDataPath The path of the symbol data model.
void loadSymbolData(const char* symbolDataPath); void loadSymbolData(const char *symbolDataPath);
/// If the data model is already loaded. /// If the data model is already loaded.
bool isSymbolDataLoaded(); bool isSymbolDataLoaded();
/// Asks to load the primary language model at the given path. /// Asks to load the primary language model at the given path.
/// @param cnsDataPath The path of the CNS data model. /// @param cnsDataPath The path of the CNS data model.
void loadCNSData(const char* cnsDataPath); void loadCNSData(const char *cnsDataPath);
/// If the data model is already loaded. /// If the data model is already loaded.
bool isCNSDataLoaded(); bool isCNSDataLoaded();
/// Asks to load the user phrases and excluded phrases at the given path. /// Asks to load the user phrases and excluded phrases at the given path.
/// @param userPhrasesPath The path of user phrases. /// @param userPhrasesPath The path of user phrases.
/// @param excludedPhrasesPath The path of excluded phrases. /// @param excludedPhrasesPath The path of excluded phrases.
void loadUserPhrases(const char* userPhrasesPath, const char* excludedPhrasesPath); void loadUserPhrases(const char *userPhrasesPath, const char *excludedPhrasesPath);
/// Asks to load the user symbol data at the given path. /// Asks to load the user symbol data at the given path.
/// @param userSymbolDataPath The path of user symbol data. /// @param userSymbolDataPath The path of user symbol data.
void loadUserSymbolData(const char* userPhrasesPath); void loadUserSymbolData(const char *userPhrasesPath);
/// Asks to load the user associated phrases at the given path. /// Asks to load the user associated phrases at the given path.
/// @param userAssociatedPhrasesPath The path of the user associated phrases. /// @param userAssociatedPhrasesPath The path of the user associated phrases.
void loadUserAssociatedPhrases(const char* userAssociatedPhrasesPath); void loadUserAssociatedPhrases(const char *userAssociatedPhrasesPath);
/// Asks to load the phrase replacement table at the given path. /// Asks to load the phrase replacement table at the given path.
/// @param phraseReplacementPath The path of the phrase replacement table. /// @param phraseReplacementPath The path of the phrase replacement table.
void loadPhraseReplacementMap(const char* phraseReplacementPath); void loadPhraseReplacementMap(const char *phraseReplacementPath);
/// Not implemented since we do not have data to provide bigram function. /// Not implemented since we do not have data to provide bigram function.
const std::vector<Gramambular::Bigram> bigramsForKeys(const std::string& preceedingKey, const std::string& key); const std::vector<Gramambular::Bigram> bigramsForKeys(const std::string &preceedingKey, const std::string &key);
/// Returns a list of available unigram for the given key. /// Returns a list of available unigram for the given key.
/// @param key A std::string represents the BPMF reading or a symbol key. For /// @param key A std::string represents the BPMF reading or a symbol key. For
/// example, it you pass "ㄇㄚ", it returns "嗎", "媽", and so on. /// example, it you pass "ㄇㄚ", it returns "嗎", "媽", and so on.
const std::vector<Gramambular::Unigram> unigramsForKey(const std::string& key); const std::vector<Gramambular::Unigram> unigramsForKey(const std::string &key);
/// If the model has unigrams for the given key. /// If the model has unigrams for the given key.
/// @param key The key. /// @param key The key.
bool hasUnigramsForKey(const std::string& key); bool hasUnigramsForKey(const std::string &key);
/// Enables or disables phrase replacement. /// Enables or disables phrase replacement.
void setPhraseReplacementEnabled(bool enabled); void setPhraseReplacementEnabled(bool enabled);
@ -125,11 +134,10 @@ public:
/// If CNS11643 input is enabled or not. /// If CNS11643 input is enabled or not.
bool cnsEnabled(); bool cnsEnabled();
const std::vector<std::string> associatedPhrasesForKey(const std::string& key); const std::vector<std::string> associatedPhrasesForKey(const std::string &key);
bool hasAssociatedPhrasesForKey(const std::string& key); bool hasAssociatedPhrasesForKey(const std::string &key);
protected:
protected:
/// Filters and converts the input unigrams and return a new list of unigrams. /// Filters and converts the input unigrams and return a new list of unigrams.
/// ///
/// @param unigrams The unigrams to be processed. /// @param unigrams The unigrams to be processed.
@ -137,9 +145,9 @@ protected:
/// @param insertedValues The values for unigrams already in the results. /// @param insertedValues The values for unigrams already in the results.
/// It helps to prevent duplicated unigrams. Please note that the method /// It helps to prevent duplicated unigrams. Please note that the method
/// has a side effect that it inserts values to `insertedValues`. /// has a side effect that it inserts values to `insertedValues`.
const std::vector<Gramambular::Unigram> filterAndTransformUnigrams(const std::vector<Gramambular::Unigram> unigrams, const std::vector<Gramambular::Unigram> filterAndTransformUnigrams(
const std::unordered_set<std::string>& excludedValues, const std::vector<Gramambular::Unigram> unigrams, const std::unordered_set<std::string> &excludedValues,
std::unordered_set<std::string>& insertedValues); std::unordered_set<std::string> &insertedValues);
ParselessLM m_languageModel; ParselessLM m_languageModel;
CoreLM m_miscModel; CoreLM m_miscModel;
@ -154,6 +162,6 @@ protected:
bool m_cnsEnabled; bool m_cnsEnabled;
bool m_symbolEnabled; bool m_symbolEnabled;
}; };
}; }; // namespace vChewing
#endif #endif

View File

@ -1,27 +1,35 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "LMInstantiator.h" #include "LMInstantiator.h"
#include <algorithm> #include <algorithm>
#include <iterator> #include <iterator>
namespace vChewing { namespace vChewing
{
LMInstantiator::LMInstantiator() LMInstantiator::LMInstantiator()
{ {
@ -39,9 +47,10 @@ LMInstantiator::~LMInstantiator()
m_associatedPhrases.close(); m_associatedPhrases.close();
} }
void LMInstantiator::loadLanguageModel(const char* languageModelDataPath) void LMInstantiator::loadLanguageModel(const char *languageModelDataPath)
{ {
if (languageModelDataPath) { if (languageModelDataPath)
{
m_languageModel.close(); m_languageModel.close();
m_languageModel.open(languageModelDataPath); m_languageModel.open(languageModelDataPath);
} }
@ -52,9 +61,10 @@ bool LMInstantiator::isDataModelLoaded()
return m_languageModel.isLoaded(); return m_languageModel.isLoaded();
} }
void LMInstantiator::loadCNSData(const char* cnsDataPath) void LMInstantiator::loadCNSData(const char *cnsDataPath)
{ {
if (cnsDataPath) { if (cnsDataPath)
{
m_cnsModel.close(); m_cnsModel.close();
m_cnsModel.open(cnsDataPath); m_cnsModel.open(cnsDataPath);
} }
@ -65,9 +75,10 @@ bool LMInstantiator::isCNSDataLoaded()
return m_cnsModel.isLoaded(); return m_cnsModel.isLoaded();
} }
void LMInstantiator::loadMiscData(const char* miscDataPath) void LMInstantiator::loadMiscData(const char *miscDataPath)
{ {
if (miscDataPath) { if (miscDataPath)
{
m_miscModel.close(); m_miscModel.close();
m_miscModel.open(miscDataPath); m_miscModel.open(miscDataPath);
} }
@ -78,9 +89,10 @@ bool LMInstantiator::isMiscDataLoaded()
return m_miscModel.isLoaded(); return m_miscModel.isLoaded();
} }
void LMInstantiator::loadSymbolData(const char* symbolDataPath) void LMInstantiator::loadSymbolData(const char *symbolDataPath)
{ {
if (symbolDataPath) { if (symbolDataPath)
{
m_symbolModel.close(); m_symbolModel.close();
m_symbolModel.open(symbolDataPath); m_symbolModel.open(symbolDataPath);
} }
@ -91,14 +103,15 @@ bool LMInstantiator::isSymbolDataLoaded()
return m_symbolModel.isLoaded(); return m_symbolModel.isLoaded();
} }
void LMInstantiator::loadUserPhrases(const char* userPhrasesDataPath, void LMInstantiator::loadUserPhrases(const char *userPhrasesDataPath, const char *excludedPhrasesDataPath)
const char* excludedPhrasesDataPath)
{ {
if (userPhrasesDataPath) { if (userPhrasesDataPath)
{
m_userPhrases.close(); m_userPhrases.close();
m_userPhrases.open(userPhrasesDataPath); m_userPhrases.open(userPhrasesDataPath);
} }
if (excludedPhrasesDataPath) { if (excludedPhrasesDataPath)
{
m_excludedPhrases.close(); m_excludedPhrases.close();
m_excludedPhrases.open(excludedPhrasesDataPath); m_excludedPhrases.open(excludedPhrasesDataPath);
} }
@ -106,7 +119,8 @@ void LMInstantiator::loadUserPhrases(const char* userPhrasesDataPath,
void LMInstantiator::loadUserSymbolData(const char *userSymbolDataPath) void LMInstantiator::loadUserSymbolData(const char *userSymbolDataPath)
{ {
if (userSymbolDataPath) { if (userSymbolDataPath)
{
m_userSymbolModel.close(); m_userSymbolModel.close();
m_userSymbolModel.open(userSymbolDataPath); m_userSymbolModel.open(userSymbolDataPath);
} }
@ -114,28 +128,32 @@ void LMInstantiator::loadUserSymbolData(const char *userSymbolDataPath)
void LMInstantiator::loadUserAssociatedPhrases(const char *userAssociatedPhrasesPath) void LMInstantiator::loadUserAssociatedPhrases(const char *userAssociatedPhrasesPath)
{ {
if (userAssociatedPhrasesPath) { if (userAssociatedPhrasesPath)
{
m_associatedPhrases.close(); m_associatedPhrases.close();
m_associatedPhrases.open(userAssociatedPhrasesPath); m_associatedPhrases.open(userAssociatedPhrasesPath);
} }
} }
void LMInstantiator::loadPhraseReplacementMap(const char* phraseReplacementPath) void LMInstantiator::loadPhraseReplacementMap(const char *phraseReplacementPath)
{ {
if (phraseReplacementPath) { if (phraseReplacementPath)
{
m_phraseReplacement.close(); m_phraseReplacement.close();
m_phraseReplacement.open(phraseReplacementPath); m_phraseReplacement.open(phraseReplacementPath);
} }
} }
const std::vector<Gramambular::Bigram> LMInstantiator::bigramsForKeys(const std::string& preceedingKey, const std::string& key) const std::vector<Gramambular::Bigram> LMInstantiator::bigramsForKeys(const std::string &preceedingKey,
const std::string &key)
{ {
return std::vector<Gramambular::Bigram>(); return std::vector<Gramambular::Bigram>();
} }
const std::vector<Gramambular::Unigram> LMInstantiator::unigramsForKey(const std::string& key) const std::vector<Gramambular::Unigram> LMInstantiator::unigramsForKey(const std::string &key)
{ {
if (key == " ") { if (key == " ")
{
std::vector<Gramambular::Unigram> spaceUnigrams; std::vector<Gramambular::Unigram> spaceUnigrams;
Gramambular::Unigram g; Gramambular::Unigram g;
g.keyValue.key = " "; g.keyValue.key = " ";
@ -152,17 +170,18 @@ const std::vector<Gramambular::Unigram> LMInstantiator::unigramsForKey(const std
std::vector<Gramambular::Unigram> userSymbolUnigrams; std::vector<Gramambular::Unigram> userSymbolUnigrams;
std::vector<Gramambular::Unigram> cnsUnigrams; std::vector<Gramambular::Unigram> cnsUnigrams;
std::unordered_set<std::string> excludedValues; std::unordered_set<std::string> excludedValues;
std::unordered_set<std::string> insertedValues; std::unordered_set<std::string> insertedValues;
if (m_excludedPhrases.hasUnigramsForKey(key)) { if (m_excludedPhrases.hasUnigramsForKey(key))
{
std::vector<Gramambular::Unigram> excludedUnigrams = m_excludedPhrases.unigramsForKey(key); std::vector<Gramambular::Unigram> excludedUnigrams = m_excludedPhrases.unigramsForKey(key);
transform(excludedUnigrams.begin(), excludedUnigrams.end(), transform(excludedUnigrams.begin(), excludedUnigrams.end(), inserter(excludedValues, excludedValues.end()),
inserter(excludedValues, excludedValues.end()), [](const Gramambular::Unigram &u) { return u.keyValue.value; });
[](const Gramambular::Unigram& u) { return u.keyValue.value; });
} }
if (m_userPhrases.hasUnigramsForKey(key)) { if (m_userPhrases.hasUnigramsForKey(key))
{
std::vector<Gramambular::Unigram> rawUserUnigrams = m_userPhrases.unigramsForKey(key); std::vector<Gramambular::Unigram> rawUserUnigrams = m_userPhrases.unigramsForKey(key);
// 用這句指令讓使用者語彙檔案內的詞條優先順序隨著行數增加而逐漸增高。 // 用這句指令讓使用者語彙檔案內的詞條優先順序隨著行數增加而逐漸增高。
// 這樣一來就可以在就地新增語彙時徹底複寫優先權。 // 這樣一來就可以在就地新增語彙時徹底複寫優先權。
@ -170,27 +189,32 @@ const std::vector<Gramambular::Unigram> LMInstantiator::unigramsForKey(const std
userUnigrams = filterAndTransformUnigrams(rawUserUnigrams, excludedValues, insertedValues); userUnigrams = filterAndTransformUnigrams(rawUserUnigrams, excludedValues, insertedValues);
} }
if (m_languageModel.hasUnigramsForKey(key)) { if (m_languageModel.hasUnigramsForKey(key))
{
std::vector<Gramambular::Unigram> rawGlobalUnigrams = m_languageModel.unigramsForKey(key); std::vector<Gramambular::Unigram> rawGlobalUnigrams = m_languageModel.unigramsForKey(key);
allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues); allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues);
} }
if (m_miscModel.hasUnigramsForKey(key)) { if (m_miscModel.hasUnigramsForKey(key))
{
std::vector<Gramambular::Unigram> rawMiscUnigrams = m_miscModel.unigramsForKey(key); std::vector<Gramambular::Unigram> rawMiscUnigrams = m_miscModel.unigramsForKey(key);
miscUnigrams = filterAndTransformUnigrams(rawMiscUnigrams, excludedValues, insertedValues); miscUnigrams = filterAndTransformUnigrams(rawMiscUnigrams, excludedValues, insertedValues);
} }
if (m_symbolModel.hasUnigramsForKey(key) && m_symbolEnabled) { if (m_symbolModel.hasUnigramsForKey(key) && m_symbolEnabled)
{
std::vector<Gramambular::Unigram> rawSymbolUnigrams = m_symbolModel.unigramsForKey(key); std::vector<Gramambular::Unigram> rawSymbolUnigrams = m_symbolModel.unigramsForKey(key);
symbolUnigrams = filterAndTransformUnigrams(rawSymbolUnigrams, excludedValues, insertedValues); symbolUnigrams = filterAndTransformUnigrams(rawSymbolUnigrams, excludedValues, insertedValues);
} }
if (m_userSymbolModel.hasUnigramsForKey(key) && m_symbolEnabled) { if (m_userSymbolModel.hasUnigramsForKey(key) && m_symbolEnabled)
{
std::vector<Gramambular::Unigram> rawUserSymbolUnigrams = m_userSymbolModel.unigramsForKey(key); std::vector<Gramambular::Unigram> rawUserSymbolUnigrams = m_userSymbolModel.unigramsForKey(key);
userSymbolUnigrams = filterAndTransformUnigrams(rawUserSymbolUnigrams, excludedValues, insertedValues); userSymbolUnigrams = filterAndTransformUnigrams(rawUserSymbolUnigrams, excludedValues, insertedValues);
} }
if (m_cnsModel.hasUnigramsForKey(key) && m_cnsEnabled) { if (m_cnsModel.hasUnigramsForKey(key) && m_cnsEnabled)
{
std::vector<Gramambular::Unigram> rawCNSUnigrams = m_cnsModel.unigramsForKey(key); std::vector<Gramambular::Unigram> rawCNSUnigrams = m_cnsModel.unigramsForKey(key);
cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues); cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues);
} }
@ -203,13 +227,15 @@ const std::vector<Gramambular::Unigram> LMInstantiator::unigramsForKey(const std
return allUnigrams; return allUnigrams;
} }
bool LMInstantiator::hasUnigramsForKey(const std::string& key) bool LMInstantiator::hasUnigramsForKey(const std::string &key)
{ {
if (key == " ") { if (key == " ")
{
return true; return true;
} }
if (!m_excludedPhrases.hasUnigramsForKey(key)) { if (!m_excludedPhrases.hasUnigramsForKey(key))
{
return m_userPhrases.hasUnigramsForKey(key) || m_languageModel.hasUnigramsForKey(key); return m_userPhrases.hasUnigramsForKey(key) || m_languageModel.hasUnigramsForKey(key);
} }
@ -246,26 +272,33 @@ bool LMInstantiator::symbolEnabled()
return m_symbolEnabled; return m_symbolEnabled;
} }
const std::vector<Gramambular::Unigram> LMInstantiator::filterAndTransformUnigrams(const std::vector<Gramambular::Unigram> unigrams, const std::unordered_set<std::string>& excludedValues, std::unordered_set<std::string>& insertedValues) const std::vector<Gramambular::Unigram> LMInstantiator::filterAndTransformUnigrams(
const std::vector<Gramambular::Unigram> unigrams, const std::unordered_set<std::string> &excludedValues,
std::unordered_set<std::string> &insertedValues)
{ {
std::vector<Gramambular::Unigram> results; std::vector<Gramambular::Unigram> results;
for (auto&& unigram : unigrams) { for (auto &&unigram : unigrams)
{
// excludedValues filters out the unigrams with the original value. // excludedValues filters out the unigrams with the original value.
// insertedValues filters out the ones with the converted value // insertedValues filters out the ones with the converted value
std::string originalValue = unigram.keyValue.value; std::string originalValue = unigram.keyValue.value;
if (excludedValues.find(originalValue) != excludedValues.end()) { if (excludedValues.find(originalValue) != excludedValues.end())
{
continue; continue;
} }
std::string value = originalValue; std::string value = originalValue;
if (m_phraseReplacementEnabled) { if (m_phraseReplacementEnabled)
{
std::string replacement = m_phraseReplacement.valueForKey(value); std::string replacement = m_phraseReplacement.valueForKey(value);
if (replacement != "") { if (replacement != "")
{
value = replacement; value = replacement;
} }
} }
if (insertedValues.find(value) == insertedValues.end()) { if (insertedValues.find(value) == insertedValues.end())
{
Gramambular::Unigram g; Gramambular::Unigram g;
g.keyValue.value = value; g.keyValue.value = value;
g.keyValue.key = unigram.keyValue.key; g.keyValue.key = unigram.keyValue.key;
@ -277,12 +310,12 @@ const std::vector<Gramambular::Unigram> LMInstantiator::filterAndTransformUnigra
return results; return results;
} }
const std::vector<std::string> LMInstantiator::associatedPhrasesForKey(const std::string& key) const std::vector<std::string> LMInstantiator::associatedPhrasesForKey(const std::string &key)
{ {
return m_associatedPhrases.valuesForKey(key); return m_associatedPhrases.valuesForKey(key);
} }
bool LMInstantiator::hasAssociatedPhrasesForKey(const std::string& key) bool LMInstantiator::hasAssociatedPhrasesForKey(const std::string &key)
{ {
return m_associatedPhrases.hasValuesForKey(key); return m_associatedPhrases.hasValuesForKey(key);
} }

View File

@ -1,47 +1,58 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef ASSOCIATEDPHRASES_H #ifndef ASSOCIATEDPHRASES_H
#define ASSOCIATEDPHRASES_H #define ASSOCIATEDPHRASES_H
#include <string>
#include <map>
#include <iostream> #include <iostream>
#include <map>
#include <string>
#include <vector> #include <vector>
namespace vChewing { namespace vChewing
{
class AssociatedPhrases class AssociatedPhrases
{ {
public: public:
AssociatedPhrases(); AssociatedPhrases();
~AssociatedPhrases(); ~AssociatedPhrases();
const bool isLoaded(); const bool isLoaded();
bool open(const char *path); bool open(const char *path);
void close(); void close();
const std::vector<std::string> valuesForKey(const std::string& key); const std::vector<std::string> valuesForKey(const std::string &key);
const bool hasValuesForKey(const std::string& key); const bool hasValuesForKey(const std::string &key);
protected: protected:
struct Row { struct Row
Row(std::string_view& k, std::string_view& v) : key(k), value(v) {} {
Row(std::string_view &k, std::string_view &v) : key(k), value(v)
{
}
std::string_view key; std::string_view key;
std::string_view value; std::string_view value;
}; };
@ -53,6 +64,6 @@ protected:
size_t length; size_t length;
}; };
} } // namespace vChewing
#endif /* AssociatedPhrases_hpp */ #endif /* AssociatedPhrases_hpp */

View File

@ -1,52 +1,59 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "AssociatedPhrases.h" #include "AssociatedPhrases.h"
#include "vChewing-Swift.h" #include "vChewing-Swift.h"
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h> #include <fcntl.h>
#include <fstream> #include <fstream>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h> #include <unistd.h>
#include "KeyValueBlobReader.h" #include "KeyValueBlobReader.h"
#include "LMConsolidator.h" #include "LMConsolidator.h"
namespace vChewing { namespace vChewing
{
AssociatedPhrases::AssociatedPhrases() AssociatedPhrases::AssociatedPhrases() : fd(-1), data(0), length(0)
: fd(-1)
, data(0)
, length(0)
{ {
} }
AssociatedPhrases::~AssociatedPhrases() AssociatedPhrases::~AssociatedPhrases()
{ {
if (data) { if (data)
{
close(); close();
} }
} }
const bool AssociatedPhrases::isLoaded() const bool AssociatedPhrases::isLoaded()
{ {
if (data) { if (data)
{
return true; return true;
} }
return false; return false;
@ -54,7 +61,8 @@ const bool AssociatedPhrases::isLoaded()
bool AssociatedPhrases::open(const char *path) bool AssociatedPhrases::open(const char *path)
{ {
if (data) { if (data)
{
return false; return false;
} }
@ -62,13 +70,15 @@ bool AssociatedPhrases::open(const char *path)
LMConsolidator::ConsolidateContent(path, true); LMConsolidator::ConsolidateContent(path, true);
fd = ::open(path, O_RDONLY); fd = ::open(path, O_RDONLY);
if (fd == -1) { if (fd == -1)
{
printf("open:: file not exist"); printf("open:: file not exist");
return false; return false;
} }
struct stat sb; struct stat sb;
if (fstat(fd, &sb) == -1) { if (fstat(fd, &sb) == -1)
{
printf("open:: cannot open file"); printf("open:: cannot open file");
return false; return false;
} }
@ -76,21 +86,25 @@ bool AssociatedPhrases::open(const char *path)
length = (size_t)sb.st_size; length = (size_t)sb.st_size;
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0); data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
if (!data) { if (!data)
{
::close(fd); ::close(fd);
return false; return false;
} }
KeyValueBlobReader reader(static_cast<char*>(data), length); KeyValueBlobReader reader(static_cast<char *>(data), length);
KeyValueBlobReader::KeyValue keyValue; KeyValueBlobReader::KeyValue keyValue;
KeyValueBlobReader::State state; KeyValueBlobReader::State state;
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) { while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR)
{
keyRowMap[keyValue.key].emplace_back(keyValue.key, keyValue.value); keyRowMap[keyValue.key].emplace_back(keyValue.key, keyValue.value);
} }
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行) // 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
if (state == KeyValueBlobReader::State::ERROR) { if (state == KeyValueBlobReader::State::ERROR)
{
// close(); // close();
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "AssociatedPhrases: Failed at Open Step 5. On Error Resume Next.\n"); if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "AssociatedPhrases: Failed at Open Step 5. On Error Resume Next.\n");
// return false; // return false;
} }
return true; return true;
@ -98,7 +112,8 @@ bool AssociatedPhrases::open(const char *path)
void AssociatedPhrases::close() void AssociatedPhrases::close()
{ {
if (data) { if (data)
{
munmap(data, length); munmap(data, length);
::close(fd); ::close(fd);
data = 0; data = 0;
@ -107,13 +122,15 @@ void AssociatedPhrases::close()
keyRowMap.clear(); keyRowMap.clear();
} }
const std::vector<std::string> AssociatedPhrases::valuesForKey(const std::string& key) const std::vector<std::string> AssociatedPhrases::valuesForKey(const std::string &key)
{ {
std::vector<std::string> v; std::vector<std::string> v;
auto iter = keyRowMap.find(key); auto iter = keyRowMap.find(key);
if (iter != keyRowMap.end()) { if (iter != keyRowMap.end())
const std::vector<Row>& rows = iter->second; {
for (const auto& row : rows) { const std::vector<Row> &rows = iter->second;
for (const auto &row : rows)
{
std::string_view value = row.value; std::string_view value = row.value;
v.push_back({value.data(), value.size()}); v.push_back({value.data(), value.size()});
} }
@ -121,9 +138,9 @@ const std::vector<std::string> AssociatedPhrases::valuesForKey(const std::string
return v; return v;
} }
const bool AssociatedPhrases::hasValuesForKey(const std::string& key) const bool AssociatedPhrases::hasValuesForKey(const std::string &key)
{ {
return keyRowMap.find(key) != keyRowMap.end(); return keyRowMap.find(key) != keyRowMap.end();
} }
}; // namespace vChewing }; // namespace vChewing

View File

@ -1,30 +1,37 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef CoreLM_H #ifndef CoreLM_H
#define CoreLM_H #define CoreLM_H
#include "LanguageModel.h" #include "LanguageModel.h"
#include <iostream>
#include <map>
#include <string> #include <string>
#include <vector> #include <vector>
#include <map>
#include <iostream>
// this class relies on the fact that we have a space-separated data // this class relies on the fact that we have a space-separated data
// format, and we use mmap and zero-out the separators and line feeds // format, and we use mmap and zero-out the separators and line feeds
@ -33,10 +40,12 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
using namespace std; using namespace std;
using namespace Gramambular; using namespace Gramambular;
namespace vChewing { namespace vChewing
{
class CoreLM : public Gramambular::LanguageModel { class CoreLM : public Gramambular::LanguageModel
public: {
public:
CoreLM(); CoreLM();
~CoreLM(); ~CoreLM();
@ -45,20 +54,21 @@ public:
void close(); void close();
void dump(); void dump();
virtual const std::vector<Gramambular::Bigram> bigramsForKeys(const string& preceedingKey, const string& key); virtual const std::vector<Gramambular::Bigram> bigramsForKeys(const string &preceedingKey, const string &key);
virtual const std::vector<Gramambular::Unigram> unigramsForKey(const string& key); virtual const std::vector<Gramambular::Unigram> unigramsForKey(const string &key);
virtual bool hasUnigramsForKey(const string& key); virtual bool hasUnigramsForKey(const string &key);
protected: protected:
struct CStringCmp struct CStringCmp
{ {
bool operator()(const char* s1, const char* s2) const bool operator()(const char *s1, const char *s2) const
{ {
return strcmp(s1, s2) < 0; return strcmp(s1, s2) < 0;
} }
}; };
struct Row { struct Row
{
const char *key; const char *key;
const char *value; const char *value;
const char *logProbability; const char *logProbability;

View File

@ -1,50 +1,56 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "CoreLM.h" #include "CoreLM.h"
#include <sys/mman.h> #include "vChewing-Swift.h"
#include <sys/stat.h>
#include <fcntl.h> #include <fcntl.h>
#include <fstream> #include <fstream>
#include <unistd.h> #include <sys/mman.h>
#include <sys/stat.h>
#include <syslog.h> #include <syslog.h>
#include "vChewing-Swift.h" #include <unistd.h>
using namespace Gramambular; using namespace Gramambular;
vChewing::CoreLM::CoreLM() vChewing::CoreLM::CoreLM() : fd(-1), data(0), length(0)
: fd(-1)
, data(0)
, length(0)
{ {
} }
vChewing::CoreLM::~CoreLM() vChewing::CoreLM::~CoreLM()
{ {
if (data) { if (data)
{
close(); close();
} }
} }
bool vChewing::CoreLM::isLoaded() bool vChewing::CoreLM::isLoaded()
{ {
if (data) { if (data)
{
return true; return true;
} }
return false; return false;
@ -52,24 +58,28 @@ bool vChewing::CoreLM::isLoaded()
bool vChewing::CoreLM::open(const char *path) bool vChewing::CoreLM::open(const char *path)
{ {
if (data) { if (data)
{
return false; return false;
} }
fd = ::open(path, O_RDONLY); fd = ::open(path, O_RDONLY);
if (fd == -1) { if (fd == -1)
{
return false; return false;
} }
struct stat sb; struct stat sb;
if (fstat(fd, &sb) == -1) { if (fstat(fd, &sb) == -1)
{
return false; return false;
} }
length = (size_t)sb.st_size; length = (size_t)sb.st_size;
data = mmap(NULL, length, PROT_WRITE, MAP_PRIVATE, fd, 0); data = mmap(NULL, length, PROT_WRITE, MAP_PRIVATE, fd, 0);
if (!data) { if (!data)
{
::close(fd); ::close(fd);
return false; return false;
} }
@ -117,18 +127,22 @@ bool vChewing::CoreLM::open(const char *path)
start: start:
// EOF -> end // EOF -> end
if (head == end) { if (head == end)
{
goto end; goto end;
} }
c = *head; c = *head;
// \s -> error // \s -> error
if (c == ' ') { if (c == ' ')
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // Start: \\s -> error"); {
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // Start: \\s -> error");
goto error; goto error;
} }
// \n -> start // \n -> start
else if (c == '\n') { else if (c == '\n')
{
head++; head++;
goto start; goto start;
} }
@ -140,19 +154,24 @@ start:
state1: state1:
// EOF -> error // EOF -> error
if (head == end) { if (head == end)
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: EOF -> error"); {
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: EOF -> error");
goto error; goto error;
} }
c = *head; c = *head;
// \n -> error // \n -> error
if (c == '\n') { if (c == '\n')
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: \\n -> error"); {
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: \\n -> error");
goto error; goto error;
} }
// \s -> state2 + zero out ending + record column start // \s -> state2 + zero out ending + record column start
else if (c == ' ') { else if (c == ' ')
{
*head = 0; *head = 0;
head++; head++;
row.key = head; row.key = head;
@ -165,15 +184,19 @@ state1:
state2: state2:
// eof -> error // eof -> error
if (head == end) { if (head == end)
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: EOF -> error"); {
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: EOF -> error");
goto error; goto error;
} }
c = *head; c = *head;
// \n, \s -> error // \n, \s -> error
if (c == '\n' || c == ' ') { if (c == '\n' || c == ' ')
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: \\n \\s -> error"); {
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: \\n \\s -> error");
goto error; goto error;
} }
@ -184,20 +207,25 @@ state2:
state3: state3:
// eof -> error // eof -> error
if (head == end) { if (head == end)
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: EOF -> error"); {
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: EOF -> error");
goto error; goto error;
} }
c = *head; c = *head;
// \n -> error // \n -> error
if (c == '\n') { if (c == '\n')
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: \\n -> error"); {
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: \\n -> error");
goto error; goto error;
} }
// \s -> state4 + zero out ending + record column start // \s -> state4 + zero out ending + record column start
else if (c == ' ') { else if (c == ' ')
{
*head = 0; *head = 0;
head++; head++;
row.logProbability = head; row.logProbability = head;
@ -210,15 +238,19 @@ state3:
state4: state4:
// eof -> error // eof -> error
if (head == end) { if (head == end)
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: EOF -> error"); {
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: EOF -> error");
goto error; goto error;
} }
c = *head; c = *head;
// \n, \s -> error // \n, \s -> error
if (c == '\n' || c == ' ') { if (c == '\n' || c == ' ')
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: \\n \\s -> error"); {
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: \\n \\s -> error");
goto error; goto error;
} }
@ -227,22 +259,26 @@ state4:
// fall through to state 5 // fall through to state 5
state5: state5:
// eof -> error // eof -> error
if (head == end) { if (head == end)
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: EOF -> error"); {
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: EOF -> error");
goto error; goto error;
} }
c = *head; c = *head;
// \s -> error // \s -> error
if (c == ' ') { if (c == ' ')
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: \\s -> error"); {
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: \\s -> error");
goto error; goto error;
} }
// \n -> start // \n -> start
else if (c == '\n') { else if (c == '\n')
{
*head = 0; *head = 0;
head++; head++;
keyRowMap[row.key].push_back(row); keyRowMap[row.key].push_back(row);
@ -265,13 +301,15 @@ end:
emptyRow.value = space; emptyRow.value = space;
emptyRow.logProbability = zero; emptyRow.logProbability = zero;
keyRowMap[space].push_back(emptyRow); keyRowMap[space].push_back(emptyRow);
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // File Load Complete."); if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // File Load Complete.");
return true; return true;
} }
void vChewing::CoreLM::close() void vChewing::CoreLM::close()
{ {
if (data) { if (data)
{
munmap(data, length); munmap(data, length);
::close(fd); ::close(fd);
data = 0; data = 0;
@ -283,30 +321,34 @@ void vChewing::CoreLM::close()
void vChewing::CoreLM::dump() void vChewing::CoreLM::dump()
{ {
size_t rows = 0; size_t rows = 0;
for (map<const char *, vector<Row> >::const_iterator i = keyRowMap.begin(), e = keyRowMap.end(); i != e; ++i) { for (map<const char *, vector<Row>>::const_iterator i = keyRowMap.begin(), e = keyRowMap.end(); i != e; ++i)
const vector<Row>& r = (*i).second; {
for (vector<Row>::const_iterator ri = r.begin(), re = r.end(); ri != re; ++ri) { const vector<Row> &r = (*i).second;
const Row& row = *ri; for (vector<Row>::const_iterator ri = r.begin(), re = r.end(); ri != re; ++ri)
{
const Row &row = *ri;
cerr << row.key << " " << row.value << " " << row.logProbability << "\n"; cerr << row.key << " " << row.value << " " << row.logProbability << "\n";
rows++; rows++;
} }
} }
} }
const std::vector<Gramambular::Bigram> vChewing::CoreLM::bigramsForKeys(const string& preceedingKey, const string& key) const std::vector<Gramambular::Bigram> vChewing::CoreLM::bigramsForKeys(const string &preceedingKey, const string &key)
{ {
return std::vector<Gramambular::Bigram>(); return std::vector<Gramambular::Bigram>();
} }
const std::vector<Gramambular::Unigram> vChewing::CoreLM::unigramsForKey(const string& key) const std::vector<Gramambular::Unigram> vChewing::CoreLM::unigramsForKey(const string &key)
{ {
std::vector<Gramambular::Unigram> v; std::vector<Gramambular::Unigram> v;
map<const char *, vector<Row> >::const_iterator i = keyRowMap.find(key.c_str()); map<const char *, vector<Row>>::const_iterator i = keyRowMap.find(key.c_str());
if (i != keyRowMap.end()) { if (i != keyRowMap.end())
for (vector<Row>::const_iterator ri = (*i).second.begin(), re = (*i).second.end(); ri != re; ++ri) { {
for (vector<Row>::const_iterator ri = (*i).second.begin(), re = (*i).second.end(); ri != re; ++ri)
{
Unigram g; Unigram g;
const Row& r = *ri; const Row &r = *ri;
g.keyValue.key = r.key; g.keyValue.key = r.key;
g.keyValue.value = r.value; g.keyValue.value = r.value;
g.score = atof(r.logProbability); g.score = atof(r.logProbability);
@ -317,7 +359,7 @@ const std::vector<Gramambular::Unigram> vChewing::CoreLM::unigramsForKey(const s
return v; return v;
} }
bool vChewing::CoreLM::hasUnigramsForKey(const string& key) bool vChewing::CoreLM::hasUnigramsForKey(const string &key)
{ {
return keyRowMap.find(key.c_str()) != keyRowMap.end(); return keyRowMap.find(key.c_str()) != keyRowMap.end();
} }

View File

@ -1,44 +1,54 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef CNSLM_H #ifndef CNSLM_H
#define CNSLM_H #define CNSLM_H
#include <string>
#include <map>
#include <iostream>
#include "LanguageModel.h" #include "LanguageModel.h"
#include "UserPhrasesLM.h" #include "UserPhrasesLM.h"
#include <iostream>
#include <map>
#include <string>
namespace vChewing { namespace vChewing
class CNSLM: public UserPhrasesLM
{ {
public:
virtual bool allowConsolidation() override { class CNSLM : public UserPhrasesLM
{
public:
virtual bool allowConsolidation() override
{
return false; return false;
} }
virtual float overridedValue() override { virtual float overridedValue() override
{
return -11.0; return -11.0;
} }
}; };
} } // namespace vChewing
#endif #endif

View File

@ -1,44 +1,54 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef SYMBOLLM_H #ifndef SYMBOLLM_H
#define SYMBOLLM_H #define SYMBOLLM_H
#include <string>
#include <map>
#include <iostream>
#include "LanguageModel.h" #include "LanguageModel.h"
#include "UserPhrasesLM.h" #include "UserPhrasesLM.h"
#include <iostream>
#include <map>
#include <string>
namespace vChewing { namespace vChewing
class SymbolLM: public UserPhrasesLM
{ {
public:
virtual bool allowConsolidation() override { class SymbolLM : public UserPhrasesLM
{
public:
virtual bool allowConsolidation() override
{
return false; return false;
} }
virtual float overridedValue() override { virtual float overridedValue() override
{
return -13.0; return -13.0;
} }
}; };
} } // namespace vChewing
#endif #endif

View File

@ -1,44 +1,54 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef USERSYMBOLLM_H #ifndef USERSYMBOLLM_H
#define USERSYMBOLLM_H #define USERSYMBOLLM_H
#include <string>
#include <map>
#include <iostream>
#include "LanguageModel.h" #include "LanguageModel.h"
#include "UserPhrasesLM.h" #include "UserPhrasesLM.h"
#include <iostream>
#include <map>
#include <string>
namespace vChewing { namespace vChewing
class UserSymbolLM: public UserPhrasesLM
{ {
public:
virtual bool allowConsolidation() override { class UserSymbolLM : public UserPhrasesLM
{
public:
virtual bool allowConsolidation() override
{
return true; return true;
} }
virtual float overridedValue() override { virtual float overridedValue() override
{
return -12.0; return -12.0;
} }
}; };
} } // namespace vChewing
#endif #endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "ParselessLM.h" #include "ParselessLM.h"
@ -26,29 +33,36 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include <memory> #include <memory>
vChewing::ParselessLM::~ParselessLM() { close(); } vChewing::ParselessLM::~ParselessLM()
{
close();
}
bool vChewing::ParselessLM::isLoaded() bool vChewing::ParselessLM::isLoaded()
{ {
if (data_) { if (data_)
{
return true; return true;
} }
return false; return false;
} }
bool vChewing::ParselessLM::open(const std::string_view& path) bool vChewing::ParselessLM::open(const std::string_view &path)
{ {
if (data_) { if (data_)
{
return false; return false;
} }
fd_ = ::open(path.data(), O_RDONLY); fd_ = ::open(path.data(), O_RDONLY);
if (fd_ == -1) { if (fd_ == -1)
{
return false; return false;
} }
struct stat sb; struct stat sb;
if (fstat(fd_, &sb) == -1) { if (fstat(fd_, &sb) == -1)
{
::close(fd_); ::close(fd_);
fd_ = -1; fd_ = -1;
return false; return false;
@ -57,21 +71,22 @@ bool vChewing::ParselessLM::open(const std::string_view& path)
length_ = static_cast<size_t>(sb.st_size); length_ = static_cast<size_t>(sb.st_size);
data_ = mmap(NULL, length_, PROT_READ, MAP_SHARED, fd_, 0); data_ = mmap(NULL, length_, PROT_READ, MAP_SHARED, fd_, 0);
if (data_ == nullptr) { if (data_ == nullptr)
{
::close(fd_); ::close(fd_);
fd_ = -1; fd_ = -1;
length_ = 0; length_ = 0;
return false; return false;
} }
db_ = std::unique_ptr<ParselessPhraseDB>(new ParselessPhraseDB( db_ = std::unique_ptr<ParselessPhraseDB>(new ParselessPhraseDB(static_cast<char *>(data_), length_));
static_cast<char*>(data_), length_));
return true; return true;
} }
void vChewing::ParselessLM::close() void vChewing::ParselessLM::close()
{ {
if (data_ != nullptr) { if (data_ != nullptr)
{
munmap(data_, length_); munmap(data_, length_);
::close(fd_); ::close(fd_);
fd_ = -1; fd_ = -1;
@ -80,55 +95,61 @@ void vChewing::ParselessLM::close()
} }
} }
const std::vector<Gramambular::Bigram> const std::vector<Gramambular::Bigram> vChewing::ParselessLM::bigramsForKeys(const std::string &preceedingKey,
vChewing::ParselessLM::bigramsForKeys( const std::string &key)
const std::string& preceedingKey, const std::string& key)
{ {
return std::vector<Gramambular::Bigram>(); return std::vector<Gramambular::Bigram>();
} }
const std::vector<Gramambular::Unigram> const std::vector<Gramambular::Unigram> vChewing::ParselessLM::unigramsForKey(const std::string &key)
vChewing::ParselessLM::unigramsForKey(const std::string& key)
{ {
if (db_ == nullptr) { if (db_ == nullptr)
{
return std::vector<Gramambular::Unigram>(); return std::vector<Gramambular::Unigram>();
} }
std::vector<Gramambular::Unigram> results; std::vector<Gramambular::Unigram> results;
for (const auto& row : db_->findRows(key + " ")) { for (const auto &row : db_->findRows(key + " "))
{
Gramambular::Unigram unigram; Gramambular::Unigram unigram;
// Move ahead until we encounter the first space. This is the key. // Move ahead until we encounter the first space. This is the key.
auto it = row.begin(); auto it = row.begin();
while (it != row.end() && *it != ' ') { while (it != row.end() && *it != ' ')
{
++it; ++it;
} }
unigram.keyValue.key = std::string(row.begin(), it); unigram.keyValue.key = std::string(row.begin(), it);
// Read past the space. // Read past the space.
if (it != row.end()) { if (it != row.end())
{
++it; ++it;
} }
if (it != row.end()) { if (it != row.end())
{
// Now it is the start of the value portion. // Now it is the start of the value portion.
auto value_begin = it; auto value_begin = it;
// Move ahead until we encounter the second space. This is the // Move ahead until we encounter the second space. This is the
// value. // value.
while (it != row.end() && *it != ' ') { while (it != row.end() && *it != ' ')
{
++it; ++it;
} }
unigram.keyValue.value = std::string(value_begin, it); unigram.keyValue.value = std::string(value_begin, it);
} }
// Read past the space. The remainder, if it exists, is the score. // Read past the space. The remainder, if it exists, is the score.
if (it != row.end()) { if (it != row.end())
{
++it; ++it;
} }
if (it != row.end()) { if (it != row.end())
{
unigram.score = std::stod(std::string(it, row.end())); unigram.score = std::stod(std::string(it, row.end()));
} }
results.push_back(unigram); results.push_back(unigram);
@ -136,9 +157,10 @@ vChewing::ParselessLM::unigramsForKey(const std::string& key)
return results; return results;
} }
bool vChewing::ParselessLM::hasUnigramsForKey(const std::string& key) bool vChewing::ParselessLM::hasUnigramsForKey(const std::string &key)
{ {
if (db_ == nullptr) { if (db_ == nullptr)
{
return false; return false;
} }

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef SOURCE_ENGINE_PARSELESSLM_H_ #ifndef SOURCE_ENGINE_PARSELESSLM_H_
@ -27,25 +34,26 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "LanguageModel.h" #include "LanguageModel.h"
#include "ParselessPhraseDB.h" #include "ParselessPhraseDB.h"
namespace vChewing { namespace vChewing
{
class ParselessLM : public Gramambular::LanguageModel { class ParselessLM : public Gramambular::LanguageModel
public: {
public:
~ParselessLM() override; ~ParselessLM() override;
bool isLoaded(); bool isLoaded();
bool open(const std::string_view& path); bool open(const std::string_view &path);
void close(); void close();
const std::vector<Gramambular::Bigram> bigramsForKeys( const std::vector<Gramambular::Bigram> bigramsForKeys(const std::string &preceedingKey,
const std::string& preceedingKey, const std::string& key) override; const std::string &key) override;
const std::vector<Gramambular::Unigram> unigramsForKey( const std::vector<Gramambular::Unigram> unigramsForKey(const std::string &key) override;
const std::string& key) override; bool hasUnigramsForKey(const std::string &key) override;
bool hasUnigramsForKey(const std::string& key) override;
private: private:
int fd_ = -1; int fd_ = -1;
void* data_ = nullptr; void *data_ = nullptr;
size_t length_ = 0; size_t length_ = 0;
std::unique_ptr<ParselessPhraseDB> db_; std::unique_ptr<ParselessPhraseDB> db_;
}; };

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "ParselessPhraseDB.h" #include "ParselessPhraseDB.h"
@ -22,35 +29,35 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include <cassert> #include <cassert>
#include <cstring> #include <cstring>
namespace vChewing { namespace vChewing
{
ParselessPhraseDB::ParselessPhraseDB( ParselessPhraseDB::ParselessPhraseDB(const char *buf, size_t length) : begin_(buf), end_(buf + length)
const char* buf, size_t length)
: begin_(buf)
, end_(buf + length)
{ {
} }
std::vector<std::string_view> ParselessPhraseDB::findRows( std::vector<std::string_view> ParselessPhraseDB::findRows(const std::string_view &key)
const std::string_view& key)
{ {
std::vector<std::string_view> rows; std::vector<std::string_view> rows;
const char* ptr = findFirstMatchingLine(key); const char *ptr = findFirstMatchingLine(key);
if (ptr == nullptr) { if (ptr == nullptr)
{
return rows; return rows;
} }
while (ptr + key.length() <= end_ while (ptr + key.length() <= end_ && memcmp(ptr, key.data(), key.length()) == 0)
&& memcmp(ptr, key.data(), key.length()) == 0) { {
const char* eol = ptr; const char *eol = ptr;
while (eol != end_ && *eol != '\n') { while (eol != end_ && *eol != '\n')
{
++eol; ++eol;
} }
rows.emplace_back(ptr, eol - ptr); rows.emplace_back(ptr, eol - ptr);
if (eol == end_) { if (eol == end_)
{
break; break;
} }
@ -66,71 +73,83 @@ std::vector<std::string_view> ParselessPhraseDB::findRows(
// current line is actually the first matching line: if the previous line is // current line is actually the first matching line: if the previous line is
// less to the key and the current line starts exactly with the key, then // less to the key and the current line starts exactly with the key, then
// the current line is the first matching line. // the current line is the first matching line.
const char* ParselessPhraseDB::findFirstMatchingLine( const char *ParselessPhraseDB::findFirstMatchingLine(const std::string_view &key)
const std::string_view& key)
{ {
if (key.empty()) { if (key.empty())
{
return begin_; return begin_;
} }
const char* top = begin_; const char *top = begin_;
const char* bottom = end_; const char *bottom = end_;
while (top < bottom) { while (top < bottom)
const char* mid = top + (bottom - top) / 2; {
const char* ptr = mid; const char *mid = top + (bottom - top) / 2;
const char *ptr = mid;
if (ptr != begin_) { if (ptr != begin_)
{
--ptr; --ptr;
} }
while (ptr != begin_ && *ptr != '\n') { while (ptr != begin_ && *ptr != '\n')
{
--ptr; --ptr;
} }
const char* prev = nullptr; const char *prev = nullptr;
if (*ptr == '\n') { if (*ptr == '\n')
{
prev = ptr; prev = ptr;
++ptr; ++ptr;
} }
// ptr is now in the "current" line we're interested in. // ptr is now in the "current" line we're interested in.
if (ptr + key.length() > end_) { if (ptr + key.length() > end_)
{
// not enough data to compare at this point, bail. // not enough data to compare at this point, bail.
break; break;
} }
int current_cmp = memcmp(ptr, key.data(), key.length()); int current_cmp = memcmp(ptr, key.data(), key.length());
if (current_cmp > 0) { if (current_cmp > 0)
{
bottom = mid - 1; bottom = mid - 1;
continue; continue;
} }
if (current_cmp < 0) { if (current_cmp < 0)
{
top = mid + 1; top = mid + 1;
continue; continue;
} }
if (!prev) { if (!prev)
{
return ptr; return ptr;
} }
// Move the prev so that it reaches the previous line. // Move the prev so that it reaches the previous line.
if (prev != begin_) { if (prev != begin_)
{
--prev; --prev;
} }
while (prev != begin_ && *prev != '\n') { while (prev != begin_ && *prev != '\n')
{
--prev; --prev;
} }
if (*prev == '\n') { if (*prev == '\n')
{
++prev; ++prev;
} }
int prev_cmp = memcmp(prev, key.data(), key.length()); int prev_cmp = memcmp(prev, key.data(), key.length());
// This is the first occurrence. // This is the first occurrence.
if (prev_cmp < 0 && current_cmp == 0) { if (prev_cmp < 0 && current_cmp == 0)
{
return ptr; return ptr;
} }

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef SOURCE_ENGINE_PARSELESSPHRASEDB_H_ #ifndef SOURCE_ENGINE_PARSELESSPHRASEDB_H_
@ -24,28 +31,29 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include <string> #include <string>
#include <vector> #include <vector>
namespace vChewing { namespace vChewing
{
// Defines phrase database that consists of (key, value, score) rows that are // Defines phrase database that consists of (key, value, score) rows that are
// pre-sorted by the byte value of the keys. It is way faster than FastLM // pre-sorted by the byte value of the keys. It is way faster than FastLM
// because it does not need to parse anything. Instead, it relies on the fact // because it does not need to parse anything. Instead, it relies on the fact
// that the database is already sorted, and binary search is used to find the // that the database is already sorted, and binary search is used to find the
// rows. // rows.
class ParselessPhraseDB { class ParselessPhraseDB
public: {
ParselessPhraseDB( public:
const char* buf, size_t length); ParselessPhraseDB(const char *buf, size_t length);
// Find the rows that match the key. Note that prefix match is used. If you // Find the rows that match the key. Note that prefix match is used. If you
// need exact match, the key will need to have a delimiter (usually a space) // need exact match, the key will need to have a delimiter (usually a space)
// at the end. // at the end.
std::vector<std::string_view> findRows(const std::string_view& key); std::vector<std::string_view> findRows(const std::string_view &key);
const char* findFirstMatchingLine(const std::string_view& key); const char *findFirstMatchingLine(const std::string_view &key);
private: private:
const char* begin_; const char *begin_;
const char* end_; const char *end_;
}; };
}; // namespace vChewing }; // namespace vChewing

View File

@ -1,48 +1,56 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef PHRASEREPLACEMENTMAP_H #ifndef PHRASEREPLACEMENTMAP_H
#define PHRASEREPLACEMENTMAP_H #define PHRASEREPLACEMENTMAP_H
#include <string>
#include <map>
#include <iostream> #include <iostream>
#include <map>
#include <string>
namespace vChewing { namespace vChewing
{
class PhraseReplacementMap class PhraseReplacementMap
{ {
public: public:
PhraseReplacementMap(); PhraseReplacementMap();
~PhraseReplacementMap(); ~PhraseReplacementMap();
bool open(const char *path); bool open(const char *path);
void close(); void close();
const std::string valueForKey(const std::string& key); const std::string valueForKey(const std::string &key);
protected: protected:
std::map<std::string_view, std::string_view> keyValueMap; std::map<std::string_view, std::string_view> keyValueMap;
int fd; int fd;
void *data; void *data;
size_t length; size_t length;
}; };
} } // namespace vChewing
#endif #endif

View File

@ -1,55 +1,62 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "PhraseReplacementMap.h" #include "PhraseReplacementMap.h"
#include "vChewing-Swift.h" #include "vChewing-Swift.h"
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h> #include <fcntl.h>
#include <fstream> #include <fstream>
#include <unistd.h> #include <sys/mman.h>
#include <sys/stat.h>
#include <syslog.h> #include <syslog.h>
#include <unistd.h>
#include "KeyValueBlobReader.h" #include "KeyValueBlobReader.h"
#include "LMConsolidator.h" #include "LMConsolidator.h"
namespace vChewing { namespace vChewing
{
using std::string; using std::string;
PhraseReplacementMap::PhraseReplacementMap() PhraseReplacementMap::PhraseReplacementMap() : fd(-1), data(0), length(0)
: fd(-1)
, data(0)
, length(0)
{ {
} }
PhraseReplacementMap::~PhraseReplacementMap() PhraseReplacementMap::~PhraseReplacementMap()
{ {
if (data) { if (data)
{
close(); close();
} }
} }
bool PhraseReplacementMap::open(const char *path) bool PhraseReplacementMap::open(const char *path)
{ {
if (data) { if (data)
{
return false; return false;
} }
@ -57,13 +64,15 @@ bool PhraseReplacementMap::open(const char *path)
LMConsolidator::ConsolidateContent(path, true); LMConsolidator::ConsolidateContent(path, true);
fd = ::open(path, O_RDONLY); fd = ::open(path, O_RDONLY);
if (fd == -1) { if (fd == -1)
{
printf("open:: file not exist"); printf("open:: file not exist");
return false; return false;
} }
struct stat sb; struct stat sb;
if (fstat(fd, &sb) == -1) { if (fstat(fd, &sb) == -1)
{
printf("open:: cannot open file"); printf("open:: cannot open file");
return false; return false;
} }
@ -71,21 +80,25 @@ bool PhraseReplacementMap::open(const char *path)
length = (size_t)sb.st_size; length = (size_t)sb.st_size;
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0); data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
if (!data) { if (!data)
{
::close(fd); ::close(fd);
return false; return false;
} }
KeyValueBlobReader reader(static_cast<char*>(data), length); KeyValueBlobReader reader(static_cast<char *>(data), length);
KeyValueBlobReader::KeyValue keyValue; KeyValueBlobReader::KeyValue keyValue;
KeyValueBlobReader::State state; KeyValueBlobReader::State state;
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) { while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR)
{
keyValueMap[keyValue.key] = keyValue.value; keyValueMap[keyValue.key] = keyValue.value;
} }
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行) // 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
if (state == KeyValueBlobReader::State::ERROR) { if (state == KeyValueBlobReader::State::ERROR)
{
// close(); // close();
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "PhraseReplacementMap: Failed at Open Step 5. On Error Resume Next.\n"); if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "PhraseReplacementMap: Failed at Open Step 5. On Error Resume Next.\n");
// return false; // return false;
} }
return true; return true;
@ -93,7 +106,8 @@ bool PhraseReplacementMap::open(const char *path)
void PhraseReplacementMap::close() void PhraseReplacementMap::close()
{ {
if (data) { if (data)
{
munmap(data, length); munmap(data, length);
::close(fd); ::close(fd);
data = 0; data = 0;
@ -102,15 +116,15 @@ void PhraseReplacementMap::close()
keyValueMap.clear(); keyValueMap.clear();
} }
const std::string PhraseReplacementMap::valueForKey(const std::string& key) const std::string PhraseReplacementMap::valueForKey(const std::string &key)
{ {
auto iter = keyValueMap.find(key); auto iter = keyValueMap.find(key);
if (iter != keyValueMap.end()) { if (iter != keyValueMap.end())
{
const std::string_view v = iter->second; const std::string_view v = iter->second;
return {v.data(), v.size()}; return {v.data(), v.size()};
} }
return string(""); return string("");
} }
} }

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "UserOverrideModel.h" #include "UserOverrideModel.h"
@ -23,88 +30,84 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include <cmath> #include <cmath>
#include <sstream> #include <sstream>
namespace vChewing { namespace vChewing
{
// About 20 generations. // About 20 generations.
static const double DecayThreshould = 1.0 / 1048576.0; static const double DecayThreshould = 1.0 / 1048576.0;
static double Score(size_t eventCount, static double Score(size_t eventCount, size_t totalCount, double eventTimestamp, double timestamp, double lambda);
size_t totalCount, static bool IsEndingPunctuation(const std::string &value);
double eventTimestamp, static std::string WalkedNodesToKey(const std::vector<Gramambular::NodeAnchor> &walkedNodes, size_t cursorIndex);
double timestamp,
double lambda);
static bool IsEndingPunctuation(const std::string& value);
static std::string WalkedNodesToKey(const std::vector<Gramambular::NodeAnchor>& walkedNodes,
size_t cursorIndex);
UserOverrideModel::UserOverrideModel(size_t capacity, double decayConstant) UserOverrideModel::UserOverrideModel(size_t capacity, double decayConstant) : m_capacity(capacity)
: m_capacity(capacity) { {
assert(m_capacity > 0); assert(m_capacity > 0);
m_decayExponent = log(0.5) / decayConstant; m_decayExponent = log(0.5) / decayConstant;
} }
void UserOverrideModel::observe(const std::vector<Gramambular::NodeAnchor>& walkedNodes, void UserOverrideModel::observe(const std::vector<Gramambular::NodeAnchor> &walkedNodes, size_t cursorIndex,
size_t cursorIndex, const std::string &candidate, double timestamp)
const std::string& candidate, {
double timestamp) {
std::string key = WalkedNodesToKey(walkedNodes, cursorIndex); std::string key = WalkedNodesToKey(walkedNodes, cursorIndex);
auto mapIter = m_lruMap.find(key); auto mapIter = m_lruMap.find(key);
if (mapIter == m_lruMap.end()) { if (mapIter == m_lruMap.end())
{
auto keyValuePair = KeyObservationPair(key, Observation()); auto keyValuePair = KeyObservationPair(key, Observation());
Observation& observation = keyValuePair.second; Observation &observation = keyValuePair.second;
observation.update(candidate, timestamp); observation.update(candidate, timestamp);
m_lruList.push_front(keyValuePair); m_lruList.push_front(keyValuePair);
auto listIter = m_lruList.begin(); auto listIter = m_lruList.begin();
auto lruKeyValue = std::pair<std::string, auto lruKeyValue = std::pair<std::string, std::list<KeyObservationPair>::iterator>(key, listIter);
std::list<KeyObservationPair>::iterator>(key, listIter);
m_lruMap.insert(lruKeyValue); m_lruMap.insert(lruKeyValue);
if (m_lruList.size() > m_capacity) { if (m_lruList.size() > m_capacity)
{
auto lastKeyValuePair = m_lruList.end(); auto lastKeyValuePair = m_lruList.end();
--lastKeyValuePair; --lastKeyValuePair;
m_lruMap.erase(lastKeyValuePair->first); m_lruMap.erase(lastKeyValuePair->first);
m_lruList.pop_back(); m_lruList.pop_back();
} }
} else { }
else
{
auto listIter = mapIter->second; auto listIter = mapIter->second;
m_lruList.splice(m_lruList.begin(), m_lruList, listIter); m_lruList.splice(m_lruList.begin(), m_lruList, listIter);
auto& keyValuePair = *listIter; auto &keyValuePair = *listIter;
Observation& observation = keyValuePair.second; Observation &observation = keyValuePair.second;
observation.update(candidate, timestamp); observation.update(candidate, timestamp);
} }
} }
std::string UserOverrideModel::suggest(const std::vector<Gramambular::NodeAnchor>& walkedNodes, std::string UserOverrideModel::suggest(const std::vector<Gramambular::NodeAnchor> &walkedNodes, size_t cursorIndex,
size_t cursorIndex, double timestamp)
double timestamp) { {
std::string key = WalkedNodesToKey(walkedNodes, cursorIndex); std::string key = WalkedNodesToKey(walkedNodes, cursorIndex);
auto mapIter = m_lruMap.find(key); auto mapIter = m_lruMap.find(key);
if (mapIter == m_lruMap.end()) { if (mapIter == m_lruMap.end())
{
return std::string(); return std::string();
} }
auto listIter = mapIter->second; auto listIter = mapIter->second;
auto& keyValuePair = *listIter; auto &keyValuePair = *listIter;
const Observation& observation = keyValuePair.second; const Observation &observation = keyValuePair.second;
std::string candidate; std::string candidate;
double score = 0.0; double score = 0.0;
for (auto i = observation.overrides.begin(); for (auto i = observation.overrides.begin(); i != observation.overrides.end(); ++i)
i != observation.overrides.end(); {
++i) { const Override &o = i->second;
const Override& o = i->second; double overrideScore = Score(o.count, observation.count, o.timestamp, timestamp, m_decayExponent);
double overrideScore = Score(o.count, if (overrideScore == 0.0)
observation.count, {
o.timestamp,
timestamp,
m_decayExponent);
if (overrideScore == 0.0) {
continue; continue;
} }
if (overrideScore > score) { if (overrideScore > score)
{
candidate = i->first; candidate = i->first;
score = overrideScore; score = overrideScore;
} }
@ -112,21 +115,19 @@ std::string UserOverrideModel::suggest(const std::vector<Gramambular::NodeAnchor
return candidate; return candidate;
} }
void UserOverrideModel::Observation::update(const std::string& candidate, void UserOverrideModel::Observation::update(const std::string &candidate, double timestamp)
double timestamp) { {
count++; count++;
auto& o = overrides[candidate]; auto &o = overrides[candidate];
o.timestamp = timestamp; o.timestamp = timestamp;
o.count++; o.count++;
} }
static double Score(size_t eventCount, static double Score(size_t eventCount, size_t totalCount, double eventTimestamp, double timestamp, double lambda)
size_t totalCount, {
double eventTimestamp,
double timestamp,
double lambda) {
double decay = exp((timestamp - eventTimestamp) * lambda); double decay = exp((timestamp - eventTimestamp) * lambda);
if (decay < DecayThreshould) { if (decay < DecayThreshould)
{
return 0.0; return 0.0;
} }
@ -134,29 +135,31 @@ static double Score(size_t eventCount,
return prob * decay; return prob * decay;
} }
static bool IsEndingPunctuation(const std::string& value) { static bool IsEndingPunctuation(const std::string &value)
return value == "" || value == "" || value== "" || value == "" || {
value == "" || value == "" || value== "" || value == ""; return value == "" || value == "" || value == "" || value == "" || value == "" || value == "" ||
value == "" || value == "";
} }
static std::string WalkedNodesToKey(const std::vector<Gramambular::NodeAnchor>& walkedNodes, static std::string WalkedNodesToKey(const std::vector<Gramambular::NodeAnchor> &walkedNodes, size_t cursorIndex)
size_t cursorIndex) { {
std::stringstream s; std::stringstream s;
std::vector<Gramambular::NodeAnchor> n; std::vector<Gramambular::NodeAnchor> n;
size_t ll = 0; size_t ll = 0;
for (std::vector<Gramambular::NodeAnchor>::const_iterator i = walkedNodes.begin(); for (std::vector<Gramambular::NodeAnchor>::const_iterator i = walkedNodes.begin(); i != walkedNodes.end(); ++i)
i != walkedNodes.end(); {
++i) { const auto &nn = *i;
const auto& nn = *i;
n.push_back(nn); n.push_back(nn);
ll += nn.spanningLength; ll += nn.spanningLength;
if (ll >= cursorIndex) { if (ll >= cursorIndex)
{
break; break;
} }
} }
std::vector<Gramambular::NodeAnchor>::const_reverse_iterator r = n.rbegin(); std::vector<Gramambular::NodeAnchor>::const_reverse_iterator r = n.rbegin();
if (r == n.rend()) { if (r == n.rend())
{
return ""; return "";
} }
@ -165,40 +168,44 @@ static std::string WalkedNodesToKey(const std::vector<Gramambular::NodeAnchor>&
s.clear(); s.clear();
s.str(std::string()); s.str(std::string());
if (r != n.rend()) { if (r != n.rend())
{
std::string value = (*r).node->currentKeyValue().value; std::string value = (*r).node->currentKeyValue().value;
if (IsEndingPunctuation(value)) { if (IsEndingPunctuation(value))
{
s << "()"; s << "()";
r = n.rend(); r = n.rend();
} else { }
s << "(" else
<< (*r).node->currentKeyValue().key {
<< "," s << "(" << (*r).node->currentKeyValue().key << "," << value << ")";
<< value
<< ")";
++r; ++r;
} }
} else { }
else
{
s << "()"; s << "()";
} }
std::string prev = s.str(); std::string prev = s.str();
s.clear(); s.clear();
s.str(std::string()); s.str(std::string());
if (r != n.rend()) { if (r != n.rend())
{
std::string value = (*r).node->currentKeyValue().value; std::string value = (*r).node->currentKeyValue().value;
if (IsEndingPunctuation(value)) { if (IsEndingPunctuation(value))
{
s << "()"; s << "()";
r = n.rend(); r = n.rend();
} else { }
s << "(" else
<< (*r).node->currentKeyValue().key {
<< "," s << "(" << (*r).node->currentKeyValue().key << "," << value << ")";
<< value
<< ")";
++r; ++r;
} }
} else { }
else
{
s << "()"; s << "()";
} }
std::string anterior = s.str(); std::string anterior = s.str();

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef USEROVERRIDEMODEL_H #ifndef USEROVERRIDEMODEL_H
@ -25,37 +32,41 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "Gramambular.h" #include "Gramambular.h"
namespace vChewing { namespace vChewing
{
using namespace Gramambular; using namespace Gramambular;
class UserOverrideModel { class UserOverrideModel
public: {
public:
UserOverrideModel(size_t capacity, double decayConstant); UserOverrideModel(size_t capacity, double decayConstant);
void observe(const std::vector<Gramambular::NodeAnchor>& walkedNodes, void observe(const std::vector<Gramambular::NodeAnchor> &walkedNodes, size_t cursorIndex,
size_t cursorIndex, const std::string &candidate, double timestamp);
const std::string& candidate,
double timestamp);
std::string suggest(const std::vector<Gramambular::NodeAnchor>& walkedNodes, std::string suggest(const std::vector<Gramambular::NodeAnchor> &walkedNodes, size_t cursorIndex, double timestamp);
size_t cursorIndex,
double timestamp);
private: private:
struct Override { struct Override
{
size_t count; size_t count;
double timestamp; double timestamp;
Override() : count(0), timestamp(0.0) {} Override() : count(0), timestamp(0.0)
{
}
}; };
struct Observation { struct Observation
{
size_t count; size_t count;
std::map<std::string, Override> overrides; std::map<std::string, Override> overrides;
Observation() : count(0) {} Observation() : count(0)
void update(const std::string& candidate, double timestamp); {
}
void update(const std::string &candidate, double timestamp);
}; };
typedef std::pair<std::string, Observation> KeyObservationPair; typedef std::pair<std::string, Observation> KeyObservationPair;
@ -69,4 +80,3 @@ private:
}; // namespace vChewing }; // namespace vChewing
#endif #endif

View File

@ -1,35 +1,43 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef USERPHRASESLM_H #ifndef USERPHRASESLM_H
#define USERPHRASESLM_H #define USERPHRASESLM_H
#include <string>
#include <map>
#include <iostream>
#include "LanguageModel.h" #include "LanguageModel.h"
#include <iostream>
#include <map>
#include <string>
namespace vChewing { namespace vChewing
{
class UserPhrasesLM : public Gramambular::LanguageModel class UserPhrasesLM : public Gramambular::LanguageModel
{ {
public: public:
UserPhrasesLM(); UserPhrasesLM();
~UserPhrasesLM(); ~UserPhrasesLM();
@ -38,21 +46,27 @@ public:
void close(); void close();
void dump(); void dump();
virtual bool allowConsolidation() { virtual bool allowConsolidation()
{
return true; return true;
} }
virtual float overridedValue() { virtual float overridedValue()
{
return 0.0; return 0.0;
} }
virtual const std::vector<Gramambular::Bigram> bigramsForKeys(const std::string& preceedingKey, const std::string& key); virtual const std::vector<Gramambular::Bigram> bigramsForKeys(const std::string &preceedingKey,
virtual const std::vector<Gramambular::Unigram> unigramsForKey(const std::string& key); const std::string &key);
virtual bool hasUnigramsForKey(const std::string& key); virtual const std::vector<Gramambular::Unigram> unigramsForKey(const std::string &key);
virtual bool hasUnigramsForKey(const std::string &key);
protected: protected:
struct Row { struct Row
Row(std::string_view& k, std::string_view& v) : key(k), value(v) {} {
Row(std::string_view &k, std::string_view &v) : key(k), value(v)
{
}
std::string_view key; std::string_view key;
std::string_view value; std::string_view value;
}; };
@ -63,6 +77,6 @@ protected:
size_t length; size_t length;
}; };
} } // namespace vChewing
#endif #endif

View File

@ -1,53 +1,60 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include "UserPhrasesLM.h" #include "UserPhrasesLM.h"
#include "vChewing-Swift.h" #include "vChewing-Swift.h"
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h> #include <fcntl.h>
#include <fstream> #include <fstream>
#include <unistd.h> #include <sys/mman.h>
#include <sys/stat.h>
#include <syslog.h> #include <syslog.h>
#include <unistd.h>
#include "KeyValueBlobReader.h" #include "KeyValueBlobReader.h"
#include "LMConsolidator.h" #include "LMConsolidator.h"
namespace vChewing { namespace vChewing
{
UserPhrasesLM::UserPhrasesLM() UserPhrasesLM::UserPhrasesLM() : fd(-1), data(0), length(0)
: fd(-1)
, data(0)
, length(0)
{ {
} }
UserPhrasesLM::~UserPhrasesLM() UserPhrasesLM::~UserPhrasesLM()
{ {
if (data) { if (data)
{
close(); close();
} }
} }
bool UserPhrasesLM::isLoaded() bool UserPhrasesLM::isLoaded()
{ {
if (data) { if (data)
{
return true; return true;
} }
return false; return false;
@ -55,23 +62,27 @@ bool UserPhrasesLM::isLoaded()
bool UserPhrasesLM::open(const char *path) bool UserPhrasesLM::open(const char *path)
{ {
if (data) { if (data)
{
return false; return false;
} }
if (allowConsolidation()) { if (allowConsolidation())
{
LMConsolidator::FixEOF(path); LMConsolidator::FixEOF(path);
LMConsolidator::ConsolidateContent(path, true); LMConsolidator::ConsolidateContent(path, true);
} }
fd = ::open(path, O_RDONLY); fd = ::open(path, O_RDONLY);
if (fd == -1) { if (fd == -1)
{
printf("open:: file not exist"); printf("open:: file not exist");
return false; return false;
} }
struct stat sb; struct stat sb;
if (fstat(fd, &sb) == -1) { if (fstat(fd, &sb) == -1)
{
printf("open:: cannot open file"); printf("open:: cannot open file");
return false; return false;
} }
@ -79,22 +90,27 @@ bool UserPhrasesLM::open(const char *path)
length = (size_t)sb.st_size; length = (size_t)sb.st_size;
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0); data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
if (!data) { if (!data)
{
::close(fd); ::close(fd);
return false; return false;
} }
KeyValueBlobReader reader(static_cast<char*>(data), length); KeyValueBlobReader reader(static_cast<char *>(data), length);
KeyValueBlobReader::KeyValue keyValue; KeyValueBlobReader::KeyValue keyValue;
KeyValueBlobReader::State state; KeyValueBlobReader::State state;
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) { while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR)
// We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF reading. {
// We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF
// reading.
keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key); keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key);
} }
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行) // 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
if (state == KeyValueBlobReader::State::ERROR) { if (state == KeyValueBlobReader::State::ERROR)
{
// close(); // close();
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "UserPhrasesLM: Failed at Open Step 5. On Error Resume Next.\n"); if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "UserPhrasesLM: Failed at Open Step 5. On Error Resume Next.\n");
// return false; // return false;
} }
return true; return true;
@ -102,7 +118,8 @@ bool UserPhrasesLM::open(const char *path)
void UserPhrasesLM::close() void UserPhrasesLM::close()
{ {
if (data) { if (data)
{
munmap(data, length); munmap(data, length);
::close(fd); ::close(fd);
data = 0; data = 0;
@ -113,26 +130,31 @@ void UserPhrasesLM::close()
void UserPhrasesLM::dump() void UserPhrasesLM::dump()
{ {
for (const auto& entry : keyRowMap) { for (const auto &entry : keyRowMap)
const std::vector<Row>& rows = entry.second; {
for (const auto& row : rows) { const std::vector<Row> &rows = entry.second;
for (const auto &row : rows)
{
std::cerr << row.key << " " << row.value << "\n"; std::cerr << row.key << " " << row.value << "\n";
} }
} }
} }
const std::vector<Gramambular::Bigram> UserPhrasesLM::bigramsForKeys(const std::string& preceedingKey, const std::string& key) const std::vector<Gramambular::Bigram> UserPhrasesLM::bigramsForKeys(const std::string &preceedingKey,
const std::string &key)
{ {
return std::vector<Gramambular::Bigram>(); return std::vector<Gramambular::Bigram>();
} }
const std::vector<Gramambular::Unigram> UserPhrasesLM::unigramsForKey(const std::string& key) const std::vector<Gramambular::Unigram> UserPhrasesLM::unigramsForKey(const std::string &key)
{ {
std::vector<Gramambular::Unigram> v; std::vector<Gramambular::Unigram> v;
auto iter = keyRowMap.find(key); auto iter = keyRowMap.find(key);
if (iter != keyRowMap.end()) { if (iter != keyRowMap.end())
const std::vector<Row>& rows = iter->second; {
for (const auto& row : rows) { const std::vector<Row> &rows = iter->second;
for (const auto &row : rows)
{
Gramambular::Unigram g; Gramambular::Unigram g;
g.keyValue.key = row.key; g.keyValue.key = row.key;
g.keyValue.value = row.value; g.keyValue.value = row.value;
@ -144,9 +166,9 @@ const std::vector<Gramambular::Unigram> UserPhrasesLM::unigramsForKey(const std:
return v; return v;
} }
bool UserPhrasesLM::hasUnigramsForKey(const std::string& key) bool UserPhrasesLM::hasUnigramsForKey(const std::string &key)
{ {
return keyRowMap.find(key) != keyRowMap.end(); return keyRowMap.find(key) != keyRowMap.end();
} }
}; // namespace vChewing }; // namespace vChewing

View File

@ -1,24 +1,31 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#import <Foundation/Foundation.h>
#import "KeyHandler.h" #import "KeyHandler.h"
#import <Foundation/Foundation.h>
NS_ASSUME_NONNULL_BEGIN NS_ASSUME_NONNULL_BEGIN
@ -33,8 +40,13 @@ NS_ASSUME_NONNULL_BEGIN
+ (BOOL)checkIfSpecifiedUserDataFolderValid:(NSString *)folderPath; + (BOOL)checkIfSpecifiedUserDataFolderValid:(NSString *)folderPath;
+ (NSString *)dataFolderPath:(bool)isDefaultFolder NS_SWIFT_NAME(dataFolderPath(isDefaultFolder:)); + (NSString *)dataFolderPath:(bool)isDefaultFolder NS_SWIFT_NAME(dataFolderPath(isDefaultFolder:));
+ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase inputMode:(InputMode)mode key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:)); + (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase
+ (BOOL)writeUserPhrase:(NSString *)userPhrase inputMode:(InputMode)mode areWeDuplicating:(BOOL)areWeDuplicating areWeDeleting:(BOOL)areWeDeleting; inputMode:(InputMode)mode
key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:));
+ (BOOL)writeUserPhrase:(NSString *)userPhrase
inputMode:(InputMode)mode
areWeDuplicating:(BOOL)areWeDuplicating
areWeDeleting:(BOOL)areWeDeleting;
+ (void)setPhraseReplacementEnabled:(BOOL)phraseReplacementEnabled; + (void)setPhraseReplacementEnabled:(BOOL)phraseReplacementEnabled;
+ (void)setCNSEnabled:(BOOL)cnsEnabled; + (void)setCNSEnabled:(BOOL)cnsEnabled;
+ (void)setSymbolEnabled:(BOOL)symbolEnabled; + (void)setSymbolEnabled:(BOOL)symbolEnabled;

View File

@ -1,26 +1,33 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#import "mgrLangModel.h" #import "mgrLangModel.h"
#import "LMConsolidator.h"
#import "mgrLangModel_Privates.h" #import "mgrLangModel_Privates.h"
#import "vChewing-Swift.h" #import "vChewing-Swift.h"
#import "LMConsolidator.h"
static const int kUserOverrideModelCapacity = 500; static const int kUserOverrideModelCapacity = 500;
static const double kObservedOverrideHalflife = 5400.0; static const double kObservedOverrideHalflife = 5400.0;
@ -54,70 +61,90 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
+ (void)loadDataModels + (void)loadDataModels
{ {
if (!gLangModelCHT.isDataModelLoaded()) { if (!gLangModelCHT.isDataModelLoaded())
{
LTLoadLanguageModelFile(@"data-cht", gLangModelCHT); LTLoadLanguageModelFile(@"data-cht", gLangModelCHT);
} }
if (!gLangModelCHT.isMiscDataLoaded()) { if (!gLangModelCHT.isMiscDataLoaded())
gLangModelCHT.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]); {
gLangModelCHT.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]);
} }
if (!gLangModelCHT.isSymbolDataLoaded()){ if (!gLangModelCHT.isSymbolDataLoaded())
gLangModelCHT.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]); {
gLangModelCHT.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]);
} }
if (!gLangModelCHT.isCNSDataLoaded()){ if (!gLangModelCHT.isCNSDataLoaded())
gLangModelCHT.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]); {
gLangModelCHT.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]);
} }
// ----------------- // -----------------
if (!gLangModelCHS.isDataModelLoaded()) { if (!gLangModelCHS.isDataModelLoaded())
{
LTLoadLanguageModelFile(@"data-chs", gLangModelCHS); LTLoadLanguageModelFile(@"data-chs", gLangModelCHS);
} }
if (!gLangModelCHS.isMiscDataLoaded()) { if (!gLangModelCHS.isMiscDataLoaded())
gLangModelCHS.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]); {
gLangModelCHS.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]);
} }
if (!gLangModelCHS.isSymbolDataLoaded()){ if (!gLangModelCHS.isSymbolDataLoaded())
gLangModelCHS.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]); {
gLangModelCHS.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]);
} }
if (!gLangModelCHS.isCNSDataLoaded()){ if (!gLangModelCHS.isCNSDataLoaded())
gLangModelCHS.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]); {
gLangModelCHS.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]);
} }
} }
+ (void)loadDataModel:(InputMode)mode + (void)loadDataModel:(InputMode)mode
{ {
if ([mode isEqualToString:imeModeCHT]) { if ([mode isEqualToString:imeModeCHT])
if (!gLangModelCHT.isDataModelLoaded()) { {
if (!gLangModelCHT.isDataModelLoaded())
{
LTLoadLanguageModelFile(@"data-cht", gLangModelCHT); LTLoadLanguageModelFile(@"data-cht", gLangModelCHT);
} }
if (!gLangModelCHT.isMiscDataLoaded()) { if (!gLangModelCHT.isMiscDataLoaded())
gLangModelCHT.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]); {
gLangModelCHT.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]);
} }
if (!gLangModelCHT.isSymbolDataLoaded()){ if (!gLangModelCHT.isSymbolDataLoaded())
gLangModelCHT.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]); {
gLangModelCHT.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]);
} }
if (!gLangModelCHT.isCNSDataLoaded()){ if (!gLangModelCHT.isCNSDataLoaded())
gLangModelCHT.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]); {
gLangModelCHT.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]);
} }
} }
if ([mode isEqualToString:imeModeCHS]) { if ([mode isEqualToString:imeModeCHS])
if (!gLangModelCHS.isDataModelLoaded()) { {
if (!gLangModelCHS.isDataModelLoaded())
{
LTLoadLanguageModelFile(@"data-chs", gLangModelCHS); LTLoadLanguageModelFile(@"data-chs", gLangModelCHS);
} }
if (!gLangModelCHS.isMiscDataLoaded()) { if (!gLangModelCHS.isMiscDataLoaded())
gLangModelCHS.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]); {
gLangModelCHS.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]);
} }
if (!gLangModelCHS.isSymbolDataLoaded()){ if (!gLangModelCHS.isSymbolDataLoaded())
gLangModelCHS.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]); {
gLangModelCHS.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]);
} }
if (!gLangModelCHS.isCNSDataLoaded()){ if (!gLangModelCHS.isCNSDataLoaded())
gLangModelCHS.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]); {
gLangModelCHS.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]);
} }
} }
} }
+ (void)loadUserPhrases + (void)loadUserPhrases
{ {
gLangModelCHT.loadUserPhrases([[self userPhrasesDataPath:imeModeCHT] UTF8String], [[self excludedPhrasesDataPath:imeModeCHT] UTF8String]); gLangModelCHT.loadUserPhrases([[self userPhrasesDataPath:imeModeCHT] UTF8String],
gLangModelCHS.loadUserPhrases([[self userPhrasesDataPath:imeModeCHS] UTF8String], [[self excludedPhrasesDataPath:imeModeCHS] UTF8String]); [[self excludedPhrasesDataPath:imeModeCHT] UTF8String]);
gLangModelCHS.loadUserPhrases([[self userPhrasesDataPath:imeModeCHS] UTF8String],
[[self excludedPhrasesDataPath:imeModeCHS] UTF8String]);
gLangModelCHT.loadUserSymbolData([[self userSymbolDataPath:imeModeCHT] UTF8String]); gLangModelCHT.loadUserSymbolData([[self userSymbolDataPath:imeModeCHT] UTF8String]);
gLangModelCHS.loadUserSymbolData([[self userSymbolDataPath:imeModeCHS] UTF8String]); gLangModelCHS.loadUserSymbolData([[self userSymbolDataPath:imeModeCHS] UTF8String]);
} }
@ -139,19 +166,26 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
NSString *folderPath = [self dataFolderPath:false]; NSString *folderPath = [self dataFolderPath:false];
BOOL isFolder = NO; BOOL isFolder = NO;
BOOL folderExist = [[NSFileManager defaultManager] fileExistsAtPath:folderPath isDirectory:&isFolder]; BOOL folderExist = [[NSFileManager defaultManager] fileExistsAtPath:folderPath isDirectory:&isFolder];
if (folderExist && !isFolder) { if (folderExist && !isFolder)
{
NSError *error = nil; NSError *error = nil;
[[NSFileManager defaultManager] removeItemAtPath:folderPath error:&error]; [[NSFileManager defaultManager] removeItemAtPath:folderPath error:&error];
if (error) { if (error)
{
NSLog(@"Failed to remove folder %@", error); NSLog(@"Failed to remove folder %@", error);
return NO; return NO;
} }
folderExist = NO; folderExist = NO;
} }
if (!folderExist) { if (!folderExist)
{
NSError *error = nil; NSError *error = nil;
[[NSFileManager defaultManager] createDirectoryAtPath:folderPath withIntermediateDirectories:YES attributes:nil error:&error]; [[NSFileManager defaultManager] createDirectoryAtPath:folderPath
if (error) { withIntermediateDirectories:YES
attributes:nil
error:&error];
if (error)
{
NSLog(@"Failed to create folder %@", error); NSLog(@"Failed to create folder %@", error);
return NO; return NO;
} }
@ -163,26 +197,34 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
{ {
BOOL isFolder = NO; BOOL isFolder = NO;
BOOL folderExist = [[NSFileManager defaultManager] fileExistsAtPath:folderPath isDirectory:&isFolder]; BOOL folderExist = [[NSFileManager defaultManager] fileExistsAtPath:folderPath isDirectory:&isFolder];
if ((folderExist && !isFolder) || (!folderExist)) { if ((folderExist && !isFolder) || (!folderExist))
{
return NO; return NO;
} }
return YES; return YES;
} }
+ (BOOL)ensureFileExists:(NSString *)filePath populateWithTemplate:(NSString *)templateBasename extension:(NSString *)ext + (BOOL)ensureFileExists:(NSString *)filePath
populateWithTemplate:(NSString *)templateBasename
extension:(NSString *)ext
{ {
if (![[NSFileManager defaultManager] fileExistsAtPath:filePath]) { if (![[NSFileManager defaultManager] fileExistsAtPath:filePath])
{
NSURL *templateURL = [[NSBundle mainBundle] URLForResource:templateBasename withExtension:ext]; NSURL *templateURL = [[NSBundle mainBundle] URLForResource:templateBasename withExtension:ext];
NSData *templateData; NSData *templateData;
if (templateURL) { if (templateURL)
{
templateData = [NSData dataWithContentsOfURL:templateURL]; templateData = [NSData dataWithContentsOfURL:templateURL];
} else { }
else
{
templateData = [@"" dataUsingEncoding:NSUTF8StringEncoding]; templateData = [@"" dataUsingEncoding:NSUTF8StringEncoding];
} }
BOOL result = [templateData writeToFile:filePath atomically:YES]; BOOL result = [templateData writeToFile:filePath atomically:YES];
if (!result) { if (!result)
{
NSLog(@"Failed to write file"); NSLog(@"Failed to write file");
return NO; return NO;
} }
@ -192,36 +234,76 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
+ (BOOL)checkIfUserLanguageModelFilesExist + (BOOL)checkIfUserLanguageModelFilesExist
{ {
if (![self checkIfUserDataFolderExists]) return NO; if (![self checkIfUserDataFolderExists])
if (![self ensureFileExists:[self userPhrasesDataPath:imeModeCHS] populateWithTemplate:kUserDataTemplateName extension:kTemplateExtension]) return NO; return NO;
if (![self ensureFileExists:[self userPhrasesDataPath:imeModeCHT] populateWithTemplate:kUserDataTemplateName extension:kTemplateExtension]) return NO; if (![self ensureFileExists:[self userPhrasesDataPath:imeModeCHS]
if (![self ensureFileExists:[self userAssociatedPhrasesDataPath:imeModeCHS] populateWithTemplate:kUserAssDataTemplateName extension:kTemplateExtension]) return NO; populateWithTemplate:kUserDataTemplateName
if (![self ensureFileExists:[self userAssociatedPhrasesDataPath:imeModeCHT] populateWithTemplate:kUserAssDataTemplateName extension:kTemplateExtension]) return NO; extension:kTemplateExtension])
if (![self ensureFileExists:[self excludedPhrasesDataPath:imeModeCHS] populateWithTemplate:kExcludedPhrasesvChewingTemplateName extension:kTemplateExtension]) return NO; return NO;
if (![self ensureFileExists:[self excludedPhrasesDataPath:imeModeCHT] populateWithTemplate:kExcludedPhrasesvChewingTemplateName extension:kTemplateExtension]) return NO; if (![self ensureFileExists:[self userPhrasesDataPath:imeModeCHT]
if (![self ensureFileExists:[self phraseReplacementDataPath:imeModeCHS] populateWithTemplate:kPhraseReplacementTemplateName extension:kTemplateExtension]) return NO; populateWithTemplate:kUserDataTemplateName
if (![self ensureFileExists:[self phraseReplacementDataPath:imeModeCHT] populateWithTemplate:kPhraseReplacementTemplateName extension:kTemplateExtension]) return NO; extension:kTemplateExtension])
if (![self ensureFileExists:[self userSymbolDataPath:imeModeCHT] populateWithTemplate:kUserSymbolDataTemplateName extension:kTemplateExtension]) return NO; return NO;
if (![self ensureFileExists:[self userSymbolDataPath:imeModeCHS] populateWithTemplate:kUserSymbolDataTemplateName extension:kTemplateExtension]) return NO; if (![self ensureFileExists:[self userAssociatedPhrasesDataPath:imeModeCHS]
populateWithTemplate:kUserAssDataTemplateName
extension:kTemplateExtension])
return NO;
if (![self ensureFileExists:[self userAssociatedPhrasesDataPath:imeModeCHT]
populateWithTemplate:kUserAssDataTemplateName
extension:kTemplateExtension])
return NO;
if (![self ensureFileExists:[self excludedPhrasesDataPath:imeModeCHS]
populateWithTemplate:kExcludedPhrasesvChewingTemplateName
extension:kTemplateExtension])
return NO;
if (![self ensureFileExists:[self excludedPhrasesDataPath:imeModeCHT]
populateWithTemplate:kExcludedPhrasesvChewingTemplateName
extension:kTemplateExtension])
return NO;
if (![self ensureFileExists:[self phraseReplacementDataPath:imeModeCHS]
populateWithTemplate:kPhraseReplacementTemplateName
extension:kTemplateExtension])
return NO;
if (![self ensureFileExists:[self phraseReplacementDataPath:imeModeCHT]
populateWithTemplate:kPhraseReplacementTemplateName
extension:kTemplateExtension])
return NO;
if (![self ensureFileExists:[self userSymbolDataPath:imeModeCHT]
populateWithTemplate:kUserSymbolDataTemplateName
extension:kTemplateExtension])
return NO;
if (![self ensureFileExists:[self userSymbolDataPath:imeModeCHS]
populateWithTemplate:kUserSymbolDataTemplateName
extension:kTemplateExtension])
return NO;
return YES; return YES;
} }
+ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase inputMode:(InputMode)mode key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:)) + (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase
inputMode:(InputMode)mode
key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:))
{ {
string unigramKey = string(key.UTF8String); string unigramKey = string(key.UTF8String);
vector<vChewing::Unigram> unigrams = [mode isEqualToString:imeModeCHT] ? gLangModelCHT.unigramsForKey(unigramKey): gLangModelCHS.unigramsForKey(unigramKey); vector<vChewing::Unigram> unigrams = [mode isEqualToString:imeModeCHT] ? gLangModelCHT.unigramsForKey(unigramKey)
: gLangModelCHS.unigramsForKey(unigramKey);
string userPhraseString = string(userPhrase.UTF8String); string userPhraseString = string(userPhrase.UTF8String);
for (auto unigram: unigrams) { for (auto unigram : unigrams)
if (unigram.keyValue.value == userPhraseString) { {
if (unigram.keyValue.value == userPhraseString)
{
return YES; return YES;
} }
} }
return NO; return NO;
} }
+ (BOOL)writeUserPhrase:(NSString *)userPhrase inputMode:(InputMode)mode areWeDuplicating:(BOOL)areWeDuplicating areWeDeleting:(BOOL)areWeDeleting + (BOOL)writeUserPhrase:(NSString *)userPhrase
inputMode:(InputMode)mode
areWeDuplicating:(BOOL)areWeDuplicating
areWeDeleting:(BOOL)areWeDeleting
{ {
if (![self checkIfUserLanguageModelFilesExist]) { if (![self checkIfUserLanguageModelFilesExist])
{
return NO; return NO;
} }
@ -233,7 +315,8 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
// [currentMarkedPhrase appendString:@"\n"]; // [currentMarkedPhrase appendString:@"\n"];
// } // }
[currentMarkedPhrase appendString:userPhrase]; [currentMarkedPhrase appendString:userPhrase];
if (areWeDuplicating && !areWeDeleting) { if (areWeDuplicating && !areWeDeleting)
{
// Do not use ASCII characters to comment here. // Do not use ASCII characters to comment here.
// Otherwise, it will be scrambled by cnvHYPYtoBPMF module shipped in the vChewing Phrase Editor. // Otherwise, it will be scrambled by cnvHYPYtoBPMF module shipped in the vChewing Phrase Editor.
[currentMarkedPhrase appendString:@"\t#𝙾𝚟𝚎𝚛𝚛𝚒𝚍𝚎"]; [currentMarkedPhrase appendString:@"\t#𝙾𝚟𝚎𝚛𝚛𝚒𝚍𝚎"];
@ -241,7 +324,8 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
[currentMarkedPhrase appendString:@"\n"]; [currentMarkedPhrase appendString:@"\n"];
NSFileHandle *writeFile = [NSFileHandle fileHandleForUpdatingAtPath:path]; NSFileHandle *writeFile = [NSFileHandle fileHandleForUpdatingAtPath:path];
if (!writeFile) { if (!writeFile)
{
return NO; return NO;
} }
[writeFile seekToEndOfFile]; [writeFile seekToEndOfFile];
@ -249,12 +333,14 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
[writeFile writeData:data]; [writeFile writeData:data];
[writeFile closeFile]; [writeFile closeFile];
// We enforce the format consolidation here, since the pragma header will let the UserPhraseLM bypasses the consolidating process on load. // We enforce the format consolidation here, since the pragma header will let the UserPhraseLM bypasses the
// consolidating process on load.
vChewing::LMConsolidator::ConsolidateContent([path UTF8String], false); vChewing::LMConsolidator::ConsolidateContent([path UTF8String], false);
// We use FSEventStream to monitor the change of the user phrase folder, // We use FSEventStream to monitor the change of the user phrase folder,
// so we don't have to load data here unless FSEventStream is disabled by user. // so we don't have to load data here unless FSEventStream is disabled by user.
if (!mgrPrefs.shouldAutoReloadUserDataFiles) { if (!mgrPrefs.shouldAutoReloadUserDataFiles)
{
[self loadUserPhrases]; [self loadUserPhrases];
} }
return YES; return YES;
@ -263,15 +349,21 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
+ (NSString *)dataFolderPath:(bool)isDefaultFolder + (NSString *)dataFolderPath:(bool)isDefaultFolder
{ {
// 此處不能用「~」來取代當前使用者目錄名稱。不然的話,一旦輸入法被系統的沙箱干預的話,則反而會定位到沙箱目錄內。 // 此處不能用「~」來取代當前使用者目錄名稱。不然的話,一旦輸入法被系統的沙箱干預的話,則反而會定位到沙箱目錄內。
NSString *appSupportPath = [NSFileManager.defaultManager URLsForDirectory:NSApplicationSupportDirectory inDomains:NSUserDomainMask][0].path; NSString *appSupportPath = [NSFileManager.defaultManager URLsForDirectory:NSApplicationSupportDirectory
inDomains:NSUserDomainMask][0].path;
NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"vChewing"].stringByExpandingTildeInPath; NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"vChewing"].stringByExpandingTildeInPath;
if (mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath == userDictPath || isDefaultFolder) { if (mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath == userDictPath || isDefaultFolder)
{
return userDictPath; return userDictPath;
} }
if ([mgrPrefs ifSpecifiedUserDataPathExistsInPlist]) { if ([mgrPrefs ifSpecifiedUserDataPathExistsInPlist])
if ([self checkIfSpecifiedUserDataFolderValid:mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath]) { {
if ([self checkIfSpecifiedUserDataFolderValid:mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath])
{
return mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath; return mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath;
} else { }
else
{
[NSUserDefaults.standardUserDefaults removeObjectForKey:@"UserDataFolderSpecified"]; [NSUserDefaults.standardUserDefaults removeObjectForKey:@"UserDataFolderSpecified"];
} }
} }
@ -286,13 +378,15 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
+ (NSString *)userSymbolDataPath:(InputMode)mode; + (NSString *)userSymbolDataPath:(InputMode)mode;
{ {
NSString *fileName = [mode isEqualToString:imeModeCHT] ? @"usersymbolphrases-cht.txt" : @"usersymbolphrases-chs.txt"; NSString *fileName =
[mode isEqualToString:imeModeCHT] ? @"usersymbolphrases-cht.txt" : @"usersymbolphrases-chs.txt";
return [[self dataFolderPath:false] stringByAppendingPathComponent:fileName]; return [[self dataFolderPath:false] stringByAppendingPathComponent:fileName];
} }
+ (NSString *)userAssociatedPhrasesDataPath:(InputMode)mode; + (NSString *)userAssociatedPhrasesDataPath:(InputMode)mode;
{ {
NSString *fileName = [mode isEqualToString:imeModeCHT] ? @"associatedPhrases-cht.txt" : @"associatedPhrases-chs.txt"; NSString *fileName =
[mode isEqualToString:imeModeCHT] ? @"associatedPhrases-cht.txt" : @"associatedPhrases-chs.txt";
return [[self dataFolderPath:false] stringByAppendingPathComponent:fileName]; return [[self dataFolderPath:false] stringByAppendingPathComponent:fileName];
} }
@ -304,11 +398,12 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
+ (NSString *)phraseReplacementDataPath:(InputMode)mode; + (NSString *)phraseReplacementDataPath:(InputMode)mode;
{ {
NSString *fileName = [mode isEqualToString:imeModeCHT] ? @"phrases-replacement-cht.txt" : @"phrases-replacement-chs.txt"; NSString *fileName =
[mode isEqualToString:imeModeCHT] ? @"phrases-replacement-cht.txt" : @"phrases-replacement-chs.txt";
return [[self dataFolderPath:false] stringByAppendingPathComponent:fileName]; return [[self dataFolderPath:false] stringByAppendingPathComponent:fileName];
} }
+ (vChewing::LMInstantiator *)lmCHT + (vChewing::LMInstantiator *)lmCHT
{ {
return &gLangModelCHT; return &gLangModelCHT;
} }

View File

@ -1,33 +1,40 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#import "mgrLangModel.h"
#import "UserOverrideModel.h"
#import "LMInstantiator.h" #import "LMInstantiator.h"
#import "UserOverrideModel.h"
#import "mgrLangModel.h"
NS_ASSUME_NONNULL_BEGIN NS_ASSUME_NONNULL_BEGIN
@interface mgrLangModel () @interface mgrLangModel ()
@property (class, readonly, nonatomic) vChewing::LMInstantiator *lmCHT; @property(class, readonly, nonatomic) vChewing::LMInstantiator *lmCHT;
@property (class, readonly, nonatomic) vChewing::LMInstantiator *lmCHS; @property(class, readonly, nonatomic) vChewing::LMInstantiator *lmCHS;
@property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHS; @property(class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHS;
@property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHT; @property(class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHT;
@end @end
NS_ASSUME_NONNULL_END NS_ASSUME_NONNULL_END

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef BIGRAM_H_ #ifndef BIGRAM_H_
@ -24,39 +31,42 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "KeyValuePair.h" #include "KeyValuePair.h"
namespace Gramambular { namespace Gramambular
class Bigram { {
public: class Bigram
{
public:
Bigram(); Bigram();
KeyValuePair preceedingKeyValue; KeyValuePair preceedingKeyValue;
KeyValuePair keyValue; KeyValuePair keyValue;
double score; double score;
bool operator==(const Bigram& another) const; bool operator==(const Bigram &another) const;
bool operator<(const Bigram& another) const; bool operator<(const Bigram &another) const;
}; };
inline std::ostream& operator<<(std::ostream& stream, const Bigram& gram) { inline std::ostream &operator<<(std::ostream &stream, const Bigram &gram)
{
std::streamsize p = stream.precision(); std::streamsize p = stream.precision();
stream.precision(6); stream.precision(6);
stream << "(" << gram.keyValue << "|" << gram.preceedingKeyValue << "," stream << "(" << gram.keyValue << "|" << gram.preceedingKeyValue << "," << gram.score << ")";
<< gram.score << ")";
stream.precision(p); stream.precision(p);
return stream; return stream;
} }
inline std::ostream& operator<<(std::ostream& stream, inline std::ostream &operator<<(std::ostream &stream, const std::vector<Bigram> &grams)
const std::vector<Bigram>& grams) { {
stream << "[" << grams.size() << "]=>{"; stream << "[" << grams.size() << "]=>{";
size_t index = 0; size_t index = 0;
for (std::vector<Bigram>::const_iterator gi = grams.begin(); for (std::vector<Bigram>::const_iterator gi = grams.begin(); gi != grams.end(); ++gi, ++index)
gi != grams.end(); ++gi, ++index) { {
stream << index << "=>"; stream << index << "=>";
stream << *gi; stream << *gi;
if (gi + 1 != grams.end()) { if (gi + 1 != grams.end())
{
stream << ","; stream << ",";
} }
} }
@ -65,20 +75,29 @@ inline std::ostream& operator<<(std::ostream& stream,
return stream; return stream;
} }
inline Bigram::Bigram() : score(0.0) {} inline Bigram::Bigram() : score(0.0)
{
inline bool Bigram::operator==(const Bigram& another) const {
return preceedingKeyValue == another.preceedingKeyValue &&
keyValue == another.keyValue && score == another.score;
} }
inline bool Bigram::operator<(const Bigram& another) const { inline bool Bigram::operator==(const Bigram &another) const
if (preceedingKeyValue < another.preceedingKeyValue) { {
return preceedingKeyValue == another.preceedingKeyValue && keyValue == another.keyValue && score == another.score;
}
inline bool Bigram::operator<(const Bigram &another) const
{
if (preceedingKeyValue < another.preceedingKeyValue)
{
return true; return true;
} else if (preceedingKeyValue == another.preceedingKeyValue) { }
if (keyValue < another.keyValue) { else if (preceedingKeyValue == another.preceedingKeyValue)
{
if (keyValue < another.keyValue)
{
return true; return true;
} else if (keyValue == another.keyValue) { }
else if (keyValue == another.keyValue)
{
return score < another.score; return score < another.score;
} }
return false; return false;
@ -86,7 +105,6 @@ inline bool Bigram::operator<(const Bigram& another) const {
return false; return false;
} }
} // namespace Gramambular } // namespace Gramambular
#endif #endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef BLOCKREADINGBUILDER_H_ #ifndef BLOCKREADINGBUILDER_H_
@ -26,35 +33,36 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "Grid.h" #include "Grid.h"
#include "LanguageModel.h" #include "LanguageModel.h"
namespace Gramambular { namespace Gramambular
{
class BlockReadingBuilder { class BlockReadingBuilder
public: {
explicit BlockReadingBuilder(LanguageModel* lm); public:
explicit BlockReadingBuilder(LanguageModel *lm);
void clear(); void clear();
size_t length() const; size_t length() const;
size_t cursorIndex() const; size_t cursorIndex() const;
void setCursorIndex(size_t newIndex); void setCursorIndex(size_t newIndex);
void insertReadingAtCursor(const std::string& reading); void insertReadingAtCursor(const std::string &reading);
bool deleteReadingBeforeCursor(); // backspace bool deleteReadingBeforeCursor(); // backspace
bool deleteReadingAfterCursor(); // delete bool deleteReadingAfterCursor(); // delete
bool removeHeadReadings(size_t count); bool removeHeadReadings(size_t count);
void setJoinSeparator(const std::string& separator); void setJoinSeparator(const std::string &separator);
const std::string joinSeparator() const; const std::string joinSeparator() const;
std::vector<std::string> readings() const; std::vector<std::string> readings() const;
Grid& grid(); Grid &grid();
protected: protected:
void build(); void build();
static const std::string Join(std::vector<std::string>::const_iterator begin, static const std::string Join(std::vector<std::string>::const_iterator begin,
std::vector<std::string>::const_iterator end, std::vector<std::string>::const_iterator end, const std::string &separator);
const std::string& separator);
// 規定最多可以組成的詞的字數上限為 10 // 規定最多可以組成的詞的字數上限為 10
static const size_t MaximumBuildSpanLength = 10; static const size_t MaximumBuildSpanLength = 10;
@ -63,29 +71,38 @@ protected:
std::vector<std::string> m_readings; std::vector<std::string> m_readings;
Grid m_grid; Grid m_grid;
LanguageModel* m_LM; LanguageModel *m_LM;
std::string m_joinSeparator; std::string m_joinSeparator;
}; };
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel* lm) inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *lm) : m_LM(lm), m_cursorIndex(0)
: m_LM(lm), m_cursorIndex(0) {} {
}
inline void BlockReadingBuilder::clear() { inline void BlockReadingBuilder::clear()
{
m_cursorIndex = 0; m_cursorIndex = 0;
m_readings.clear(); m_readings.clear();
m_grid.clear(); m_grid.clear();
} }
inline size_t BlockReadingBuilder::length() const { return m_readings.size(); } inline size_t BlockReadingBuilder::length() const
{
return m_readings.size();
}
inline size_t BlockReadingBuilder::cursorIndex() const { return m_cursorIndex; } inline size_t BlockReadingBuilder::cursorIndex() const
{
return m_cursorIndex;
}
inline void BlockReadingBuilder::setCursorIndex(size_t newIndex) { inline void BlockReadingBuilder::setCursorIndex(size_t newIndex)
{
m_cursorIndex = newIndex > m_readings.size() ? m_readings.size() : newIndex; m_cursorIndex = newIndex > m_readings.size() ? m_readings.size() : newIndex;
} }
inline void BlockReadingBuilder::insertReadingAtCursor( inline void BlockReadingBuilder::insertReadingAtCursor(const std::string &reading)
const std::string& reading) { {
m_readings.insert(m_readings.begin() + m_cursorIndex, reading); m_readings.insert(m_readings.begin() + m_cursorIndex, reading);
m_grid.expandGridByOneAtLocation(m_cursorIndex); m_grid.expandGridByOneAtLocation(m_cursorIndex);
@ -93,42 +110,49 @@ inline void BlockReadingBuilder::insertReadingAtCursor(
m_cursorIndex++; m_cursorIndex++;
} }
inline std::vector<std::string> BlockReadingBuilder::readings() const { inline std::vector<std::string> BlockReadingBuilder::readings() const
{
return m_readings; return m_readings;
} }
inline bool BlockReadingBuilder::deleteReadingBeforeCursor() { inline bool BlockReadingBuilder::deleteReadingBeforeCursor()
if (!m_cursorIndex) { {
if (!m_cursorIndex)
{
return false; return false;
} }
m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.begin() + m_cursorIndex);
m_readings.begin() + m_cursorIndex);
m_cursorIndex--; m_cursorIndex--;
m_grid.shrinkGridByOneAtLocation(m_cursorIndex); m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build(); build();
return true; return true;
} }
inline bool BlockReadingBuilder::deleteReadingAfterCursor() { inline bool BlockReadingBuilder::deleteReadingAfterCursor()
if (m_cursorIndex == m_readings.size()) { {
if (m_cursorIndex == m_readings.size())
{
return false; return false;
} }
m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.begin() + m_cursorIndex + 1);
m_readings.begin() + m_cursorIndex + 1);
m_grid.shrinkGridByOneAtLocation(m_cursorIndex); m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build(); build();
return true; return true;
} }
inline bool BlockReadingBuilder::removeHeadReadings(size_t count) { inline bool BlockReadingBuilder::removeHeadReadings(size_t count)
if (count > length()) { {
if (count > length())
{
return false; return false;
} }
for (size_t i = 0; i < count; i++) { for (size_t i = 0; i < count; i++)
if (m_cursorIndex) { {
if (m_cursorIndex)
{
m_cursorIndex--; m_cursorIndex--;
} }
m_readings.erase(m_readings.begin(), m_readings.begin() + 1); m_readings.erase(m_readings.begin(), m_readings.begin() + 1);
@ -139,44 +163,56 @@ inline bool BlockReadingBuilder::removeHeadReadings(size_t count) {
return true; return true;
} }
inline void BlockReadingBuilder::setJoinSeparator( inline void BlockReadingBuilder::setJoinSeparator(const std::string &separator)
const std::string& separator) { {
m_joinSeparator = separator; m_joinSeparator = separator;
} }
inline const std::string BlockReadingBuilder::joinSeparator() const { inline const std::string BlockReadingBuilder::joinSeparator() const
{
return m_joinSeparator; return m_joinSeparator;
} }
inline Grid& BlockReadingBuilder::grid() { return m_grid; } inline Grid &BlockReadingBuilder::grid()
{
return m_grid;
}
inline void BlockReadingBuilder::build() { inline void BlockReadingBuilder::build()
if (!m_LM) { {
if (!m_LM)
{
return; return;
} }
size_t begin = 0; size_t begin = 0;
size_t end = m_cursorIndex + MaximumBuildSpanLength; size_t end = m_cursorIndex + MaximumBuildSpanLength;
if (m_cursorIndex < MaximumBuildSpanLength) { if (m_cursorIndex < MaximumBuildSpanLength)
{
begin = 0; begin = 0;
} else { }
else
{
begin = m_cursorIndex - MaximumBuildSpanLength; begin = m_cursorIndex - MaximumBuildSpanLength;
} }
if (end > m_readings.size()) { if (end > m_readings.size())
{
end = m_readings.size(); end = m_readings.size();
} }
for (size_t p = begin; p < end; p++) { for (size_t p = begin; p < end; p++)
for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) { {
std::string combinedReading = Join( for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++)
m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator); {
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, std::string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
combinedReading)) { if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading))
{
std::vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading); std::vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
if (unigrams.size() > 0) { if (unigrams.size() > 0)
{
Node n(combinedReading, unigrams, std::vector<Bigram>()); Node n(combinedReading, unigrams, std::vector<Bigram>());
m_grid.insertNode(n, p, q); m_grid.insertNode(n, p, q);
} }
@ -185,21 +221,22 @@ inline void BlockReadingBuilder::build() {
} }
} }
inline const std::string BlockReadingBuilder::Join( inline const std::string BlockReadingBuilder::Join(std::vector<std::string>::const_iterator begin,
std::vector<std::string>::const_iterator begin,
std::vector<std::string>::const_iterator end, std::vector<std::string>::const_iterator end,
const std::string& separator) { const std::string &separator)
{
std::string result; std::string result;
for (std::vector<std::string>::const_iterator iter = begin; iter != end;) { for (std::vector<std::string>::const_iterator iter = begin; iter != end;)
{
result += *iter; result += *iter;
++iter; ++iter;
if (iter != end) { if (iter != end)
{
result += separator; result += separator;
} }
} }
return result; return result;
} }
} // namespace Gramambular } // namespace Gramambular
#endif #endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef GRAMAMBULAR_H_ #ifndef GRAMAMBULAR_H_

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef GRID_H_ #ifndef GRID_H_
@ -27,15 +34,15 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "NodeAnchor.h" #include "NodeAnchor.h"
#include "Span.h" #include "Span.h"
namespace Gramambular { namespace Gramambular
{
class Grid { class Grid
public: {
public:
void clear(); void clear();
void insertNode(const Node& node, size_t location, size_t spanningLength); void insertNode(const Node &node, size_t location, size_t spanningLength);
bool hasNodeAtLocationSpanningLengthMatchingKey(size_t location, bool hasNodeAtLocationSpanningLengthMatchingKey(size_t location, size_t spanningLength, const std::string &key);
size_t spanningLength,
const std::string& key);
void expandGridByOneAtLocation(size_t location); void expandGridByOneAtLocation(size_t location);
void shrinkGridByOneAtLocation(size_t location); void shrinkGridByOneAtLocation(size_t location);
@ -49,48 +56,54 @@ public:
// evaluated to that unigram, while all other overlapping nodes will be reset // evaluated to that unigram, while all other overlapping nodes will be reset
// to their initial state (that is, if any of those nodes were "frozen" or // to their initial state (that is, if any of those nodes were "frozen" or
// fixed, they will be unfrozen.) // fixed, they will be unfrozen.)
NodeAnchor fixNodeSelectedCandidate(size_t location, NodeAnchor fixNodeSelectedCandidate(size_t location, const std::string &value);
const std::string& value);
// Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, // Similar to fixNodeSelectedCandidate, but instead of "freezing" the node,
// only boost the unigram that represents the value with an overriding score. // only boost the unigram that represents the value with an overriding score.
// This has the same side effect as fixNodeSelectedCandidate, which is that // This has the same side effect as fixNodeSelectedCandidate, which is that
// all other overlapping nodes will be reset to their initial state. // all other overlapping nodes will be reset to their initial state.
void overrideNodeScoreForSelectedCandidate(size_t location, void overrideNodeScoreForSelectedCandidate(size_t location, const std::string &value, float overridingScore);
const std::string& value,
float overridingScore);
std::string dumpDOT() { std::string dumpDOT()
{
std::stringstream sst; std::stringstream sst;
sst << "digraph {" << std::endl; sst << "digraph {" << std::endl;
sst << "graph [ rankdir=LR ];" << std::endl; sst << "graph [ rankdir=LR ];" << std::endl;
sst << "BOS;" << std::endl; sst << "BOS;" << std::endl;
for (size_t p = 0; p < m_spans.size(); p++) { for (size_t p = 0; p < m_spans.size(); p++)
Span& span = m_spans[p]; {
for (size_t ni = 0; ni <= span.maximumLength(); ni++) { Span &span = m_spans[p];
Node* np = span.nodeOfLength(ni); for (size_t ni = 0; ni <= span.maximumLength(); ni++)
if (np) { {
if (!p) { Node *np = span.nodeOfLength(ni);
if (np)
{
if (!p)
{
sst << "BOS -> " << np->currentKeyValue().value << ";" << std::endl; sst << "BOS -> " << np->currentKeyValue().value << ";" << std::endl;
} }
sst << np->currentKeyValue().value << ";" << std::endl; sst << np->currentKeyValue().value << ";" << std::endl;
if (p + ni < m_spans.size()) { if (p + ni < m_spans.size())
Span& dstSpan = m_spans[p + ni]; {
for (size_t q = 0; q <= dstSpan.maximumLength(); q++) { Span &dstSpan = m_spans[p + ni];
Node* dn = dstSpan.nodeOfLength(q); for (size_t q = 0; q <= dstSpan.maximumLength(); q++)
if (dn) { {
sst << np->currentKeyValue().value << " -> " Node *dn = dstSpan.nodeOfLength(q);
<< dn->currentKeyValue().value << ";" << std::endl; if (dn)
{
sst << np->currentKeyValue().value << " -> " << dn->currentKeyValue().value << ";"
<< std::endl;
} }
} }
} }
if (p + ni == m_spans.size()) { if (p + ni == m_spans.size())
{
sst << np->currentKeyValue().value << " -> " sst << np->currentKeyValue().value << " -> "
<< "EOS;" << std::endl; << "EOS;" << std::endl;
} }
} }
} }
@ -101,18 +114,23 @@ public:
return sst.str(); return sst.str();
} }
protected: protected:
std::vector<Span> m_spans; std::vector<Span> m_spans;
}; };
inline void Grid::clear() { m_spans.clear(); } inline void Grid::clear()
{
m_spans.clear();
}
inline void Grid::insertNode(const Node& node, size_t location, inline void Grid::insertNode(const Node &node, size_t location, size_t spanningLength)
size_t spanningLength) { {
if (location >= m_spans.size()) { if (location >= m_spans.size())
{
size_t diff = location - m_spans.size() + 1; size_t diff = location - m_spans.size() + 1;
for (size_t i = 0; i < diff; i++) { for (size_t i = 0; i < diff; i++)
{
m_spans.push_back(Span()); m_spans.push_back(Span());
} }
} }
@ -120,55 +138,74 @@ inline void Grid::insertNode(const Node& node, size_t location,
m_spans[location].insertNodeOfLength(node, spanningLength); m_spans[location].insertNodeOfLength(node, spanningLength);
} }
inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey( inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(size_t location, size_t spanningLength,
size_t location, size_t spanningLength, const std::string& key) { const std::string &key)
if (location > m_spans.size()) { {
if (location > m_spans.size())
{
return false; return false;
} }
const Node* n = m_spans[location].nodeOfLength(spanningLength); const Node *n = m_spans[location].nodeOfLength(spanningLength);
if (!n) { if (!n)
{
return false; return false;
} }
return key == n->key(); return key == n->key();
} }
inline void Grid::expandGridByOneAtLocation(size_t location) { inline void Grid::expandGridByOneAtLocation(size_t location)
if (!location || location == m_spans.size()) { {
if (!location || location == m_spans.size())
{
m_spans.insert(m_spans.begin() + location, Span()); m_spans.insert(m_spans.begin() + location, Span());
} else { }
else
{
m_spans.insert(m_spans.begin() + location, Span()); m_spans.insert(m_spans.begin() + location, Span());
for (size_t i = 0; i < location; i++) { for (size_t i = 0; i < location; i++)
{
// zaps overlapping spans // zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(location - i); m_spans[i].removeNodeOfLengthGreaterThan(location - i);
} }
} }
} }
inline void Grid::shrinkGridByOneAtLocation(size_t location) { inline void Grid::shrinkGridByOneAtLocation(size_t location)
if (location >= m_spans.size()) { {
if (location >= m_spans.size())
{
return; return;
} }
m_spans.erase(m_spans.begin() + location); m_spans.erase(m_spans.begin() + location);
for (size_t i = 0; i < location; i++) { for (size_t i = 0; i < location; i++)
{
// zaps overlapping spans // zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(location - i); m_spans[i].removeNodeOfLengthGreaterThan(location - i);
} }
} }
inline size_t Grid::width() const { return m_spans.size(); } inline size_t Grid::width() const
{
return m_spans.size();
}
inline std::vector<NodeAnchor> Grid::nodesEndingAt(size_t location) { inline std::vector<NodeAnchor> Grid::nodesEndingAt(size_t location)
{
std::vector<NodeAnchor> result; std::vector<NodeAnchor> result;
if (m_spans.size() && location <= m_spans.size()) { if (m_spans.size() && location <= m_spans.size())
for (size_t i = 0; i < location; i++) { {
Span& span = m_spans[i]; for (size_t i = 0; i < location; i++)
if (i + span.maximumLength() >= location) { {
Node* np = span.nodeOfLength(location - i); Span &span = m_spans[i];
if (np) { if (i + span.maximumLength() >= location)
{
Node *np = span.nodeOfLength(location - i);
if (np)
{
NodeAnchor na; NodeAnchor na;
na.node = np; na.node = np;
na.location = i; na.location = i;
@ -183,21 +220,28 @@ inline std::vector<NodeAnchor> Grid::nodesEndingAt(size_t location) {
return result; return result;
} }
inline std::vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t location) { inline std::vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t location)
{
std::vector<NodeAnchor> result; std::vector<NodeAnchor> result;
if (m_spans.size() && location <= m_spans.size()) { if (m_spans.size() && location <= m_spans.size())
for (size_t i = 0; i < location; i++) { {
Span& span = m_spans[i]; for (size_t i = 0; i < location; i++)
{
Span &span = m_spans[i];
if (i + span.maximumLength() >= location) { if (i + span.maximumLength() >= location)
for (size_t j = 1, m = span.maximumLength(); j <= m; j++) { {
if (i + j < location) { for (size_t j = 1, m = span.maximumLength(); j <= m; j++)
{
if (i + j < location)
{
continue; continue;
} }
Node* np = span.nodeOfLength(j); Node *np = span.nodeOfLength(j);
if (np) { if (np)
{
NodeAnchor na; NodeAnchor na;
na.node = np; na.node = np;
na.location = i; na.location = i;
@ -215,19 +259,22 @@ inline std::vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t location) {
// For nodes found at the location, fix their currently-selected candidate using // For nodes found at the location, fix their currently-selected candidate using
// the supplied string value. // the supplied string value.
inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, const std::string &value)
const std::string& value) { {
std::vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location); std::vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
NodeAnchor node; NodeAnchor node;
for (auto nodeAnchor : nodes) { for (auto nodeAnchor : nodes)
{
auto candidates = nodeAnchor.node->candidates(); auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location. // Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate(); const_cast<Node *>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) { for (size_t i = 0, c = candidates.size(); i < c; ++i)
if (candidates[i].value == value) { {
const_cast<Node*>(nodeAnchor.node)->selectCandidateAtIndex(i); if (candidates[i].value == value)
{
const_cast<Node *>(nodeAnchor.node)->selectCandidateAtIndex(i);
node = nodeAnchor; node = nodeAnchor;
break; break;
} }
@ -236,26 +283,28 @@ inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location,
return node; return node;
} }
inline void Grid::overrideNodeScoreForSelectedCandidate( inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const std::string &value,
size_t location, const std::string& value, float overridingScore) { float overridingScore)
{
std::vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location); std::vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
for (auto nodeAnchor : nodes) { for (auto nodeAnchor : nodes)
{
auto candidates = nodeAnchor.node->candidates(); auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location. // Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate(); const_cast<Node *>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) { for (size_t i = 0, c = candidates.size(); i < c; ++i)
if (candidates[i].value == value) { {
const_cast<Node*>(nodeAnchor.node) if (candidates[i].value == value)
->selectFloatingCandidateAtIndex(i, overridingScore); {
const_cast<Node *>(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore);
break; break;
} }
} }
} }
} }
} // namespace Gramambular } // namespace Gramambular
#endif #endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef KEYVALUEPAIR_H_ #ifndef KEYVALUEPAIR_H_
@ -23,36 +30,42 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include <ostream> #include <ostream>
#include <string> #include <string>
namespace Gramambular { namespace Gramambular
{
class KeyValuePair { class KeyValuePair
public: {
public:
std::string key; std::string key;
std::string value; std::string value;
bool operator==(const KeyValuePair& another) const; bool operator==(const KeyValuePair &another) const;
bool operator<(const KeyValuePair& another) const; bool operator<(const KeyValuePair &another) const;
}; };
inline std::ostream& operator<<(std::ostream& stream, inline std::ostream &operator<<(std::ostream &stream, const KeyValuePair &pair)
const KeyValuePair& pair) { {
stream << "(" << pair.key << "," << pair.value << ")"; stream << "(" << pair.key << "," << pair.value << ")";
return stream; return stream;
} }
inline bool KeyValuePair::operator==(const KeyValuePair& another) const { inline bool KeyValuePair::operator==(const KeyValuePair &another) const
{
return key == another.key && value == another.value; return key == another.key && value == another.value;
} }
inline bool KeyValuePair::operator<(const KeyValuePair& another) const { inline bool KeyValuePair::operator<(const KeyValuePair &another) const
if (key < another.key) { {
if (key < another.key)
{
return true; return true;
} else if (key == another.key) { }
else if (key == another.key)
{
return value < another.value; return value < another.value;
} }
return false; return false;
} }
} // namespace Gramambular } // namespace Gramambular
#endif #endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef LANGUAGEMODEL_H_ #ifndef LANGUAGEMODEL_H_
@ -26,18 +33,20 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "Bigram.h" #include "Bigram.h"
#include "Unigram.h" #include "Unigram.h"
namespace Gramambular { namespace Gramambular
{
class LanguageModel { class LanguageModel
public: {
virtual ~LanguageModel() {} public:
virtual ~LanguageModel()
{
}
virtual const std::vector<Bigram> bigramsForKeys( virtual const std::vector<Bigram> bigramsForKeys(const std::string &preceedingKey, const std::string &key) = 0;
const std::string& preceedingKey, const std::string& key) = 0; virtual const std::vector<Unigram> unigramsForKey(const std::string &key) = 0;
virtual const std::vector<Unigram> unigramsForKey(const std::string& key) = 0; virtual bool hasUnigramsForKey(const std::string &key) = 0;
virtual bool hasUnigramsForKey(const std::string& key) = 0;
}; };
} // namespace Gramambular } // namespace Gramambular
#endif #endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef NODE_H_ #ifndef NODE_H_
@ -27,31 +34,31 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "LanguageModel.h" #include "LanguageModel.h"
namespace Gramambular { namespace Gramambular
{
class Node { class Node
public: {
public:
Node(); Node();
Node(const std::string& key, const std::vector<Unigram>& unigrams, Node(const std::string &key, const std::vector<Unigram> &unigrams, const std::vector<Bigram> &bigrams);
const std::vector<Bigram>& bigrams);
void primeNodeWithPreceedingKeyValues( void primeNodeWithPreceedingKeyValues(const std::vector<KeyValuePair> &keyValues);
const std::vector<KeyValuePair>& keyValues);
bool isCandidateFixed() const; bool isCandidateFixed() const;
const std::vector<KeyValuePair>& candidates() const; const std::vector<KeyValuePair> &candidates() const;
void selectCandidateAtIndex(size_t index = 0, bool fix = true); void selectCandidateAtIndex(size_t index = 0, bool fix = true);
void resetCandidate(); void resetCandidate();
void selectFloatingCandidateAtIndex(size_t index, double score); void selectFloatingCandidateAtIndex(size_t index, double score);
const std::string& key() const; const std::string &key() const;
double score() const; double score() const;
double scoreForCandidate(const std::string& candidate) const; double scoreForCandidate(const std::string &candidate) const;
const KeyValuePair currentKeyValue() const; const KeyValuePair currentKeyValue() const;
double highestUnigramScore() const; double highestUnigramScore() const;
protected: protected:
const LanguageModel* m_LM; const LanguageModel *m_LM;
std::string m_key; std::string m_key;
double m_score; double m_score;
@ -59,73 +66,73 @@ protected:
std::vector<Unigram> m_unigrams; std::vector<Unigram> m_unigrams;
std::vector<KeyValuePair> m_candidates; std::vector<KeyValuePair> m_candidates;
std::map<std::string, size_t> m_valueUnigramIndexMap; std::map<std::string, size_t> m_valueUnigramIndexMap;
std::map<KeyValuePair, std::vector<Bigram> > m_preceedingGramBigramMap; std::map<KeyValuePair, std::vector<Bigram>> m_preceedingGramBigramMap;
bool m_candidateFixed; bool m_candidateFixed;
size_t m_selectedUnigramIndex; size_t m_selectedUnigramIndex;
friend std::ostream& operator<<(std::ostream& stream, const Node& node); friend std::ostream &operator<<(std::ostream &stream, const Node &node);
}; };
inline std::ostream& operator<<(std::ostream& stream, const Node& node) { inline std::ostream &operator<<(std::ostream &stream, const Node &node)
stream << "(node,key:" << node.m_key {
<< ",fixed:" << (node.m_candidateFixed ? "true" : "false") stream << "(node,key:" << node.m_key << ",fixed:" << (node.m_candidateFixed ? "true" : "false")
<< ",selected:" << node.m_selectedUnigramIndex << "," << ",selected:" << node.m_selectedUnigramIndex << "," << node.m_unigrams << ")";
<< node.m_unigrams << ")";
return stream; return stream;
} }
inline Node::Node() inline Node::Node() : m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0)
: m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) {} {
}
inline Node::Node(const std::string& key, const std::vector<Unigram>& unigrams, inline Node::Node(const std::string &key, const std::vector<Unigram> &unigrams, const std::vector<Bigram> &bigrams)
const std::vector<Bigram>& bigrams) : m_key(key), m_unigrams(unigrams), m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0)
: m_key(key), {
m_unigrams(unigrams),
m_candidateFixed(false),
m_selectedUnigramIndex(0),
m_score(0.0) {
stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare); stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare);
if (m_unigrams.size()) { if (m_unigrams.size())
{
m_score = m_unigrams[0].score; m_score = m_unigrams[0].score;
} }
size_t i = 0; size_t i = 0;
for (std::vector<Unigram>::const_iterator ui = m_unigrams.begin(); for (std::vector<Unigram>::const_iterator ui = m_unigrams.begin(); ui != m_unigrams.end(); ++ui)
ui != m_unigrams.end(); ++ui) { {
m_valueUnigramIndexMap[(*ui).keyValue.value] = i; m_valueUnigramIndexMap[(*ui).keyValue.value] = i;
i++; i++;
m_candidates.push_back((*ui).keyValue); m_candidates.push_back((*ui).keyValue);
} }
for (std::vector<Bigram>::const_iterator bi = bigrams.begin(); for (std::vector<Bigram>::const_iterator bi = bigrams.begin(); bi != bigrams.end(); ++bi)
bi != bigrams.end(); ++bi) { {
m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi); m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi);
} }
} }
inline void Node::primeNodeWithPreceedingKeyValues( inline void Node::primeNodeWithPreceedingKeyValues(const std::vector<KeyValuePair> &keyValues)
const std::vector<KeyValuePair>& keyValues) { {
size_t newIndex = m_selectedUnigramIndex; size_t newIndex = m_selectedUnigramIndex;
double max = m_score; double max = m_score;
if (!isCandidateFixed()) { if (!isCandidateFixed())
for (std::vector<KeyValuePair>::const_iterator kvi = keyValues.begin(); {
kvi != keyValues.end(); ++kvi) { for (std::vector<KeyValuePair>::const_iterator kvi = keyValues.begin(); kvi != keyValues.end(); ++kvi)
std::map<KeyValuePair, std::vector<Bigram> >::const_iterator f = {
m_preceedingGramBigramMap.find(*kvi); std::map<KeyValuePair, std::vector<Bigram>>::const_iterator f = m_preceedingGramBigramMap.find(*kvi);
if (f != m_preceedingGramBigramMap.end()) { if (f != m_preceedingGramBigramMap.end())
const std::vector<Bigram>& bigrams = (*f).second; {
const std::vector<Bigram> &bigrams = (*f).second;
for (std::vector<Bigram>::const_iterator bi = bigrams.begin(); for (std::vector<Bigram>::const_iterator bi = bigrams.begin(); bi != bigrams.end(); ++bi)
bi != bigrams.end(); ++bi) { {
const Bigram& bigram = *bi; const Bigram &bigram = *bi;
if (bigram.score > max) { if (bigram.score > max)
{
std::map<std::string, size_t>::const_iterator uf = std::map<std::string, size_t>::const_iterator uf =
m_valueUnigramIndexMap.find((*bi).keyValue.value); m_valueUnigramIndexMap.find((*bi).keyValue.value);
if (uf != m_valueUnigramIndexMap.end()) { if (uf != m_valueUnigramIndexMap.end())
{
newIndex = (*uf).second; newIndex = (*uf).second;
max = bigram.score; max = bigram.score;
} }
@ -135,25 +142,35 @@ inline void Node::primeNodeWithPreceedingKeyValues(
} }
} }
if (m_score != max) { if (m_score != max)
{
m_score = max; m_score = max;
} }
if (newIndex != m_selectedUnigramIndex) { if (newIndex != m_selectedUnigramIndex)
{
m_selectedUnigramIndex = newIndex; m_selectedUnigramIndex = newIndex;
} }
} }
inline bool Node::isCandidateFixed() const { return m_candidateFixed; } inline bool Node::isCandidateFixed() const
{
return m_candidateFixed;
}
inline const std::vector<KeyValuePair>& Node::candidates() const { inline const std::vector<KeyValuePair> &Node::candidates() const
{
return m_candidates; return m_candidates;
} }
inline void Node::selectCandidateAtIndex(size_t index, bool fix) { inline void Node::selectCandidateAtIndex(size_t index, bool fix)
if (index >= m_unigrams.size()) { {
if (index >= m_unigrams.size())
{
m_selectedUnigramIndex = 0; m_selectedUnigramIndex = 0;
} else { }
else
{
m_selectedUnigramIndex = index; m_selectedUnigramIndex = index;
} }
@ -161,53 +178,72 @@ inline void Node::selectCandidateAtIndex(size_t index, bool fix) {
m_score = 99; m_score = 99;
} }
inline void Node::resetCandidate() { inline void Node::resetCandidate()
{
m_selectedUnigramIndex = 0; m_selectedUnigramIndex = 0;
m_candidateFixed = 0; m_candidateFixed = 0;
if (m_unigrams.size()) { if (m_unigrams.size())
{
m_score = m_unigrams[0].score; m_score = m_unigrams[0].score;
} }
} }
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) { inline void Node::selectFloatingCandidateAtIndex(size_t index, double score)
if (index >= m_unigrams.size()) { {
if (index >= m_unigrams.size())
{
m_selectedUnigramIndex = 0; m_selectedUnigramIndex = 0;
} else { }
else
{
m_selectedUnigramIndex = index; m_selectedUnigramIndex = index;
} }
m_candidateFixed = false; m_candidateFixed = false;
m_score = score; m_score = score;
} }
inline const std::string& Node::key() const { return m_key; } inline const std::string &Node::key() const
{
return m_key;
}
inline double Node::score() const { return m_score; } inline double Node::score() const
{
return m_score;
}
inline double Node::scoreForCandidate(const std::string &candidate) const
inline double Node::scoreForCandidate(const std::string& candidate) const { {
for (auto unigram : m_unigrams) { for (auto unigram : m_unigrams)
if (unigram.keyValue.value == candidate) { {
if (unigram.keyValue.value == candidate)
{
return unigram.score; return unigram.score;
} }
} }
return 0.0; return 0.0;
} }
inline double Node::highestUnigramScore() const { inline double Node::highestUnigramScore() const
if (m_unigrams.empty()) { {
if (m_unigrams.empty())
{
return 0.0; return 0.0;
} }
return m_unigrams[0].score; return m_unigrams[0].score;
} }
inline const KeyValuePair Node::currentKeyValue() const { inline const KeyValuePair Node::currentKeyValue() const
if (m_selectedUnigramIndex >= m_unigrams.size()) { {
if (m_selectedUnigramIndex >= m_unigrams.size())
{
return KeyValuePair(); return KeyValuePair();
} else { }
else
{
return m_candidates[m_selectedUnigramIndex]; return m_candidates[m_selectedUnigramIndex];
} }
} }
} // namespace Gramambular } // namespace Gramambular
#endif #endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef NODEANCHOR_H_ #ifndef NODEANCHOR_H_
@ -24,40 +31,45 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "Node.h" #include "Node.h"
namespace Gramambular { namespace Gramambular
{
struct NodeAnchor { struct NodeAnchor
const Node* node = nullptr; {
const Node *node = nullptr;
size_t location = 0; size_t location = 0;
size_t spanningLength = 0; size_t spanningLength = 0;
double accumulatedScore = 0.0; double accumulatedScore = 0.0;
}; };
inline std::ostream& operator<<(std::ostream& stream, inline std::ostream &operator<<(std::ostream &stream, const NodeAnchor &anchor)
const NodeAnchor& anchor) { {
stream << "{@(" << anchor.location << "," << anchor.spanningLength << "),"; stream << "{@(" << anchor.location << "," << anchor.spanningLength << "),";
if (anchor.node) { if (anchor.node)
{
stream << *(anchor.node); stream << *(anchor.node);
} else { }
else
{
stream << "null"; stream << "null";
} }
stream << "}"; stream << "}";
return stream; return stream;
} }
inline std::ostream& operator<<(std::ostream& stream, inline std::ostream &operator<<(std::ostream &stream, const std::vector<NodeAnchor> &anchor)
const std::vector<NodeAnchor>& anchor) { {
for (std::vector<NodeAnchor>::const_iterator i = anchor.begin(); for (std::vector<NodeAnchor>::const_iterator i = anchor.begin(); i != anchor.end(); ++i)
i != anchor.end(); ++i) { {
stream << *i; stream << *i;
if (i + 1 != anchor.end()) { if (i + 1 != anchor.end())
{
stream << "<-"; stream << "<-";
} }
} }
return stream; return stream;
} }
} // namespace Gramambular } // namespace Gramambular
#endif #endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef SPAN_H_ #ifndef SPAN_H_
@ -26,67 +33,80 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "Node.h" #include "Node.h"
namespace Gramambular { namespace Gramambular
class Span { {
public: class Span
{
public:
void clear(); void clear();
void insertNodeOfLength(const Node& node, size_t length); void insertNodeOfLength(const Node &node, size_t length);
void removeNodeOfLengthGreaterThan(size_t length); void removeNodeOfLengthGreaterThan(size_t length);
Node* nodeOfLength(size_t length); Node *nodeOfLength(size_t length);
size_t maximumLength() const; size_t maximumLength() const;
protected: protected:
std::map<size_t, Node> m_lengthNodeMap; std::map<size_t, Node> m_lengthNodeMap;
size_t m_maximumLength = 0; size_t m_maximumLength = 0;
}; };
inline void Span::clear() { inline void Span::clear()
{
m_lengthNodeMap.clear(); m_lengthNodeMap.clear();
m_maximumLength = 0; m_maximumLength = 0;
} }
inline void Span::insertNodeOfLength(const Node& node, size_t length) { inline void Span::insertNodeOfLength(const Node &node, size_t length)
{
m_lengthNodeMap[length] = node; m_lengthNodeMap[length] = node;
if (length > m_maximumLength) { if (length > m_maximumLength)
{
m_maximumLength = length; m_maximumLength = length;
} }
} }
inline void Span::removeNodeOfLengthGreaterThan(size_t length) { inline void Span::removeNodeOfLengthGreaterThan(size_t length)
if (length > m_maximumLength) { {
if (length > m_maximumLength)
{
return; return;
} }
size_t max = 0; size_t max = 0;
std::set<size_t> removeSet; std::set<size_t> removeSet;
for (std::map<size_t, Node>::iterator i = m_lengthNodeMap.begin(), for (std::map<size_t, Node>::iterator i = m_lengthNodeMap.begin(), e = m_lengthNodeMap.end(); i != e; ++i)
e = m_lengthNodeMap.end(); {
i != e; ++i) { if ((*i).first > length)
if ((*i).first > length) { {
removeSet.insert((*i).first); removeSet.insert((*i).first);
} else { }
if ((*i).first > max) { else
{
if ((*i).first > max)
{
max = (*i).first; max = (*i).first;
} }
} }
} }
for (std::set<size_t>::iterator i = removeSet.begin(), e = removeSet.end(); for (std::set<size_t>::iterator i = removeSet.begin(), e = removeSet.end(); i != e; ++i)
i != e; ++i) { {
m_lengthNodeMap.erase(*i); m_lengthNodeMap.erase(*i);
} }
m_maximumLength = max; m_maximumLength = max;
} }
inline Node* Span::nodeOfLength(size_t length) { inline Node *Span::nodeOfLength(size_t length)
{
std::map<size_t, Node>::iterator f = m_lengthNodeMap.find(length); std::map<size_t, Node>::iterator f = m_lengthNodeMap.find(length);
return f == m_lengthNodeMap.end() ? 0 : &(*f).second; return f == m_lengthNodeMap.end() ? 0 : &(*f).second;
} }
inline size_t Span::maximumLength() const { return m_maximumLength; } inline size_t Span::maximumLength() const
} // namespace Gramambular {
return m_maximumLength;
}
} // namespace Gramambular
#endif #endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef UNIGRAM_H_ #ifndef UNIGRAM_H_
@ -24,22 +31,25 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "KeyValuePair.h" #include "KeyValuePair.h"
namespace Gramambular { namespace Gramambular
{
class Unigram { class Unigram
public: {
public:
Unigram(); Unigram();
KeyValuePair keyValue; KeyValuePair keyValue;
double score; double score;
bool operator==(const Unigram& another) const; bool operator==(const Unigram &another) const;
bool operator<(const Unigram& another) const; bool operator<(const Unigram &another) const;
static bool ScoreCompare(const Unigram& a, const Unigram& b); static bool ScoreCompare(const Unigram &a, const Unigram &b);
}; };
inline std::ostream& operator<<(std::ostream& stream, const Unigram& gram) { inline std::ostream &operator<<(std::ostream &stream, const Unigram &gram)
{
std::streamsize p = stream.precision(); std::streamsize p = stream.precision();
stream.precision(6); stream.precision(6);
stream << "(" << gram.keyValue << "," << gram.score << ")"; stream << "(" << gram.keyValue << "," << gram.score << ")";
@ -47,17 +57,18 @@ inline std::ostream& operator<<(std::ostream& stream, const Unigram& gram) {
return stream; return stream;
} }
inline std::ostream& operator<<(std::ostream& stream, inline std::ostream &operator<<(std::ostream &stream, const std::vector<Unigram> &grams)
const std::vector<Unigram>& grams) { {
stream << "[" << grams.size() << "]=>{"; stream << "[" << grams.size() << "]=>{";
size_t index = 0; size_t index = 0;
for (std::vector<Unigram>::const_iterator gi = grams.begin(); for (std::vector<Unigram>::const_iterator gi = grams.begin(); gi != grams.end(); ++gi, ++index)
gi != grams.end(); ++gi, ++index) { {
stream << index << "=>"; stream << index << "=>";
stream << *gi; stream << *gi;
if (gi + 1 != grams.end()) { if (gi + 1 != grams.end())
{
stream << ","; stream << ",";
} }
} }
@ -66,25 +77,32 @@ inline std::ostream& operator<<(std::ostream& stream,
return stream; return stream;
} }
inline Unigram::Unigram() : score(0.0) {} inline Unigram::Unigram() : score(0.0)
{
}
inline bool Unigram::operator==(const Unigram& another) const { inline bool Unigram::operator==(const Unigram &another) const
{
return keyValue == another.keyValue && score == another.score; return keyValue == another.keyValue && score == another.score;
} }
inline bool Unigram::operator<(const Unigram& another) const { inline bool Unigram::operator<(const Unigram &another) const
if (keyValue < another.keyValue) { {
if (keyValue < another.keyValue)
{
return true; return true;
} else if (keyValue == another.keyValue) { }
else if (keyValue == another.keyValue)
{
return score < another.score; return score < another.score;
} }
return false; return false;
} }
inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) { inline bool Unigram::ScoreCompare(const Unigram &a, const Unigram &b)
{
return a.score > b.score; return a.score > b.score;
} }
} // namespace Gramambular } // namespace Gramambular
#endif #endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). // Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License). // All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/* /*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated Permission is hereby granted, free of charge, to any person obtaining a copy of
documentation files (the "Software"), to deal in the Software without restriction, including without limitation this software and associated documentation files (the "Software"), to deal in
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and the Software without restriction, including without limitation the rights to
to permit persons to whom the Software is furnished to do so, subject to the following conditions: use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor, 2. No trademark license is granted to use the trade names, trademarks, service
except as required to fulfill notice requirements above. marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#ifndef WALKER_H_ #ifndef WALKER_H_
@ -25,60 +32,65 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "Grid.h" #include "Grid.h"
namespace Gramambular { namespace Gramambular
{
class Walker { class Walker
public: {
explicit Walker(Grid* inGrid); public:
const std::vector<NodeAnchor> reverseWalk(size_t location, explicit Walker(Grid *inGrid);
double accumulatedScore = 0.0); const std::vector<NodeAnchor> reverseWalk(size_t location, double accumulatedScore = 0.0);
protected: protected:
Grid* m_grid; Grid *m_grid;
}; };
inline Walker::Walker(Grid* inGrid) : m_grid(inGrid) {} inline Walker::Walker(Grid *inGrid) : m_grid(inGrid)
{
}
inline const std::vector<NodeAnchor> Walker::reverseWalk( inline const std::vector<NodeAnchor> Walker::reverseWalk(size_t location, double accumulatedScore)
size_t location, double accumulatedScore) { {
if (!location || location > m_grid->width()) { if (!location || location > m_grid->width())
{
return std::vector<NodeAnchor>(); return std::vector<NodeAnchor>();
} }
std::vector<std::vector<NodeAnchor> > paths; std::vector<std::vector<NodeAnchor>> paths;
std::vector<NodeAnchor> nodes = m_grid->nodesEndingAt(location); std::vector<NodeAnchor> nodes = m_grid->nodesEndingAt(location);
for (std::vector<NodeAnchor>::iterator ni = nodes.begin(); ni != nodes.end(); for (std::vector<NodeAnchor>::iterator ni = nodes.begin(); ni != nodes.end(); ++ni)
++ni) { {
if (!(*ni).node) { if (!(*ni).node)
{
continue; continue;
} }
(*ni).accumulatedScore = accumulatedScore + (*ni).node->score(); (*ni).accumulatedScore = accumulatedScore + (*ni).node->score();
std::vector<NodeAnchor> path = std::vector<NodeAnchor> path = reverseWalk(location - (*ni).spanningLength, (*ni).accumulatedScore);
reverseWalk(location - (*ni).spanningLength, (*ni).accumulatedScore);
path.insert(path.begin(), *ni); path.insert(path.begin(), *ni);
paths.push_back(path); paths.push_back(path);
} }
if (!paths.size()) { if (!paths.size())
{
return std::vector<NodeAnchor>(); return std::vector<NodeAnchor>();
} }
std::vector<NodeAnchor>* result = &*(paths.begin()); std::vector<NodeAnchor> *result = &*(paths.begin());
for (std::vector<std::vector<NodeAnchor> >::iterator pi = paths.begin(); for (std::vector<std::vector<NodeAnchor>>::iterator pi = paths.begin(); pi != paths.end(); ++pi)
pi != paths.end(); ++pi) { {
if ((*pi).back().accumulatedScore > result->back().accumulatedScore) { if ((*pi).back().accumulatedScore > result->back().accumulatedScore)
{
result = &*pi; result = &*pi;
} }
} }
return *result; return *result;
} }
} // namespace Gramambular } // namespace Gramambular
#endif #endif