(Obj)C(pp) // Clang-Format.

This commit is contained in:
ShikiSuen 2022-04-03 14:05:04 +08:00
parent 4cbe68e052
commit e8bd8f82cb
48 changed files with 4318 additions and 2707 deletions

171
.clang-format Normal file
View File

@ -0,0 +1,171 @@
---
Language: Cpp
# BasedOnStyle: Microsoft
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignConsecutiveMacros: false
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Left
AlignOperands: true
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortLambdasOnASingleLine: All
AllowShortIfStatementsOnASingleLine: WithoutElse
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeInheritanceComma: false
BreakInheritanceList: BeforeColon
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DeriveLineEnding: true
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^<ext/.*\.h>'
Priority: 2
SortPriority: 0
- Regex: '^<.*\.h>'
Priority: 1
SortPriority: 0
- Regex: '^<.*'
Priority: 2
SortPriority: 0
- Regex: '.*'
Priority: 3
SortPriority: 0
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
IndentCaseLabels: true
IndentGotoLabels: true
IndentPPDirectives: None
IndentWidth: 4
IndentWrappedFunctionNames: false
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBinPackProtocolList: Never
ObjCBlockIndentWidth: 4
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
RawStringFormats:
- Language: Cpp
Delimiters:
- h
- m
- hh
- mm
- cc
- CC
- cpp
- Cpp
- CPP
- 'c++'
- 'C++'
CanonicalDelimiter: ''
BasedOnStyle: Microsoft
- Language: TextProto
Delimiters:
- pb
- PB
- proto
- PROTO
EnclosingFunctions:
- EqualsProto
- EquivToProto
- PARSE_PARTIAL_TEXT_PROTO
- PARSE_TEST_PROTO
- PARSE_TEXT_PROTO
- ParseTextOrDie
- ParseTextProtoOrDie
CanonicalDelimiter: ''
BasedOnStyle: Microsoft
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
SpaceBeforeSquareBrackets: false
Standard: Auto
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
TabWidth: 4
UseCRLF: false
UseTab: Always
...

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
@import Cocoa;

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#import "Chronosphere.h"
@ -27,15 +34,18 @@ BOOL appBundleChronoshiftedToARandomizedPath(NSString *bundle)
int entrySize = sizeof(struct statfs);
struct statfs *bufs = (struct statfs *)calloc(entryCount, entrySize);
entryCount = getfsstat(bufs, entryCount * entrySize, MNT_NOWAIT);
for (int i = 0; i < entryCount; i++) {
if (!strcmp(bundleAbsPath, bufs[i].f_mntfromname)) {
for (int i = 0; i < entryCount; i++)
{
if (!strcmp(bundleAbsPath, bufs[i].f_mntfromname))
{
free(bufs);
// getfsstat() may return us a cached result, and so we need to get the stat of the mounted fs.
// If statfs() returns an error, the mounted fs is already gone.
struct statfs stat;
int checkResult = statfs(bundleAbsPath, &stat);
if (checkResult != 0) {
if (checkResult != 0)
{
// Meaning the app's bundle is not mounted, that is it's not translocated.
// It also means that the app is not loaded.
return NO;

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
//

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
//

File diff suppressed because it is too large Load Diff

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef MANDARIN_H_
@ -25,79 +32,115 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include <string>
#include <vector>
namespace Mandarin {
namespace Mandarin
{
class BopomofoSyllable {
public:
class BopomofoSyllable
{
public:
typedef uint16_t Component;
explicit BopomofoSyllable(Component syllable = 0) : syllable_(syllable) {}
BopomofoSyllable(const BopomofoSyllable&) = default;
BopomofoSyllable(BopomofoSyllable&& another) = default;
BopomofoSyllable& operator=(const BopomofoSyllable&) = default;
BopomofoSyllable& operator=(BopomofoSyllable&&) = default;
explicit BopomofoSyllable(Component syllable = 0) : syllable_(syllable)
{
}
BopomofoSyllable(const BopomofoSyllable &) = default;
BopomofoSyllable(BopomofoSyllable &&another) = default;
BopomofoSyllable &operator=(const BopomofoSyllable &) = default;
BopomofoSyllable &operator=(BopomofoSyllable &&) = default;
// takes the ASCII-form, "v"-tolerant, TW-style Hanyu Pinyin (fong, pong, bong
// acceptable)
static const BopomofoSyllable FromHanyuPinyin(const std::string& str);
static const BopomofoSyllable FromHanyuPinyin(const std::string &str);
// TO DO: Support accented vowels
const std::string HanyuPinyinString(bool includesTone,
bool useVForUUmlaut) const;
static const BopomofoSyllable FromComposedString(const std::string& str);
const std::string HanyuPinyinString(bool includesTone, bool useVForUUmlaut) const;
static const BopomofoSyllable FromComposedString(const std::string &str);
const std::string composedString() const;
void clear() { syllable_ = 0; }
bool isEmpty() const { return !syllable_; }
bool hasConsonant() const { return !!(syllable_ & ConsonantMask); }
bool hasMiddleVowel() const { return !!(syllable_ & MiddleVowelMask); }
bool hasVowel() const { return !!(syllable_ & VowelMask); }
bool hasToneMarker() const { return !!(syllable_ & ToneMarkerMask); }
Component consonantComponent() const { return syllable_ & ConsonantMask; }
Component middleVowelComponent() const {
void clear()
{
syllable_ = 0;
}
bool isEmpty() const
{
return !syllable_;
}
bool hasConsonant() const
{
return !!(syllable_ & ConsonantMask);
}
bool hasMiddleVowel() const
{
return !!(syllable_ & MiddleVowelMask);
}
bool hasVowel() const
{
return !!(syllable_ & VowelMask);
}
bool hasToneMarker() const
{
return !!(syllable_ & ToneMarkerMask);
}
Component consonantComponent() const
{
return syllable_ & ConsonantMask;
}
Component middleVowelComponent() const
{
return syllable_ & MiddleVowelMask;
}
Component vowelComponent() const { return syllable_ & VowelMask; }
Component toneMarkerComponent() const { return syllable_ & ToneMarkerMask; }
bool operator==(const BopomofoSyllable& another) const {
Component vowelComponent() const
{
return syllable_ & VowelMask;
}
Component toneMarkerComponent() const
{
return syllable_ & ToneMarkerMask;
}
bool operator==(const BopomofoSyllable &another) const
{
return syllable_ == another.syllable_;
}
bool operator!=(const BopomofoSyllable& another) const {
bool operator!=(const BopomofoSyllable &another) const
{
return syllable_ != another.syllable_;
}
bool isOverlappingWith(const BopomofoSyllable& another) const {
bool isOverlappingWith(const BopomofoSyllable &another) const
{
#define IOW_SAND(mask) ((syllable_ & mask) && (another.syllable_ & mask))
return IOW_SAND(ConsonantMask) || IOW_SAND(MiddleVowelMask) ||
IOW_SAND(VowelMask) || IOW_SAND(ToneMarkerMask);
return IOW_SAND(ConsonantMask) || IOW_SAND(MiddleVowelMask) || IOW_SAND(VowelMask) || IOW_SAND(ToneMarkerMask);
#undef IOW_SAND
}
// consonants J, Q, X all require the existence of vowel I or UE
bool belongsToJQXClass() const {
bool belongsToJQXClass() const
{
Component consonant = syllable_ & ConsonantMask;
return (consonant == J || consonant == Q || consonant == X);
}
// zi, ci, si, chi, chi, shi, ri
bool belongsToZCSRClass() const {
bool belongsToZCSRClass() const
{
Component consonant = syllable_ & ConsonantMask;
return (consonant >= ZH && consonant <= S);
}
Component maskType() const {
Component maskType() const
{
Component mask = 0;
mask |= (syllable_ & ConsonantMask) ? ConsonantMask : 0;
mask |= (syllable_ & MiddleVowelMask) ? MiddleVowelMask : 0;
@ -105,13 +148,15 @@ public:
mask |= (syllable_ & ToneMarkerMask) ? ToneMarkerMask : 0;
return mask;
}
const BopomofoSyllable operator+(const BopomofoSyllable& another) const {
const BopomofoSyllable operator+(const BopomofoSyllable &another) const
{
Component newSyllable = syllable_;
#define OP_SOVER(mask) \
if (another.syllable_ & mask) { \
newSyllable = (newSyllable & ~mask) | (another.syllable_ & mask); \
}
#define OP_SOVER(mask) \
if (another.syllable_ & mask) \
{ \
newSyllable = (newSyllable & ~mask) | (another.syllable_ & mask); \
}
OP_SOVER(ConsonantMask);
OP_SOVER(MiddleVowelMask);
OP_SOVER(VowelMask);
@ -119,12 +164,14 @@ newSyllable = (newSyllable & ~mask) | (another.syllable_ & mask); \
#undef OP_SOVER
return BopomofoSyllable(newSyllable);
}
BopomofoSyllable& operator+=(const BopomofoSyllable& another) {
#define OPE_SOVER(mask) \
if (another.syllable_ & mask) { \
syllable_ = (syllable_ & ~mask) | (another.syllable_ & mask); \
}
BopomofoSyllable &operator+=(const BopomofoSyllable &another)
{
#define OPE_SOVER(mask) \
if (another.syllable_ & mask) \
{ \
syllable_ = (syllable_ & ~mask) | (another.syllable_ & mask); \
}
OPE_SOVER(ConsonantMask);
OPE_SOVER(MiddleVowelMask);
OPE_SOVER(VowelMask);
@ -132,87 +179,88 @@ syllable_ = (syllable_ & ~mask) | (another.syllable_ & mask); \
#undef OPE_SOVER
return *this;
}
friend std::ostream& operator<<(std::ostream& stream,
const BopomofoSyllable& syllable);
static constexpr Component
ConsonantMask = 0x001f, // 0000 0000 0001 1111, 21 consonants
MiddleVowelMask = 0x0060, // 0000 0000 0110 0000, 3 middle vowels
VowelMask = 0x0780, // 0000 0111 1000 0000, 13 vowels
ToneMarkerMask = 0x3800, // 0011 1000 0000 0000, 5 tones (tone1 = 0x00)
B = 0x0001, P = 0x0002, M = 0x0003, F = 0x0004, D = 0x0005, T = 0x0006,
N = 0x0007, L = 0x0008, G = 0x0009, K = 0x000a, H = 0x000b, J = 0x000c,
Q = 0x000d, X = 0x000e, ZH = 0x000f, CH = 0x0010, SH = 0x0011, R = 0x0012,
Z = 0x0013, C = 0x0014, S = 0x0015, I = 0x0020, U = 0x0040,
UE = 0x0060, // ue = u umlaut (we use the German convention here as an
// ersatz to the /ju:/ sound)
A = 0x0080, O = 0x0100, ER = 0x0180, E = 0x0200, AI = 0x0280, EI = 0x0300,
AO = 0x0380, OU = 0x0400, AN = 0x0480, EN = 0x0500, ANG = 0x0580,
ENG = 0x0600, ERR = 0x0680, Tone1 = 0x0000, Tone2 = 0x0800,
Tone3 = 0x1000, Tone4 = 0x1800, Tone5 = 0x2000;
protected:
friend std::ostream &operator<<(std::ostream &stream, const BopomofoSyllable &syllable);
static constexpr Component ConsonantMask = 0x001f, // 0000 0000 0001 1111, 21 consonants
MiddleVowelMask = 0x0060, // 0000 0000 0110 0000, 3 middle vowels
VowelMask = 0x0780, // 0000 0111 1000 0000, 13 vowels
ToneMarkerMask = 0x3800, // 0011 1000 0000 0000, 5 tones (tone1 = 0x00)
B = 0x0001, P = 0x0002, M = 0x0003, F = 0x0004, D = 0x0005, T = 0x0006, N = 0x0007, L = 0x0008, G = 0x0009,
K = 0x000a, H = 0x000b, J = 0x000c, Q = 0x000d, X = 0x000e, ZH = 0x000f, CH = 0x0010,
SH = 0x0011, R = 0x0012, Z = 0x0013, C = 0x0014, S = 0x0015, I = 0x0020, U = 0x0040,
UE = 0x0060, // ue = u umlaut (we use the German convention here as an
// ersatz to the /ju:/ sound)
A = 0x0080, O = 0x0100, ER = 0x0180, E = 0x0200, AI = 0x0280, EI = 0x0300, AO = 0x0380, OU = 0x0400,
AN = 0x0480, EN = 0x0500, ANG = 0x0580, ENG = 0x0600, ERR = 0x0680, Tone1 = 0x0000,
Tone2 = 0x0800, Tone3 = 0x1000, Tone4 = 0x1800, Tone5 = 0x2000;
protected:
Component syllable_;
};
inline std::ostream& operator<<(std::ostream& stream,
const BopomofoSyllable& syllable) {
inline std::ostream &operator<<(std::ostream &stream, const BopomofoSyllable &syllable)
{
stream << syllable.composedString();
return stream;
}
typedef BopomofoSyllable BPMF;
typedef std::map<char, std::vector<BPMF::Component> > BopomofoKeyToComponentMap;
typedef std::map<char, std::vector<BPMF::Component>> BopomofoKeyToComponentMap;
typedef std::map<BPMF::Component, char> BopomofoComponentToKeyMap;
class BopomofoKeyboardLayout {
public:
static const BopomofoKeyboardLayout* StandardLayout();
static const BopomofoKeyboardLayout* ETenLayout();
static const BopomofoKeyboardLayout* HsuLayout();
static const BopomofoKeyboardLayout* ETen26Layout();
static const BopomofoKeyboardLayout* IBMLayout();
static const BopomofoKeyboardLayout* MiTACLayout();
static const BopomofoKeyboardLayout* FakeSeigyouLayout();
static const BopomofoKeyboardLayout* HanyuPinyinLayout();
BopomofoKeyboardLayout(const BopomofoKeyToComponentMap& ktcm,
const std::string& name)
: m_keyToComponent(ktcm), m_name(name) {
for (BopomofoKeyToComponentMap::const_iterator miter =
m_keyToComponent.begin();
class BopomofoKeyboardLayout
{
public:
static const BopomofoKeyboardLayout *StandardLayout();
static const BopomofoKeyboardLayout *ETenLayout();
static const BopomofoKeyboardLayout *HsuLayout();
static const BopomofoKeyboardLayout *ETen26Layout();
static const BopomofoKeyboardLayout *IBMLayout();
static const BopomofoKeyboardLayout *MiTACLayout();
static const BopomofoKeyboardLayout *FakeSeigyouLayout();
static const BopomofoKeyboardLayout *HanyuPinyinLayout();
BopomofoKeyboardLayout(const BopomofoKeyToComponentMap &ktcm, const std::string &name)
: m_keyToComponent(ktcm), m_name(name)
{
for (BopomofoKeyToComponentMap::const_iterator miter = m_keyToComponent.begin();
miter != m_keyToComponent.end(); ++miter)
for (std::vector<BPMF::Component>::const_iterator viter =
(*miter).second.begin();
for (std::vector<BPMF::Component>::const_iterator viter = (*miter).second.begin();
viter != (*miter).second.end(); ++viter)
m_componentToKey[*viter] = (*miter).first;
}
const std::string name() const { return m_name; }
char componentToKey(BPMF::Component component) const {
BopomofoComponentToKeyMap::const_iterator iter =
m_componentToKey.find(component);
const std::string name() const
{
return m_name;
}
char componentToKey(BPMF::Component component) const
{
BopomofoComponentToKeyMap::const_iterator iter = m_componentToKey.find(component);
return (iter == m_componentToKey.end()) ? 0 : (*iter).second;
}
const std::vector<BPMF::Component> keyToComponents(char key) const {
const std::vector<BPMF::Component> keyToComponents(char key) const
{
BopomofoKeyToComponentMap::const_iterator iter = m_keyToComponent.find(key);
return (iter == m_keyToComponent.end()) ? std::vector<BPMF::Component>()
: (*iter).second;
return (iter == m_keyToComponent.end()) ? std::vector<BPMF::Component>() : (*iter).second;
}
const std::string keySequenceFromSyllable(BPMF syllable) const {
const std::string keySequenceFromSyllable(BPMF syllable) const
{
std::string sequence;
BPMF::Component c;
char k;
#define STKS_COMBINE(component) \
if ((c = component)) { \
if ((k = componentToKey(c))) sequence += std::string(1, k); \
}
#define STKS_COMBINE(component) \
if ((c = component)) \
{ \
if ((k = componentToKey(c))) \
sequence += std::string(1, k); \
}
STKS_COMBINE(syllable.consonantComponent());
STKS_COMBINE(syllable.middleVowelComponent());
STKS_COMBINE(syllable.vowelComponent());
@ -220,256 +268,314 @@ if ((k = componentToKey(c))) sequence += std::string(1, k); \
#undef STKS_COMBINE
return sequence;
}
const BPMF syllableFromKeySequence(const std::string& sequence) const {
const BPMF syllableFromKeySequence(const std::string &sequence) const
{
BPMF syllable;
for (std::string::const_iterator iter = sequence.begin();
iter != sequence.end(); ++iter) {
for (std::string::const_iterator iter = sequence.begin(); iter != sequence.end(); ++iter)
{
bool beforeSeqHasIorUE = sequenceContainsIorUE(sequence.begin(), iter);
bool aheadSeqHasIorUE = sequenceContainsIorUE(iter + 1, sequence.end());
std::vector<BPMF::Component> components = keyToComponents(*iter);
if (!components.size()) continue;
if (components.size() == 1) {
if (!components.size())
continue;
if (components.size() == 1)
{
syllable += BPMF(components[0]);
continue;
}
BPMF head = BPMF(components[0]);
BPMF follow = BPMF(components[1]);
BPMF ending = components.size() > 2 ? BPMF(components[2]) : follow;
// apply the I/UE + E rule
if (head.vowelComponent() == BPMF::E &&
follow.vowelComponent() != BPMF::E) {
if (head.vowelComponent() == BPMF::E && follow.vowelComponent() != BPMF::E)
{
syllable += beforeSeqHasIorUE ? head : follow;
continue;
}
if (head.vowelComponent() != BPMF::E &&
follow.vowelComponent() == BPMF::E) {
if (head.vowelComponent() != BPMF::E && follow.vowelComponent() == BPMF::E)
{
syllable += beforeSeqHasIorUE ? follow : head;
continue;
}
// apply the J/Q/X + I/UE rule, only two components are allowed in the
// components vector here
if (head.belongsToJQXClass() && !follow.belongsToJQXClass()) {
if (!syllable.isEmpty()) {
if (ending != follow) syllable += ending;
} else {
if (head.belongsToJQXClass() && !follow.belongsToJQXClass())
{
if (!syllable.isEmpty())
{
if (ending != follow)
syllable += ending;
}
else
{
syllable += aheadSeqHasIorUE ? head : follow;
}
continue;
}
if (!head.belongsToJQXClass() && follow.belongsToJQXClass()) {
if (!syllable.isEmpty()) {
if (ending != follow) syllable += ending;
} else {
if (!head.belongsToJQXClass() && follow.belongsToJQXClass())
{
if (!syllable.isEmpty())
{
if (ending != follow)
syllable += ending;
}
else
{
syllable += aheadSeqHasIorUE ? follow : head;
}
continue;
}
// the nasty issue of only one char in the buffer
if (iter == sequence.begin() && iter + 1 == sequence.end()) {
if (head.hasVowel() || follow.hasToneMarker() ||
head.belongsToZCSRClass()) {
if (iter == sequence.begin() && iter + 1 == sequence.end())
{
if (head.hasVowel() || follow.hasToneMarker() || head.belongsToZCSRClass())
{
syllable += head;
} else {
if (follow.hasVowel() || ending.hasToneMarker()) {
}
else
{
if (follow.hasVowel() || ending.hasToneMarker())
{
syllable += follow;
} else {
}
else
{
syllable += ending;
}
}
continue;
}
if (!(syllable.maskType() & head.maskType()) &&
!endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end())) {
if (!(syllable.maskType() & head.maskType()) && !endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end()))
{
syllable += head;
} else {
if (endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end()) &&
head.belongsToZCSRClass() && syllable.isEmpty()) {
}
else
{
if (endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end()) && head.belongsToZCSRClass() &&
syllable.isEmpty())
{
syllable += head;
} else if (syllable.maskType() < follow.maskType()) {
}
else if (syllable.maskType() < follow.maskType())
{
syllable += follow;
} else {
}
else
{
syllable += ending;
}
}
}
// heuristics for Hsu keyboard layout
if (this == HsuLayout()) {
if (this == HsuLayout())
{
// fix the left out L to ERR when it has sound, and GI, GUE -> JI, JUE
if (syllable.vowelComponent() == BPMF::ENG && !syllable.hasConsonant() &&
!syllable.hasMiddleVowel()) {
if (syllable.vowelComponent() == BPMF::ENG && !syllable.hasConsonant() && !syllable.hasMiddleVowel())
{
syllable += BPMF(BPMF::ERR);
} else if (syllable.consonantComponent() == BPMF::G &&
(syllable.middleVowelComponent() == BPMF::I ||
syllable.middleVowelComponent() == BPMF::UE)) {
}
else if (syllable.consonantComponent() == BPMF::G &&
(syllable.middleVowelComponent() == BPMF::I || syllable.middleVowelComponent() == BPMF::UE))
{
syllable += BPMF(BPMF::J);
}
}
return syllable;
}
protected:
bool endAheadOrAheadHasToneMarkKey(std::string::const_iterator ahead,
std::string::const_iterator end) const {
if (ahead == end) return true;
protected:
bool endAheadOrAheadHasToneMarkKey(std::string::const_iterator ahead, std::string::const_iterator end) const
{
if (ahead == end)
return true;
char tone1 = componentToKey(BPMF::Tone1);
char tone2 = componentToKey(BPMF::Tone2);
char tone3 = componentToKey(BPMF::Tone3);
char tone4 = componentToKey(BPMF::Tone4);
char tone5 = componentToKey(BPMF::Tone5);
if (tone1)
if (*ahead == tone1) return true;
if (*ahead == tone2 || *ahead == tone3 || *ahead == tone4 ||
*ahead == tone5)
if (*ahead == tone1)
return true;
if (*ahead == tone2 || *ahead == tone3 || *ahead == tone4 || *ahead == tone5)
return true;
return false;
}
bool sequenceContainsIorUE(std::string::const_iterator start,
std::string::const_iterator end) const {
bool sequenceContainsIorUE(std::string::const_iterator start, std::string::const_iterator end) const
{
char iChar = componentToKey(BPMF::I);
char ueChar = componentToKey(BPMF::UE);
for (; start != end; ++start)
if (*start == iChar || *start == ueChar) return true;
if (*start == iChar || *start == ueChar)
return true;
return false;
}
std::string m_name;
BopomofoKeyToComponentMap m_keyToComponent;
BopomofoComponentToKeyMap m_componentToKey;
};
class BopomofoReadingBuffer {
public:
explicit BopomofoReadingBuffer(const BopomofoKeyboardLayout* layout)
: layout_(layout), pinyin_mode_(false) {
if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout()) {
class BopomofoReadingBuffer
{
public:
explicit BopomofoReadingBuffer(const BopomofoKeyboardLayout *layout) : layout_(layout), pinyin_mode_(false)
{
if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout())
{
pinyin_mode_ = true;
pinyin_sequence_ = "";
}
}
void setKeyboardLayout(const BopomofoKeyboardLayout* layout) {
void setKeyboardLayout(const BopomofoKeyboardLayout *layout)
{
layout_ = layout;
if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout()) {
if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout())
{
pinyin_mode_ = true;
pinyin_sequence_ = "";
}
}
bool isValidKey(char k) const {
if (!pinyin_mode_) {
bool isValidKey(char k) const
{
if (!pinyin_mode_)
{
return layout_ ? (layout_->keyToComponents(k)).size() > 0 : false;
}
char lk = tolower(k);
if (lk >= 'a' && lk <= 'z') {
if (lk >= 'a' && lk <= 'z')
{
// if a tone marker is already in place
if (pinyin_sequence_.length()) {
if (pinyin_sequence_.length())
{
char lastc = pinyin_sequence_[pinyin_sequence_.length() - 1];
if (lastc >= '2' && lastc <= '5') {
if (lastc >= '2' && lastc <= '5')
{
return false;
}
return true;
}
return true;
}
if (pinyin_sequence_.length() && (lk >= '2' && lk <= '5')) {
if (pinyin_sequence_.length() && (lk >= '2' && lk <= '5'))
{
return true;
}
return false;
}
bool combineKey(char k) {
if (!isValidKey(k)) return false;
if (pinyin_mode_) {
bool combineKey(char k)
{
if (!isValidKey(k))
return false;
if (pinyin_mode_)
{
pinyin_sequence_ += std::string(1, tolower(k));
syllable_ = BPMF::FromHanyuPinyin(pinyin_sequence_);
return true;
}
std::string sequence =
layout_->keySequenceFromSyllable(syllable_) + std::string(1, k);
std::string sequence = layout_->keySequenceFromSyllable(syllable_) + std::string(1, k);
syllable_ = layout_->syllableFromKeySequence(sequence);
return true;
}
void clear() {
void clear()
{
pinyin_sequence_.clear();
syllable_.clear();
}
void backspace() {
if (!layout_) return;
if (pinyin_mode_) {
if (pinyin_sequence_.length()) {
pinyin_sequence_ =
pinyin_sequence_.substr(0, pinyin_sequence_.length() - 1);
void backspace()
{
if (!layout_)
return;
if (pinyin_mode_)
{
if (pinyin_sequence_.length())
{
pinyin_sequence_ = pinyin_sequence_.substr(0, pinyin_sequence_.length() - 1);
}
syllable_ = BPMF::FromHanyuPinyin(pinyin_sequence_);
return;
}
std::string sequence = layout_->keySequenceFromSyllable(syllable_);
if (sequence.length()) {
if (sequence.length())
{
sequence = sequence.substr(0, sequence.length() - 1);
syllable_ = layout_->syllableFromKeySequence(sequence);
}
}
bool isEmpty() const { return syllable_.isEmpty(); }
const std::string composedString() const {
if (pinyin_mode_) {
bool isEmpty() const
{
return syllable_.isEmpty();
}
const std::string composedString() const
{
if (pinyin_mode_)
{
return pinyin_sequence_;
}
return syllable_.composedString();
}
const BPMF syllable() const { return syllable_; }
const std::string standardLayoutQueryString() const {
const BPMF syllable() const
{
return syllable_;
}
const std::string standardLayoutQueryString() const
{
return BopomofoKeyboardLayout::StandardLayout()->keySequenceFromSyllable(syllable_);
}
bool hasToneMarker() const { return syllable_.hasToneMarker(); }
protected:
const BopomofoKeyboardLayout* layout_;
bool hasToneMarker() const
{
return syllable_.hasToneMarker();
}
protected:
const BopomofoKeyboardLayout *layout_;
BPMF syllable_;
bool pinyin_mode_;
std::string pinyin_sequence_;
};
} // namespace Mandarin
} // namespace Mandarin
#endif // MANDARIN_H_
#endif // MANDARIN_H_

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
//

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
//

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#import <Foundation/Foundation.h>
@ -33,7 +40,9 @@ extern InputMode imeModeNULL;
@protocol KeyHandlerDelegate <NSObject>
- (id)candidateControllerForKeyHandler:(KeyHandler *)keyHandler;
- (void)keyHandler:(KeyHandler *)keyHandler didSelectCandidateAtIndex:(NSInteger)index candidateController:(id)controller;
- (void)keyHandler:(KeyHandler *)keyHandler
didSelectCandidateAtIndex:(NSInteger)index
candidateController:(id)controller;
- (BOOL)keyHandler:(KeyHandler *)keyHandler didRequestWriteUserPhraseWithState:(InputState *)state;
@end
@ -43,7 +52,8 @@ extern InputMode imeModeNULL;
- (BOOL)handleInput:(keyParser *)input
state:(InputState *)state
stateCallback:(void (^)(InputState *))stateCallback
errorCallback:(void (^)(void))errorCallback NS_SWIFT_NAME(handle(input:state:stateCallback:errorCallback:));
errorCallback:(void (^)(void))errorCallback
NS_SWIFT_NAME(handle(input:state:stateCallback:errorCallback:));
- (void)syncWithPreferences;
- (void)fixNodeWithValue:(NSString *)value NS_SWIFT_NAME(fixNode(value:));
@ -52,8 +62,8 @@ extern InputMode imeModeNULL;
- (InputState *)buildInputtingState;
- (nullable InputState *)buildAssociatePhraseStateWithKey:(NSString *)key useVerticalMode:(BOOL)useVerticalMode;
@property (strong, nonatomic) InputMode inputMode;
@property (weak, nonatomic) id <KeyHandlerDelegate> delegate;
@property(strong, nonatomic) InputMode inputMode;
@property(weak, nonatomic) id<KeyHandlerDelegate> delegate;
@end
NS_ASSUME_NONNULL_END

File diff suppressed because it is too large Load Diff

View File

@ -1,55 +1,67 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "KeyValueBlobReader.h"
namespace vChewing {
namespace vChewing
{
KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue* out)
KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue *out)
{
static auto new_line = [](char c) { return c == '\n' || c == '\r'; };
static auto blank = [](char c) { return c == ' ' || c == '\t'; };
static auto blank_or_newline
= [](char c) { return blank(c) || new_line(c); };
static auto blank_or_newline = [](char c) { return blank(c) || new_line(c); };
static auto content_char = [](char c) { return !blank(c) && !new_line(c); };
if (state_ == State::ERROR) {
if (state_ == State::ERROR)
{
return state_;
}
const char* key_begin = nullptr;
const char *key_begin = nullptr;
size_t key_length = 0;
const char* value_begin = nullptr;
const char *value_begin = nullptr;
size_t value_length = 0;
while (true) {
while (true)
{
state_ = SkipUntilNot(blank_or_newline);
if (state_ != State::CAN_CONTINUE) {
if (state_ != State::CAN_CONTINUE)
{
return state_;
}
// Check if it's a comment line; if so, read until end of line.
if (*current_ != '#') {
if (*current_ != '#')
{
break;
}
state_ = SkipUntil(new_line);
if (state_ != State::CAN_CONTINUE) {
if (state_ != State::CAN_CONTINUE)
{
return state_;
}
}
@ -59,22 +71,26 @@ KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue* out)
key_begin = current_;
state_ = SkipUntilNot(content_char);
if (state_ != State::CAN_CONTINUE) {
if (state_ != State::CAN_CONTINUE)
{
goto error;
}
key_length = current_ - key_begin;
// There should be at least one blank character after the key string.
if (!blank(*current_)) {
if (!blank(*current_))
{
goto error;
}
state_ = SkipUntilNot(blank);
if (state_ != State::CAN_CONTINUE) {
if (state_ != State::CAN_CONTINUE)
{
goto error;
}
if (!content_char(*current_)) {
if (!content_char(*current_))
{
goto error;
}
@ -90,9 +106,9 @@ KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue* out)
// like "foo bar baz\n" where baz should not be treated as the Next key.
SkipUntil(new_line);
if (out != nullptr) {
*out = KeyValue { std::string_view { key_begin, key_length },
std::string_view { value_begin, value_length } };
if (out != nullptr)
{
*out = KeyValue{std::string_view{key_begin, key_length}, std::string_view{value_begin, value_length}};
}
state_ = State::HAS_PAIR;
return state_;
@ -102,11 +118,12 @@ error:
return state_;
}
KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot(
const std::function<bool(char)>& f)
KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot(const std::function<bool(char)> &f)
{
while (current_ != end_ && *current_) {
if (!f(*current_)) {
while (current_ != end_ && *current_)
{
if (!f(*current_))
{
return State::CAN_CONTINUE;
}
++current_;
@ -115,11 +132,12 @@ KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot(
return State::END;
}
KeyValueBlobReader::State KeyValueBlobReader::SkipUntil(
const std::function<bool(char)>& f)
KeyValueBlobReader::State KeyValueBlobReader::SkipUntil(const std::function<bool(char)> &f)
{
while (current_ != end_ && *current_) {
if (f(*current_)) {
while (current_ != end_ && *current_)
{
if (f(*current_))
{
return State::CAN_CONTINUE;
}
++current_;
@ -128,8 +146,7 @@ KeyValueBlobReader::State KeyValueBlobReader::SkipUntil(
return State::END;
}
std::ostream& operator<<(
std::ostream& os, const KeyValueBlobReader::KeyValue& kv)
std::ostream &operator<<(std::ostream &os, const KeyValueBlobReader::KeyValue &kv)
{
os << "(key: " << kv.key << ", value: " << kv.value << ")";
return os;

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SOURCE_ENGINE_KEYVALUEBLOBREADER_H_
@ -39,11 +46,14 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
// std::string_view is used to allow returning results efficiently. As a result,
// the blob is a const char* and will never be mutated. This implies, for
// example, read-only mmap can be used to parse large files.
namespace vChewing {
namespace vChewing
{
class KeyValueBlobReader {
public:
enum class State : int {
class KeyValueBlobReader
{
public:
enum class State : int
{
// There are no more key-value pairs in this blob.
END = 0,
// The reader has produced a new key-value pair.
@ -54,19 +64,16 @@ public:
CAN_CONTINUE = 2
};
struct KeyValue {
constexpr KeyValue()
: key("")
, value("")
struct KeyValue
{
constexpr KeyValue() : key(""), value("")
{
}
constexpr KeyValue(std::string_view k, std::string_view v)
: key(k)
, value(v)
constexpr KeyValue(std::string_view k, std::string_view v) : key(k), value(v)
{
}
bool operator==(const KeyValue& another) const
bool operator==(const KeyValue &another) const
{
return key == another.key && value == another.value;
}
@ -75,27 +82,25 @@ public:
std::string_view value;
};
KeyValueBlobReader(const char* blob, size_t size)
: current_(blob)
, end_(blob + size)
KeyValueBlobReader(const char *blob, size_t size) : current_(blob), end_(blob + size)
{
}
// Parse the next key-value pair and return the state of the reader. If
// `out` is passed, out will be set to the produced key-value pair if there
// is one.
State Next(KeyValue* out = nullptr);
State Next(KeyValue *out = nullptr);
private:
State SkipUntil(const std::function<bool(char)>& f);
State SkipUntilNot(const std::function<bool(char)>& f);
private:
State SkipUntil(const std::function<bool(char)> &f);
State SkipUntilNot(const std::function<bool(char)> &f);
const char* current_;
const char* end_;
const char *current_;
const char *end_;
State state_ = State::CAN_CONTINUE;
};
std::ostream& operator<<(std::ostream&, const KeyValueBlobReader::KeyValue&);
std::ostream &operator<<(std::ostream &, const KeyValueBlobReader::KeyValue &);
} // namespace vChewing

View File

@ -1,40 +1,47 @@
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef LMConsolidator_hpp
#define LMConsolidator_hpp
#include <syslog.h>
#include <stdio.h>
#include <fstream>
#include <sstream>
#include <iostream>
#include <string>
#include <map>
#include <set>
#include <regex>
#include <set>
#include <sstream>
#include <stdio.h>
#include <string>
#include <syslog.h>
using namespace std;
namespace vChewing {
namespace vChewing
{
class LMConsolidator
{
public:
public:
static bool CheckPragma(const char *path);
static bool FixEOF(const char *path);
static bool ConsolidateContent(const char *path, bool shouldCheckPragma);

View File

@ -1,28 +1,35 @@
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "LMConsolidator.h"
#include "vChewing-Swift.h"
namespace vChewing {
namespace vChewing
{
constexpr std::string_view FORMATTED_PRAGMA_HEADER
= "# 𝙵𝙾𝚁𝙼𝙰𝚃 𝚘𝚛𝚐.𝚊𝚝𝚎𝚕𝚒𝚎𝚛𝙸𝚗𝚖𝚞.𝚟𝚌𝚑𝚎𝚠𝚒𝚗𝚐.𝚞𝚜𝚎𝚛𝙻𝚊𝚗𝚐𝚞𝚊𝚐𝚎𝙼𝚘𝚍𝚎𝚕𝙳𝚊𝚝𝚊.𝚏𝚘𝚛𝚖𝚊𝚝𝚝𝚎𝚍";
constexpr std::string_view FORMATTED_PRAGMA_HEADER =
"# 𝙵𝙾𝚁𝙼𝙰𝚃 𝚘𝚛𝚐.𝚊𝚝𝚎𝚕𝚒𝚎𝚛𝙸𝚗𝚖𝚞.𝚟𝚌𝚑𝚎𝚠𝚒𝚗𝚐.𝚞𝚜𝚎𝚛𝙻𝚊𝚗𝚐𝚞𝚊𝚐𝚎𝙼𝚘𝚍𝚎𝚕𝙳𝚊𝚝𝚊.𝚏𝚘𝚛𝚖𝚊𝚝𝚝𝚎𝚍";
// HEADER VERIFIER. CREDIT: Shiki Suen
bool LMConsolidator::CheckPragma(const char *path)
@ -32,13 +39,17 @@ bool LMConsolidator::CheckPragma(const char *path)
{
string firstLine;
getline(zfdCheckPragma, firstLine);
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "HEADER SEEN ||%s", firstLine.c_str());
if (firstLine != FORMATTED_PRAGMA_HEADER) {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "HEADER VERIFICATION FAILED. START IN-PLACE CONSOLIDATING PROCESS.");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "HEADER SEEN ||%s", firstLine.c_str());
if (firstLine != FORMATTED_PRAGMA_HEADER)
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "HEADER VERIFICATION FAILED. START IN-PLACE CONSOLIDATING PROCESS.");
return false;
}
}
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "HEADER VERIFICATION SUCCESSFUL.");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "HEADER VERIFICATION SUCCESSFUL.");
return true;
}
@ -46,58 +57,76 @@ bool LMConsolidator::CheckPragma(const char *path)
bool LMConsolidator::FixEOF(const char *path)
{
std::fstream zfdEOFFixerIncomingStream(path);
zfdEOFFixerIncomingStream.seekg(-1,std::ios_base::end);
zfdEOFFixerIncomingStream.seekg(-1, std::ios_base::end);
char z;
zfdEOFFixerIncomingStream.get(z);
if(z!='\n'){
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// REPORT: Data File not ended with a new line.\n");
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// DATA FILE: %s", path);
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// PROCEDURE: Trying to insert a new line as EOF before per-line check process.\n");
if (z != '\n')
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// REPORT: Data File not ended with a new line.\n");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// DATA FILE: %s", path);
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// PROCEDURE: Trying to insert a new line as EOF before per-line check process.\n");
std::ofstream zfdEOFFixerOutput(path, std::ios_base::app);
zfdEOFFixerOutput << std::endl;
zfdEOFFixerOutput.close();
if (zfdEOFFixerOutput.fail()) {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// REPORT: Failed to append a newline to the data file. Insufficient Privileges?\n");
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// DATA FILE: %s", path);
if (zfdEOFFixerOutput.fail())
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// REPORT: Failed to append a newline to the data file. Insufficient Privileges?\n");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// DATA FILE: %s", path);
return false;
}
}
zfdEOFFixerIncomingStream.close();
if (zfdEOFFixerIncomingStream.fail()) {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// REPORT: Failed to read lines through the data file for EOF check. Insufficient Privileges?\n");
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// DATA FILE: %s", path);
if (zfdEOFFixerIncomingStream.fail())
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS,
"// REPORT: Failed to read lines through the data file for EOF check. Insufficient Privileges?\n");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// DATA FILE: %s", path);
return false;
}
return true;
} // END: EOF FIXER.
// CONTENT CONSOLIDATOR. CREDIT: Shiki Suen.
bool LMConsolidator::ConsolidateContent(const char *path, bool shouldCheckPragma) {
bool LMConsolidator::ConsolidateContent(const char *path, bool shouldCheckPragma)
{
bool pragmaCheckResult = LMConsolidator::CheckPragma(path);
if (pragmaCheckResult && shouldCheckPragma){
if (pragmaCheckResult && shouldCheckPragma)
{
return true;
}
ifstream zfdContentConsolidatorIncomingStream(path);
vector<string>vecEntry;
while(!zfdContentConsolidatorIncomingStream.eof())
vector<string> vecEntry;
while (!zfdContentConsolidatorIncomingStream.eof())
{ // Xcode 13 能用的 ObjCpp 與 Cpp 並無原生支援「\h」這個 Regex 參數的能力,只能逐行處理。
string zfdBuffer;
getline(zfdContentConsolidatorIncomingStream,zfdBuffer);
getline(zfdContentConsolidatorIncomingStream, zfdBuffer);
vecEntry.push_back(zfdBuffer);
}
// 第一遍 for 用來統整每行內的內容。
// regex sedCJKWhiteSpace("\\x{3000}"), sedNonBreakWhiteSpace("\\x{A0}"), sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // 這樣寫會導致輸入法敲不了任何字,推測 Xcode 13 支援的 cpp / objCpp 可能對某些 Regex 寫法有相容性問題。
// regex sedCJKWhiteSpace(" "), sedNonBreakWhiteSpace(" "), sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // RegEx 先定義好。
// regex sedCJKWhiteSpace("\\x{3000}"), sedNonBreakWhiteSpace("\\x{A0}"), sedWhiteSpace("\\s+"),
// sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // 這樣寫會導致輸入法敲不了任何字,推測 Xcode 13 支援的 cpp /
// objCpp 可能對某些 Regex 寫法有相容性問題。 regex sedCJKWhiteSpace(" "), sedNonBreakWhiteSpace(" "),
// sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // RegEx 先定義好。
regex sedToConsolidate("( +| +| +|\t+)+"), sedToTrim("(^\\s|\\s$)");
for(int i=0;i<vecEntry.size();i++) { // 第一遍 for 用來統整每行內的內容。
if (vecEntry[i].size() != 0) { // 不要理會空行,否則給空行加上 endl 等於再加空行。
// RegEx 處理順序:先將全形空格換成西文空格,然後合併任何意義上的連續空格(包括 tab 等),最後去除每行首尾空格。
// vecEntry[i] = regex_replace(vecEntry[i], sedCJKWhiteSpace, " ").c_str(); // 中日韓全形空格轉為 ASCII 空格。
// vecEntry[i] = regex_replace(vecEntry[i], sedNonBreakWhiteSpace, " ").c_str(); // Non-Break 型空格轉為 ASCII 空格。
// vecEntry[i] = regex_replace(vecEntry[i], sedWhiteSpace, " ").c_str(); // 所有意義上的連續的 \s 型空格都轉為單個 ASCII 空格。
// vecEntry[i] = regex_replace(vecEntry[i], sedLeadingSpace, "").c_str(); // 去掉行首空格。
// vecEntry[i] = regex_replace(vecEntry[i], sedTrailingSpace, "").c_str(); // 去掉行尾空格。
for (int i = 0; i < vecEntry.size(); i++)
{ // 第一遍 for 用來統整每行內的內容。
if (vecEntry[i].size() != 0)
{ // 不要理會空行,否則給空行加上 endl 等於再加空行。
// RegEx 處理順序:先將全形空格換成西文空格,然後合併任何意義上的連續空格(包括 tab
// 等),最後去除每行首尾空格。 vecEntry[i] = regex_replace(vecEntry[i], sedCJKWhiteSpace, " ").c_str(); //
// 中日韓全形空格轉為 ASCII 空格。 vecEntry[i] = regex_replace(vecEntry[i], sedNonBreakWhiteSpace, "
// ").c_str(); // Non-Break 型空格轉為 ASCII 空格。 vecEntry[i] = regex_replace(vecEntry[i], sedWhiteSpace,
// " ").c_str(); // 所有意義上的連續的 \s 型空格都轉為單個 ASCII 空格。 vecEntry[i] =
// regex_replace(vecEntry[i], sedLeadingSpace, "").c_str(); // 去掉行首空格。 vecEntry[i] =
// regex_replace(vecEntry[i], sedTrailingSpace, "").c_str(); // 去掉行尾空格。
// 上述命令分步驟執行容易產生效能問題,故濃縮為下述兩句。
vecEntry[i] = regex_replace(vecEntry[i], sedToConsolidate, " ").c_str();
vecEntry[i] = regex_replace(vecEntry[i], sedToTrim, "").c_str();
@ -106,27 +135,39 @@ bool LMConsolidator::ConsolidateContent(const char *path, bool shouldCheckPragma
// 在第二遍 for 運算之前,針對 vecEntry 去除重複條目。
std::reverse(vecEntry.begin(), vecEntry.end()); // 先首尾顛倒,免得破壞最新的 override 資訊。
vecEntry.erase(unique(vecEntry.begin(), vecEntry.end()), vecEntry.end()); // 去重複。
std::reverse(vecEntry.begin(), vecEntry.end()); // 再顛倒回來。
std::reverse(vecEntry.begin(), vecEntry.end()); // 再顛倒回來。
// 統整完畢。開始將統整過的內容寫入檔案。
ofstream zfdContentConsolidatorOutput(path); // 這裡是要從頭開始重寫檔案內容,所以不需要「 ios_base::app 」。
if (!pragmaCheckResult){
zfdContentConsolidatorOutput<<FORMATTED_PRAGMA_HEADER<<endl; // 寫入經過整理處理的 HEADER。
if (!pragmaCheckResult)
{
zfdContentConsolidatorOutput << FORMATTED_PRAGMA_HEADER << endl; // 寫入經過整理處理的 HEADER。
}
for(int i=0;i<vecEntry.size();i++) { // 第二遍 for 用來寫入統整過的內容。
if (vecEntry[i].size() != 0) { // 這句很重要,不然還是會把經過 RegEx 處理後出現的空行搞到檔案裡。
zfdContentConsolidatorOutput<<vecEntry[i]<<endl; // 這裡是必須得加上 endl 的,不然所有行都變成一個整合行。
for (int i = 0; i < vecEntry.size(); i++)
{ // 第二遍 for 用來寫入統整過的內容。
if (vecEntry[i].size() != 0)
{ // 這句很重要,不然還是會把經過 RegEx 處理後出現的空行搞到檔案裡。
zfdContentConsolidatorOutput << vecEntry[i]
<< endl; // 這裡是必須得加上 endl 的,不然所有行都變成一個整合行。
}
}
zfdContentConsolidatorOutput.close();
if (zfdContentConsolidatorOutput.fail()) {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// REPORT: Failed to write content-consolidated data to the file. Insufficient Privileges?\n");
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// DATA FILE: %s", path);
if (zfdContentConsolidatorOutput.fail())
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS,
"// REPORT: Failed to write content-consolidated data to the file. Insufficient Privileges?\n");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// DATA FILE: %s", path);
return false;
}
zfdContentConsolidatorIncomingStream.close();
if (zfdContentConsolidatorIncomingStream.fail()) {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// REPORT: Failed to read lines through the data file for content-consolidation. Insufficient Privileges?\n");
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// DATA FILE: %s", path);
if (zfdContentConsolidatorIncomingStream.fail())
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// REPORT: Failed to read lines through the data file for content-consolidation. "
"Insufficient Privileges?\n");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "// DATA FILE: %s", path);
return false;
}
return true;

View File

@ -1,28 +1,35 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef LMInstantiator_H
#define LMInstantiator_H
#include "AssociatedPhrases.h"
#include "CoreLM.h"
#include "CNSLM.h"
#include "CoreLM.h"
#include "ParselessLM.h"
#include "PhraseReplacementMap.h"
#include "SymbolLM.h"
@ -31,7 +38,8 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include <stdio.h>
#include <unordered_set>
namespace vChewing {
namespace vChewing
{
using namespace Gramambular;
@ -57,58 +65,59 @@ using namespace Gramambular;
/// model while launching and to load the user phrases anytime if the custom
/// files are modified. It does not keep the reference of the data pathes but
/// you have to pass the paths when you ask it to do loading.
class LMInstantiator : public Gramambular::LanguageModel {
public:
class LMInstantiator : public Gramambular::LanguageModel
{
public:
LMInstantiator();
~LMInstantiator();
/// Asks to load the primary language model at the given path.
/// @param languageModelPath The path of the language model.
void loadLanguageModel(const char* languageModelPath);
void loadLanguageModel(const char *languageModelPath);
/// If the data model is already loaded.
bool isDataModelLoaded();
/// Asks to load the primary language model at the given path.
/// @param miscDataPath The path of the misc data model.
void loadMiscData(const char* miscDataPath);
void loadMiscData(const char *miscDataPath);
/// If the data model is already loaded.
bool isMiscDataLoaded();
/// Asks to load the primary language model at the given path.
/// @param symbolDataPath The path of the symbol data model.
void loadSymbolData(const char* symbolDataPath);
void loadSymbolData(const char *symbolDataPath);
/// If the data model is already loaded.
bool isSymbolDataLoaded();
/// Asks to load the primary language model at the given path.
/// @param cnsDataPath The path of the CNS data model.
void loadCNSData(const char* cnsDataPath);
void loadCNSData(const char *cnsDataPath);
/// If the data model is already loaded.
bool isCNSDataLoaded();
/// Asks to load the user phrases and excluded phrases at the given path.
/// @param userPhrasesPath The path of user phrases.
/// @param excludedPhrasesPath The path of excluded phrases.
void loadUserPhrases(const char* userPhrasesPath, const char* excludedPhrasesPath);
void loadUserPhrases(const char *userPhrasesPath, const char *excludedPhrasesPath);
/// Asks to load the user symbol data at the given path.
/// @param userSymbolDataPath The path of user symbol data.
void loadUserSymbolData(const char* userPhrasesPath);
void loadUserSymbolData(const char *userPhrasesPath);
/// Asks to load the user associated phrases at the given path.
/// @param userAssociatedPhrasesPath The path of the user associated phrases.
void loadUserAssociatedPhrases(const char* userAssociatedPhrasesPath);
void loadUserAssociatedPhrases(const char *userAssociatedPhrasesPath);
/// Asks to load the phrase replacement table at the given path.
/// @param phraseReplacementPath The path of the phrase replacement table.
void loadPhraseReplacementMap(const char* phraseReplacementPath);
void loadPhraseReplacementMap(const char *phraseReplacementPath);
/// Not implemented since we do not have data to provide bigram function.
const std::vector<Gramambular::Bigram> bigramsForKeys(const std::string& preceedingKey, const std::string& key);
const std::vector<Gramambular::Bigram> bigramsForKeys(const std::string &preceedingKey, const std::string &key);
/// Returns a list of available unigram for the given key.
/// @param key A std::string represents the BPMF reading or a symbol key. For
/// example, it you pass "ㄇㄚ", it returns "嗎", "媽", and so on.
const std::vector<Gramambular::Unigram> unigramsForKey(const std::string& key);
const std::vector<Gramambular::Unigram> unigramsForKey(const std::string &key);
/// If the model has unigrams for the given key.
/// @param key The key.
bool hasUnigramsForKey(const std::string& key);
bool hasUnigramsForKey(const std::string &key);
/// Enables or disables phrase replacement.
void setPhraseReplacementEnabled(bool enabled);
@ -125,21 +134,20 @@ public:
/// If CNS11643 input is enabled or not.
bool cnsEnabled();
const std::vector<std::string> associatedPhrasesForKey(const std::string& key);
bool hasAssociatedPhrasesForKey(const std::string& key);
const std::vector<std::string> associatedPhrasesForKey(const std::string &key);
bool hasAssociatedPhrasesForKey(const std::string &key);
protected:
protected:
/// Filters and converts the input unigrams and return a new list of unigrams.
///
///
/// @param unigrams The unigrams to be processed.
/// @param excludedValues The values to excluded unigrams.
/// @param insertedValues The values for unigrams already in the results.
/// It helps to prevent duplicated unigrams. Please note that the method
/// has a side effect that it inserts values to `insertedValues`.
const std::vector<Gramambular::Unigram> filterAndTransformUnigrams(const std::vector<Gramambular::Unigram> unigrams,
const std::unordered_set<std::string>& excludedValues,
std::unordered_set<std::string>& insertedValues);
const std::vector<Gramambular::Unigram> filterAndTransformUnigrams(
const std::vector<Gramambular::Unigram> unigrams, const std::unordered_set<std::string> &excludedValues,
std::unordered_set<std::string> &insertedValues);
ParselessLM m_languageModel;
CoreLM m_miscModel;
@ -154,6 +162,6 @@ protected:
bool m_cnsEnabled;
bool m_symbolEnabled;
};
};
}; // namespace vChewing
#endif

View File

@ -1,27 +1,35 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "LMInstantiator.h"
#include <algorithm>
#include <iterator>
namespace vChewing {
namespace vChewing
{
LMInstantiator::LMInstantiator()
{
@ -39,9 +47,10 @@ LMInstantiator::~LMInstantiator()
m_associatedPhrases.close();
}
void LMInstantiator::loadLanguageModel(const char* languageModelDataPath)
void LMInstantiator::loadLanguageModel(const char *languageModelDataPath)
{
if (languageModelDataPath) {
if (languageModelDataPath)
{
m_languageModel.close();
m_languageModel.open(languageModelDataPath);
}
@ -52,9 +61,10 @@ bool LMInstantiator::isDataModelLoaded()
return m_languageModel.isLoaded();
}
void LMInstantiator::loadCNSData(const char* cnsDataPath)
void LMInstantiator::loadCNSData(const char *cnsDataPath)
{
if (cnsDataPath) {
if (cnsDataPath)
{
m_cnsModel.close();
m_cnsModel.open(cnsDataPath);
}
@ -65,9 +75,10 @@ bool LMInstantiator::isCNSDataLoaded()
return m_cnsModel.isLoaded();
}
void LMInstantiator::loadMiscData(const char* miscDataPath)
void LMInstantiator::loadMiscData(const char *miscDataPath)
{
if (miscDataPath) {
if (miscDataPath)
{
m_miscModel.close();
m_miscModel.open(miscDataPath);
}
@ -78,9 +89,10 @@ bool LMInstantiator::isMiscDataLoaded()
return m_miscModel.isLoaded();
}
void LMInstantiator::loadSymbolData(const char* symbolDataPath)
void LMInstantiator::loadSymbolData(const char *symbolDataPath)
{
if (symbolDataPath) {
if (symbolDataPath)
{
m_symbolModel.close();
m_symbolModel.open(symbolDataPath);
}
@ -91,14 +103,15 @@ bool LMInstantiator::isSymbolDataLoaded()
return m_symbolModel.isLoaded();
}
void LMInstantiator::loadUserPhrases(const char* userPhrasesDataPath,
const char* excludedPhrasesDataPath)
void LMInstantiator::loadUserPhrases(const char *userPhrasesDataPath, const char *excludedPhrasesDataPath)
{
if (userPhrasesDataPath) {
if (userPhrasesDataPath)
{
m_userPhrases.close();
m_userPhrases.open(userPhrasesDataPath);
}
if (excludedPhrasesDataPath) {
if (excludedPhrasesDataPath)
{
m_excludedPhrases.close();
m_excludedPhrases.open(excludedPhrasesDataPath);
}
@ -106,7 +119,8 @@ void LMInstantiator::loadUserPhrases(const char* userPhrasesDataPath,
void LMInstantiator::loadUserSymbolData(const char *userSymbolDataPath)
{
if (userSymbolDataPath) {
if (userSymbolDataPath)
{
m_userSymbolModel.close();
m_userSymbolModel.open(userSymbolDataPath);
}
@ -114,28 +128,32 @@ void LMInstantiator::loadUserSymbolData(const char *userSymbolDataPath)
void LMInstantiator::loadUserAssociatedPhrases(const char *userAssociatedPhrasesPath)
{
if (userAssociatedPhrasesPath) {
if (userAssociatedPhrasesPath)
{
m_associatedPhrases.close();
m_associatedPhrases.open(userAssociatedPhrasesPath);
}
}
void LMInstantiator::loadPhraseReplacementMap(const char* phraseReplacementPath)
void LMInstantiator::loadPhraseReplacementMap(const char *phraseReplacementPath)
{
if (phraseReplacementPath) {
if (phraseReplacementPath)
{
m_phraseReplacement.close();
m_phraseReplacement.open(phraseReplacementPath);
}
}
const std::vector<Gramambular::Bigram> LMInstantiator::bigramsForKeys(const std::string& preceedingKey, const std::string& key)
const std::vector<Gramambular::Bigram> LMInstantiator::bigramsForKeys(const std::string &preceedingKey,
const std::string &key)
{
return std::vector<Gramambular::Bigram>();
}
const std::vector<Gramambular::Unigram> LMInstantiator::unigramsForKey(const std::string& key)
const std::vector<Gramambular::Unigram> LMInstantiator::unigramsForKey(const std::string &key)
{
if (key == " ") {
if (key == " ")
{
std::vector<Gramambular::Unigram> spaceUnigrams;
Gramambular::Unigram g;
g.keyValue.key = " ";
@ -152,17 +170,18 @@ const std::vector<Gramambular::Unigram> LMInstantiator::unigramsForKey(const std
std::vector<Gramambular::Unigram> userSymbolUnigrams;
std::vector<Gramambular::Unigram> cnsUnigrams;
std::unordered_set<std::string> excludedValues;
std::unordered_set<std::string> insertedValues;
std::unordered_set<std::string> excludedValues;
std::unordered_set<std::string> insertedValues;
if (m_excludedPhrases.hasUnigramsForKey(key)) {
if (m_excludedPhrases.hasUnigramsForKey(key))
{
std::vector<Gramambular::Unigram> excludedUnigrams = m_excludedPhrases.unigramsForKey(key);
transform(excludedUnigrams.begin(), excludedUnigrams.end(),
inserter(excludedValues, excludedValues.end()),
[](const Gramambular::Unigram& u) { return u.keyValue.value; });
transform(excludedUnigrams.begin(), excludedUnigrams.end(), inserter(excludedValues, excludedValues.end()),
[](const Gramambular::Unigram &u) { return u.keyValue.value; });
}
if (m_userPhrases.hasUnigramsForKey(key)) {
if (m_userPhrases.hasUnigramsForKey(key))
{
std::vector<Gramambular::Unigram> rawUserUnigrams = m_userPhrases.unigramsForKey(key);
// 用這句指令讓使用者語彙檔案內的詞條優先順序隨著行數增加而逐漸增高。
// 這樣一來就可以在就地新增語彙時徹底複寫優先權。
@ -170,27 +189,32 @@ const std::vector<Gramambular::Unigram> LMInstantiator::unigramsForKey(const std
userUnigrams = filterAndTransformUnigrams(rawUserUnigrams, excludedValues, insertedValues);
}
if (m_languageModel.hasUnigramsForKey(key)) {
if (m_languageModel.hasUnigramsForKey(key))
{
std::vector<Gramambular::Unigram> rawGlobalUnigrams = m_languageModel.unigramsForKey(key);
allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues);
}
if (m_miscModel.hasUnigramsForKey(key)) {
if (m_miscModel.hasUnigramsForKey(key))
{
std::vector<Gramambular::Unigram> rawMiscUnigrams = m_miscModel.unigramsForKey(key);
miscUnigrams = filterAndTransformUnigrams(rawMiscUnigrams, excludedValues, insertedValues);
}
if (m_symbolModel.hasUnigramsForKey(key) && m_symbolEnabled) {
if (m_symbolModel.hasUnigramsForKey(key) && m_symbolEnabled)
{
std::vector<Gramambular::Unigram> rawSymbolUnigrams = m_symbolModel.unigramsForKey(key);
symbolUnigrams = filterAndTransformUnigrams(rawSymbolUnigrams, excludedValues, insertedValues);
}
if (m_userSymbolModel.hasUnigramsForKey(key) && m_symbolEnabled) {
if (m_userSymbolModel.hasUnigramsForKey(key) && m_symbolEnabled)
{
std::vector<Gramambular::Unigram> rawUserSymbolUnigrams = m_userSymbolModel.unigramsForKey(key);
userSymbolUnigrams = filterAndTransformUnigrams(rawUserSymbolUnigrams, excludedValues, insertedValues);
}
if (m_cnsModel.hasUnigramsForKey(key) && m_cnsEnabled) {
if (m_cnsModel.hasUnigramsForKey(key) && m_cnsEnabled)
{
std::vector<Gramambular::Unigram> rawCNSUnigrams = m_cnsModel.unigramsForKey(key);
cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues);
}
@ -203,13 +227,15 @@ const std::vector<Gramambular::Unigram> LMInstantiator::unigramsForKey(const std
return allUnigrams;
}
bool LMInstantiator::hasUnigramsForKey(const std::string& key)
bool LMInstantiator::hasUnigramsForKey(const std::string &key)
{
if (key == " ") {
if (key == " ")
{
return true;
}
if (!m_excludedPhrases.hasUnigramsForKey(key)) {
if (!m_excludedPhrases.hasUnigramsForKey(key))
{
return m_userPhrases.hasUnigramsForKey(key) || m_languageModel.hasUnigramsForKey(key);
}
@ -246,26 +272,33 @@ bool LMInstantiator::symbolEnabled()
return m_symbolEnabled;
}
const std::vector<Gramambular::Unigram> LMInstantiator::filterAndTransformUnigrams(const std::vector<Gramambular::Unigram> unigrams, const std::unordered_set<std::string>& excludedValues, std::unordered_set<std::string>& insertedValues)
const std::vector<Gramambular::Unigram> LMInstantiator::filterAndTransformUnigrams(
const std::vector<Gramambular::Unigram> unigrams, const std::unordered_set<std::string> &excludedValues,
std::unordered_set<std::string> &insertedValues)
{
std::vector<Gramambular::Unigram> results;
for (auto&& unigram : unigrams) {
for (auto &&unigram : unigrams)
{
// excludedValues filters out the unigrams with the original value.
// insertedValues filters out the ones with the converted value
std::string originalValue = unigram.keyValue.value;
if (excludedValues.find(originalValue) != excludedValues.end()) {
if (excludedValues.find(originalValue) != excludedValues.end())
{
continue;
}
std::string value = originalValue;
if (m_phraseReplacementEnabled) {
if (m_phraseReplacementEnabled)
{
std::string replacement = m_phraseReplacement.valueForKey(value);
if (replacement != "") {
if (replacement != "")
{
value = replacement;
}
}
if (insertedValues.find(value) == insertedValues.end()) {
if (insertedValues.find(value) == insertedValues.end())
{
Gramambular::Unigram g;
g.keyValue.value = value;
g.keyValue.key = unigram.keyValue.key;
@ -277,12 +310,12 @@ const std::vector<Gramambular::Unigram> LMInstantiator::filterAndTransformUnigra
return results;
}
const std::vector<std::string> LMInstantiator::associatedPhrasesForKey(const std::string& key)
const std::vector<std::string> LMInstantiator::associatedPhrasesForKey(const std::string &key)
{
return m_associatedPhrases.valuesForKey(key);
}
bool LMInstantiator::hasAssociatedPhrasesForKey(const std::string& key)
bool LMInstantiator::hasAssociatedPhrasesForKey(const std::string &key)
{
return m_associatedPhrases.hasValuesForKey(key);
}

View File

@ -1,47 +1,58 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ASSOCIATEDPHRASES_H
#define ASSOCIATEDPHRASES_H
#include <string>
#include <map>
#include <iostream>
#include <map>
#include <string>
#include <vector>
namespace vChewing {
namespace vChewing
{
class AssociatedPhrases
{
public:
public:
AssociatedPhrases();
~AssociatedPhrases();
const bool isLoaded();
bool open(const char *path);
void close();
const std::vector<std::string> valuesForKey(const std::string& key);
const bool hasValuesForKey(const std::string& key);
const std::vector<std::string> valuesForKey(const std::string &key);
const bool hasValuesForKey(const std::string &key);
protected:
struct Row {
Row(std::string_view& k, std::string_view& v) : key(k), value(v) {}
protected:
struct Row
{
Row(std::string_view &k, std::string_view &v) : key(k), value(v)
{
}
std::string_view key;
std::string_view value;
};
@ -53,6 +64,6 @@ protected:
size_t length;
};
}
} // namespace vChewing
#endif /* AssociatedPhrases_hpp */

View File

@ -1,52 +1,59 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "AssociatedPhrases.h"
#include "vChewing-Swift.h"
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <fstream>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include "KeyValueBlobReader.h"
#include "LMConsolidator.h"
namespace vChewing {
namespace vChewing
{
AssociatedPhrases::AssociatedPhrases()
: fd(-1)
, data(0)
, length(0)
AssociatedPhrases::AssociatedPhrases() : fd(-1), data(0), length(0)
{
}
AssociatedPhrases::~AssociatedPhrases()
{
if (data) {
if (data)
{
close();
}
}
const bool AssociatedPhrases::isLoaded()
{
if (data) {
if (data)
{
return true;
}
return false;
@ -54,7 +61,8 @@ const bool AssociatedPhrases::isLoaded()
bool AssociatedPhrases::open(const char *path)
{
if (data) {
if (data)
{
return false;
}
@ -62,13 +70,15 @@ bool AssociatedPhrases::open(const char *path)
LMConsolidator::ConsolidateContent(path, true);
fd = ::open(path, O_RDONLY);
if (fd == -1) {
if (fd == -1)
{
printf("open:: file not exist");
return false;
}
struct stat sb;
if (fstat(fd, &sb) == -1) {
if (fstat(fd, &sb) == -1)
{
printf("open:: cannot open file");
return false;
}
@ -76,21 +86,25 @@ bool AssociatedPhrases::open(const char *path)
length = (size_t)sb.st_size;
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
if (!data) {
if (!data)
{
::close(fd);
return false;
}
KeyValueBlobReader reader(static_cast<char*>(data), length);
KeyValueBlobReader reader(static_cast<char *>(data), length);
KeyValueBlobReader::KeyValue keyValue;
KeyValueBlobReader::State state;
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) {
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR)
{
keyRowMap[keyValue.key].emplace_back(keyValue.key, keyValue.value);
}
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
if (state == KeyValueBlobReader::State::ERROR) {
if (state == KeyValueBlobReader::State::ERROR)
{
// close();
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "AssociatedPhrases: Failed at Open Step 5. On Error Resume Next.\n");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "AssociatedPhrases: Failed at Open Step 5. On Error Resume Next.\n");
// return false;
}
return true;
@ -98,7 +112,8 @@ bool AssociatedPhrases::open(const char *path)
void AssociatedPhrases::close()
{
if (data) {
if (data)
{
munmap(data, length);
::close(fd);
data = 0;
@ -107,13 +122,15 @@ void AssociatedPhrases::close()
keyRowMap.clear();
}
const std::vector<std::string> AssociatedPhrases::valuesForKey(const std::string& key)
const std::vector<std::string> AssociatedPhrases::valuesForKey(const std::string &key)
{
std::vector<std::string> v;
auto iter = keyRowMap.find(key);
if (iter != keyRowMap.end()) {
const std::vector<Row>& rows = iter->second;
for (const auto& row : rows) {
if (iter != keyRowMap.end())
{
const std::vector<Row> &rows = iter->second;
for (const auto &row : rows)
{
std::string_view value = row.value;
v.push_back({value.data(), value.size()});
}
@ -121,9 +138,9 @@ const std::vector<std::string> AssociatedPhrases::valuesForKey(const std::string
return v;
}
const bool AssociatedPhrases::hasValuesForKey(const std::string& key)
const bool AssociatedPhrases::hasValuesForKey(const std::string &key)
{
return keyRowMap.find(key) != keyRowMap.end();
}
}; // namespace vChewing
}; // namespace vChewing

View File

@ -1,30 +1,37 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef CoreLM_H
#define CoreLM_H
#include "LanguageModel.h"
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include <map>
#include <iostream>
// this class relies on the fact that we have a space-separated data
// format, and we use mmap and zero-out the separators and line feeds
@ -33,10 +40,12 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
using namespace std;
using namespace Gramambular;
namespace vChewing {
namespace vChewing
{
class CoreLM : public Gramambular::LanguageModel {
public:
class CoreLM : public Gramambular::LanguageModel
{
public:
CoreLM();
~CoreLM();
@ -45,20 +54,21 @@ public:
void close();
void dump();
virtual const std::vector<Gramambular::Bigram> bigramsForKeys(const string& preceedingKey, const string& key);
virtual const std::vector<Gramambular::Unigram> unigramsForKey(const string& key);
virtual bool hasUnigramsForKey(const string& key);
virtual const std::vector<Gramambular::Bigram> bigramsForKeys(const string &preceedingKey, const string &key);
virtual const std::vector<Gramambular::Unigram> unigramsForKey(const string &key);
virtual bool hasUnigramsForKey(const string &key);
protected:
protected:
struct CStringCmp
{
bool operator()(const char* s1, const char* s2) const
bool operator()(const char *s1, const char *s2) const
{
return strcmp(s1, s2) < 0;
}
};
struct Row {
struct Row
{
const char *key;
const char *value;
const char *logProbability;

View File

@ -1,50 +1,56 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "CoreLM.h"
#include <sys/mman.h>
#include <sys/stat.h>
#include "vChewing-Swift.h"
#include <fcntl.h>
#include <fstream>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <syslog.h>
#include "vChewing-Swift.h"
#include <unistd.h>
using namespace Gramambular;
vChewing::CoreLM::CoreLM()
: fd(-1)
, data(0)
, length(0)
vChewing::CoreLM::CoreLM() : fd(-1), data(0), length(0)
{
}
vChewing::CoreLM::~CoreLM()
{
if (data) {
if (data)
{
close();
}
}
bool vChewing::CoreLM::isLoaded()
{
if (data) {
if (data)
{
return true;
}
return false;
@ -52,24 +58,28 @@ bool vChewing::CoreLM::isLoaded()
bool vChewing::CoreLM::open(const char *path)
{
if (data) {
if (data)
{
return false;
}
fd = ::open(path, O_RDONLY);
if (fd == -1) {
if (fd == -1)
{
return false;
}
struct stat sb;
if (fstat(fd, &sb) == -1) {
if (fstat(fd, &sb) == -1)
{
return false;
}
length = (size_t)sb.st_size;
data = mmap(NULL, length, PROT_WRITE, MAP_PRIVATE, fd, 0);
if (!data) {
if (!data)
{
::close(fd);
return false;
}
@ -117,18 +127,22 @@ bool vChewing::CoreLM::open(const char *path)
start:
// EOF -> end
if (head == end) {
if (head == end)
{
goto end;
}
c = *head;
// \s -> error
if (c == ' ') {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // Start: \\s -> error");
if (c == ' ')
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // Start: \\s -> error");
goto error;
}
// \n -> start
else if (c == '\n') {
else if (c == '\n')
{
head++;
goto start;
}
@ -140,19 +154,24 @@ start:
state1:
// EOF -> error
if (head == end) {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: EOF -> error");
if (head == end)
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: EOF -> error");
goto error;
}
c = *head;
// \n -> error
if (c == '\n') {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: \\n -> error");
if (c == '\n')
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: \\n -> error");
goto error;
}
// \s -> state2 + zero out ending + record column start
else if (c == ' ') {
else if (c == ' ')
{
*head = 0;
head++;
row.key = head;
@ -165,15 +184,19 @@ state1:
state2:
// eof -> error
if (head == end) {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: EOF -> error");
if (head == end)
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: EOF -> error");
goto error;
}
c = *head;
// \n, \s -> error
if (c == '\n' || c == ' ') {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: \\n \\s -> error");
if (c == '\n' || c == ' ')
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: \\n \\s -> error");
goto error;
}
@ -184,20 +207,25 @@ state2:
state3:
// eof -> error
if (head == end) {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: EOF -> error");
if (head == end)
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: EOF -> error");
goto error;
}
c = *head;
// \n -> error
if (c == '\n') {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: \\n -> error");
if (c == '\n')
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: \\n -> error");
goto error;
}
// \s -> state4 + zero out ending + record column start
else if (c == ' ') {
else if (c == ' ')
{
*head = 0;
head++;
row.logProbability = head;
@ -210,15 +238,19 @@ state3:
state4:
// eof -> error
if (head == end) {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: EOF -> error");
if (head == end)
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: EOF -> error");
goto error;
}
c = *head;
// \n, \s -> error
if (c == '\n' || c == ' ') {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: \\n \\s -> error");
if (c == '\n' || c == ' ')
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: \\n \\s -> error");
goto error;
}
@ -227,22 +259,26 @@ state4:
// fall through to state 5
state5:
// eof -> error
if (head == end) {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: EOF -> error");
if (head == end)
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: EOF -> error");
goto error;
}
c = *head;
// \s -> error
if (c == ' ') {
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: \\s -> error");
if (c == ' ')
{
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: \\s -> error");
goto error;
}
// \n -> start
else if (c == '\n') {
else if (c == '\n')
{
*head = 0;
head++;
keyRowMap[row.key].push_back(row);
@ -265,13 +301,15 @@ end:
emptyRow.value = space;
emptyRow.logProbability = zero;
keyRowMap[space].push_back(emptyRow);
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // File Load Complete.");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "vChewingDebug: CoreLM // File Load Complete.");
return true;
}
void vChewing::CoreLM::close()
{
if (data) {
if (data)
{
munmap(data, length);
::close(fd);
data = 0;
@ -283,30 +321,34 @@ void vChewing::CoreLM::close()
void vChewing::CoreLM::dump()
{
size_t rows = 0;
for (map<const char *, vector<Row> >::const_iterator i = keyRowMap.begin(), e = keyRowMap.end(); i != e; ++i) {
const vector<Row>& r = (*i).second;
for (vector<Row>::const_iterator ri = r.begin(), re = r.end(); ri != re; ++ri) {
const Row& row = *ri;
for (map<const char *, vector<Row>>::const_iterator i = keyRowMap.begin(), e = keyRowMap.end(); i != e; ++i)
{
const vector<Row> &r = (*i).second;
for (vector<Row>::const_iterator ri = r.begin(), re = r.end(); ri != re; ++ri)
{
const Row &row = *ri;
cerr << row.key << " " << row.value << " " << row.logProbability << "\n";
rows++;
}
}
}
const std::vector<Gramambular::Bigram> vChewing::CoreLM::bigramsForKeys(const string& preceedingKey, const string& key)
const std::vector<Gramambular::Bigram> vChewing::CoreLM::bigramsForKeys(const string &preceedingKey, const string &key)
{
return std::vector<Gramambular::Bigram>();
}
const std::vector<Gramambular::Unigram> vChewing::CoreLM::unigramsForKey(const string& key)
const std::vector<Gramambular::Unigram> vChewing::CoreLM::unigramsForKey(const string &key)
{
std::vector<Gramambular::Unigram> v;
map<const char *, vector<Row> >::const_iterator i = keyRowMap.find(key.c_str());
map<const char *, vector<Row>>::const_iterator i = keyRowMap.find(key.c_str());
if (i != keyRowMap.end()) {
for (vector<Row>::const_iterator ri = (*i).second.begin(), re = (*i).second.end(); ri != re; ++ri) {
if (i != keyRowMap.end())
{
for (vector<Row>::const_iterator ri = (*i).second.begin(), re = (*i).second.end(); ri != re; ++ri)
{
Unigram g;
const Row& r = *ri;
const Row &r = *ri;
g.keyValue.key = r.key;
g.keyValue.value = r.value;
g.score = atof(r.logProbability);
@ -317,7 +359,7 @@ const std::vector<Gramambular::Unigram> vChewing::CoreLM::unigramsForKey(const s
return v;
}
bool vChewing::CoreLM::hasUnigramsForKey(const string& key)
bool vChewing::CoreLM::hasUnigramsForKey(const string &key)
{
return keyRowMap.find(key.c_str()) != keyRowMap.end();
}

View File

@ -1,44 +1,54 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef CNSLM_H
#define CNSLM_H
#include <string>
#include <map>
#include <iostream>
#include "LanguageModel.h"
#include "UserPhrasesLM.h"
#include <iostream>
#include <map>
#include <string>
namespace vChewing {
class CNSLM: public UserPhrasesLM
namespace vChewing
{
public:
virtual bool allowConsolidation() override {
class CNSLM : public UserPhrasesLM
{
public:
virtual bool allowConsolidation() override
{
return false;
}
virtual float overridedValue() override {
virtual float overridedValue() override
{
return -11.0;
}
};
}
} // namespace vChewing
#endif

View File

@ -1,44 +1,54 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SYMBOLLM_H
#define SYMBOLLM_H
#include <string>
#include <map>
#include <iostream>
#include "LanguageModel.h"
#include "UserPhrasesLM.h"
#include <iostream>
#include <map>
#include <string>
namespace vChewing {
class SymbolLM: public UserPhrasesLM
namespace vChewing
{
public:
virtual bool allowConsolidation() override {
class SymbolLM : public UserPhrasesLM
{
public:
virtual bool allowConsolidation() override
{
return false;
}
virtual float overridedValue() override {
virtual float overridedValue() override
{
return -13.0;
}
};
}
} // namespace vChewing
#endif

View File

@ -1,44 +1,54 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef USERSYMBOLLM_H
#define USERSYMBOLLM_H
#include <string>
#include <map>
#include <iostream>
#include "LanguageModel.h"
#include "UserPhrasesLM.h"
#include <iostream>
#include <map>
#include <string>
namespace vChewing {
class UserSymbolLM: public UserPhrasesLM
namespace vChewing
{
public:
virtual bool allowConsolidation() override {
class UserSymbolLM : public UserPhrasesLM
{
public:
virtual bool allowConsolidation() override
{
return true;
}
virtual float overridedValue() override {
virtual float overridedValue() override
{
return -12.0;
}
};
}
} // namespace vChewing
#endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "ParselessLM.h"
@ -26,29 +33,36 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include <memory>
vChewing::ParselessLM::~ParselessLM() { close(); }
vChewing::ParselessLM::~ParselessLM()
{
close();
}
bool vChewing::ParselessLM::isLoaded()
{
if (data_) {
if (data_)
{
return true;
}
return false;
}
bool vChewing::ParselessLM::open(const std::string_view& path)
bool vChewing::ParselessLM::open(const std::string_view &path)
{
if (data_) {
if (data_)
{
return false;
}
fd_ = ::open(path.data(), O_RDONLY);
if (fd_ == -1) {
if (fd_ == -1)
{
return false;
}
struct stat sb;
if (fstat(fd_, &sb) == -1) {
if (fstat(fd_, &sb) == -1)
{
::close(fd_);
fd_ = -1;
return false;
@ -57,21 +71,22 @@ bool vChewing::ParselessLM::open(const std::string_view& path)
length_ = static_cast<size_t>(sb.st_size);
data_ = mmap(NULL, length_, PROT_READ, MAP_SHARED, fd_, 0);
if (data_ == nullptr) {
if (data_ == nullptr)
{
::close(fd_);
fd_ = -1;
length_ = 0;
return false;
}
db_ = std::unique_ptr<ParselessPhraseDB>(new ParselessPhraseDB(
static_cast<char*>(data_), length_));
db_ = std::unique_ptr<ParselessPhraseDB>(new ParselessPhraseDB(static_cast<char *>(data_), length_));
return true;
}
void vChewing::ParselessLM::close()
{
if (data_ != nullptr) {
if (data_ != nullptr)
{
munmap(data_, length_);
::close(fd_);
fd_ = -1;
@ -80,55 +95,61 @@ void vChewing::ParselessLM::close()
}
}
const std::vector<Gramambular::Bigram>
vChewing::ParselessLM::bigramsForKeys(
const std::string& preceedingKey, const std::string& key)
const std::vector<Gramambular::Bigram> vChewing::ParselessLM::bigramsForKeys(const std::string &preceedingKey,
const std::string &key)
{
return std::vector<Gramambular::Bigram>();
}
const std::vector<Gramambular::Unigram>
vChewing::ParselessLM::unigramsForKey(const std::string& key)
const std::vector<Gramambular::Unigram> vChewing::ParselessLM::unigramsForKey(const std::string &key)
{
if (db_ == nullptr) {
if (db_ == nullptr)
{
return std::vector<Gramambular::Unigram>();
}
std::vector<Gramambular::Unigram> results;
for (const auto& row : db_->findRows(key + " ")) {
for (const auto &row : db_->findRows(key + " "))
{
Gramambular::Unigram unigram;
// Move ahead until we encounter the first space. This is the key.
auto it = row.begin();
while (it != row.end() && *it != ' ') {
while (it != row.end() && *it != ' ')
{
++it;
}
unigram.keyValue.key = std::string(row.begin(), it);
// Read past the space.
if (it != row.end()) {
if (it != row.end())
{
++it;
}
if (it != row.end()) {
if (it != row.end())
{
// Now it is the start of the value portion.
auto value_begin = it;
// Move ahead until we encounter the second space. This is the
// value.
while (it != row.end() && *it != ' ') {
while (it != row.end() && *it != ' ')
{
++it;
}
unigram.keyValue.value = std::string(value_begin, it);
}
// Read past the space. The remainder, if it exists, is the score.
if (it != row.end()) {
if (it != row.end())
{
++it;
}
if (it != row.end()) {
if (it != row.end())
{
unigram.score = std::stod(std::string(it, row.end()));
}
results.push_back(unigram);
@ -136,9 +157,10 @@ vChewing::ParselessLM::unigramsForKey(const std::string& key)
return results;
}
bool vChewing::ParselessLM::hasUnigramsForKey(const std::string& key)
bool vChewing::ParselessLM::hasUnigramsForKey(const std::string &key)
{
if (db_ == nullptr) {
if (db_ == nullptr)
{
return false;
}

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SOURCE_ENGINE_PARSELESSLM_H_
@ -27,25 +34,26 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "LanguageModel.h"
#include "ParselessPhraseDB.h"
namespace vChewing {
namespace vChewing
{
class ParselessLM : public Gramambular::LanguageModel {
public:
class ParselessLM : public Gramambular::LanguageModel
{
public:
~ParselessLM() override;
bool isLoaded();
bool open(const std::string_view& path);
bool open(const std::string_view &path);
void close();
const std::vector<Gramambular::Bigram> bigramsForKeys(
const std::string& preceedingKey, const std::string& key) override;
const std::vector<Gramambular::Unigram> unigramsForKey(
const std::string& key) override;
bool hasUnigramsForKey(const std::string& key) override;
const std::vector<Gramambular::Bigram> bigramsForKeys(const std::string &preceedingKey,
const std::string &key) override;
const std::vector<Gramambular::Unigram> unigramsForKey(const std::string &key) override;
bool hasUnigramsForKey(const std::string &key) override;
private:
private:
int fd_ = -1;
void* data_ = nullptr;
void *data_ = nullptr;
size_t length_ = 0;
std::unique_ptr<ParselessPhraseDB> db_;
};

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "ParselessPhraseDB.h"
@ -22,35 +29,35 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include <cassert>
#include <cstring>
namespace vChewing {
namespace vChewing
{
ParselessPhraseDB::ParselessPhraseDB(
const char* buf, size_t length)
: begin_(buf)
, end_(buf + length)
ParselessPhraseDB::ParselessPhraseDB(const char *buf, size_t length) : begin_(buf), end_(buf + length)
{
}
std::vector<std::string_view> ParselessPhraseDB::findRows(
const std::string_view& key)
std::vector<std::string_view> ParselessPhraseDB::findRows(const std::string_view &key)
{
std::vector<std::string_view> rows;
const char* ptr = findFirstMatchingLine(key);
if (ptr == nullptr) {
const char *ptr = findFirstMatchingLine(key);
if (ptr == nullptr)
{
return rows;
}
while (ptr + key.length() <= end_
&& memcmp(ptr, key.data(), key.length()) == 0) {
const char* eol = ptr;
while (ptr + key.length() <= end_ && memcmp(ptr, key.data(), key.length()) == 0)
{
const char *eol = ptr;
while (eol != end_ && *eol != '\n') {
while (eol != end_ && *eol != '\n')
{
++eol;
}
rows.emplace_back(ptr, eol - ptr);
if (eol == end_) {
if (eol == end_)
{
break;
}
@ -66,71 +73,83 @@ std::vector<std::string_view> ParselessPhraseDB::findRows(
// current line is actually the first matching line: if the previous line is
// less to the key and the current line starts exactly with the key, then
// the current line is the first matching line.
const char* ParselessPhraseDB::findFirstMatchingLine(
const std::string_view& key)
const char *ParselessPhraseDB::findFirstMatchingLine(const std::string_view &key)
{
if (key.empty()) {
if (key.empty())
{
return begin_;
}
const char* top = begin_;
const char* bottom = end_;
const char *top = begin_;
const char *bottom = end_;
while (top < bottom) {
const char* mid = top + (bottom - top) / 2;
const char* ptr = mid;
while (top < bottom)
{
const char *mid = top + (bottom - top) / 2;
const char *ptr = mid;
if (ptr != begin_) {
if (ptr != begin_)
{
--ptr;
}
while (ptr != begin_ && *ptr != '\n') {
while (ptr != begin_ && *ptr != '\n')
{
--ptr;
}
const char* prev = nullptr;
if (*ptr == '\n') {
const char *prev = nullptr;
if (*ptr == '\n')
{
prev = ptr;
++ptr;
}
// ptr is now in the "current" line we're interested in.
if (ptr + key.length() > end_) {
if (ptr + key.length() > end_)
{
// not enough data to compare at this point, bail.
break;
}
int current_cmp = memcmp(ptr, key.data(), key.length());
if (current_cmp > 0) {
if (current_cmp > 0)
{
bottom = mid - 1;
continue;
}
if (current_cmp < 0) {
if (current_cmp < 0)
{
top = mid + 1;
continue;
}
if (!prev) {
if (!prev)
{
return ptr;
}
// Move the prev so that it reaches the previous line.
if (prev != begin_) {
if (prev != begin_)
{
--prev;
}
while (prev != begin_ && *prev != '\n') {
while (prev != begin_ && *prev != '\n')
{
--prev;
}
if (*prev == '\n') {
if (*prev == '\n')
{
++prev;
}
int prev_cmp = memcmp(prev, key.data(), key.length());
// This is the first occurrence.
if (prev_cmp < 0 && current_cmp == 0) {
if (prev_cmp < 0 && current_cmp == 0)
{
return ptr;
}

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SOURCE_ENGINE_PARSELESSPHRASEDB_H_
@ -24,28 +31,29 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include <string>
#include <vector>
namespace vChewing {
namespace vChewing
{
// Defines phrase database that consists of (key, value, score) rows that are
// pre-sorted by the byte value of the keys. It is way faster than FastLM
// because it does not need to parse anything. Instead, it relies on the fact
// that the database is already sorted, and binary search is used to find the
// rows.
class ParselessPhraseDB {
public:
ParselessPhraseDB(
const char* buf, size_t length);
class ParselessPhraseDB
{
public:
ParselessPhraseDB(const char *buf, size_t length);
// Find the rows that match the key. Note that prefix match is used. If you
// need exact match, the key will need to have a delimiter (usually a space)
// at the end.
std::vector<std::string_view> findRows(const std::string_view& key);
std::vector<std::string_view> findRows(const std::string_view &key);
const char* findFirstMatchingLine(const std::string_view& key);
const char *findFirstMatchingLine(const std::string_view &key);
private:
const char* begin_;
const char* end_;
private:
const char *begin_;
const char *end_;
};
}; // namespace vChewing

View File

@ -1,48 +1,56 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef PHRASEREPLACEMENTMAP_H
#define PHRASEREPLACEMENTMAP_H
#include <string>
#include <map>
#include <iostream>
#include <map>
#include <string>
namespace vChewing {
namespace vChewing
{
class PhraseReplacementMap
{
public:
public:
PhraseReplacementMap();
~PhraseReplacementMap();
bool open(const char *path);
void close();
const std::string valueForKey(const std::string& key);
const std::string valueForKey(const std::string &key);
protected:
protected:
std::map<std::string_view, std::string_view> keyValueMap;
int fd;
void *data;
size_t length;
};
}
} // namespace vChewing
#endif

View File

@ -1,55 +1,62 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "PhraseReplacementMap.h"
#include "vChewing-Swift.h"
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <fstream>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <syslog.h>
#include <unistd.h>
#include "KeyValueBlobReader.h"
#include "LMConsolidator.h"
namespace vChewing {
namespace vChewing
{
using std::string;
PhraseReplacementMap::PhraseReplacementMap()
: fd(-1)
, data(0)
, length(0)
PhraseReplacementMap::PhraseReplacementMap() : fd(-1), data(0), length(0)
{
}
PhraseReplacementMap::~PhraseReplacementMap()
{
if (data) {
if (data)
{
close();
}
}
bool PhraseReplacementMap::open(const char *path)
{
if (data) {
if (data)
{
return false;
}
@ -57,13 +64,15 @@ bool PhraseReplacementMap::open(const char *path)
LMConsolidator::ConsolidateContent(path, true);
fd = ::open(path, O_RDONLY);
if (fd == -1) {
if (fd == -1)
{
printf("open:: file not exist");
return false;
}
struct stat sb;
if (fstat(fd, &sb) == -1) {
if (fstat(fd, &sb) == -1)
{
printf("open:: cannot open file");
return false;
}
@ -71,21 +80,25 @@ bool PhraseReplacementMap::open(const char *path)
length = (size_t)sb.st_size;
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
if (!data) {
if (!data)
{
::close(fd);
return false;
}
KeyValueBlobReader reader(static_cast<char*>(data), length);
KeyValueBlobReader reader(static_cast<char *>(data), length);
KeyValueBlobReader::KeyValue keyValue;
KeyValueBlobReader::State state;
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) {
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR)
{
keyValueMap[keyValue.key] = keyValue.value;
}
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
if (state == KeyValueBlobReader::State::ERROR) {
if (state == KeyValueBlobReader::State::ERROR)
{
// close();
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "PhraseReplacementMap: Failed at Open Step 5. On Error Resume Next.\n");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "PhraseReplacementMap: Failed at Open Step 5. On Error Resume Next.\n");
// return false;
}
return true;
@ -93,7 +106,8 @@ bool PhraseReplacementMap::open(const char *path)
void PhraseReplacementMap::close()
{
if (data) {
if (data)
{
munmap(data, length);
::close(fd);
data = 0;
@ -102,15 +116,15 @@ void PhraseReplacementMap::close()
keyValueMap.clear();
}
const std::string PhraseReplacementMap::valueForKey(const std::string& key)
const std::string PhraseReplacementMap::valueForKey(const std::string &key)
{
auto iter = keyValueMap.find(key);
if (iter != keyValueMap.end()) {
if (iter != keyValueMap.end())
{
const std::string_view v = iter->second;
return {v.data(), v.size()};
}
return string("");
}
}

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "UserOverrideModel.h"
@ -23,88 +30,84 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include <cmath>
#include <sstream>
namespace vChewing {
namespace vChewing
{
// About 20 generations.
static const double DecayThreshould = 1.0 / 1048576.0;
static double Score(size_t eventCount,
size_t totalCount,
double eventTimestamp,
double timestamp,
double lambda);
static bool IsEndingPunctuation(const std::string& value);
static std::string WalkedNodesToKey(const std::vector<Gramambular::NodeAnchor>& walkedNodes,
size_t cursorIndex);
static double Score(size_t eventCount, size_t totalCount, double eventTimestamp, double timestamp, double lambda);
static bool IsEndingPunctuation(const std::string &value);
static std::string WalkedNodesToKey(const std::vector<Gramambular::NodeAnchor> &walkedNodes, size_t cursorIndex);
UserOverrideModel::UserOverrideModel(size_t capacity, double decayConstant)
: m_capacity(capacity) {
UserOverrideModel::UserOverrideModel(size_t capacity, double decayConstant) : m_capacity(capacity)
{
assert(m_capacity > 0);
m_decayExponent = log(0.5) / decayConstant;
}
void UserOverrideModel::observe(const std::vector<Gramambular::NodeAnchor>& walkedNodes,
size_t cursorIndex,
const std::string& candidate,
double timestamp) {
void UserOverrideModel::observe(const std::vector<Gramambular::NodeAnchor> &walkedNodes, size_t cursorIndex,
const std::string &candidate, double timestamp)
{
std::string key = WalkedNodesToKey(walkedNodes, cursorIndex);
auto mapIter = m_lruMap.find(key);
if (mapIter == m_lruMap.end()) {
if (mapIter == m_lruMap.end())
{
auto keyValuePair = KeyObservationPair(key, Observation());
Observation& observation = keyValuePair.second;
Observation &observation = keyValuePair.second;
observation.update(candidate, timestamp);
m_lruList.push_front(keyValuePair);
auto listIter = m_lruList.begin();
auto lruKeyValue = std::pair<std::string,
std::list<KeyObservationPair>::iterator>(key, listIter);
auto lruKeyValue = std::pair<std::string, std::list<KeyObservationPair>::iterator>(key, listIter);
m_lruMap.insert(lruKeyValue);
if (m_lruList.size() > m_capacity) {
if (m_lruList.size() > m_capacity)
{
auto lastKeyValuePair = m_lruList.end();
--lastKeyValuePair;
m_lruMap.erase(lastKeyValuePair->first);
m_lruList.pop_back();
}
} else {
}
else
{
auto listIter = mapIter->second;
m_lruList.splice(m_lruList.begin(), m_lruList, listIter);
auto& keyValuePair = *listIter;
Observation& observation = keyValuePair.second;
auto &keyValuePair = *listIter;
Observation &observation = keyValuePair.second;
observation.update(candidate, timestamp);
}
}
std::string UserOverrideModel::suggest(const std::vector<Gramambular::NodeAnchor>& walkedNodes,
size_t cursorIndex,
double timestamp) {
std::string UserOverrideModel::suggest(const std::vector<Gramambular::NodeAnchor> &walkedNodes, size_t cursorIndex,
double timestamp)
{
std::string key = WalkedNodesToKey(walkedNodes, cursorIndex);
auto mapIter = m_lruMap.find(key);
if (mapIter == m_lruMap.end()) {
if (mapIter == m_lruMap.end())
{
return std::string();
}
auto listIter = mapIter->second;
auto& keyValuePair = *listIter;
const Observation& observation = keyValuePair.second;
auto &keyValuePair = *listIter;
const Observation &observation = keyValuePair.second;
std::string candidate;
double score = 0.0;
for (auto i = observation.overrides.begin();
i != observation.overrides.end();
++i) {
const Override& o = i->second;
double overrideScore = Score(o.count,
observation.count,
o.timestamp,
timestamp,
m_decayExponent);
if (overrideScore == 0.0) {
for (auto i = observation.overrides.begin(); i != observation.overrides.end(); ++i)
{
const Override &o = i->second;
double overrideScore = Score(o.count, observation.count, o.timestamp, timestamp, m_decayExponent);
if (overrideScore == 0.0)
{
continue;
}
if (overrideScore > score) {
if (overrideScore > score)
{
candidate = i->first;
score = overrideScore;
}
@ -112,21 +115,19 @@ std::string UserOverrideModel::suggest(const std::vector<Gramambular::NodeAnchor
return candidate;
}
void UserOverrideModel::Observation::update(const std::string& candidate,
double timestamp) {
void UserOverrideModel::Observation::update(const std::string &candidate, double timestamp)
{
count++;
auto& o = overrides[candidate];
auto &o = overrides[candidate];
o.timestamp = timestamp;
o.count++;
}
static double Score(size_t eventCount,
size_t totalCount,
double eventTimestamp,
double timestamp,
double lambda) {
static double Score(size_t eventCount, size_t totalCount, double eventTimestamp, double timestamp, double lambda)
{
double decay = exp((timestamp - eventTimestamp) * lambda);
if (decay < DecayThreshould) {
if (decay < DecayThreshould)
{
return 0.0;
}
@ -134,29 +135,31 @@ static double Score(size_t eventCount,
return prob * decay;
}
static bool IsEndingPunctuation(const std::string& value) {
return value == "" || value == "" || value== "" || value == "" ||
value == "" || value == "" || value== "" || value == "";
static bool IsEndingPunctuation(const std::string &value)
{
return value == "" || value == "" || value == "" || value == "" || value == "" || value == "" ||
value == "" || value == "";
}
static std::string WalkedNodesToKey(const std::vector<Gramambular::NodeAnchor>& walkedNodes,
size_t cursorIndex) {
static std::string WalkedNodesToKey(const std::vector<Gramambular::NodeAnchor> &walkedNodes, size_t cursorIndex)
{
std::stringstream s;
std::vector<Gramambular::NodeAnchor> n;
size_t ll = 0;
for (std::vector<Gramambular::NodeAnchor>::const_iterator i = walkedNodes.begin();
i != walkedNodes.end();
++i) {
const auto& nn = *i;
for (std::vector<Gramambular::NodeAnchor>::const_iterator i = walkedNodes.begin(); i != walkedNodes.end(); ++i)
{
const auto &nn = *i;
n.push_back(nn);
ll += nn.spanningLength;
if (ll >= cursorIndex) {
if (ll >= cursorIndex)
{
break;
}
}
std::vector<Gramambular::NodeAnchor>::const_reverse_iterator r = n.rbegin();
if (r == n.rend()) {
if (r == n.rend())
{
return "";
}
@ -165,40 +168,44 @@ static std::string WalkedNodesToKey(const std::vector<Gramambular::NodeAnchor>&
s.clear();
s.str(std::string());
if (r != n.rend()) {
if (r != n.rend())
{
std::string value = (*r).node->currentKeyValue().value;
if (IsEndingPunctuation(value)) {
if (IsEndingPunctuation(value))
{
s << "()";
r = n.rend();
} else {
s << "("
<< (*r).node->currentKeyValue().key
<< ","
<< value
<< ")";
}
else
{
s << "(" << (*r).node->currentKeyValue().key << "," << value << ")";
++r;
}
} else {
}
else
{
s << "()";
}
std::string prev = s.str();
s.clear();
s.str(std::string());
if (r != n.rend()) {
if (r != n.rend())
{
std::string value = (*r).node->currentKeyValue().value;
if (IsEndingPunctuation(value)) {
if (IsEndingPunctuation(value))
{
s << "()";
r = n.rend();
} else {
s << "("
<< (*r).node->currentKeyValue().key
<< ","
<< value
<< ")";
}
else
{
s << "(" << (*r).node->currentKeyValue().key << "," << value << ")";
++r;
}
} else {
}
else
{
s << "()";
}
std::string anterior = s.str();

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef USEROVERRIDEMODEL_H
@ -25,37 +32,41 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "Gramambular.h"
namespace vChewing {
namespace vChewing
{
using namespace Gramambular;
class UserOverrideModel {
public:
class UserOverrideModel
{
public:
UserOverrideModel(size_t capacity, double decayConstant);
void observe(const std::vector<Gramambular::NodeAnchor>& walkedNodes,
size_t cursorIndex,
const std::string& candidate,
double timestamp);
void observe(const std::vector<Gramambular::NodeAnchor> &walkedNodes, size_t cursorIndex,
const std::string &candidate, double timestamp);
std::string suggest(const std::vector<Gramambular::NodeAnchor>& walkedNodes,
size_t cursorIndex,
double timestamp);
std::string suggest(const std::vector<Gramambular::NodeAnchor> &walkedNodes, size_t cursorIndex, double timestamp);
private:
struct Override {
private:
struct Override
{
size_t count;
double timestamp;
Override() : count(0), timestamp(0.0) {}
Override() : count(0), timestamp(0.0)
{
}
};
struct Observation {
struct Observation
{
size_t count;
std::map<std::string, Override> overrides;
Observation() : count(0) {}
void update(const std::string& candidate, double timestamp);
Observation() : count(0)
{
}
void update(const std::string &candidate, double timestamp);
};
typedef std::pair<std::string, Observation> KeyObservationPair;
@ -69,4 +80,3 @@ private:
}; // namespace vChewing
#endif

View File

@ -1,35 +1,43 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef USERPHRASESLM_H
#define USERPHRASESLM_H
#include <string>
#include <map>
#include <iostream>
#include "LanguageModel.h"
#include <iostream>
#include <map>
#include <string>
namespace vChewing {
namespace vChewing
{
class UserPhrasesLM : public Gramambular::LanguageModel
{
public:
public:
UserPhrasesLM();
~UserPhrasesLM();
@ -38,31 +46,37 @@ public:
void close();
void dump();
virtual bool allowConsolidation() {
virtual bool allowConsolidation()
{
return true;
}
virtual float overridedValue() {
virtual float overridedValue()
{
return 0.0;
}
virtual const std::vector<Gramambular::Bigram> bigramsForKeys(const std::string& preceedingKey, const std::string& key);
virtual const std::vector<Gramambular::Unigram> unigramsForKey(const std::string& key);
virtual bool hasUnigramsForKey(const std::string& key);
protected:
struct Row {
Row(std::string_view& k, std::string_view& v) : key(k), value(v) {}
virtual const std::vector<Gramambular::Bigram> bigramsForKeys(const std::string &preceedingKey,
const std::string &key);
virtual const std::vector<Gramambular::Unigram> unigramsForKey(const std::string &key);
virtual bool hasUnigramsForKey(const std::string &key);
protected:
struct Row
{
Row(std::string_view &k, std::string_view &v) : key(k), value(v)
{
}
std::string_view key;
std::string_view value;
};
std::map<std::string_view, std::vector<Row>> keyRowMap;
int fd;
void *data;
size_t length;
};
}
} // namespace vChewing
#endif

View File

@ -1,53 +1,60 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "UserPhrasesLM.h"
#include "vChewing-Swift.h"
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <fstream>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <syslog.h>
#include <unistd.h>
#include "KeyValueBlobReader.h"
#include "LMConsolidator.h"
namespace vChewing {
namespace vChewing
{
UserPhrasesLM::UserPhrasesLM()
: fd(-1)
, data(0)
, length(0)
UserPhrasesLM::UserPhrasesLM() : fd(-1), data(0), length(0)
{
}
UserPhrasesLM::~UserPhrasesLM()
{
if (data) {
if (data)
{
close();
}
}
bool UserPhrasesLM::isLoaded()
{
if (data) {
if (data)
{
return true;
}
return false;
@ -55,23 +62,27 @@ bool UserPhrasesLM::isLoaded()
bool UserPhrasesLM::open(const char *path)
{
if (data) {
if (data)
{
return false;
}
if (allowConsolidation()) {
if (allowConsolidation())
{
LMConsolidator::FixEOF(path);
LMConsolidator::ConsolidateContent(path, true);
}
fd = ::open(path, O_RDONLY);
if (fd == -1) {
if (fd == -1)
{
printf("open:: file not exist");
return false;
}
struct stat sb;
if (fstat(fd, &sb) == -1) {
if (fstat(fd, &sb) == -1)
{
printf("open:: cannot open file");
return false;
}
@ -79,22 +90,27 @@ bool UserPhrasesLM::open(const char *path)
length = (size_t)sb.st_size;
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
if (!data) {
if (!data)
{
::close(fd);
return false;
}
KeyValueBlobReader reader(static_cast<char*>(data), length);
KeyValueBlobReader reader(static_cast<char *>(data), length);
KeyValueBlobReader::KeyValue keyValue;
KeyValueBlobReader::State state;
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) {
// We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF reading.
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR)
{
// We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF
// reading.
keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key);
}
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
if (state == KeyValueBlobReader::State::ERROR) {
if (state == KeyValueBlobReader::State::ERROR)
{
// close();
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "UserPhrasesLM: Failed at Open Step 5. On Error Resume Next.\n");
if (mgrPrefs.isDebugModeEnabled)
syslog(LOG_CONS, "UserPhrasesLM: Failed at Open Step 5. On Error Resume Next.\n");
// return false;
}
return true;
@ -102,7 +118,8 @@ bool UserPhrasesLM::open(const char *path)
void UserPhrasesLM::close()
{
if (data) {
if (data)
{
munmap(data, length);
::close(fd);
data = 0;
@ -113,26 +130,31 @@ void UserPhrasesLM::close()
void UserPhrasesLM::dump()
{
for (const auto& entry : keyRowMap) {
const std::vector<Row>& rows = entry.second;
for (const auto& row : rows) {
for (const auto &entry : keyRowMap)
{
const std::vector<Row> &rows = entry.second;
for (const auto &row : rows)
{
std::cerr << row.key << " " << row.value << "\n";
}
}
}
const std::vector<Gramambular::Bigram> UserPhrasesLM::bigramsForKeys(const std::string& preceedingKey, const std::string& key)
const std::vector<Gramambular::Bigram> UserPhrasesLM::bigramsForKeys(const std::string &preceedingKey,
const std::string &key)
{
return std::vector<Gramambular::Bigram>();
}
const std::vector<Gramambular::Unigram> UserPhrasesLM::unigramsForKey(const std::string& key)
const std::vector<Gramambular::Unigram> UserPhrasesLM::unigramsForKey(const std::string &key)
{
std::vector<Gramambular::Unigram> v;
auto iter = keyRowMap.find(key);
if (iter != keyRowMap.end()) {
const std::vector<Row>& rows = iter->second;
for (const auto& row : rows) {
if (iter != keyRowMap.end())
{
const std::vector<Row> &rows = iter->second;
for (const auto &row : rows)
{
Gramambular::Unigram g;
g.keyValue.key = row.key;
g.keyValue.value = row.value;
@ -144,9 +166,9 @@ const std::vector<Gramambular::Unigram> UserPhrasesLM::unigramsForKey(const std:
return v;
}
bool UserPhrasesLM::hasUnigramsForKey(const std::string& key)
bool UserPhrasesLM::hasUnigramsForKey(const std::string &key)
{
return keyRowMap.find(key) != keyRowMap.end();
}
}; // namespace vChewing
}; // namespace vChewing

View File

@ -1,24 +1,31 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#import <Foundation/Foundation.h>
#import "KeyHandler.h"
#import <Foundation/Foundation.h>
NS_ASSUME_NONNULL_BEGIN
@ -33,8 +40,13 @@ NS_ASSUME_NONNULL_BEGIN
+ (BOOL)checkIfSpecifiedUserDataFolderValid:(NSString *)folderPath;
+ (NSString *)dataFolderPath:(bool)isDefaultFolder NS_SWIFT_NAME(dataFolderPath(isDefaultFolder:));
+ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase inputMode:(InputMode)mode key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:));
+ (BOOL)writeUserPhrase:(NSString *)userPhrase inputMode:(InputMode)mode areWeDuplicating:(BOOL)areWeDuplicating areWeDeleting:(BOOL)areWeDeleting;
+ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase
inputMode:(InputMode)mode
key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:));
+ (BOOL)writeUserPhrase:(NSString *)userPhrase
inputMode:(InputMode)mode
areWeDuplicating:(BOOL)areWeDuplicating
areWeDeleting:(BOOL)areWeDeleting;
+ (void)setPhraseReplacementEnabled:(BOOL)phraseReplacementEnabled;
+ (void)setCNSEnabled:(BOOL)cnsEnabled;
+ (void)setSymbolEnabled:(BOOL)symbolEnabled;

View File

@ -1,26 +1,33 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#import "mgrLangModel.h"
#import "LMConsolidator.h"
#import "mgrLangModel_Privates.h"
#import "vChewing-Swift.h"
#import "LMConsolidator.h"
static const int kUserOverrideModelCapacity = 500;
static const double kObservedOverrideHalflife = 5400.0;
@ -54,70 +61,90 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
+ (void)loadDataModels
{
if (!gLangModelCHT.isDataModelLoaded()) {
if (!gLangModelCHT.isDataModelLoaded())
{
LTLoadLanguageModelFile(@"data-cht", gLangModelCHT);
}
if (!gLangModelCHT.isMiscDataLoaded()) {
gLangModelCHT.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]);
if (!gLangModelCHT.isMiscDataLoaded())
{
gLangModelCHT.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]);
}
if (!gLangModelCHT.isSymbolDataLoaded()){
gLangModelCHT.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]);
if (!gLangModelCHT.isSymbolDataLoaded())
{
gLangModelCHT.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]);
}
if (!gLangModelCHT.isCNSDataLoaded()){
gLangModelCHT.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]);
if (!gLangModelCHT.isCNSDataLoaded())
{
gLangModelCHT.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]);
}
// -----------------
if (!gLangModelCHS.isDataModelLoaded()) {
if (!gLangModelCHS.isDataModelLoaded())
{
LTLoadLanguageModelFile(@"data-chs", gLangModelCHS);
}
if (!gLangModelCHS.isMiscDataLoaded()) {
gLangModelCHS.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]);
if (!gLangModelCHS.isMiscDataLoaded())
{
gLangModelCHS.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]);
}
if (!gLangModelCHS.isSymbolDataLoaded()){
gLangModelCHS.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]);
if (!gLangModelCHS.isSymbolDataLoaded())
{
gLangModelCHS.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]);
}
if (!gLangModelCHS.isCNSDataLoaded()){
gLangModelCHS.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]);
if (!gLangModelCHS.isCNSDataLoaded())
{
gLangModelCHS.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]);
}
}
+ (void)loadDataModel:(InputMode)mode
{
if ([mode isEqualToString:imeModeCHT]) {
if (!gLangModelCHT.isDataModelLoaded()) {
if ([mode isEqualToString:imeModeCHT])
{
if (!gLangModelCHT.isDataModelLoaded())
{
LTLoadLanguageModelFile(@"data-cht", gLangModelCHT);
}
if (!gLangModelCHT.isMiscDataLoaded()) {
gLangModelCHT.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]);
if (!gLangModelCHT.isMiscDataLoaded())
{
gLangModelCHT.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]);
}
if (!gLangModelCHT.isSymbolDataLoaded()){
gLangModelCHT.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]);
if (!gLangModelCHT.isSymbolDataLoaded())
{
gLangModelCHT.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]);
}
if (!gLangModelCHT.isCNSDataLoaded()){
gLangModelCHT.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]);
if (!gLangModelCHT.isCNSDataLoaded())
{
gLangModelCHT.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]);
}
}
if ([mode isEqualToString:imeModeCHS]) {
if (!gLangModelCHS.isDataModelLoaded()) {
if ([mode isEqualToString:imeModeCHS])
{
if (!gLangModelCHS.isDataModelLoaded())
{
LTLoadLanguageModelFile(@"data-chs", gLangModelCHS);
}
if (!gLangModelCHS.isMiscDataLoaded()) {
gLangModelCHS.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]);
if (!gLangModelCHS.isMiscDataLoaded())
{
gLangModelCHS.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]);
}
if (!gLangModelCHS.isSymbolDataLoaded()){
gLangModelCHS.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]);
if (!gLangModelCHS.isSymbolDataLoaded())
{
gLangModelCHS.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]);
}
if (!gLangModelCHS.isCNSDataLoaded()){
gLangModelCHS.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]);
if (!gLangModelCHS.isCNSDataLoaded())
{
gLangModelCHS.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]);
}
}
}
+ (void)loadUserPhrases
{
gLangModelCHT.loadUserPhrases([[self userPhrasesDataPath:imeModeCHT] UTF8String], [[self excludedPhrasesDataPath:imeModeCHT] UTF8String]);
gLangModelCHS.loadUserPhrases([[self userPhrasesDataPath:imeModeCHS] UTF8String], [[self excludedPhrasesDataPath:imeModeCHS] UTF8String]);
gLangModelCHT.loadUserPhrases([[self userPhrasesDataPath:imeModeCHT] UTF8String],
[[self excludedPhrasesDataPath:imeModeCHT] UTF8String]);
gLangModelCHS.loadUserPhrases([[self userPhrasesDataPath:imeModeCHS] UTF8String],
[[self excludedPhrasesDataPath:imeModeCHS] UTF8String]);
gLangModelCHT.loadUserSymbolData([[self userSymbolDataPath:imeModeCHT] UTF8String]);
gLangModelCHS.loadUserSymbolData([[self userSymbolDataPath:imeModeCHS] UTF8String]);
}
@ -139,19 +166,26 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
NSString *folderPath = [self dataFolderPath:false];
BOOL isFolder = NO;
BOOL folderExist = [[NSFileManager defaultManager] fileExistsAtPath:folderPath isDirectory:&isFolder];
if (folderExist && !isFolder) {
if (folderExist && !isFolder)
{
NSError *error = nil;
[[NSFileManager defaultManager] removeItemAtPath:folderPath error:&error];
if (error) {
if (error)
{
NSLog(@"Failed to remove folder %@", error);
return NO;
}
folderExist = NO;
}
if (!folderExist) {
if (!folderExist)
{
NSError *error = nil;
[[NSFileManager defaultManager] createDirectoryAtPath:folderPath withIntermediateDirectories:YES attributes:nil error:&error];
if (error) {
[[NSFileManager defaultManager] createDirectoryAtPath:folderPath
withIntermediateDirectories:YES
attributes:nil
error:&error];
if (error)
{
NSLog(@"Failed to create folder %@", error);
return NO;
}
@ -163,26 +197,34 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
{
BOOL isFolder = NO;
BOOL folderExist = [[NSFileManager defaultManager] fileExistsAtPath:folderPath isDirectory:&isFolder];
if ((folderExist && !isFolder) || (!folderExist)) {
if ((folderExist && !isFolder) || (!folderExist))
{
return NO;
}
return YES;
}
+ (BOOL)ensureFileExists:(NSString *)filePath populateWithTemplate:(NSString *)templateBasename extension:(NSString *)ext
+ (BOOL)ensureFileExists:(NSString *)filePath
populateWithTemplate:(NSString *)templateBasename
extension:(NSString *)ext
{
if (![[NSFileManager defaultManager] fileExistsAtPath:filePath]) {
if (![[NSFileManager defaultManager] fileExistsAtPath:filePath])
{
NSURL *templateURL = [[NSBundle mainBundle] URLForResource:templateBasename withExtension:ext];
NSData *templateData;
if (templateURL) {
if (templateURL)
{
templateData = [NSData dataWithContentsOfURL:templateURL];
} else {
}
else
{
templateData = [@"" dataUsingEncoding:NSUTF8StringEncoding];
}
BOOL result = [templateData writeToFile:filePath atomically:YES];
if (!result) {
if (!result)
{
NSLog(@"Failed to write file");
return NO;
}
@ -192,36 +234,76 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
+ (BOOL)checkIfUserLanguageModelFilesExist
{
if (![self checkIfUserDataFolderExists]) return NO;
if (![self ensureFileExists:[self userPhrasesDataPath:imeModeCHS] populateWithTemplate:kUserDataTemplateName extension:kTemplateExtension]) return NO;
if (![self ensureFileExists:[self userPhrasesDataPath:imeModeCHT] populateWithTemplate:kUserDataTemplateName extension:kTemplateExtension]) return NO;
if (![self ensureFileExists:[self userAssociatedPhrasesDataPath:imeModeCHS] populateWithTemplate:kUserAssDataTemplateName extension:kTemplateExtension]) return NO;
if (![self ensureFileExists:[self userAssociatedPhrasesDataPath:imeModeCHT] populateWithTemplate:kUserAssDataTemplateName extension:kTemplateExtension]) return NO;
if (![self ensureFileExists:[self excludedPhrasesDataPath:imeModeCHS] populateWithTemplate:kExcludedPhrasesvChewingTemplateName extension:kTemplateExtension]) return NO;
if (![self ensureFileExists:[self excludedPhrasesDataPath:imeModeCHT] populateWithTemplate:kExcludedPhrasesvChewingTemplateName extension:kTemplateExtension]) return NO;
if (![self ensureFileExists:[self phraseReplacementDataPath:imeModeCHS] populateWithTemplate:kPhraseReplacementTemplateName extension:kTemplateExtension]) return NO;
if (![self ensureFileExists:[self phraseReplacementDataPath:imeModeCHT] populateWithTemplate:kPhraseReplacementTemplateName extension:kTemplateExtension]) return NO;
if (![self ensureFileExists:[self userSymbolDataPath:imeModeCHT] populateWithTemplate:kUserSymbolDataTemplateName extension:kTemplateExtension]) return NO;
if (![self ensureFileExists:[self userSymbolDataPath:imeModeCHS] populateWithTemplate:kUserSymbolDataTemplateName extension:kTemplateExtension]) return NO;
if (![self checkIfUserDataFolderExists])
return NO;
if (![self ensureFileExists:[self userPhrasesDataPath:imeModeCHS]
populateWithTemplate:kUserDataTemplateName
extension:kTemplateExtension])
return NO;
if (![self ensureFileExists:[self userPhrasesDataPath:imeModeCHT]
populateWithTemplate:kUserDataTemplateName
extension:kTemplateExtension])
return NO;
if (![self ensureFileExists:[self userAssociatedPhrasesDataPath:imeModeCHS]
populateWithTemplate:kUserAssDataTemplateName
extension:kTemplateExtension])
return NO;
if (![self ensureFileExists:[self userAssociatedPhrasesDataPath:imeModeCHT]
populateWithTemplate:kUserAssDataTemplateName
extension:kTemplateExtension])
return NO;
if (![self ensureFileExists:[self excludedPhrasesDataPath:imeModeCHS]
populateWithTemplate:kExcludedPhrasesvChewingTemplateName
extension:kTemplateExtension])
return NO;
if (![self ensureFileExists:[self excludedPhrasesDataPath:imeModeCHT]
populateWithTemplate:kExcludedPhrasesvChewingTemplateName
extension:kTemplateExtension])
return NO;
if (![self ensureFileExists:[self phraseReplacementDataPath:imeModeCHS]
populateWithTemplate:kPhraseReplacementTemplateName
extension:kTemplateExtension])
return NO;
if (![self ensureFileExists:[self phraseReplacementDataPath:imeModeCHT]
populateWithTemplate:kPhraseReplacementTemplateName
extension:kTemplateExtension])
return NO;
if (![self ensureFileExists:[self userSymbolDataPath:imeModeCHT]
populateWithTemplate:kUserSymbolDataTemplateName
extension:kTemplateExtension])
return NO;
if (![self ensureFileExists:[self userSymbolDataPath:imeModeCHS]
populateWithTemplate:kUserSymbolDataTemplateName
extension:kTemplateExtension])
return NO;
return YES;
}
+ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase inputMode:(InputMode)mode key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:))
+ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase
inputMode:(InputMode)mode
key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:))
{
string unigramKey = string(key.UTF8String);
vector<vChewing::Unigram> unigrams = [mode isEqualToString:imeModeCHT] ? gLangModelCHT.unigramsForKey(unigramKey): gLangModelCHS.unigramsForKey(unigramKey);
vector<vChewing::Unigram> unigrams = [mode isEqualToString:imeModeCHT] ? gLangModelCHT.unigramsForKey(unigramKey)
: gLangModelCHS.unigramsForKey(unigramKey);
string userPhraseString = string(userPhrase.UTF8String);
for (auto unigram: unigrams) {
if (unigram.keyValue.value == userPhraseString) {
for (auto unigram : unigrams)
{
if (unigram.keyValue.value == userPhraseString)
{
return YES;
}
}
return NO;
}
+ (BOOL)writeUserPhrase:(NSString *)userPhrase inputMode:(InputMode)mode areWeDuplicating:(BOOL)areWeDuplicating areWeDeleting:(BOOL)areWeDeleting
+ (BOOL)writeUserPhrase:(NSString *)userPhrase
inputMode:(InputMode)mode
areWeDuplicating:(BOOL)areWeDuplicating
areWeDeleting:(BOOL)areWeDeleting
{
if (![self checkIfUserLanguageModelFilesExist]) {
if (![self checkIfUserLanguageModelFilesExist])
{
return NO;
}
@ -233,7 +315,8 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
// [currentMarkedPhrase appendString:@"\n"];
// }
[currentMarkedPhrase appendString:userPhrase];
if (areWeDuplicating && !areWeDeleting) {
if (areWeDuplicating && !areWeDeleting)
{
// Do not use ASCII characters to comment here.
// Otherwise, it will be scrambled by cnvHYPYtoBPMF module shipped in the vChewing Phrase Editor.
[currentMarkedPhrase appendString:@"\t#𝙾𝚟𝚎𝚛𝚛𝚒𝚍𝚎"];
@ -241,7 +324,8 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
[currentMarkedPhrase appendString:@"\n"];
NSFileHandle *writeFile = [NSFileHandle fileHandleForUpdatingAtPath:path];
if (!writeFile) {
if (!writeFile)
{
return NO;
}
[writeFile seekToEndOfFile];
@ -249,12 +333,14 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
[writeFile writeData:data];
[writeFile closeFile];
// We enforce the format consolidation here, since the pragma header will let the UserPhraseLM bypasses the consolidating process on load.
// We enforce the format consolidation here, since the pragma header will let the UserPhraseLM bypasses the
// consolidating process on load.
vChewing::LMConsolidator::ConsolidateContent([path UTF8String], false);
// We use FSEventStream to monitor the change of the user phrase folder,
// so we don't have to load data here unless FSEventStream is disabled by user.
if (!mgrPrefs.shouldAutoReloadUserDataFiles) {
if (!mgrPrefs.shouldAutoReloadUserDataFiles)
{
[self loadUserPhrases];
}
return YES;
@ -263,15 +349,21 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
+ (NSString *)dataFolderPath:(bool)isDefaultFolder
{
// 此處不能用「~」來取代當前使用者目錄名稱。不然的話,一旦輸入法被系統的沙箱干預的話,則反而會定位到沙箱目錄內。
NSString *appSupportPath = [NSFileManager.defaultManager URLsForDirectory:NSApplicationSupportDirectory inDomains:NSUserDomainMask][0].path;
NSString *appSupportPath = [NSFileManager.defaultManager URLsForDirectory:NSApplicationSupportDirectory
inDomains:NSUserDomainMask][0].path;
NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"vChewing"].stringByExpandingTildeInPath;
if (mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath == userDictPath || isDefaultFolder) {
if (mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath == userDictPath || isDefaultFolder)
{
return userDictPath;
}
if ([mgrPrefs ifSpecifiedUserDataPathExistsInPlist]) {
if ([self checkIfSpecifiedUserDataFolderValid:mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath]) {
if ([mgrPrefs ifSpecifiedUserDataPathExistsInPlist])
{
if ([self checkIfSpecifiedUserDataFolderValid:mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath])
{
return mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath;
} else {
}
else
{
[NSUserDefaults.standardUserDefaults removeObjectForKey:@"UserDataFolderSpecified"];
}
}
@ -286,13 +378,15 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
+ (NSString *)userSymbolDataPath:(InputMode)mode;
{
NSString *fileName = [mode isEqualToString:imeModeCHT] ? @"usersymbolphrases-cht.txt" : @"usersymbolphrases-chs.txt";
NSString *fileName =
[mode isEqualToString:imeModeCHT] ? @"usersymbolphrases-cht.txt" : @"usersymbolphrases-chs.txt";
return [[self dataFolderPath:false] stringByAppendingPathComponent:fileName];
}
+ (NSString *)userAssociatedPhrasesDataPath:(InputMode)mode;
{
NSString *fileName = [mode isEqualToString:imeModeCHT] ? @"associatedPhrases-cht.txt" : @"associatedPhrases-chs.txt";
NSString *fileName =
[mode isEqualToString:imeModeCHT] ? @"associatedPhrases-cht.txt" : @"associatedPhrases-chs.txt";
return [[self dataFolderPath:false] stringByAppendingPathComponent:fileName];
}
@ -304,11 +398,12 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
+ (NSString *)phraseReplacementDataPath:(InputMode)mode;
{
NSString *fileName = [mode isEqualToString:imeModeCHT] ? @"phrases-replacement-cht.txt" : @"phrases-replacement-chs.txt";
NSString *fileName =
[mode isEqualToString:imeModeCHT] ? @"phrases-replacement-cht.txt" : @"phrases-replacement-chs.txt";
return [[self dataFolderPath:false] stringByAppendingPathComponent:fileName];
}
+ (vChewing::LMInstantiator *)lmCHT
+ (vChewing::LMInstantiator *)lmCHT
{
return &gLangModelCHT;
}

View File

@ -1,33 +1,40 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#import "mgrLangModel.h"
#import "UserOverrideModel.h"
#import "LMInstantiator.h"
#import "UserOverrideModel.h"
#import "mgrLangModel.h"
NS_ASSUME_NONNULL_BEGIN
@interface mgrLangModel ()
@property (class, readonly, nonatomic) vChewing::LMInstantiator *lmCHT;
@property (class, readonly, nonatomic) vChewing::LMInstantiator *lmCHS;
@property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHS;
@property (class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHT;
@property(class, readonly, nonatomic) vChewing::LMInstantiator *lmCHT;
@property(class, readonly, nonatomic) vChewing::LMInstantiator *lmCHS;
@property(class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHS;
@property(class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHT;
@end
NS_ASSUME_NONNULL_END

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef BIGRAM_H_
@ -24,69 +31,80 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "KeyValuePair.h"
namespace Gramambular {
class Bigram {
public:
namespace Gramambular
{
class Bigram
{
public:
Bigram();
KeyValuePair preceedingKeyValue;
KeyValuePair keyValue;
double score;
bool operator==(const Bigram& another) const;
bool operator<(const Bigram& another) const;
bool operator==(const Bigram &another) const;
bool operator<(const Bigram &another) const;
};
inline std::ostream& operator<<(std::ostream& stream, const Bigram& gram) {
inline std::ostream &operator<<(std::ostream &stream, const Bigram &gram)
{
std::streamsize p = stream.precision();
stream.precision(6);
stream << "(" << gram.keyValue << "|" << gram.preceedingKeyValue << ","
<< gram.score << ")";
stream << "(" << gram.keyValue << "|" << gram.preceedingKeyValue << "," << gram.score << ")";
stream.precision(p);
return stream;
}
inline std::ostream& operator<<(std::ostream& stream,
const std::vector<Bigram>& grams) {
inline std::ostream &operator<<(std::ostream &stream, const std::vector<Bigram> &grams)
{
stream << "[" << grams.size() << "]=>{";
size_t index = 0;
for (std::vector<Bigram>::const_iterator gi = grams.begin();
gi != grams.end(); ++gi, ++index) {
for (std::vector<Bigram>::const_iterator gi = grams.begin(); gi != grams.end(); ++gi, ++index)
{
stream << index << "=>";
stream << *gi;
if (gi + 1 != grams.end()) {
if (gi + 1 != grams.end())
{
stream << ",";
}
}
stream << "}";
return stream;
}
inline Bigram::Bigram() : score(0.0) {}
inline bool Bigram::operator==(const Bigram& another) const {
return preceedingKeyValue == another.preceedingKeyValue &&
keyValue == another.keyValue && score == another.score;
inline Bigram::Bigram() : score(0.0)
{
}
inline bool Bigram::operator<(const Bigram& another) const {
if (preceedingKeyValue < another.preceedingKeyValue) {
inline bool Bigram::operator==(const Bigram &another) const
{
return preceedingKeyValue == another.preceedingKeyValue && keyValue == another.keyValue && score == another.score;
}
inline bool Bigram::operator<(const Bigram &another) const
{
if (preceedingKeyValue < another.preceedingKeyValue)
{
return true;
} else if (preceedingKeyValue == another.preceedingKeyValue) {
if (keyValue < another.keyValue) {
}
else if (preceedingKeyValue == another.preceedingKeyValue)
{
if (keyValue < another.keyValue)
{
return true;
} else if (keyValue == another.keyValue) {
}
else if (keyValue == another.keyValue)
{
return score < another.score;
}
return false;
}
return false;
}
} // namespace Gramambular
} // namespace Gramambular
#endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef BLOCKREADINGBUILDER_H_
@ -26,157 +33,186 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "Grid.h"
#include "LanguageModel.h"
namespace Gramambular {
namespace Gramambular
{
class BlockReadingBuilder {
public:
explicit BlockReadingBuilder(LanguageModel* lm);
class BlockReadingBuilder
{
public:
explicit BlockReadingBuilder(LanguageModel *lm);
void clear();
size_t length() const;
size_t cursorIndex() const;
void setCursorIndex(size_t newIndex);
void insertReadingAtCursor(const std::string& reading);
bool deleteReadingBeforeCursor(); // backspace
bool deleteReadingAfterCursor(); // delete
void insertReadingAtCursor(const std::string &reading);
bool deleteReadingBeforeCursor(); // backspace
bool deleteReadingAfterCursor(); // delete
bool removeHeadReadings(size_t count);
void setJoinSeparator(const std::string& separator);
void setJoinSeparator(const std::string &separator);
const std::string joinSeparator() const;
std::vector<std::string> readings() const;
Grid& grid();
protected:
Grid &grid();
protected:
void build();
static const std::string Join(std::vector<std::string>::const_iterator begin,
std::vector<std::string>::const_iterator end,
const std::string& separator);
std::vector<std::string>::const_iterator end, const std::string &separator);
// 規定最多可以組成的詞的字數上限為 10
static const size_t MaximumBuildSpanLength = 10;
size_t m_cursorIndex;
std::vector<std::string> m_readings;
Grid m_grid;
LanguageModel* m_LM;
LanguageModel *m_LM;
std::string m_joinSeparator;
};
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel* lm)
: m_LM(lm), m_cursorIndex(0) {}
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *lm) : m_LM(lm), m_cursorIndex(0)
{
}
inline void BlockReadingBuilder::clear() {
inline void BlockReadingBuilder::clear()
{
m_cursorIndex = 0;
m_readings.clear();
m_grid.clear();
}
inline size_t BlockReadingBuilder::length() const { return m_readings.size(); }
inline size_t BlockReadingBuilder::length() const
{
return m_readings.size();
}
inline size_t BlockReadingBuilder::cursorIndex() const { return m_cursorIndex; }
inline size_t BlockReadingBuilder::cursorIndex() const
{
return m_cursorIndex;
}
inline void BlockReadingBuilder::setCursorIndex(size_t newIndex) {
inline void BlockReadingBuilder::setCursorIndex(size_t newIndex)
{
m_cursorIndex = newIndex > m_readings.size() ? m_readings.size() : newIndex;
}
inline void BlockReadingBuilder::insertReadingAtCursor(
const std::string& reading) {
inline void BlockReadingBuilder::insertReadingAtCursor(const std::string &reading)
{
m_readings.insert(m_readings.begin() + m_cursorIndex, reading);
m_grid.expandGridByOneAtLocation(m_cursorIndex);
build();
m_cursorIndex++;
}
inline std::vector<std::string> BlockReadingBuilder::readings() const {
inline std::vector<std::string> BlockReadingBuilder::readings() const
{
return m_readings;
}
inline bool BlockReadingBuilder::deleteReadingBeforeCursor() {
if (!m_cursorIndex) {
inline bool BlockReadingBuilder::deleteReadingBeforeCursor()
{
if (!m_cursorIndex)
{
return false;
}
m_readings.erase(m_readings.begin() + m_cursorIndex - 1,
m_readings.begin() + m_cursorIndex);
m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.begin() + m_cursorIndex);
m_cursorIndex--;
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
}
inline bool BlockReadingBuilder::deleteReadingAfterCursor() {
if (m_cursorIndex == m_readings.size()) {
inline bool BlockReadingBuilder::deleteReadingAfterCursor()
{
if (m_cursorIndex == m_readings.size())
{
return false;
}
m_readings.erase(m_readings.begin() + m_cursorIndex,
m_readings.begin() + m_cursorIndex + 1);
m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.begin() + m_cursorIndex + 1);
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
build();
return true;
}
inline bool BlockReadingBuilder::removeHeadReadings(size_t count) {
if (count > length()) {
inline bool BlockReadingBuilder::removeHeadReadings(size_t count)
{
if (count > length())
{
return false;
}
for (size_t i = 0; i < count; i++) {
if (m_cursorIndex) {
for (size_t i = 0; i < count; i++)
{
if (m_cursorIndex)
{
m_cursorIndex--;
}
m_readings.erase(m_readings.begin(), m_readings.begin() + 1);
m_grid.shrinkGridByOneAtLocation(0);
build();
}
return true;
}
inline void BlockReadingBuilder::setJoinSeparator(
const std::string& separator) {
inline void BlockReadingBuilder::setJoinSeparator(const std::string &separator)
{
m_joinSeparator = separator;
}
inline const std::string BlockReadingBuilder::joinSeparator() const {
inline const std::string BlockReadingBuilder::joinSeparator() const
{
return m_joinSeparator;
}
inline Grid& BlockReadingBuilder::grid() { return m_grid; }
inline Grid &BlockReadingBuilder::grid()
{
return m_grid;
}
inline void BlockReadingBuilder::build() {
if (!m_LM) {
inline void BlockReadingBuilder::build()
{
if (!m_LM)
{
return;
}
size_t begin = 0;
size_t end = m_cursorIndex + MaximumBuildSpanLength;
if (m_cursorIndex < MaximumBuildSpanLength) {
if (m_cursorIndex < MaximumBuildSpanLength)
{
begin = 0;
} else {
}
else
{
begin = m_cursorIndex - MaximumBuildSpanLength;
}
if (end > m_readings.size()) {
if (end > m_readings.size())
{
end = m_readings.size();
}
for (size_t p = begin; p < end; p++) {
for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) {
std::string combinedReading = Join(
m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q,
combinedReading)) {
for (size_t p = begin; p < end; p++)
{
for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++)
{
std::string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading))
{
std::vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
if (unigrams.size() > 0) {
if (unigrams.size() > 0)
{
Node n(combinedReading, unigrams, std::vector<Bigram>());
m_grid.insertNode(n, p, q);
}
@ -185,21 +221,22 @@ inline void BlockReadingBuilder::build() {
}
}
inline const std::string BlockReadingBuilder::Join(
std::vector<std::string>::const_iterator begin,
inline const std::string BlockReadingBuilder::Join(std::vector<std::string>::const_iterator begin,
std::vector<std::string>::const_iterator end,
const std::string& separator) {
const std::string &separator)
{
std::string result;
for (std::vector<std::string>::const_iterator iter = begin; iter != end;) {
for (std::vector<std::string>::const_iterator iter = begin; iter != end;)
{
result += *iter;
++iter;
if (iter != end) {
if (iter != end)
{
result += separator;
}
}
return result;
}
} // namespace Gramambular
} // namespace Gramambular
#endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef GRAMAMBULAR_H_

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef GRID_H_
@ -27,207 +34,247 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "NodeAnchor.h"
#include "Span.h"
namespace Gramambular {
namespace Gramambular
{
class Grid {
public:
class Grid
{
public:
void clear();
void insertNode(const Node& node, size_t location, size_t spanningLength);
bool hasNodeAtLocationSpanningLengthMatchingKey(size_t location,
size_t spanningLength,
const std::string& key);
void insertNode(const Node &node, size_t location, size_t spanningLength);
bool hasNodeAtLocationSpanningLengthMatchingKey(size_t location, size_t spanningLength, const std::string &key);
void expandGridByOneAtLocation(size_t location);
void shrinkGridByOneAtLocation(size_t location);
size_t width() const;
std::vector<NodeAnchor> nodesEndingAt(size_t location);
std::vector<NodeAnchor> nodesCrossingOrEndingAt(size_t location);
// "Freeze" the node with the unigram that represents the selected candidate
// value. After this, the node that contains the unigram will always be
// evaluated to that unigram, while all other overlapping nodes will be reset
// to their initial state (that is, if any of those nodes were "frozen" or
// fixed, they will be unfrozen.)
NodeAnchor fixNodeSelectedCandidate(size_t location,
const std::string& value);
NodeAnchor fixNodeSelectedCandidate(size_t location, const std::string &value);
// Similar to fixNodeSelectedCandidate, but instead of "freezing" the node,
// only boost the unigram that represents the value with an overriding score.
// This has the same side effect as fixNodeSelectedCandidate, which is that
// all other overlapping nodes will be reset to their initial state.
void overrideNodeScoreForSelectedCandidate(size_t location,
const std::string& value,
float overridingScore);
std::string dumpDOT() {
void overrideNodeScoreForSelectedCandidate(size_t location, const std::string &value, float overridingScore);
std::string dumpDOT()
{
std::stringstream sst;
sst << "digraph {" << std::endl;
sst << "graph [ rankdir=LR ];" << std::endl;
sst << "BOS;" << std::endl;
for (size_t p = 0; p < m_spans.size(); p++) {
Span& span = m_spans[p];
for (size_t ni = 0; ni <= span.maximumLength(); ni++) {
Node* np = span.nodeOfLength(ni);
if (np) {
if (!p) {
for (size_t p = 0; p < m_spans.size(); p++)
{
Span &span = m_spans[p];
for (size_t ni = 0; ni <= span.maximumLength(); ni++)
{
Node *np = span.nodeOfLength(ni);
if (np)
{
if (!p)
{
sst << "BOS -> " << np->currentKeyValue().value << ";" << std::endl;
}
sst << np->currentKeyValue().value << ";" << std::endl;
if (p + ni < m_spans.size()) {
Span& dstSpan = m_spans[p + ni];
for (size_t q = 0; q <= dstSpan.maximumLength(); q++) {
Node* dn = dstSpan.nodeOfLength(q);
if (dn) {
sst << np->currentKeyValue().value << " -> "
<< dn->currentKeyValue().value << ";" << std::endl;
if (p + ni < m_spans.size())
{
Span &dstSpan = m_spans[p + ni];
for (size_t q = 0; q <= dstSpan.maximumLength(); q++)
{
Node *dn = dstSpan.nodeOfLength(q);
if (dn)
{
sst << np->currentKeyValue().value << " -> " << dn->currentKeyValue().value << ";"
<< std::endl;
}
}
}
if (p + ni == m_spans.size()) {
if (p + ni == m_spans.size())
{
sst << np->currentKeyValue().value << " -> "
<< "EOS;" << std::endl;
<< "EOS;" << std::endl;
}
}
}
}
sst << "EOS;" << std::endl;
sst << "}";
return sst.str();
}
protected:
protected:
std::vector<Span> m_spans;
};
inline void Grid::clear() { m_spans.clear(); }
inline void Grid::clear()
{
m_spans.clear();
}
inline void Grid::insertNode(const Node& node, size_t location,
size_t spanningLength) {
if (location >= m_spans.size()) {
inline void Grid::insertNode(const Node &node, size_t location, size_t spanningLength)
{
if (location >= m_spans.size())
{
size_t diff = location - m_spans.size() + 1;
for (size_t i = 0; i < diff; i++) {
for (size_t i = 0; i < diff; i++)
{
m_spans.push_back(Span());
}
}
m_spans[location].insertNodeOfLength(node, spanningLength);
}
inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(
size_t location, size_t spanningLength, const std::string& key) {
if (location > m_spans.size()) {
inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(size_t location, size_t spanningLength,
const std::string &key)
{
if (location > m_spans.size())
{
return false;
}
const Node* n = m_spans[location].nodeOfLength(spanningLength);
if (!n) {
const Node *n = m_spans[location].nodeOfLength(spanningLength);
if (!n)
{
return false;
}
return key == n->key();
}
inline void Grid::expandGridByOneAtLocation(size_t location) {
if (!location || location == m_spans.size()) {
inline void Grid::expandGridByOneAtLocation(size_t location)
{
if (!location || location == m_spans.size())
{
m_spans.insert(m_spans.begin() + location, Span());
} else {
}
else
{
m_spans.insert(m_spans.begin() + location, Span());
for (size_t i = 0; i < location; i++) {
for (size_t i = 0; i < location; i++)
{
// zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(location - i);
}
}
}
inline void Grid::shrinkGridByOneAtLocation(size_t location) {
if (location >= m_spans.size()) {
inline void Grid::shrinkGridByOneAtLocation(size_t location)
{
if (location >= m_spans.size())
{
return;
}
m_spans.erase(m_spans.begin() + location);
for (size_t i = 0; i < location; i++) {
for (size_t i = 0; i < location; i++)
{
// zaps overlapping spans
m_spans[i].removeNodeOfLengthGreaterThan(location - i);
}
}
inline size_t Grid::width() const { return m_spans.size(); }
inline size_t Grid::width() const
{
return m_spans.size();
}
inline std::vector<NodeAnchor> Grid::nodesEndingAt(size_t location) {
inline std::vector<NodeAnchor> Grid::nodesEndingAt(size_t location)
{
std::vector<NodeAnchor> result;
if (m_spans.size() && location <= m_spans.size()) {
for (size_t i = 0; i < location; i++) {
Span& span = m_spans[i];
if (i + span.maximumLength() >= location) {
Node* np = span.nodeOfLength(location - i);
if (np) {
if (m_spans.size() && location <= m_spans.size())
{
for (size_t i = 0; i < location; i++)
{
Span &span = m_spans[i];
if (i + span.maximumLength() >= location)
{
Node *np = span.nodeOfLength(location - i);
if (np)
{
NodeAnchor na;
na.node = np;
na.location = i;
na.spanningLength = location - i;
result.push_back(na);
}
}
}
}
return result;
}
inline std::vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t location) {
inline std::vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t location)
{
std::vector<NodeAnchor> result;
if (m_spans.size() && location <= m_spans.size()) {
for (size_t i = 0; i < location; i++) {
Span& span = m_spans[i];
if (i + span.maximumLength() >= location) {
for (size_t j = 1, m = span.maximumLength(); j <= m; j++) {
if (i + j < location) {
if (m_spans.size() && location <= m_spans.size())
{
for (size_t i = 0; i < location; i++)
{
Span &span = m_spans[i];
if (i + span.maximumLength() >= location)
{
for (size_t j = 1, m = span.maximumLength(); j <= m; j++)
{
if (i + j < location)
{
continue;
}
Node* np = span.nodeOfLength(j);
if (np) {
Node *np = span.nodeOfLength(j);
if (np)
{
NodeAnchor na;
na.node = np;
na.location = i;
na.spanningLength = location - i;
result.push_back(na);
}
}
}
}
}
return result;
}
// For nodes found at the location, fix their currently-selected candidate using
// the supplied string value.
inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location,
const std::string& value) {
inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, const std::string &value)
{
std::vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
NodeAnchor node;
for (auto nodeAnchor : nodes) {
for (auto nodeAnchor : nodes)
{
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)->selectCandidateAtIndex(i);
const_cast<Node *>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i)
{
if (candidates[i].value == value)
{
const_cast<Node *>(nodeAnchor.node)->selectCandidateAtIndex(i);
node = nodeAnchor;
break;
}
@ -236,26 +283,28 @@ inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location,
return node;
}
inline void Grid::overrideNodeScoreForSelectedCandidate(
size_t location, const std::string& value, float overridingScore) {
inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const std::string &value,
float overridingScore)
{
std::vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
for (auto nodeAnchor : nodes) {
for (auto nodeAnchor : nodes)
{
auto candidates = nodeAnchor.node->candidates();
// Reset the candidate-fixed state of every node at the location.
const_cast<Node*>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
if (candidates[i].value == value) {
const_cast<Node*>(nodeAnchor.node)
->selectFloatingCandidateAtIndex(i, overridingScore);
const_cast<Node *>(nodeAnchor.node)->resetCandidate();
for (size_t i = 0, c = candidates.size(); i < c; ++i)
{
if (candidates[i].value == value)
{
const_cast<Node *>(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore);
break;
}
}
}
}
} // namespace Gramambular
} // namespace Gramambular
#endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef KEYVALUEPAIR_H_
@ -23,36 +30,42 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include <ostream>
#include <string>
namespace Gramambular {
namespace Gramambular
{
class KeyValuePair {
public:
class KeyValuePair
{
public:
std::string key;
std::string value;
bool operator==(const KeyValuePair& another) const;
bool operator<(const KeyValuePair& another) const;
bool operator==(const KeyValuePair &another) const;
bool operator<(const KeyValuePair &another) const;
};
inline std::ostream& operator<<(std::ostream& stream,
const KeyValuePair& pair) {
inline std::ostream &operator<<(std::ostream &stream, const KeyValuePair &pair)
{
stream << "(" << pair.key << "," << pair.value << ")";
return stream;
}
inline bool KeyValuePair::operator==(const KeyValuePair& another) const {
inline bool KeyValuePair::operator==(const KeyValuePair &another) const
{
return key == another.key && value == another.value;
}
inline bool KeyValuePair::operator<(const KeyValuePair& another) const {
if (key < another.key) {
inline bool KeyValuePair::operator<(const KeyValuePair &another) const
{
if (key < another.key)
{
return true;
} else if (key == another.key) {
}
else if (key == another.key)
{
return value < another.value;
}
return false;
}
} // namespace Gramambular
} // namespace Gramambular
#endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef LANGUAGEMODEL_H_
@ -26,18 +33,20 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "Bigram.h"
#include "Unigram.h"
namespace Gramambular {
namespace Gramambular
{
class LanguageModel {
public:
virtual ~LanguageModel() {}
virtual const std::vector<Bigram> bigramsForKeys(
const std::string& preceedingKey, const std::string& key) = 0;
virtual const std::vector<Unigram> unigramsForKey(const std::string& key) = 0;
virtual bool hasUnigramsForKey(const std::string& key) = 0;
class LanguageModel
{
public:
virtual ~LanguageModel()
{
}
virtual const std::vector<Bigram> bigramsForKeys(const std::string &preceedingKey, const std::string &key) = 0;
virtual const std::vector<Unigram> unigramsForKey(const std::string &key) = 0;
virtual bool hasUnigramsForKey(const std::string &key) = 0;
};
} // namespace Gramambular
} // namespace Gramambular
#endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef NODE_H_
@ -27,105 +34,105 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "LanguageModel.h"
namespace Gramambular {
namespace Gramambular
{
class Node {
public:
class Node
{
public:
Node();
Node(const std::string& key, const std::vector<Unigram>& unigrams,
const std::vector<Bigram>& bigrams);
void primeNodeWithPreceedingKeyValues(
const std::vector<KeyValuePair>& keyValues);
Node(const std::string &key, const std::vector<Unigram> &unigrams, const std::vector<Bigram> &bigrams);
void primeNodeWithPreceedingKeyValues(const std::vector<KeyValuePair> &keyValues);
bool isCandidateFixed() const;
const std::vector<KeyValuePair>& candidates() const;
const std::vector<KeyValuePair> &candidates() const;
void selectCandidateAtIndex(size_t index = 0, bool fix = true);
void resetCandidate();
void selectFloatingCandidateAtIndex(size_t index, double score);
const std::string& key() const;
const std::string &key() const;
double score() const;
double scoreForCandidate(const std::string& candidate) const;
double scoreForCandidate(const std::string &candidate) const;
const KeyValuePair currentKeyValue() const;
double highestUnigramScore() const;
protected:
const LanguageModel* m_LM;
protected:
const LanguageModel *m_LM;
std::string m_key;
double m_score;
std::vector<Unigram> m_unigrams;
std::vector<KeyValuePair> m_candidates;
std::map<std::string, size_t> m_valueUnigramIndexMap;
std::map<KeyValuePair, std::vector<Bigram> > m_preceedingGramBigramMap;
std::map<KeyValuePair, std::vector<Bigram>> m_preceedingGramBigramMap;
bool m_candidateFixed;
size_t m_selectedUnigramIndex;
friend std::ostream& operator<<(std::ostream& stream, const Node& node);
friend std::ostream &operator<<(std::ostream &stream, const Node &node);
};
inline std::ostream& operator<<(std::ostream& stream, const Node& node) {
stream << "(node,key:" << node.m_key
<< ",fixed:" << (node.m_candidateFixed ? "true" : "false")
<< ",selected:" << node.m_selectedUnigramIndex << ","
<< node.m_unigrams << ")";
inline std::ostream &operator<<(std::ostream &stream, const Node &node)
{
stream << "(node,key:" << node.m_key << ",fixed:" << (node.m_candidateFixed ? "true" : "false")
<< ",selected:" << node.m_selectedUnigramIndex << "," << node.m_unigrams << ")";
return stream;
}
inline Node::Node()
: m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) {}
inline Node::Node() : m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0)
{
}
inline Node::Node(const std::string& key, const std::vector<Unigram>& unigrams,
const std::vector<Bigram>& bigrams)
: m_key(key),
m_unigrams(unigrams),
m_candidateFixed(false),
m_selectedUnigramIndex(0),
m_score(0.0) {
inline Node::Node(const std::string &key, const std::vector<Unigram> &unigrams, const std::vector<Bigram> &bigrams)
: m_key(key), m_unigrams(unigrams), m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0)
{
stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare);
if (m_unigrams.size()) {
if (m_unigrams.size())
{
m_score = m_unigrams[0].score;
}
size_t i = 0;
for (std::vector<Unigram>::const_iterator ui = m_unigrams.begin();
ui != m_unigrams.end(); ++ui) {
for (std::vector<Unigram>::const_iterator ui = m_unigrams.begin(); ui != m_unigrams.end(); ++ui)
{
m_valueUnigramIndexMap[(*ui).keyValue.value] = i;
i++;
m_candidates.push_back((*ui).keyValue);
}
for (std::vector<Bigram>::const_iterator bi = bigrams.begin();
bi != bigrams.end(); ++bi) {
for (std::vector<Bigram>::const_iterator bi = bigrams.begin(); bi != bigrams.end(); ++bi)
{
m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi);
}
}
inline void Node::primeNodeWithPreceedingKeyValues(
const std::vector<KeyValuePair>& keyValues) {
inline void Node::primeNodeWithPreceedingKeyValues(const std::vector<KeyValuePair> &keyValues)
{
size_t newIndex = m_selectedUnigramIndex;
double max = m_score;
if (!isCandidateFixed()) {
for (std::vector<KeyValuePair>::const_iterator kvi = keyValues.begin();
kvi != keyValues.end(); ++kvi) {
std::map<KeyValuePair, std::vector<Bigram> >::const_iterator f =
m_preceedingGramBigramMap.find(*kvi);
if (f != m_preceedingGramBigramMap.end()) {
const std::vector<Bigram>& bigrams = (*f).second;
for (std::vector<Bigram>::const_iterator bi = bigrams.begin();
bi != bigrams.end(); ++bi) {
const Bigram& bigram = *bi;
if (bigram.score > max) {
if (!isCandidateFixed())
{
for (std::vector<KeyValuePair>::const_iterator kvi = keyValues.begin(); kvi != keyValues.end(); ++kvi)
{
std::map<KeyValuePair, std::vector<Bigram>>::const_iterator f = m_preceedingGramBigramMap.find(*kvi);
if (f != m_preceedingGramBigramMap.end())
{
const std::vector<Bigram> &bigrams = (*f).second;
for (std::vector<Bigram>::const_iterator bi = bigrams.begin(); bi != bigrams.end(); ++bi)
{
const Bigram &bigram = *bi;
if (bigram.score > max)
{
std::map<std::string, size_t>::const_iterator uf =
m_valueUnigramIndexMap.find((*bi).keyValue.value);
if (uf != m_valueUnigramIndexMap.end()) {
m_valueUnigramIndexMap.find((*bi).keyValue.value);
if (uf != m_valueUnigramIndexMap.end())
{
newIndex = (*uf).second;
max = bigram.score;
}
@ -134,80 +141,109 @@ inline void Node::primeNodeWithPreceedingKeyValues(
}
}
}
if (m_score != max) {
if (m_score != max)
{
m_score = max;
}
if (newIndex != m_selectedUnigramIndex) {
if (newIndex != m_selectedUnigramIndex)
{
m_selectedUnigramIndex = newIndex;
}
}
inline bool Node::isCandidateFixed() const { return m_candidateFixed; }
inline bool Node::isCandidateFixed() const
{
return m_candidateFixed;
}
inline const std::vector<KeyValuePair>& Node::candidates() const {
inline const std::vector<KeyValuePair> &Node::candidates() const
{
return m_candidates;
}
inline void Node::selectCandidateAtIndex(size_t index, bool fix) {
if (index >= m_unigrams.size()) {
inline void Node::selectCandidateAtIndex(size_t index, bool fix)
{
if (index >= m_unigrams.size())
{
m_selectedUnigramIndex = 0;
} else {
}
else
{
m_selectedUnigramIndex = index;
}
m_candidateFixed = fix;
m_score = 99;
}
inline void Node::resetCandidate() {
inline void Node::resetCandidate()
{
m_selectedUnigramIndex = 0;
m_candidateFixed = 0;
if (m_unigrams.size()) {
if (m_unigrams.size())
{
m_score = m_unigrams[0].score;
}
}
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) {
if (index >= m_unigrams.size()) {
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score)
{
if (index >= m_unigrams.size())
{
m_selectedUnigramIndex = 0;
} else {
}
else
{
m_selectedUnigramIndex = index;
}
m_candidateFixed = false;
m_score = score;
}
inline const std::string& Node::key() const { return m_key; }
inline const std::string &Node::key() const
{
return m_key;
}
inline double Node::score() const { return m_score; }
inline double Node::score() const
{
return m_score;
}
inline double Node::scoreForCandidate(const std::string& candidate) const {
for (auto unigram : m_unigrams) {
if (unigram.keyValue.value == candidate) {
inline double Node::scoreForCandidate(const std::string &candidate) const
{
for (auto unigram : m_unigrams)
{
if (unigram.keyValue.value == candidate)
{
return unigram.score;
}
}
return 0.0;
}
inline double Node::highestUnigramScore() const {
if (m_unigrams.empty()) {
inline double Node::highestUnigramScore() const
{
if (m_unigrams.empty())
{
return 0.0;
}
return m_unigrams[0].score;
}
inline const KeyValuePair Node::currentKeyValue() const {
if (m_selectedUnigramIndex >= m_unigrams.size()) {
inline const KeyValuePair Node::currentKeyValue() const
{
if (m_selectedUnigramIndex >= m_unigrams.size())
{
return KeyValuePair();
} else {
}
else
{
return m_candidates[m_selectedUnigramIndex];
}
}
} // namespace Gramambular
} // namespace Gramambular
#endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef NODEANCHOR_H_
@ -24,40 +31,45 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "Node.h"
namespace Gramambular {
namespace Gramambular
{
struct NodeAnchor {
const Node* node = nullptr;
struct NodeAnchor
{
const Node *node = nullptr;
size_t location = 0;
size_t spanningLength = 0;
double accumulatedScore = 0.0;
};
inline std::ostream& operator<<(std::ostream& stream,
const NodeAnchor& anchor) {
inline std::ostream &operator<<(std::ostream &stream, const NodeAnchor &anchor)
{
stream << "{@(" << anchor.location << "," << anchor.spanningLength << "),";
if (anchor.node) {
if (anchor.node)
{
stream << *(anchor.node);
} else {
}
else
{
stream << "null";
}
stream << "}";
return stream;
}
inline std::ostream& operator<<(std::ostream& stream,
const std::vector<NodeAnchor>& anchor) {
for (std::vector<NodeAnchor>::const_iterator i = anchor.begin();
i != anchor.end(); ++i) {
inline std::ostream &operator<<(std::ostream &stream, const std::vector<NodeAnchor> &anchor)
{
for (std::vector<NodeAnchor>::const_iterator i = anchor.begin(); i != anchor.end(); ++i)
{
stream << *i;
if (i + 1 != anchor.end()) {
if (i + 1 != anchor.end())
{
stream << "<-";
}
}
return stream;
}
} // namespace Gramambular
} // namespace Gramambular
#endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SPAN_H_
@ -26,67 +33,80 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "Node.h"
namespace Gramambular {
class Span {
public:
namespace Gramambular
{
class Span
{
public:
void clear();
void insertNodeOfLength(const Node& node, size_t length);
void insertNodeOfLength(const Node &node, size_t length);
void removeNodeOfLengthGreaterThan(size_t length);
Node* nodeOfLength(size_t length);
Node *nodeOfLength(size_t length);
size_t maximumLength() const;
protected:
protected:
std::map<size_t, Node> m_lengthNodeMap;
size_t m_maximumLength = 0;
};
inline void Span::clear() {
inline void Span::clear()
{
m_lengthNodeMap.clear();
m_maximumLength = 0;
}
inline void Span::insertNodeOfLength(const Node& node, size_t length) {
inline void Span::insertNodeOfLength(const Node &node, size_t length)
{
m_lengthNodeMap[length] = node;
if (length > m_maximumLength) {
if (length > m_maximumLength)
{
m_maximumLength = length;
}
}
inline void Span::removeNodeOfLengthGreaterThan(size_t length) {
if (length > m_maximumLength) {
inline void Span::removeNodeOfLengthGreaterThan(size_t length)
{
if (length > m_maximumLength)
{
return;
}
size_t max = 0;
std::set<size_t> removeSet;
for (std::map<size_t, Node>::iterator i = m_lengthNodeMap.begin(),
e = m_lengthNodeMap.end();
i != e; ++i) {
if ((*i).first > length) {
for (std::map<size_t, Node>::iterator i = m_lengthNodeMap.begin(), e = m_lengthNodeMap.end(); i != e; ++i)
{
if ((*i).first > length)
{
removeSet.insert((*i).first);
} else {
if ((*i).first > max) {
}
else
{
if ((*i).first > max)
{
max = (*i).first;
}
}
}
for (std::set<size_t>::iterator i = removeSet.begin(), e = removeSet.end();
i != e; ++i) {
for (std::set<size_t>::iterator i = removeSet.begin(), e = removeSet.end(); i != e; ++i)
{
m_lengthNodeMap.erase(*i);
}
m_maximumLength = max;
}
inline Node* Span::nodeOfLength(size_t length) {
inline Node *Span::nodeOfLength(size_t length)
{
std::map<size_t, Node>::iterator f = m_lengthNodeMap.find(length);
return f == m_lengthNodeMap.end() ? 0 : &(*f).second;
}
inline size_t Span::maximumLength() const { return m_maximumLength; }
} // namespace Gramambular
inline size_t Span::maximumLength() const
{
return m_maximumLength;
}
} // namespace Gramambular
#endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef UNIGRAM_H_
@ -24,22 +31,25 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "KeyValuePair.h"
namespace Gramambular {
namespace Gramambular
{
class Unigram {
public:
class Unigram
{
public:
Unigram();
KeyValuePair keyValue;
double score;
bool operator==(const Unigram& another) const;
bool operator<(const Unigram& another) const;
static bool ScoreCompare(const Unigram& a, const Unigram& b);
bool operator==(const Unigram &another) const;
bool operator<(const Unigram &another) const;
static bool ScoreCompare(const Unigram &a, const Unigram &b);
};
inline std::ostream& operator<<(std::ostream& stream, const Unigram& gram) {
inline std::ostream &operator<<(std::ostream &stream, const Unigram &gram)
{
std::streamsize p = stream.precision();
stream.precision(6);
stream << "(" << gram.keyValue << "," << gram.score << ")";
@ -47,44 +57,52 @@ inline std::ostream& operator<<(std::ostream& stream, const Unigram& gram) {
return stream;
}
inline std::ostream& operator<<(std::ostream& stream,
const std::vector<Unigram>& grams) {
inline std::ostream &operator<<(std::ostream &stream, const std::vector<Unigram> &grams)
{
stream << "[" << grams.size() << "]=>{";
size_t index = 0;
for (std::vector<Unigram>::const_iterator gi = grams.begin();
gi != grams.end(); ++gi, ++index) {
for (std::vector<Unigram>::const_iterator gi = grams.begin(); gi != grams.end(); ++gi, ++index)
{
stream << index << "=>";
stream << *gi;
if (gi + 1 != grams.end()) {
if (gi + 1 != grams.end())
{
stream << ",";
}
}
stream << "}";
return stream;
}
inline Unigram::Unigram() : score(0.0) {}
inline Unigram::Unigram() : score(0.0)
{
}
inline bool Unigram::operator==(const Unigram& another) const {
inline bool Unigram::operator==(const Unigram &another) const
{
return keyValue == another.keyValue && score == another.score;
}
inline bool Unigram::operator<(const Unigram& another) const {
if (keyValue < another.keyValue) {
inline bool Unigram::operator<(const Unigram &another) const
{
if (keyValue < another.keyValue)
{
return true;
} else if (keyValue == another.keyValue) {
}
else if (keyValue == another.keyValue)
{
return score < another.score;
}
return false;
}
inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) {
inline bool Unigram::ScoreCompare(const Unigram &a, const Unigram &b)
{
return a.score > b.score;
}
} // namespace Gramambular
} // namespace Gramambular
#endif

View File

@ -1,20 +1,27 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// All possible vChewing-specific modifications are of:
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef WALKER_H_
@ -25,60 +32,65 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#include "Grid.h"
namespace Gramambular {
namespace Gramambular
{
class Walker {
public:
explicit Walker(Grid* inGrid);
const std::vector<NodeAnchor> reverseWalk(size_t location,
double accumulatedScore = 0.0);
protected:
Grid* m_grid;
class Walker
{
public:
explicit Walker(Grid *inGrid);
const std::vector<NodeAnchor> reverseWalk(size_t location, double accumulatedScore = 0.0);
protected:
Grid *m_grid;
};
inline Walker::Walker(Grid* inGrid) : m_grid(inGrid) {}
inline Walker::Walker(Grid *inGrid) : m_grid(inGrid)
{
}
inline const std::vector<NodeAnchor> Walker::reverseWalk(
size_t location, double accumulatedScore) {
if (!location || location > m_grid->width()) {
inline const std::vector<NodeAnchor> Walker::reverseWalk(size_t location, double accumulatedScore)
{
if (!location || location > m_grid->width())
{
return std::vector<NodeAnchor>();
}
std::vector<std::vector<NodeAnchor> > paths;
std::vector<std::vector<NodeAnchor>> paths;
std::vector<NodeAnchor> nodes = m_grid->nodesEndingAt(location);
for (std::vector<NodeAnchor>::iterator ni = nodes.begin(); ni != nodes.end();
++ni) {
if (!(*ni).node) {
for (std::vector<NodeAnchor>::iterator ni = nodes.begin(); ni != nodes.end(); ++ni)
{
if (!(*ni).node)
{
continue;
}
(*ni).accumulatedScore = accumulatedScore + (*ni).node->score();
std::vector<NodeAnchor> path =
reverseWalk(location - (*ni).spanningLength, (*ni).accumulatedScore);
std::vector<NodeAnchor> path = reverseWalk(location - (*ni).spanningLength, (*ni).accumulatedScore);
path.insert(path.begin(), *ni);
paths.push_back(path);
}
if (!paths.size()) {
if (!paths.size())
{
return std::vector<NodeAnchor>();
}
std::vector<NodeAnchor>* result = &*(paths.begin());
for (std::vector<std::vector<NodeAnchor> >::iterator pi = paths.begin();
pi != paths.end(); ++pi) {
if ((*pi).back().accumulatedScore > result->back().accumulatedScore) {
std::vector<NodeAnchor> *result = &*(paths.begin());
for (std::vector<std::vector<NodeAnchor>>::iterator pi = paths.begin(); pi != paths.end(); ++pi)
{
if ((*pi).back().accumulatedScore > result->back().accumulatedScore)
{
result = &*pi;
}
}
return *result;
}
} // namespace Gramambular
} // namespace Gramambular
#endif