(Obj)C(pp) // Clang-Format.
This commit is contained in:
parent
eedf95f307
commit
3842dc5013
|
@ -0,0 +1,171 @@
|
||||||
|
---
|
||||||
|
Language: Cpp
|
||||||
|
# BasedOnStyle: Microsoft
|
||||||
|
AccessModifierOffset: -1
|
||||||
|
AlignAfterOpenBracket: Align
|
||||||
|
AlignConsecutiveMacros: false
|
||||||
|
AlignConsecutiveAssignments: false
|
||||||
|
AlignConsecutiveDeclarations: false
|
||||||
|
AlignEscapedNewlines: Left
|
||||||
|
AlignOperands: true
|
||||||
|
AlignTrailingComments: true
|
||||||
|
AllowAllArgumentsOnNextLine: true
|
||||||
|
AllowAllConstructorInitializersOnNextLine: true
|
||||||
|
AllowAllParametersOfDeclarationOnNextLine: true
|
||||||
|
AllowShortBlocksOnASingleLine: Never
|
||||||
|
AllowShortCaseLabelsOnASingleLine: false
|
||||||
|
AllowShortFunctionsOnASingleLine: All
|
||||||
|
AllowShortLambdasOnASingleLine: All
|
||||||
|
AllowShortIfStatementsOnASingleLine: WithoutElse
|
||||||
|
AllowShortLoopsOnASingleLine: true
|
||||||
|
AlwaysBreakAfterDefinitionReturnType: None
|
||||||
|
AlwaysBreakAfterReturnType: None
|
||||||
|
AlwaysBreakBeforeMultilineStrings: true
|
||||||
|
AlwaysBreakTemplateDeclarations: Yes
|
||||||
|
BinPackArguments: true
|
||||||
|
BinPackParameters: true
|
||||||
|
BraceWrapping:
|
||||||
|
AfterCaseLabel: false
|
||||||
|
AfterClass: false
|
||||||
|
AfterControlStatement: false
|
||||||
|
AfterEnum: false
|
||||||
|
AfterFunction: false
|
||||||
|
AfterNamespace: false
|
||||||
|
AfterObjCDeclaration: false
|
||||||
|
AfterStruct: false
|
||||||
|
AfterUnion: false
|
||||||
|
AfterExternBlock: false
|
||||||
|
BeforeCatch: false
|
||||||
|
BeforeElse: false
|
||||||
|
IndentBraces: false
|
||||||
|
SplitEmptyFunction: true
|
||||||
|
SplitEmptyRecord: true
|
||||||
|
SplitEmptyNamespace: true
|
||||||
|
BreakBeforeBinaryOperators: None
|
||||||
|
BreakBeforeBraces: Attach
|
||||||
|
BreakBeforeInheritanceComma: false
|
||||||
|
BreakInheritanceList: BeforeColon
|
||||||
|
BreakBeforeTernaryOperators: true
|
||||||
|
BreakConstructorInitializersBeforeComma: false
|
||||||
|
BreakConstructorInitializers: BeforeColon
|
||||||
|
BreakAfterJavaFieldAnnotations: false
|
||||||
|
BreakStringLiterals: true
|
||||||
|
ColumnLimit: 80
|
||||||
|
CommentPragmas: '^ IWYU pragma:'
|
||||||
|
CompactNamespaces: false
|
||||||
|
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
||||||
|
ConstructorInitializerIndentWidth: 4
|
||||||
|
ContinuationIndentWidth: 4
|
||||||
|
Cpp11BracedListStyle: true
|
||||||
|
DeriveLineEnding: true
|
||||||
|
DerivePointerAlignment: false
|
||||||
|
DisableFormat: false
|
||||||
|
ExperimentalAutoDetectBinPacking: false
|
||||||
|
FixNamespaceComments: true
|
||||||
|
ForEachMacros:
|
||||||
|
- foreach
|
||||||
|
- Q_FOREACH
|
||||||
|
- BOOST_FOREACH
|
||||||
|
IncludeBlocks: Regroup
|
||||||
|
IncludeCategories:
|
||||||
|
- Regex: '^<ext/.*\.h>'
|
||||||
|
Priority: 2
|
||||||
|
SortPriority: 0
|
||||||
|
- Regex: '^<.*\.h>'
|
||||||
|
Priority: 1
|
||||||
|
SortPriority: 0
|
||||||
|
- Regex: '^<.*'
|
||||||
|
Priority: 2
|
||||||
|
SortPriority: 0
|
||||||
|
- Regex: '.*'
|
||||||
|
Priority: 3
|
||||||
|
SortPriority: 0
|
||||||
|
IncludeIsMainRegex: '([-_](test|unittest))?$'
|
||||||
|
IncludeIsMainSourceRegex: ''
|
||||||
|
IndentCaseLabels: true
|
||||||
|
IndentGotoLabels: true
|
||||||
|
IndentPPDirectives: None
|
||||||
|
IndentWidth: 4
|
||||||
|
IndentWrappedFunctionNames: false
|
||||||
|
JavaScriptQuotes: Leave
|
||||||
|
JavaScriptWrapImports: true
|
||||||
|
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||||
|
MacroBlockBegin: ''
|
||||||
|
MacroBlockEnd: ''
|
||||||
|
MaxEmptyLinesToKeep: 1
|
||||||
|
NamespaceIndentation: None
|
||||||
|
ObjCBinPackProtocolList: Never
|
||||||
|
ObjCBlockIndentWidth: 4
|
||||||
|
ObjCSpaceAfterProperty: false
|
||||||
|
ObjCSpaceBeforeProtocolList: true
|
||||||
|
PenaltyBreakAssignment: 2
|
||||||
|
PenaltyBreakBeforeFirstCallParameter: 1
|
||||||
|
PenaltyBreakComment: 300
|
||||||
|
PenaltyBreakFirstLessLess: 120
|
||||||
|
PenaltyBreakString: 1000
|
||||||
|
PenaltyBreakTemplateDeclaration: 10
|
||||||
|
PenaltyExcessCharacter: 1000000
|
||||||
|
PenaltyReturnTypeOnItsOwnLine: 200
|
||||||
|
PointerAlignment: Left
|
||||||
|
RawStringFormats:
|
||||||
|
- Language: Cpp
|
||||||
|
Delimiters:
|
||||||
|
- h
|
||||||
|
- m
|
||||||
|
- hh
|
||||||
|
- mm
|
||||||
|
- cc
|
||||||
|
- CC
|
||||||
|
- cpp
|
||||||
|
- Cpp
|
||||||
|
- CPP
|
||||||
|
- 'c++'
|
||||||
|
- 'C++'
|
||||||
|
CanonicalDelimiter: ''
|
||||||
|
BasedOnStyle: Microsoft
|
||||||
|
- Language: TextProto
|
||||||
|
Delimiters:
|
||||||
|
- pb
|
||||||
|
- PB
|
||||||
|
- proto
|
||||||
|
- PROTO
|
||||||
|
EnclosingFunctions:
|
||||||
|
- EqualsProto
|
||||||
|
- EquivToProto
|
||||||
|
- PARSE_PARTIAL_TEXT_PROTO
|
||||||
|
- PARSE_TEST_PROTO
|
||||||
|
- PARSE_TEXT_PROTO
|
||||||
|
- ParseTextOrDie
|
||||||
|
- ParseTextProtoOrDie
|
||||||
|
CanonicalDelimiter: ''
|
||||||
|
BasedOnStyle: Microsoft
|
||||||
|
ReflowComments: true
|
||||||
|
SortIncludes: true
|
||||||
|
SortUsingDeclarations: true
|
||||||
|
SpaceAfterCStyleCast: false
|
||||||
|
SpaceAfterLogicalNot: false
|
||||||
|
SpaceAfterTemplateKeyword: true
|
||||||
|
SpaceBeforeAssignmentOperators: true
|
||||||
|
SpaceBeforeCpp11BracedList: false
|
||||||
|
SpaceBeforeCtorInitializerColon: true
|
||||||
|
SpaceBeforeInheritanceColon: true
|
||||||
|
SpaceBeforeParens: ControlStatements
|
||||||
|
SpaceBeforeRangeBasedForLoopColon: true
|
||||||
|
SpaceInEmptyBlock: false
|
||||||
|
SpaceInEmptyParentheses: false
|
||||||
|
SpacesBeforeTrailingComments: 2
|
||||||
|
SpacesInAngles: false
|
||||||
|
SpacesInConditionalStatement: false
|
||||||
|
SpacesInContainerLiterals: true
|
||||||
|
SpacesInCStyleCastParentheses: false
|
||||||
|
SpacesInParentheses: false
|
||||||
|
SpacesInSquareBrackets: false
|
||||||
|
SpaceBeforeSquareBrackets: false
|
||||||
|
Standard: Auto
|
||||||
|
StatementMacros:
|
||||||
|
- Q_UNUSED
|
||||||
|
- QT_REQUIRE_VERSION
|
||||||
|
TabWidth: 4
|
||||||
|
UseCRLF: false
|
||||||
|
UseTab: Always
|
||||||
|
...
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@import Cocoa;
|
@import Cocoa;
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#import "Chronosphere.h"
|
#import "Chronosphere.h"
|
||||||
|
@ -27,15 +34,18 @@ BOOL appBundleChronoshiftedToARandomizedPath(NSString *bundle)
|
||||||
int entrySize = sizeof(struct statfs);
|
int entrySize = sizeof(struct statfs);
|
||||||
struct statfs *bufs = (struct statfs *)calloc(entryCount, entrySize);
|
struct statfs *bufs = (struct statfs *)calloc(entryCount, entrySize);
|
||||||
entryCount = getfsstat(bufs, entryCount * entrySize, MNT_NOWAIT);
|
entryCount = getfsstat(bufs, entryCount * entrySize, MNT_NOWAIT);
|
||||||
for (int i = 0; i < entryCount; i++) {
|
for (int i = 0; i < entryCount; i++)
|
||||||
if (!strcmp(bundleAbsPath, bufs[i].f_mntfromname)) {
|
{
|
||||||
|
if (!strcmp(bundleAbsPath, bufs[i].f_mntfromname))
|
||||||
|
{
|
||||||
free(bufs);
|
free(bufs);
|
||||||
|
|
||||||
// getfsstat() may return us a cached result, and so we need to get the stat of the mounted fs.
|
// getfsstat() may return us a cached result, and so we need to get the stat of the mounted fs.
|
||||||
// If statfs() returns an error, the mounted fs is already gone.
|
// If statfs() returns an error, the mounted fs is already gone.
|
||||||
struct statfs stat;
|
struct statfs stat;
|
||||||
int checkResult = statfs(bundleAbsPath, &stat);
|
int checkResult = statfs(bundleAbsPath, &stat);
|
||||||
if (checkResult != 0) {
|
if (checkResult != 0)
|
||||||
|
{
|
||||||
// Meaning the app's bundle is not mounted, that is it's not translocated.
|
// Meaning the app's bundle is not mounted, that is it's not translocated.
|
||||||
// It also means that the app is not loaded.
|
// It also means that the app is not loaded.
|
||||||
return NO;
|
return NO;
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "Mandarin.h"
|
#include "Mandarin.h"
|
||||||
|
@ -22,19 +29,22 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
|
|
||||||
namespace Mandarin {
|
namespace Mandarin
|
||||||
|
{
|
||||||
|
|
||||||
class PinyinParseHelper {
|
class PinyinParseHelper
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
static const bool ConsumePrefix(std::string& target,
|
static const bool ConsumePrefix(std::string &target, const std::string &prefix)
|
||||||
const std::string& prefix) {
|
{
|
||||||
if (target.length() < prefix.length()) {
|
if (target.length() < prefix.length())
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (target.substr(0, prefix.length()) == prefix) {
|
if (target.substr(0, prefix.length()) == prefix)
|
||||||
target =
|
{
|
||||||
target.substr(prefix.length(), target.length() - prefix.length());
|
target = target.substr(prefix.length(), target.length() - prefix.length());
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,7 +52,8 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class BopomofoCharacterMap {
|
class BopomofoCharacterMap
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
static const BopomofoCharacterMap &SharedInstance();
|
static const BopomofoCharacterMap &SharedInstance();
|
||||||
|
|
||||||
|
@ -53,8 +64,10 @@ protected:
|
||||||
BopomofoCharacterMap();
|
BopomofoCharacterMap();
|
||||||
};
|
};
|
||||||
|
|
||||||
const BPMF BPMF::FromHanyuPinyin(const std::string& str) {
|
const BPMF BPMF::FromHanyuPinyin(const std::string &str)
|
||||||
if (!str.length()) {
|
{
|
||||||
|
if (!str.length())
|
||||||
|
{
|
||||||
return BPMF();
|
return BPMF();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -70,35 +83,53 @@ const BPMF BPMF::FromHanyuPinyin(const std::string& str) {
|
||||||
bool independentConsonant = false;
|
bool independentConsonant = false;
|
||||||
|
|
||||||
// the y exceptions fist
|
// the y exceptions fist
|
||||||
if (0) {
|
if (0)
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "yuan")) {
|
{
|
||||||
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "yuan"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::UE;
|
secondComponent = BPMF::UE;
|
||||||
thirdComponent = BPMF::AN;
|
thirdComponent = BPMF::AN;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "ying")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "ying"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::I;
|
secondComponent = BPMF::I;
|
||||||
thirdComponent = BPMF::ENG;
|
thirdComponent = BPMF::ENG;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "yung")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "yung"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::UE;
|
secondComponent = BPMF::UE;
|
||||||
thirdComponent = BPMF::ENG;
|
thirdComponent = BPMF::ENG;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "yong")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "yong"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::UE;
|
secondComponent = BPMF::UE;
|
||||||
thirdComponent = BPMF::ENG;
|
thirdComponent = BPMF::ENG;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "yue")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "yue"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::UE;
|
secondComponent = BPMF::UE;
|
||||||
thirdComponent = BPMF::E;
|
thirdComponent = BPMF::E;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "yun")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "yun"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::UE;
|
secondComponent = BPMF::UE;
|
||||||
thirdComponent = BPMF::EN;
|
thirdComponent = BPMF::EN;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "you")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "you"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::I;
|
secondComponent = BPMF::I;
|
||||||
thirdComponent = BPMF::OU;
|
thirdComponent = BPMF::OU;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "yu")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "yu"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::UE;
|
secondComponent = BPMF::UE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// try the first character
|
// try the first character
|
||||||
char c = pinyin.length() ? pinyin[0] : 0;
|
char c = pinyin.length() ? pinyin[0] : 0;
|
||||||
switch (c) {
|
switch (c)
|
||||||
|
{
|
||||||
case 'b':
|
case 'b':
|
||||||
firstComponent = BPMF::B;
|
firstComponent = BPMF::B;
|
||||||
pinyin = pinyin.substr(1);
|
pinyin = pinyin.substr(1);
|
||||||
|
@ -162,7 +193,8 @@ const BPMF BPMF::FromHanyuPinyin(const std::string& str) {
|
||||||
pinyin = pinyin.substr(1);
|
pinyin = pinyin.substr(1);
|
||||||
break;
|
break;
|
||||||
case 'y':
|
case 'y':
|
||||||
if (!secondComponent && !thirdComponent) {
|
if (!secondComponent && !thirdComponent)
|
||||||
|
{
|
||||||
secondComponent = BPMF::I;
|
secondComponent = BPMF::I;
|
||||||
}
|
}
|
||||||
pinyin = pinyin.substr(1);
|
pinyin = pinyin.substr(1);
|
||||||
|
@ -170,176 +202,283 @@ const BPMF BPMF::FromHanyuPinyin(const std::string& str) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// then we try ZH, CH, SH, R, Z, C, S (in that order)
|
// then we try ZH, CH, SH, R, Z, C, S (in that order)
|
||||||
if (0) {
|
if (0)
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "zh")) {
|
{
|
||||||
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "zh"))
|
||||||
|
{
|
||||||
firstComponent = BPMF::ZH;
|
firstComponent = BPMF::ZH;
|
||||||
independentConsonant = true;
|
independentConsonant = true;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "ch")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "ch"))
|
||||||
|
{
|
||||||
firstComponent = BPMF::CH;
|
firstComponent = BPMF::CH;
|
||||||
independentConsonant = true;
|
independentConsonant = true;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "sh")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "sh"))
|
||||||
|
{
|
||||||
firstComponent = BPMF::SH;
|
firstComponent = BPMF::SH;
|
||||||
independentConsonant = true;
|
independentConsonant = true;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "r")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "r"))
|
||||||
|
{
|
||||||
firstComponent = BPMF::R;
|
firstComponent = BPMF::R;
|
||||||
independentConsonant = true;
|
independentConsonant = true;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "z")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "z"))
|
||||||
|
{
|
||||||
firstComponent = BPMF::Z;
|
firstComponent = BPMF::Z;
|
||||||
independentConsonant = true;
|
independentConsonant = true;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "c")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "c"))
|
||||||
|
{
|
||||||
firstComponent = BPMF::C;
|
firstComponent = BPMF::C;
|
||||||
independentConsonant = true;
|
independentConsonant = true;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "s")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "s"))
|
||||||
|
{
|
||||||
firstComponent = BPMF::S;
|
firstComponent = BPMF::S;
|
||||||
independentConsonant = true;
|
independentConsonant = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// consume exceptions first: (ien, in), (iou, iu), (uen, un), (veng, iong),
|
// consume exceptions first: (ien, in), (iou, iu), (uen, un), (veng, iong),
|
||||||
// (ven, vn), (uei, ui), ung but longer sequence takes precedence
|
// (ven, vn), (uei, ui), ung but longer sequence takes precedence
|
||||||
if (0) {
|
if (0)
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "veng")) {
|
{
|
||||||
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "veng"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::UE;
|
secondComponent = BPMF::UE;
|
||||||
thirdComponent = BPMF::ENG;
|
thirdComponent = BPMF::ENG;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "iong")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "iong"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::UE;
|
secondComponent = BPMF::UE;
|
||||||
thirdComponent = BPMF::ENG;
|
thirdComponent = BPMF::ENG;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "ing")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "ing"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::I;
|
secondComponent = BPMF::I;
|
||||||
thirdComponent = BPMF::ENG;
|
thirdComponent = BPMF::ENG;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "ien")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "ien"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::I;
|
secondComponent = BPMF::I;
|
||||||
thirdComponent = BPMF::EN;
|
thirdComponent = BPMF::EN;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "iou")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "iou"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::I;
|
secondComponent = BPMF::I;
|
||||||
thirdComponent = BPMF::OU;
|
thirdComponent = BPMF::OU;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "uen")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "uen"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::U;
|
secondComponent = BPMF::U;
|
||||||
thirdComponent = BPMF::EN;
|
thirdComponent = BPMF::EN;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "ven")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "ven"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::UE;
|
secondComponent = BPMF::UE;
|
||||||
thirdComponent = BPMF::EN;
|
thirdComponent = BPMF::EN;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "uei")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "uei"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::U;
|
secondComponent = BPMF::U;
|
||||||
thirdComponent = BPMF::EI;
|
thirdComponent = BPMF::EI;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "ung")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "ung"))
|
||||||
|
{
|
||||||
// f exception
|
// f exception
|
||||||
if (firstComponent == BPMF::F) {
|
if (firstComponent == BPMF::F)
|
||||||
|
{
|
||||||
thirdComponent = BPMF::ENG;
|
thirdComponent = BPMF::ENG;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
secondComponent = BPMF::U;
|
secondComponent = BPMF::U;
|
||||||
thirdComponent = BPMF::ENG;
|
thirdComponent = BPMF::ENG;
|
||||||
}
|
}
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "ong")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "ong"))
|
||||||
|
{
|
||||||
// f exception
|
// f exception
|
||||||
if (firstComponent == BPMF::F) {
|
if (firstComponent == BPMF::F)
|
||||||
|
{
|
||||||
thirdComponent = BPMF::ENG;
|
thirdComponent = BPMF::ENG;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
secondComponent = BPMF::U;
|
secondComponent = BPMF::U;
|
||||||
thirdComponent = BPMF::ENG;
|
thirdComponent = BPMF::ENG;
|
||||||
}
|
}
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "un")) {
|
}
|
||||||
if (firstComponent == BPMF::J || firstComponent == BPMF::Q ||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "un"))
|
||||||
firstComponent == BPMF::X) {
|
{
|
||||||
|
if (firstComponent == BPMF::J || firstComponent == BPMF::Q || firstComponent == BPMF::X)
|
||||||
|
{
|
||||||
secondComponent = BPMF::UE;
|
secondComponent = BPMF::UE;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
secondComponent = BPMF::U;
|
secondComponent = BPMF::U;
|
||||||
}
|
}
|
||||||
thirdComponent = BPMF::EN;
|
thirdComponent = BPMF::EN;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "iu")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "iu"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::I;
|
secondComponent = BPMF::I;
|
||||||
thirdComponent = BPMF::OU;
|
thirdComponent = BPMF::OU;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "in")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "in"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::I;
|
secondComponent = BPMF::I;
|
||||||
thirdComponent = BPMF::EN;
|
thirdComponent = BPMF::EN;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "vn")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "vn"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::UE;
|
secondComponent = BPMF::UE;
|
||||||
thirdComponent = BPMF::EN;
|
thirdComponent = BPMF::EN;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "ui")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "ui"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::U;
|
secondComponent = BPMF::U;
|
||||||
thirdComponent = BPMF::EI;
|
thirdComponent = BPMF::EI;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "ue")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "ue"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::UE;
|
secondComponent = BPMF::UE;
|
||||||
thirdComponent = BPMF::E;
|
thirdComponent = BPMF::E;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, u8"ü")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, u8"ü"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::UE;
|
secondComponent = BPMF::UE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// then consume the middle component...
|
// then consume the middle component...
|
||||||
if (0) {
|
if (0)
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "i")) {
|
{
|
||||||
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "i"))
|
||||||
|
{
|
||||||
secondComponent = independentConsonant ? 0 : BPMF::I;
|
secondComponent = independentConsonant ? 0 : BPMF::I;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "u")) {
|
}
|
||||||
if (firstComponent == BPMF::J || firstComponent == BPMF::Q ||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "u"))
|
||||||
firstComponent == BPMF::X) {
|
{
|
||||||
|
if (firstComponent == BPMF::J || firstComponent == BPMF::Q || firstComponent == BPMF::X)
|
||||||
|
{
|
||||||
secondComponent = BPMF::UE;
|
secondComponent = BPMF::UE;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
secondComponent = BPMF::U;
|
secondComponent = BPMF::U;
|
||||||
}
|
}
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "v")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "v"))
|
||||||
|
{
|
||||||
secondComponent = BPMF::UE;
|
secondComponent = BPMF::UE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// the vowels, longer sequence takes precedence
|
// the vowels, longer sequence takes precedence
|
||||||
if (0) {
|
if (0)
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "ang")) {
|
{
|
||||||
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "ang"))
|
||||||
|
{
|
||||||
thirdComponent = BPMF::ANG;
|
thirdComponent = BPMF::ANG;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "eng")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "eng"))
|
||||||
|
{
|
||||||
thirdComponent = BPMF::ENG;
|
thirdComponent = BPMF::ENG;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "err")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "err"))
|
||||||
|
{
|
||||||
thirdComponent = BPMF::ERR;
|
thirdComponent = BPMF::ERR;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "ai")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "ai"))
|
||||||
|
{
|
||||||
thirdComponent = BPMF::AI;
|
thirdComponent = BPMF::AI;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "ei")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "ei"))
|
||||||
|
{
|
||||||
thirdComponent = BPMF::EI;
|
thirdComponent = BPMF::EI;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "ao")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "ao"))
|
||||||
|
{
|
||||||
thirdComponent = BPMF::AO;
|
thirdComponent = BPMF::AO;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "ou")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "ou"))
|
||||||
|
{
|
||||||
thirdComponent = BPMF::OU;
|
thirdComponent = BPMF::OU;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "an")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "an"))
|
||||||
|
{
|
||||||
thirdComponent = BPMF::AN;
|
thirdComponent = BPMF::AN;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "en")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "en"))
|
||||||
|
{
|
||||||
thirdComponent = BPMF::EN;
|
thirdComponent = BPMF::EN;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "er")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "er"))
|
||||||
|
{
|
||||||
thirdComponent = BPMF::ERR;
|
thirdComponent = BPMF::ERR;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "a")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "a"))
|
||||||
|
{
|
||||||
thirdComponent = BPMF::A;
|
thirdComponent = BPMF::A;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "o")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "o"))
|
||||||
|
{
|
||||||
thirdComponent = BPMF::O;
|
thirdComponent = BPMF::O;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "e")) {
|
}
|
||||||
if (secondComponent) {
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "e"))
|
||||||
|
{
|
||||||
|
if (secondComponent)
|
||||||
|
{
|
||||||
thirdComponent = BPMF::E;
|
thirdComponent = BPMF::E;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
thirdComponent = BPMF::ER;
|
thirdComponent = BPMF::ER;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// at last!
|
// at last!
|
||||||
if (0) {
|
if (0)
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "1")) {
|
{
|
||||||
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "1"))
|
||||||
|
{
|
||||||
toneComponent = BPMF::Tone1;
|
toneComponent = BPMF::Tone1;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "2")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "2"))
|
||||||
|
{
|
||||||
toneComponent = BPMF::Tone2;
|
toneComponent = BPMF::Tone2;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "3")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "3"))
|
||||||
|
{
|
||||||
toneComponent = BPMF::Tone3;
|
toneComponent = BPMF::Tone3;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "4")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "4"))
|
||||||
|
{
|
||||||
toneComponent = BPMF::Tone4;
|
toneComponent = BPMF::Tone4;
|
||||||
} else if (PinyinParseHelper::ConsumePrefix(pinyin, "5")) {
|
}
|
||||||
|
else if (PinyinParseHelper::ConsumePrefix(pinyin, "5"))
|
||||||
|
{
|
||||||
toneComponent = BPMF::Tone5;
|
toneComponent = BPMF::Tone5;
|
||||||
}
|
}
|
||||||
|
|
||||||
return BPMF(firstComponent | secondComponent | thirdComponent |
|
return BPMF(firstComponent | secondComponent | thirdComponent | toneComponent);
|
||||||
toneComponent);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string BPMF::HanyuPinyinString(bool includesTone,
|
const std::string BPMF::HanyuPinyinString(bool includesTone, bool useVForUUmlaut) const
|
||||||
bool useVForUUmlaut) const {
|
{
|
||||||
std::string consonant, middle, vowel, tone;
|
std::string consonant, middle, vowel, tone;
|
||||||
|
|
||||||
Component cc = consonantComponent(), mvc = middleVowelComponent(),
|
Component cc = consonantComponent(), mvc = middleVowelComponent(), vc = vowelComponent();
|
||||||
vc = vowelComponent();
|
|
||||||
bool hasNoMVCOrVC = !(mvc || vc);
|
bool hasNoMVCOrVC = !(mvc || vc);
|
||||||
|
|
||||||
switch (cc) {
|
switch (cc)
|
||||||
|
{
|
||||||
case B:
|
case B:
|
||||||
consonant = "b";
|
consonant = "b";
|
||||||
break;
|
break;
|
||||||
|
@ -375,75 +514,93 @@ const std::string BPMF::HanyuPinyinString(bool includesTone,
|
||||||
break;
|
break;
|
||||||
case J:
|
case J:
|
||||||
consonant = "j";
|
consonant = "j";
|
||||||
if (hasNoMVCOrVC) middle = "i";
|
if (hasNoMVCOrVC)
|
||||||
|
middle = "i";
|
||||||
break;
|
break;
|
||||||
case Q:
|
case Q:
|
||||||
consonant = "q";
|
consonant = "q";
|
||||||
if (hasNoMVCOrVC) middle = "i";
|
if (hasNoMVCOrVC)
|
||||||
|
middle = "i";
|
||||||
break;
|
break;
|
||||||
case X:
|
case X:
|
||||||
consonant = "x";
|
consonant = "x";
|
||||||
if (hasNoMVCOrVC) middle = "i";
|
if (hasNoMVCOrVC)
|
||||||
|
middle = "i";
|
||||||
break;
|
break;
|
||||||
case ZH:
|
case ZH:
|
||||||
consonant = "zh";
|
consonant = "zh";
|
||||||
if (hasNoMVCOrVC) middle = "i";
|
if (hasNoMVCOrVC)
|
||||||
|
middle = "i";
|
||||||
break;
|
break;
|
||||||
case CH:
|
case CH:
|
||||||
consonant = "ch";
|
consonant = "ch";
|
||||||
if (hasNoMVCOrVC) middle = "i";
|
if (hasNoMVCOrVC)
|
||||||
|
middle = "i";
|
||||||
break;
|
break;
|
||||||
case SH:
|
case SH:
|
||||||
consonant = "sh";
|
consonant = "sh";
|
||||||
if (hasNoMVCOrVC) middle = "i";
|
if (hasNoMVCOrVC)
|
||||||
|
middle = "i";
|
||||||
break;
|
break;
|
||||||
case R:
|
case R:
|
||||||
consonant = "r";
|
consonant = "r";
|
||||||
if (hasNoMVCOrVC) middle = "i";
|
if (hasNoMVCOrVC)
|
||||||
|
middle = "i";
|
||||||
break;
|
break;
|
||||||
case Z:
|
case Z:
|
||||||
consonant = "z";
|
consonant = "z";
|
||||||
if (hasNoMVCOrVC) middle = "i";
|
if (hasNoMVCOrVC)
|
||||||
|
middle = "i";
|
||||||
break;
|
break;
|
||||||
case C:
|
case C:
|
||||||
consonant = "c";
|
consonant = "c";
|
||||||
if (hasNoMVCOrVC) middle = "i";
|
if (hasNoMVCOrVC)
|
||||||
|
middle = "i";
|
||||||
break;
|
break;
|
||||||
case S:
|
case S:
|
||||||
consonant = "s";
|
consonant = "s";
|
||||||
if (hasNoMVCOrVC) middle = "i";
|
if (hasNoMVCOrVC)
|
||||||
|
middle = "i";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (mvc) {
|
switch (mvc)
|
||||||
|
{
|
||||||
case I:
|
case I:
|
||||||
if (!cc) {
|
if (!cc)
|
||||||
|
{
|
||||||
consonant = "y";
|
consonant = "y";
|
||||||
}
|
}
|
||||||
|
|
||||||
middle = (!vc || cc) ? "i" : "";
|
middle = (!vc || cc) ? "i" : "";
|
||||||
break;
|
break;
|
||||||
case U:
|
case U:
|
||||||
if (!cc) {
|
if (!cc)
|
||||||
|
{
|
||||||
consonant = "w";
|
consonant = "w";
|
||||||
}
|
}
|
||||||
middle = (!vc || cc) ? "u" : "";
|
middle = (!vc || cc) ? "u" : "";
|
||||||
break;
|
break;
|
||||||
case UE:
|
case UE:
|
||||||
if (!cc) {
|
if (!cc)
|
||||||
|
{
|
||||||
consonant = "y";
|
consonant = "y";
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((cc == N || cc == L) && vc != E) {
|
if ((cc == N || cc == L) && vc != E)
|
||||||
|
{
|
||||||
middle = useVForUUmlaut ? "v" : "ü";
|
middle = useVForUUmlaut ? "v" : "ü";
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
middle = "u";
|
middle = "u";
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (vc) {
|
switch (vc)
|
||||||
|
{
|
||||||
case A:
|
case A:
|
||||||
vowel = "a";
|
vowel = "a";
|
||||||
break;
|
break;
|
||||||
|
@ -488,48 +645,61 @@ const std::string BPMF::HanyuPinyinString(bool includesTone,
|
||||||
// combination rules
|
// combination rules
|
||||||
|
|
||||||
// ueng -> ong, but note "weng"
|
// ueng -> ong, but note "weng"
|
||||||
if ((mvc == U || mvc == UE) && vc == ENG) {
|
if ((mvc == U || mvc == UE) && vc == ENG)
|
||||||
|
{
|
||||||
middle = "";
|
middle = "";
|
||||||
vowel = (cc == J || cc == Q || cc == X)
|
vowel = (cc == J || cc == Q || cc == X) ? "iong" : ((!cc && mvc == U) ? "eng" : "ong");
|
||||||
? "iong"
|
|
||||||
: ((!cc && mvc == U) ? "eng" : "ong");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ien, uen, üen -> in, un, ün ; but note "wen", "yin" and "yun"
|
// ien, uen, üen -> in, un, ün ; but note "wen", "yin" and "yun"
|
||||||
if (mvc && vc == EN) {
|
if (mvc && vc == EN)
|
||||||
if (cc) {
|
{
|
||||||
|
if (cc)
|
||||||
|
{
|
||||||
vowel = "n";
|
vowel = "n";
|
||||||
} else {
|
}
|
||||||
if (mvc == UE) {
|
else
|
||||||
|
{
|
||||||
|
if (mvc == UE)
|
||||||
|
{
|
||||||
vowel = "n"; // yun
|
vowel = "n"; // yun
|
||||||
} else if (mvc == U) {
|
}
|
||||||
|
else if (mvc == U)
|
||||||
|
{
|
||||||
vowel = "en"; // wen
|
vowel = "en"; // wen
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
vowel = "in"; // yin
|
vowel = "in"; // yin
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// iou -> iu
|
// iou -> iu
|
||||||
if (cc && mvc == I && vc == OU) {
|
if (cc && mvc == I && vc == OU)
|
||||||
|
{
|
||||||
middle = "";
|
middle = "";
|
||||||
vowel = "iu";
|
vowel = "iu";
|
||||||
}
|
}
|
||||||
|
|
||||||
// ieng -> ing
|
// ieng -> ing
|
||||||
if (mvc == I && vc == ENG) {
|
if (mvc == I && vc == ENG)
|
||||||
|
{
|
||||||
middle = "";
|
middle = "";
|
||||||
vowel = "ing";
|
vowel = "ing";
|
||||||
}
|
}
|
||||||
|
|
||||||
// uei -> ui
|
// uei -> ui
|
||||||
if (cc && mvc == U && vc == EI) {
|
if (cc && mvc == U && vc == EI)
|
||||||
|
{
|
||||||
middle = "";
|
middle = "";
|
||||||
vowel = "ui";
|
vowel = "ui";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (includesTone) {
|
if (includesTone)
|
||||||
switch (toneMarkerComponent()) {
|
{
|
||||||
|
switch (toneMarkerComponent())
|
||||||
|
{
|
||||||
case Tone2:
|
case Tone2:
|
||||||
tone = "2";
|
tone = "2";
|
||||||
break;
|
break;
|
||||||
|
@ -548,44 +718,55 @@ const std::string BPMF::HanyuPinyinString(bool includesTone,
|
||||||
return consonant + middle + vowel + tone;
|
return consonant + middle + vowel + tone;
|
||||||
}
|
}
|
||||||
|
|
||||||
const BPMF BPMF::FromComposedString(const std::string& str) {
|
const BPMF BPMF::FromComposedString(const std::string &str)
|
||||||
|
{
|
||||||
BPMF syllable;
|
BPMF syllable;
|
||||||
auto iter = str.begin();
|
auto iter = str.begin();
|
||||||
while (iter != str.end()) {
|
while (iter != str.end())
|
||||||
|
{
|
||||||
// This is a naive implementation and we bail early at anything we don't
|
// This is a naive implementation and we bail early at anything we don't
|
||||||
// recognize. A sound implementation would require to either use a trie for
|
// recognize. A sound implementation would require to either use a trie for
|
||||||
// the Bopomofo character map or to split the input by codepoints. This
|
// the Bopomofo character map or to split the input by codepoints. This
|
||||||
// suffices for now.
|
// suffices for now.
|
||||||
|
|
||||||
// Illegal.
|
// Illegal.
|
||||||
if (!(*iter & 0x80)) {
|
if (!(*iter & 0x80))
|
||||||
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t utf8_length = -1;
|
size_t utf8_length = -1;
|
||||||
|
|
||||||
// These are the code points for the tone markers.
|
// These are the code points for the tone markers.
|
||||||
if ((*iter & (0x80 | 0x40)) && !(*iter & 0x20)) {
|
if ((*iter & (0x80 | 0x40)) && !(*iter & 0x20))
|
||||||
|
{
|
||||||
utf8_length = 2;
|
utf8_length = 2;
|
||||||
} else if ((*iter & (0x80 | 0x40 | 0x20)) && !(*iter & 0x10)) {
|
}
|
||||||
|
else if ((*iter & (0x80 | 0x40 | 0x20)) && !(*iter & 0x10))
|
||||||
|
{
|
||||||
utf8_length = 3;
|
utf8_length = 3;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
// Illegal.
|
// Illegal.
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (iter + (utf8_length - 1) == str.end()) {
|
if (iter + (utf8_length - 1) == str.end())
|
||||||
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string component = std::string(iter, iter + utf8_length);
|
std::string component = std::string(iter, iter + utf8_length);
|
||||||
const std::map<std::string, BPMF::Component> &charToComp =
|
const std::map<std::string, BPMF::Component> &charToComp =
|
||||||
BopomofoCharacterMap::SharedInstance().characterToComponent;
|
BopomofoCharacterMap::SharedInstance().characterToComponent;
|
||||||
std::map<std::string, BPMF::Component>::const_iterator result =
|
std::map<std::string, BPMF::Component>::const_iterator result = charToComp.find(component);
|
||||||
charToComp.find(component);
|
if (result == charToComp.end())
|
||||||
if (result == charToComp.end()) {
|
{
|
||||||
break;
|
break;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
syllable += BPMF((*result).second);
|
syllable += BPMF((*result).second);
|
||||||
}
|
}
|
||||||
iter += utf8_length;
|
iter += utf8_length;
|
||||||
|
@ -593,14 +774,12 @@ const BPMF BPMF::FromComposedString(const std::string& str) {
|
||||||
return syllable;
|
return syllable;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string BPMF::composedString() const {
|
const std::string BPMF::composedString() const
|
||||||
|
{
|
||||||
std::string result;
|
std::string result;
|
||||||
#define APPEND(c) \
|
#define APPEND(c) \
|
||||||
if (syllable_ & c) \
|
if (syllable_ & c) \
|
||||||
result += \
|
result += (*BopomofoCharacterMap::SharedInstance().componentToCharacter.find(syllable_ & c)).second
|
||||||
(*BopomofoCharacterMap::SharedInstance().componentToCharacter.find( \
|
|
||||||
syllable_ & c)) \
|
|
||||||
.second
|
|
||||||
APPEND(ConsonantMask);
|
APPEND(ConsonantMask);
|
||||||
APPEND(MiddleVowelMask);
|
APPEND(MiddleVowelMask);
|
||||||
APPEND(VowelMask);
|
APPEND(VowelMask);
|
||||||
|
@ -609,14 +788,14 @@ syllable_ & c)) \
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const BopomofoCharacterMap &BopomofoCharacterMap::SharedInstance()
|
||||||
|
{
|
||||||
const BopomofoCharacterMap& BopomofoCharacterMap::SharedInstance() {
|
|
||||||
static BopomofoCharacterMap *map = new BopomofoCharacterMap();
|
static BopomofoCharacterMap *map = new BopomofoCharacterMap();
|
||||||
return *map;
|
return *map;
|
||||||
}
|
}
|
||||||
|
|
||||||
BopomofoCharacterMap::BopomofoCharacterMap() {
|
BopomofoCharacterMap::BopomofoCharacterMap()
|
||||||
|
{
|
||||||
characterToComponent[u8"ㄅ"] = BPMF::B;
|
characterToComponent[u8"ㄅ"] = BPMF::B;
|
||||||
characterToComponent[u8"ㄆ"] = BPMF::P;
|
characterToComponent[u8"ㄆ"] = BPMF::P;
|
||||||
characterToComponent[u8"ㄇ"] = BPMF::M;
|
characterToComponent[u8"ㄇ"] = BPMF::M;
|
||||||
|
@ -659,23 +838,20 @@ BopomofoCharacterMap::BopomofoCharacterMap() {
|
||||||
characterToComponent[u8"ˋ"] = BPMF::Tone4;
|
characterToComponent[u8"ˋ"] = BPMF::Tone4;
|
||||||
characterToComponent[u8"˙"] = BPMF::Tone5;
|
characterToComponent[u8"˙"] = BPMF::Tone5;
|
||||||
|
|
||||||
for (std::map<std::string, BPMF::Component>::iterator iter =
|
for (std::map<std::string, BPMF::Component>::iterator iter = characterToComponent.begin();
|
||||||
characterToComponent.begin();
|
|
||||||
iter != characterToComponent.end(); ++iter)
|
iter != characterToComponent.end(); ++iter)
|
||||||
componentToCharacter[(*iter).second] = (*iter).first;
|
componentToCharacter[(*iter).second] = (*iter).first;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ASSIGNKEY1(m, vec, k, val) \
|
#define ASSIGNKEY1(m, vec, k, val) m[k] = (vec.clear(), vec.push_back((BPMF::Component)val), vec)
|
||||||
m[k] = (vec.clear(), vec.push_back((BPMF::Component)val), vec)
|
|
||||||
#define ASSIGNKEY2(m, vec, k, val1, val2) \
|
#define ASSIGNKEY2(m, vec, k, val1, val2) \
|
||||||
m[k] = (vec.clear(), vec.push_back((BPMF::Component)val1), \
|
m[k] = (vec.clear(), vec.push_back((BPMF::Component)val1), vec.push_back((BPMF::Component)val2), vec)
|
||||||
vec.push_back((BPMF::Component)val2), vec)
|
|
||||||
#define ASSIGNKEY3(m, vec, k, val1, val2, val3) \
|
#define ASSIGNKEY3(m, vec, k, val1, val2, val3) \
|
||||||
m[k] = (vec.clear(), vec.push_back((BPMF::Component)val1), \
|
m[k] = (vec.clear(), vec.push_back((BPMF::Component)val1), vec.push_back((BPMF::Component)val2), \
|
||||||
vec.push_back((BPMF::Component)val2), \
|
|
||||||
vec.push_back((BPMF::Component)val3), vec)
|
vec.push_back((BPMF::Component)val3), vec)
|
||||||
|
|
||||||
static BopomofoKeyboardLayout* CreateStandardLayout() {
|
static BopomofoKeyboardLayout *CreateStandardLayout()
|
||||||
|
{
|
||||||
std::vector<BPMF::Component> vec;
|
std::vector<BPMF::Component> vec;
|
||||||
BopomofoKeyToComponentMap ktcm;
|
BopomofoKeyToComponentMap ktcm;
|
||||||
|
|
||||||
|
@ -724,7 +900,8 @@ static BopomofoKeyboardLayout* CreateStandardLayout() {
|
||||||
return new BopomofoKeyboardLayout(ktcm, "Standard");
|
return new BopomofoKeyboardLayout(ktcm, "Standard");
|
||||||
}
|
}
|
||||||
|
|
||||||
static BopomofoKeyboardLayout* CreateIBMLayout() {
|
static BopomofoKeyboardLayout *CreateIBMLayout()
|
||||||
|
{
|
||||||
std::vector<BPMF::Component> vec;
|
std::vector<BPMF::Component> vec;
|
||||||
BopomofoKeyToComponentMap ktcm;
|
BopomofoKeyToComponentMap ktcm;
|
||||||
|
|
||||||
|
@ -773,7 +950,8 @@ static BopomofoKeyboardLayout* CreateIBMLayout() {
|
||||||
return new BopomofoKeyboardLayout(ktcm, "IBM");
|
return new BopomofoKeyboardLayout(ktcm, "IBM");
|
||||||
}
|
}
|
||||||
|
|
||||||
static BopomofoKeyboardLayout* CreateMiTACLayout() {
|
static BopomofoKeyboardLayout *CreateMiTACLayout()
|
||||||
|
{
|
||||||
std::vector<BPMF::Component> vec;
|
std::vector<BPMF::Component> vec;
|
||||||
BopomofoKeyToComponentMap ktcm;
|
BopomofoKeyToComponentMap ktcm;
|
||||||
|
|
||||||
|
@ -822,7 +1000,8 @@ static BopomofoKeyboardLayout* CreateMiTACLayout() {
|
||||||
return new BopomofoKeyboardLayout(ktcm, "MiTAC");
|
return new BopomofoKeyboardLayout(ktcm, "MiTAC");
|
||||||
}
|
}
|
||||||
|
|
||||||
static BopomofoKeyboardLayout* CreateETenLayout() {
|
static BopomofoKeyboardLayout *CreateETenLayout()
|
||||||
|
{
|
||||||
std::vector<BPMF::Component> vec;
|
std::vector<BPMF::Component> vec;
|
||||||
BopomofoKeyToComponentMap ktcm;
|
BopomofoKeyToComponentMap ktcm;
|
||||||
|
|
||||||
|
@ -871,7 +1050,8 @@ static BopomofoKeyboardLayout* CreateETenLayout() {
|
||||||
return new BopomofoKeyboardLayout(ktcm, "ETen");
|
return new BopomofoKeyboardLayout(ktcm, "ETen");
|
||||||
}
|
}
|
||||||
|
|
||||||
static BopomofoKeyboardLayout* CreateHsuLayout() {
|
static BopomofoKeyboardLayout *CreateHsuLayout()
|
||||||
|
{
|
||||||
std::vector<BPMF::Component> vec;
|
std::vector<BPMF::Component> vec;
|
||||||
BopomofoKeyToComponentMap ktcm;
|
BopomofoKeyToComponentMap ktcm;
|
||||||
|
|
||||||
|
@ -904,7 +1084,8 @@ static BopomofoKeyboardLayout* CreateHsuLayout() {
|
||||||
return new BopomofoKeyboardLayout(ktcm, "Hsu");
|
return new BopomofoKeyboardLayout(ktcm, "Hsu");
|
||||||
}
|
}
|
||||||
|
|
||||||
static BopomofoKeyboardLayout* CreateETen26Layout() {
|
static BopomofoKeyboardLayout *CreateETen26Layout()
|
||||||
|
{
|
||||||
std::vector<BPMF::Component> vec;
|
std::vector<BPMF::Component> vec;
|
||||||
BopomofoKeyToComponentMap ktcm;
|
BopomofoKeyToComponentMap ktcm;
|
||||||
|
|
||||||
|
@ -938,7 +1119,8 @@ static BopomofoKeyboardLayout* CreateETen26Layout() {
|
||||||
return new BopomofoKeyboardLayout(ktcm, "ETen26");
|
return new BopomofoKeyboardLayout(ktcm, "ETen26");
|
||||||
}
|
}
|
||||||
|
|
||||||
static BopomofoKeyboardLayout* CreateFakeSeigyouLayout() {
|
static BopomofoKeyboardLayout *CreateFakeSeigyouLayout()
|
||||||
|
{
|
||||||
std::vector<BPMF::Component> vec;
|
std::vector<BPMF::Component> vec;
|
||||||
BopomofoKeyToComponentMap ktcm;
|
BopomofoKeyToComponentMap ktcm;
|
||||||
|
|
||||||
|
@ -987,51 +1169,58 @@ static BopomofoKeyboardLayout* CreateFakeSeigyouLayout() {
|
||||||
return new BopomofoKeyboardLayout(ktcm, "FakeSeigyou");
|
return new BopomofoKeyboardLayout(ktcm, "FakeSeigyou");
|
||||||
}
|
}
|
||||||
|
|
||||||
static BopomofoKeyboardLayout* CreateHanyuPinyinLayout() {
|
static BopomofoKeyboardLayout *CreateHanyuPinyinLayout()
|
||||||
|
{
|
||||||
BopomofoKeyToComponentMap ktcm;
|
BopomofoKeyToComponentMap ktcm;
|
||||||
return new BopomofoKeyboardLayout(ktcm, "HanyuPinyin");
|
return new BopomofoKeyboardLayout(ktcm, "HanyuPinyin");
|
||||||
}
|
}
|
||||||
|
|
||||||
const BopomofoKeyboardLayout* BopomofoKeyboardLayout::StandardLayout() {
|
const BopomofoKeyboardLayout *BopomofoKeyboardLayout::StandardLayout()
|
||||||
|
{
|
||||||
static BopomofoKeyboardLayout *layout = CreateStandardLayout();
|
static BopomofoKeyboardLayout *layout = CreateStandardLayout();
|
||||||
return layout;
|
return layout;
|
||||||
}
|
}
|
||||||
|
|
||||||
const BopomofoKeyboardLayout* BopomofoKeyboardLayout::ETenLayout() {
|
const BopomofoKeyboardLayout *BopomofoKeyboardLayout::ETenLayout()
|
||||||
|
{
|
||||||
static BopomofoKeyboardLayout *layout = CreateETenLayout();
|
static BopomofoKeyboardLayout *layout = CreateETenLayout();
|
||||||
return layout;
|
return layout;
|
||||||
}
|
}
|
||||||
|
|
||||||
const BopomofoKeyboardLayout* BopomofoKeyboardLayout::HsuLayout() {
|
const BopomofoKeyboardLayout *BopomofoKeyboardLayout::HsuLayout()
|
||||||
|
{
|
||||||
static BopomofoKeyboardLayout *layout = CreateHsuLayout();
|
static BopomofoKeyboardLayout *layout = CreateHsuLayout();
|
||||||
return layout;
|
return layout;
|
||||||
}
|
}
|
||||||
|
|
||||||
const BopomofoKeyboardLayout* BopomofoKeyboardLayout::ETen26Layout() {
|
const BopomofoKeyboardLayout *BopomofoKeyboardLayout::ETen26Layout()
|
||||||
|
{
|
||||||
static BopomofoKeyboardLayout *layout = CreateETen26Layout();
|
static BopomofoKeyboardLayout *layout = CreateETen26Layout();
|
||||||
return layout;
|
return layout;
|
||||||
}
|
}
|
||||||
|
|
||||||
const BopomofoKeyboardLayout* BopomofoKeyboardLayout::IBMLayout() {
|
const BopomofoKeyboardLayout *BopomofoKeyboardLayout::IBMLayout()
|
||||||
|
{
|
||||||
static BopomofoKeyboardLayout *layout = CreateIBMLayout();
|
static BopomofoKeyboardLayout *layout = CreateIBMLayout();
|
||||||
return layout;
|
return layout;
|
||||||
}
|
}
|
||||||
|
|
||||||
const BopomofoKeyboardLayout* BopomofoKeyboardLayout::MiTACLayout() {
|
const BopomofoKeyboardLayout *BopomofoKeyboardLayout::MiTACLayout()
|
||||||
|
{
|
||||||
static BopomofoKeyboardLayout *layout = CreateMiTACLayout();
|
static BopomofoKeyboardLayout *layout = CreateMiTACLayout();
|
||||||
return layout;
|
return layout;
|
||||||
}
|
}
|
||||||
|
|
||||||
const BopomofoKeyboardLayout* BopomofoKeyboardLayout::FakeSeigyouLayout() {
|
const BopomofoKeyboardLayout *BopomofoKeyboardLayout::FakeSeigyouLayout()
|
||||||
|
{
|
||||||
static BopomofoKeyboardLayout *layout = CreateFakeSeigyouLayout();
|
static BopomofoKeyboardLayout *layout = CreateFakeSeigyouLayout();
|
||||||
return layout;
|
return layout;
|
||||||
}
|
}
|
||||||
|
|
||||||
const BopomofoKeyboardLayout* BopomofoKeyboardLayout::HanyuPinyinLayout() {
|
const BopomofoKeyboardLayout *BopomofoKeyboardLayout::HanyuPinyinLayout()
|
||||||
|
{
|
||||||
static BopomofoKeyboardLayout *layout = CreateHanyuPinyinLayout();
|
static BopomofoKeyboardLayout *layout = CreateHanyuPinyinLayout();
|
||||||
return layout;
|
return layout;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Mandarin
|
} // namespace Mandarin
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef MANDARIN_H_
|
#ifndef MANDARIN_H_
|
||||||
|
@ -25,13 +32,17 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace Mandarin {
|
namespace Mandarin
|
||||||
|
{
|
||||||
|
|
||||||
class BopomofoSyllable {
|
class BopomofoSyllable
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
typedef uint16_t Component;
|
typedef uint16_t Component;
|
||||||
|
|
||||||
explicit BopomofoSyllable(Component syllable = 0) : syllable_(syllable) {}
|
explicit BopomofoSyllable(Component syllable = 0) : syllable_(syllable)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
BopomofoSyllable(const BopomofoSyllable &) = default;
|
BopomofoSyllable(const BopomofoSyllable &) = default;
|
||||||
BopomofoSyllable(BopomofoSyllable &&another) = default;
|
BopomofoSyllable(BopomofoSyllable &&another) = default;
|
||||||
|
@ -43,61 +54,93 @@ public:
|
||||||
static const BopomofoSyllable FromHanyuPinyin(const std::string &str);
|
static const BopomofoSyllable FromHanyuPinyin(const std::string &str);
|
||||||
|
|
||||||
// TO DO: Support accented vowels
|
// TO DO: Support accented vowels
|
||||||
const std::string HanyuPinyinString(bool includesTone,
|
const std::string HanyuPinyinString(bool includesTone, bool useVForUUmlaut) const;
|
||||||
bool useVForUUmlaut) const;
|
|
||||||
|
|
||||||
static const BopomofoSyllable FromComposedString(const std::string &str);
|
static const BopomofoSyllable FromComposedString(const std::string &str);
|
||||||
const std::string composedString() const;
|
const std::string composedString() const;
|
||||||
|
|
||||||
void clear() { syllable_ = 0; }
|
void clear()
|
||||||
|
{
|
||||||
|
syllable_ = 0;
|
||||||
|
}
|
||||||
|
|
||||||
bool isEmpty() const { return !syllable_; }
|
bool isEmpty() const
|
||||||
|
{
|
||||||
|
return !syllable_;
|
||||||
|
}
|
||||||
|
|
||||||
bool hasConsonant() const { return !!(syllable_ & ConsonantMask); }
|
bool hasConsonant() const
|
||||||
|
{
|
||||||
|
return !!(syllable_ & ConsonantMask);
|
||||||
|
}
|
||||||
|
|
||||||
bool hasMiddleVowel() const { return !!(syllable_ & MiddleVowelMask); }
|
bool hasMiddleVowel() const
|
||||||
bool hasVowel() const { return !!(syllable_ & VowelMask); }
|
{
|
||||||
|
return !!(syllable_ & MiddleVowelMask);
|
||||||
|
}
|
||||||
|
bool hasVowel() const
|
||||||
|
{
|
||||||
|
return !!(syllable_ & VowelMask);
|
||||||
|
}
|
||||||
|
|
||||||
bool hasToneMarker() const { return !!(syllable_ & ToneMarkerMask); }
|
bool hasToneMarker() const
|
||||||
|
{
|
||||||
|
return !!(syllable_ & ToneMarkerMask);
|
||||||
|
}
|
||||||
|
|
||||||
Component consonantComponent() const { return syllable_ & ConsonantMask; }
|
Component consonantComponent() const
|
||||||
|
{
|
||||||
|
return syllable_ & ConsonantMask;
|
||||||
|
}
|
||||||
|
|
||||||
Component middleVowelComponent() const {
|
Component middleVowelComponent() const
|
||||||
|
{
|
||||||
return syllable_ & MiddleVowelMask;
|
return syllable_ & MiddleVowelMask;
|
||||||
}
|
}
|
||||||
|
|
||||||
Component vowelComponent() const { return syllable_ & VowelMask; }
|
Component vowelComponent() const
|
||||||
|
{
|
||||||
|
return syllable_ & VowelMask;
|
||||||
|
}
|
||||||
|
|
||||||
Component toneMarkerComponent() const { return syllable_ & ToneMarkerMask; }
|
Component toneMarkerComponent() const
|
||||||
|
{
|
||||||
|
return syllable_ & ToneMarkerMask;
|
||||||
|
}
|
||||||
|
|
||||||
bool operator==(const BopomofoSyllable& another) const {
|
bool operator==(const BopomofoSyllable &another) const
|
||||||
|
{
|
||||||
return syllable_ == another.syllable_;
|
return syllable_ == another.syllable_;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator!=(const BopomofoSyllable& another) const {
|
bool operator!=(const BopomofoSyllable &another) const
|
||||||
|
{
|
||||||
return syllable_ != another.syllable_;
|
return syllable_ != another.syllable_;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isOverlappingWith(const BopomofoSyllable& another) const {
|
bool isOverlappingWith(const BopomofoSyllable &another) const
|
||||||
|
{
|
||||||
#define IOW_SAND(mask) ((syllable_ & mask) && (another.syllable_ & mask))
|
#define IOW_SAND(mask) ((syllable_ & mask) && (another.syllable_ & mask))
|
||||||
return IOW_SAND(ConsonantMask) || IOW_SAND(MiddleVowelMask) ||
|
return IOW_SAND(ConsonantMask) || IOW_SAND(MiddleVowelMask) || IOW_SAND(VowelMask) || IOW_SAND(ToneMarkerMask);
|
||||||
IOW_SAND(VowelMask) || IOW_SAND(ToneMarkerMask);
|
|
||||||
#undef IOW_SAND
|
#undef IOW_SAND
|
||||||
}
|
}
|
||||||
|
|
||||||
// consonants J, Q, X all require the existence of vowel I or UE
|
// consonants J, Q, X all require the existence of vowel I or UE
|
||||||
bool belongsToJQXClass() const {
|
bool belongsToJQXClass() const
|
||||||
|
{
|
||||||
Component consonant = syllable_ & ConsonantMask;
|
Component consonant = syllable_ & ConsonantMask;
|
||||||
return (consonant == J || consonant == Q || consonant == X);
|
return (consonant == J || consonant == Q || consonant == X);
|
||||||
}
|
}
|
||||||
|
|
||||||
// zi, ci, si, chi, chi, shi, ri
|
// zi, ci, si, chi, chi, shi, ri
|
||||||
bool belongsToZCSRClass() const {
|
bool belongsToZCSRClass() const
|
||||||
|
{
|
||||||
Component consonant = syllable_ & ConsonantMask;
|
Component consonant = syllable_ & ConsonantMask;
|
||||||
return (consonant >= ZH && consonant <= S);
|
return (consonant >= ZH && consonant <= S);
|
||||||
}
|
}
|
||||||
|
|
||||||
Component maskType() const {
|
Component maskType() const
|
||||||
|
{
|
||||||
Component mask = 0;
|
Component mask = 0;
|
||||||
mask |= (syllable_ & ConsonantMask) ? ConsonantMask : 0;
|
mask |= (syllable_ & ConsonantMask) ? ConsonantMask : 0;
|
||||||
mask |= (syllable_ & MiddleVowelMask) ? MiddleVowelMask : 0;
|
mask |= (syllable_ & MiddleVowelMask) ? MiddleVowelMask : 0;
|
||||||
|
@ -106,10 +149,12 @@ public:
|
||||||
return mask;
|
return mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
const BopomofoSyllable operator+(const BopomofoSyllable& another) const {
|
const BopomofoSyllable operator+(const BopomofoSyllable &another) const
|
||||||
|
{
|
||||||
Component newSyllable = syllable_;
|
Component newSyllable = syllable_;
|
||||||
#define OP_SOVER(mask) \
|
#define OP_SOVER(mask) \
|
||||||
if (another.syllable_ & mask) { \
|
if (another.syllable_ & mask) \
|
||||||
|
{ \
|
||||||
newSyllable = (newSyllable & ~mask) | (another.syllable_ & mask); \
|
newSyllable = (newSyllable & ~mask) | (another.syllable_ & mask); \
|
||||||
}
|
}
|
||||||
OP_SOVER(ConsonantMask);
|
OP_SOVER(ConsonantMask);
|
||||||
|
@ -120,9 +165,11 @@ newSyllable = (newSyllable & ~mask) | (another.syllable_ & mask); \
|
||||||
return BopomofoSyllable(newSyllable);
|
return BopomofoSyllable(newSyllable);
|
||||||
}
|
}
|
||||||
|
|
||||||
BopomofoSyllable& operator+=(const BopomofoSyllable& another) {
|
BopomofoSyllable &operator+=(const BopomofoSyllable &another)
|
||||||
|
{
|
||||||
#define OPE_SOVER(mask) \
|
#define OPE_SOVER(mask) \
|
||||||
if (another.syllable_ & mask) { \
|
if (another.syllable_ & mask) \
|
||||||
|
{ \
|
||||||
syllable_ = (syllable_ & ~mask) | (another.syllable_ & mask); \
|
syllable_ = (syllable_ & ~mask) | (another.syllable_ & mask); \
|
||||||
}
|
}
|
||||||
OPE_SOVER(ConsonantMask);
|
OPE_SOVER(ConsonantMask);
|
||||||
|
@ -133,31 +180,27 @@ syllable_ = (syllable_ & ~mask) | (another.syllable_ & mask); \
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
friend std::ostream& operator<<(std::ostream& stream,
|
friend std::ostream &operator<<(std::ostream &stream, const BopomofoSyllable &syllable);
|
||||||
const BopomofoSyllable& syllable);
|
|
||||||
|
|
||||||
static constexpr Component
|
static constexpr Component ConsonantMask = 0x001f, // 0000 0000 0001 1111, 21 consonants
|
||||||
ConsonantMask = 0x001f, // 0000 0000 0001 1111, 21 consonants
|
|
||||||
MiddleVowelMask = 0x0060, // 0000 0000 0110 0000, 3 middle vowels
|
MiddleVowelMask = 0x0060, // 0000 0000 0110 0000, 3 middle vowels
|
||||||
VowelMask = 0x0780, // 0000 0111 1000 0000, 13 vowels
|
VowelMask = 0x0780, // 0000 0111 1000 0000, 13 vowels
|
||||||
ToneMarkerMask = 0x3800, // 0011 1000 0000 0000, 5 tones (tone1 = 0x00)
|
ToneMarkerMask = 0x3800, // 0011 1000 0000 0000, 5 tones (tone1 = 0x00)
|
||||||
B = 0x0001, P = 0x0002, M = 0x0003, F = 0x0004, D = 0x0005, T = 0x0006,
|
B = 0x0001, P = 0x0002, M = 0x0003, F = 0x0004, D = 0x0005, T = 0x0006, N = 0x0007, L = 0x0008, G = 0x0009,
|
||||||
N = 0x0007, L = 0x0008, G = 0x0009, K = 0x000a, H = 0x000b, J = 0x000c,
|
K = 0x000a, H = 0x000b, J = 0x000c, Q = 0x000d, X = 0x000e, ZH = 0x000f, CH = 0x0010,
|
||||||
Q = 0x000d, X = 0x000e, ZH = 0x000f, CH = 0x0010, SH = 0x0011, R = 0x0012,
|
SH = 0x0011, R = 0x0012, Z = 0x0013, C = 0x0014, S = 0x0015, I = 0x0020, U = 0x0040,
|
||||||
Z = 0x0013, C = 0x0014, S = 0x0015, I = 0x0020, U = 0x0040,
|
|
||||||
UE = 0x0060, // ue = u umlaut (we use the German convention here as an
|
UE = 0x0060, // ue = u umlaut (we use the German convention here as an
|
||||||
// ersatz to the /ju:/ sound)
|
// ersatz to the /ju:/ sound)
|
||||||
A = 0x0080, O = 0x0100, ER = 0x0180, E = 0x0200, AI = 0x0280, EI = 0x0300,
|
A = 0x0080, O = 0x0100, ER = 0x0180, E = 0x0200, AI = 0x0280, EI = 0x0300, AO = 0x0380, OU = 0x0400,
|
||||||
AO = 0x0380, OU = 0x0400, AN = 0x0480, EN = 0x0500, ANG = 0x0580,
|
AN = 0x0480, EN = 0x0500, ANG = 0x0580, ENG = 0x0600, ERR = 0x0680, Tone1 = 0x0000,
|
||||||
ENG = 0x0600, ERR = 0x0680, Tone1 = 0x0000, Tone2 = 0x0800,
|
Tone2 = 0x0800, Tone3 = 0x1000, Tone4 = 0x1800, Tone5 = 0x2000;
|
||||||
Tone3 = 0x1000, Tone4 = 0x1800, Tone5 = 0x2000;
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Component syllable_;
|
Component syllable_;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline std::ostream& operator<<(std::ostream& stream,
|
inline std::ostream &operator<<(std::ostream &stream, const BopomofoSyllable &syllable)
|
||||||
const BopomofoSyllable& syllable) {
|
{
|
||||||
stream << syllable.composedString();
|
stream << syllable.composedString();
|
||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
|
@ -167,7 +210,8 @@ typedef BopomofoSyllable BPMF;
|
||||||
typedef std::map<char, std::vector<BPMF::Component>> BopomofoKeyToComponentMap;
|
typedef std::map<char, std::vector<BPMF::Component>> BopomofoKeyToComponentMap;
|
||||||
typedef std::map<BPMF::Component, char> BopomofoComponentToKeyMap;
|
typedef std::map<BPMF::Component, char> BopomofoComponentToKeyMap;
|
||||||
|
|
||||||
class BopomofoKeyboardLayout {
|
class BopomofoKeyboardLayout
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
static const BopomofoKeyboardLayout *StandardLayout();
|
static const BopomofoKeyboardLayout *StandardLayout();
|
||||||
static const BopomofoKeyboardLayout *ETenLayout();
|
static const BopomofoKeyboardLayout *ETenLayout();
|
||||||
|
@ -178,40 +222,44 @@ public:
|
||||||
static const BopomofoKeyboardLayout *FakeSeigyouLayout();
|
static const BopomofoKeyboardLayout *FakeSeigyouLayout();
|
||||||
static const BopomofoKeyboardLayout *HanyuPinyinLayout();
|
static const BopomofoKeyboardLayout *HanyuPinyinLayout();
|
||||||
|
|
||||||
BopomofoKeyboardLayout(const BopomofoKeyToComponentMap& ktcm,
|
BopomofoKeyboardLayout(const BopomofoKeyToComponentMap &ktcm, const std::string &name)
|
||||||
const std::string& name)
|
: m_keyToComponent(ktcm), m_name(name)
|
||||||
: m_keyToComponent(ktcm), m_name(name) {
|
{
|
||||||
for (BopomofoKeyToComponentMap::const_iterator miter =
|
for (BopomofoKeyToComponentMap::const_iterator miter = m_keyToComponent.begin();
|
||||||
m_keyToComponent.begin();
|
|
||||||
miter != m_keyToComponent.end(); ++miter)
|
miter != m_keyToComponent.end(); ++miter)
|
||||||
for (std::vector<BPMF::Component>::const_iterator viter =
|
for (std::vector<BPMF::Component>::const_iterator viter = (*miter).second.begin();
|
||||||
(*miter).second.begin();
|
|
||||||
viter != (*miter).second.end(); ++viter)
|
viter != (*miter).second.end(); ++viter)
|
||||||
m_componentToKey[*viter] = (*miter).first;
|
m_componentToKey[*viter] = (*miter).first;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string name() const { return m_name; }
|
const std::string name() const
|
||||||
|
{
|
||||||
|
return m_name;
|
||||||
|
}
|
||||||
|
|
||||||
char componentToKey(BPMF::Component component) const {
|
char componentToKey(BPMF::Component component) const
|
||||||
BopomofoComponentToKeyMap::const_iterator iter =
|
{
|
||||||
m_componentToKey.find(component);
|
BopomofoComponentToKeyMap::const_iterator iter = m_componentToKey.find(component);
|
||||||
return (iter == m_componentToKey.end()) ? 0 : (*iter).second;
|
return (iter == m_componentToKey.end()) ? 0 : (*iter).second;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<BPMF::Component> keyToComponents(char key) const {
|
const std::vector<BPMF::Component> keyToComponents(char key) const
|
||||||
|
{
|
||||||
BopomofoKeyToComponentMap::const_iterator iter = m_keyToComponent.find(key);
|
BopomofoKeyToComponentMap::const_iterator iter = m_keyToComponent.find(key);
|
||||||
return (iter == m_keyToComponent.end()) ? std::vector<BPMF::Component>()
|
return (iter == m_keyToComponent.end()) ? std::vector<BPMF::Component>() : (*iter).second;
|
||||||
: (*iter).second;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string keySequenceFromSyllable(BPMF syllable) const {
|
const std::string keySequenceFromSyllable(BPMF syllable) const
|
||||||
|
{
|
||||||
std::string sequence;
|
std::string sequence;
|
||||||
|
|
||||||
BPMF::Component c;
|
BPMF::Component c;
|
||||||
char k;
|
char k;
|
||||||
#define STKS_COMBINE(component) \
|
#define STKS_COMBINE(component) \
|
||||||
if ((c = component)) { \
|
if ((c = component)) \
|
||||||
if ((k = componentToKey(c))) sequence += std::string(1, k); \
|
{ \
|
||||||
|
if ((k = componentToKey(c))) \
|
||||||
|
sequence += std::string(1, k); \
|
||||||
}
|
}
|
||||||
STKS_COMBINE(syllable.consonantComponent());
|
STKS_COMBINE(syllable.consonantComponent());
|
||||||
STKS_COMBINE(syllable.middleVowelComponent());
|
STKS_COMBINE(syllable.middleVowelComponent());
|
||||||
|
@ -221,19 +269,22 @@ if ((k = componentToKey(c))) sequence += std::string(1, k); \
|
||||||
return sequence;
|
return sequence;
|
||||||
}
|
}
|
||||||
|
|
||||||
const BPMF syllableFromKeySequence(const std::string& sequence) const {
|
const BPMF syllableFromKeySequence(const std::string &sequence) const
|
||||||
|
{
|
||||||
BPMF syllable;
|
BPMF syllable;
|
||||||
|
|
||||||
for (std::string::const_iterator iter = sequence.begin();
|
for (std::string::const_iterator iter = sequence.begin(); iter != sequence.end(); ++iter)
|
||||||
iter != sequence.end(); ++iter) {
|
{
|
||||||
bool beforeSeqHasIorUE = sequenceContainsIorUE(sequence.begin(), iter);
|
bool beforeSeqHasIorUE = sequenceContainsIorUE(sequence.begin(), iter);
|
||||||
bool aheadSeqHasIorUE = sequenceContainsIorUE(iter + 1, sequence.end());
|
bool aheadSeqHasIorUE = sequenceContainsIorUE(iter + 1, sequence.end());
|
||||||
|
|
||||||
std::vector<BPMF::Component> components = keyToComponents(*iter);
|
std::vector<BPMF::Component> components = keyToComponents(*iter);
|
||||||
|
|
||||||
if (!components.size()) continue;
|
if (!components.size())
|
||||||
|
continue;
|
||||||
|
|
||||||
if (components.size() == 1) {
|
if (components.size() == 1)
|
||||||
|
{
|
||||||
syllable += BPMF(components[0]);
|
syllable += BPMF(components[0]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -243,34 +294,44 @@ if ((k = componentToKey(c))) sequence += std::string(1, k); \
|
||||||
BPMF ending = components.size() > 2 ? BPMF(components[2]) : follow;
|
BPMF ending = components.size() > 2 ? BPMF(components[2]) : follow;
|
||||||
|
|
||||||
// apply the I/UE + E rule
|
// apply the I/UE + E rule
|
||||||
if (head.vowelComponent() == BPMF::E &&
|
if (head.vowelComponent() == BPMF::E && follow.vowelComponent() != BPMF::E)
|
||||||
follow.vowelComponent() != BPMF::E) {
|
{
|
||||||
syllable += beforeSeqHasIorUE ? head : follow;
|
syllable += beforeSeqHasIorUE ? head : follow;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (head.vowelComponent() != BPMF::E &&
|
if (head.vowelComponent() != BPMF::E && follow.vowelComponent() == BPMF::E)
|
||||||
follow.vowelComponent() == BPMF::E) {
|
{
|
||||||
syllable += beforeSeqHasIorUE ? follow : head;
|
syllable += beforeSeqHasIorUE ? follow : head;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// apply the J/Q/X + I/UE rule, only two components are allowed in the
|
// apply the J/Q/X + I/UE rule, only two components are allowed in the
|
||||||
// components vector here
|
// components vector here
|
||||||
if (head.belongsToJQXClass() && !follow.belongsToJQXClass()) {
|
if (head.belongsToJQXClass() && !follow.belongsToJQXClass())
|
||||||
if (!syllable.isEmpty()) {
|
{
|
||||||
if (ending != follow) syllable += ending;
|
if (!syllable.isEmpty())
|
||||||
} else {
|
{
|
||||||
|
if (ending != follow)
|
||||||
|
syllable += ending;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
syllable += aheadSeqHasIorUE ? head : follow;
|
syllable += aheadSeqHasIorUE ? head : follow;
|
||||||
}
|
}
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!head.belongsToJQXClass() && follow.belongsToJQXClass()) {
|
if (!head.belongsToJQXClass() && follow.belongsToJQXClass())
|
||||||
if (!syllable.isEmpty()) {
|
{
|
||||||
if (ending != follow) syllable += ending;
|
if (!syllable.isEmpty())
|
||||||
} else {
|
{
|
||||||
|
if (ending != follow)
|
||||||
|
syllable += ending;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
syllable += aheadSeqHasIorUE ? follow : head;
|
syllable += aheadSeqHasIorUE ? follow : head;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -278,14 +339,20 @@ if ((k = componentToKey(c))) sequence += std::string(1, k); \
|
||||||
}
|
}
|
||||||
|
|
||||||
// the nasty issue of only one char in the buffer
|
// the nasty issue of only one char in the buffer
|
||||||
if (iter == sequence.begin() && iter + 1 == sequence.end()) {
|
if (iter == sequence.begin() && iter + 1 == sequence.end())
|
||||||
if (head.hasVowel() || follow.hasToneMarker() ||
|
{
|
||||||
head.belongsToZCSRClass()) {
|
if (head.hasVowel() || follow.hasToneMarker() || head.belongsToZCSRClass())
|
||||||
|
{
|
||||||
syllable += head;
|
syllable += head;
|
||||||
} else {
|
}
|
||||||
if (follow.hasVowel() || ending.hasToneMarker()) {
|
else
|
||||||
|
{
|
||||||
|
if (follow.hasVowel() || ending.hasToneMarker())
|
||||||
|
{
|
||||||
syllable += follow;
|
syllable += follow;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
syllable += ending;
|
syllable += ending;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -293,30 +360,39 @@ if ((k = componentToKey(c))) sequence += std::string(1, k); \
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(syllable.maskType() & head.maskType()) &&
|
if (!(syllable.maskType() & head.maskType()) && !endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end()))
|
||||||
!endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end())) {
|
{
|
||||||
syllable += head;
|
syllable += head;
|
||||||
} else {
|
}
|
||||||
if (endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end()) &&
|
else
|
||||||
head.belongsToZCSRClass() && syllable.isEmpty()) {
|
{
|
||||||
|
if (endAheadOrAheadHasToneMarkKey(iter + 1, sequence.end()) && head.belongsToZCSRClass() &&
|
||||||
|
syllable.isEmpty())
|
||||||
|
{
|
||||||
syllable += head;
|
syllable += head;
|
||||||
} else if (syllable.maskType() < follow.maskType()) {
|
}
|
||||||
|
else if (syllable.maskType() < follow.maskType())
|
||||||
|
{
|
||||||
syllable += follow;
|
syllable += follow;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
syllable += ending;
|
syllable += ending;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// heuristics for Hsu keyboard layout
|
// heuristics for Hsu keyboard layout
|
||||||
if (this == HsuLayout()) {
|
if (this == HsuLayout())
|
||||||
|
{
|
||||||
// fix the left out L to ERR when it has sound, and GI, GUE -> JI, JUE
|
// fix the left out L to ERR when it has sound, and GI, GUE -> JI, JUE
|
||||||
if (syllable.vowelComponent() == BPMF::ENG && !syllable.hasConsonant() &&
|
if (syllable.vowelComponent() == BPMF::ENG && !syllable.hasConsonant() && !syllable.hasMiddleVowel())
|
||||||
!syllable.hasMiddleVowel()) {
|
{
|
||||||
syllable += BPMF(BPMF::ERR);
|
syllable += BPMF(BPMF::ERR);
|
||||||
} else if (syllable.consonantComponent() == BPMF::G &&
|
}
|
||||||
(syllable.middleVowelComponent() == BPMF::I ||
|
else if (syllable.consonantComponent() == BPMF::G &&
|
||||||
syllable.middleVowelComponent() == BPMF::UE)) {
|
(syllable.middleVowelComponent() == BPMF::I || syllable.middleVowelComponent() == BPMF::UE))
|
||||||
|
{
|
||||||
syllable += BPMF(BPMF::J);
|
syllable += BPMF(BPMF::J);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -325,9 +401,10 @@ if ((k = componentToKey(c))) sequence += std::string(1, k); \
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool endAheadOrAheadHasToneMarkKey(std::string::const_iterator ahead,
|
bool endAheadOrAheadHasToneMarkKey(std::string::const_iterator ahead, std::string::const_iterator end) const
|
||||||
std::string::const_iterator end) const {
|
{
|
||||||
if (ahead == end) return true;
|
if (ahead == end)
|
||||||
|
return true;
|
||||||
|
|
||||||
char tone1 = componentToKey(BPMF::Tone1);
|
char tone1 = componentToKey(BPMF::Tone1);
|
||||||
char tone2 = componentToKey(BPMF::Tone2);
|
char tone2 = componentToKey(BPMF::Tone2);
|
||||||
|
@ -336,22 +413,23 @@ protected:
|
||||||
char tone5 = componentToKey(BPMF::Tone5);
|
char tone5 = componentToKey(BPMF::Tone5);
|
||||||
|
|
||||||
if (tone1)
|
if (tone1)
|
||||||
if (*ahead == tone1) return true;
|
if (*ahead == tone1)
|
||||||
|
return true;
|
||||||
|
|
||||||
if (*ahead == tone2 || *ahead == tone3 || *ahead == tone4 ||
|
if (*ahead == tone2 || *ahead == tone3 || *ahead == tone4 || *ahead == tone5)
|
||||||
*ahead == tone5)
|
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool sequenceContainsIorUE(std::string::const_iterator start,
|
bool sequenceContainsIorUE(std::string::const_iterator start, std::string::const_iterator end) const
|
||||||
std::string::const_iterator end) const {
|
{
|
||||||
char iChar = componentToKey(BPMF::I);
|
char iChar = componentToKey(BPMF::I);
|
||||||
char ueChar = componentToKey(BPMF::UE);
|
char ueChar = componentToKey(BPMF::UE);
|
||||||
|
|
||||||
for (; start != end; ++start)
|
for (; start != end; ++start)
|
||||||
if (*start == iChar || *start == ueChar) return true;
|
if (*start == iChar || *start == ueChar)
|
||||||
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -360,36 +438,45 @@ protected:
|
||||||
BopomofoComponentToKeyMap m_componentToKey;
|
BopomofoComponentToKeyMap m_componentToKey;
|
||||||
};
|
};
|
||||||
|
|
||||||
class BopomofoReadingBuffer {
|
class BopomofoReadingBuffer
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
explicit BopomofoReadingBuffer(const BopomofoKeyboardLayout* layout)
|
explicit BopomofoReadingBuffer(const BopomofoKeyboardLayout *layout) : layout_(layout), pinyin_mode_(false)
|
||||||
: layout_(layout), pinyin_mode_(false) {
|
{
|
||||||
if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout()) {
|
if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout())
|
||||||
|
{
|
||||||
pinyin_mode_ = true;
|
pinyin_mode_ = true;
|
||||||
pinyin_sequence_ = "";
|
pinyin_sequence_ = "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void setKeyboardLayout(const BopomofoKeyboardLayout* layout) {
|
void setKeyboardLayout(const BopomofoKeyboardLayout *layout)
|
||||||
|
{
|
||||||
layout_ = layout;
|
layout_ = layout;
|
||||||
|
|
||||||
if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout()) {
|
if (layout == BopomofoKeyboardLayout::HanyuPinyinLayout())
|
||||||
|
{
|
||||||
pinyin_mode_ = true;
|
pinyin_mode_ = true;
|
||||||
pinyin_sequence_ = "";
|
pinyin_sequence_ = "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isValidKey(char k) const {
|
bool isValidKey(char k) const
|
||||||
if (!pinyin_mode_) {
|
{
|
||||||
|
if (!pinyin_mode_)
|
||||||
|
{
|
||||||
return layout_ ? (layout_->keyToComponents(k)).size() > 0 : false;
|
return layout_ ? (layout_->keyToComponents(k)).size() > 0 : false;
|
||||||
}
|
}
|
||||||
|
|
||||||
char lk = tolower(k);
|
char lk = tolower(k);
|
||||||
if (lk >= 'a' && lk <= 'z') {
|
if (lk >= 'a' && lk <= 'z')
|
||||||
|
{
|
||||||
// if a tone marker is already in place
|
// if a tone marker is already in place
|
||||||
if (pinyin_sequence_.length()) {
|
if (pinyin_sequence_.length())
|
||||||
|
{
|
||||||
char lastc = pinyin_sequence_[pinyin_sequence_.length() - 1];
|
char lastc = pinyin_sequence_[pinyin_sequence_.length() - 1];
|
||||||
if (lastc >= '2' && lastc <= '5') {
|
if (lastc >= '2' && lastc <= '5')
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -397,40 +484,47 @@ public:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pinyin_sequence_.length() && (lk >= '2' && lk <= '5')) {
|
if (pinyin_sequence_.length() && (lk >= '2' && lk <= '5'))
|
||||||
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool combineKey(char k) {
|
bool combineKey(char k)
|
||||||
if (!isValidKey(k)) return false;
|
{
|
||||||
|
if (!isValidKey(k))
|
||||||
|
return false;
|
||||||
|
|
||||||
if (pinyin_mode_) {
|
if (pinyin_mode_)
|
||||||
|
{
|
||||||
pinyin_sequence_ += std::string(1, tolower(k));
|
pinyin_sequence_ += std::string(1, tolower(k));
|
||||||
syllable_ = BPMF::FromHanyuPinyin(pinyin_sequence_);
|
syllable_ = BPMF::FromHanyuPinyin(pinyin_sequence_);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string sequence =
|
std::string sequence = layout_->keySequenceFromSyllable(syllable_) + std::string(1, k);
|
||||||
layout_->keySequenceFromSyllable(syllable_) + std::string(1, k);
|
|
||||||
syllable_ = layout_->syllableFromKeySequence(sequence);
|
syllable_ = layout_->syllableFromKeySequence(sequence);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void clear() {
|
void clear()
|
||||||
|
{
|
||||||
pinyin_sequence_.clear();
|
pinyin_sequence_.clear();
|
||||||
syllable_.clear();
|
syllable_.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
void backspace() {
|
void backspace()
|
||||||
if (!layout_) return;
|
{
|
||||||
|
if (!layout_)
|
||||||
|
return;
|
||||||
|
|
||||||
if (pinyin_mode_) {
|
if (pinyin_mode_)
|
||||||
if (pinyin_sequence_.length()) {
|
{
|
||||||
pinyin_sequence_ =
|
if (pinyin_sequence_.length())
|
||||||
pinyin_sequence_.substr(0, pinyin_sequence_.length() - 1);
|
{
|
||||||
|
pinyin_sequence_ = pinyin_sequence_.substr(0, pinyin_sequence_.length() - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
syllable_ = BPMF::FromHanyuPinyin(pinyin_sequence_);
|
syllable_ = BPMF::FromHanyuPinyin(pinyin_sequence_);
|
||||||
|
@ -438,29 +532,42 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string sequence = layout_->keySequenceFromSyllable(syllable_);
|
std::string sequence = layout_->keySequenceFromSyllable(syllable_);
|
||||||
if (sequence.length()) {
|
if (sequence.length())
|
||||||
|
{
|
||||||
sequence = sequence.substr(0, sequence.length() - 1);
|
sequence = sequence.substr(0, sequence.length() - 1);
|
||||||
syllable_ = layout_->syllableFromKeySequence(sequence);
|
syllable_ = layout_->syllableFromKeySequence(sequence);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isEmpty() const { return syllable_.isEmpty(); }
|
bool isEmpty() const
|
||||||
|
{
|
||||||
|
return syllable_.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
const std::string composedString() const {
|
const std::string composedString() const
|
||||||
if (pinyin_mode_) {
|
{
|
||||||
|
if (pinyin_mode_)
|
||||||
|
{
|
||||||
return pinyin_sequence_;
|
return pinyin_sequence_;
|
||||||
}
|
}
|
||||||
|
|
||||||
return syllable_.composedString();
|
return syllable_.composedString();
|
||||||
}
|
}
|
||||||
|
|
||||||
const BPMF syllable() const { return syllable_; }
|
const BPMF syllable() const
|
||||||
|
{
|
||||||
|
return syllable_;
|
||||||
|
}
|
||||||
|
|
||||||
const std::string standardLayoutQueryString() const {
|
const std::string standardLayoutQueryString() const
|
||||||
|
{
|
||||||
return BopomofoKeyboardLayout::StandardLayout()->keySequenceFromSyllable(syllable_);
|
return BopomofoKeyboardLayout::StandardLayout()->keySequenceFromSyllable(syllable_);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool hasToneMarker() const { return syllable_.hasToneMarker(); }
|
bool hasToneMarker() const
|
||||||
|
{
|
||||||
|
return syllable_.hasToneMarker();
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const BopomofoKeyboardLayout *layout_;
|
const BopomofoKeyboardLayout *layout_;
|
||||||
|
@ -471,5 +578,4 @@ protected:
|
||||||
};
|
};
|
||||||
} // namespace Mandarin
|
} // namespace Mandarin
|
||||||
|
|
||||||
|
|
||||||
#endif // MANDARIN_H_
|
#endif // MANDARIN_H_
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#import <Foundation/Foundation.h>
|
#import <Foundation/Foundation.h>
|
||||||
|
@ -33,7 +40,9 @@ extern InputMode imeModeNULL;
|
||||||
|
|
||||||
@protocol KeyHandlerDelegate <NSObject>
|
@protocol KeyHandlerDelegate <NSObject>
|
||||||
- (id)candidateControllerForKeyHandler:(KeyHandler *)keyHandler;
|
- (id)candidateControllerForKeyHandler:(KeyHandler *)keyHandler;
|
||||||
- (void)keyHandler:(KeyHandler *)keyHandler didSelectCandidateAtIndex:(NSInteger)index candidateController:(id)controller;
|
- (void)keyHandler:(KeyHandler *)keyHandler
|
||||||
|
didSelectCandidateAtIndex:(NSInteger)index
|
||||||
|
candidateController:(id)controller;
|
||||||
- (BOOL)keyHandler:(KeyHandler *)keyHandler didRequestWriteUserPhraseWithState:(InputState *)state;
|
- (BOOL)keyHandler:(KeyHandler *)keyHandler didRequestWriteUserPhraseWithState:(InputState *)state;
|
||||||
@end
|
@end
|
||||||
|
|
||||||
|
@ -43,7 +52,8 @@ extern InputMode imeModeNULL;
|
||||||
- (BOOL)handleInput:(keyParser *)input
|
- (BOOL)handleInput:(keyParser *)input
|
||||||
state:(InputState *)state
|
state:(InputState *)state
|
||||||
stateCallback:(void (^)(InputState *))stateCallback
|
stateCallback:(void (^)(InputState *))stateCallback
|
||||||
errorCallback:(void (^)(void))errorCallback NS_SWIFT_NAME(handle(input:state:stateCallback:errorCallback:));
|
errorCallback:(void (^)(void))errorCallback
|
||||||
|
NS_SWIFT_NAME(handle(input:state:stateCallback:errorCallback:));
|
||||||
|
|
||||||
- (void)syncWithPreferences;
|
- (void)syncWithPreferences;
|
||||||
- (void)fixNodeWithValue:(NSString *)value NS_SWIFT_NAME(fixNode(value:));
|
- (void)fixNodeWithValue:(NSString *)value NS_SWIFT_NAME(fixNode(value:));
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,35 +1,43 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "KeyValueBlobReader.h"
|
#include "KeyValueBlobReader.h"
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue *out)
|
KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue *out)
|
||||||
{
|
{
|
||||||
static auto new_line = [](char c) { return c == '\n' || c == '\r'; };
|
static auto new_line = [](char c) { return c == '\n' || c == '\r'; };
|
||||||
static auto blank = [](char c) { return c == ' ' || c == '\t'; };
|
static auto blank = [](char c) { return c == ' ' || c == '\t'; };
|
||||||
static auto blank_or_newline
|
static auto blank_or_newline = [](char c) { return blank(c) || new_line(c); };
|
||||||
= [](char c) { return blank(c) || new_line(c); };
|
|
||||||
static auto content_char = [](char c) { return !blank(c) && !new_line(c); };
|
static auto content_char = [](char c) { return !blank(c) && !new_line(c); };
|
||||||
|
|
||||||
if (state_ == State::ERROR) {
|
if (state_ == State::ERROR)
|
||||||
|
{
|
||||||
return state_;
|
return state_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,18 +46,22 @@ KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue* out)
|
||||||
const char *value_begin = nullptr;
|
const char *value_begin = nullptr;
|
||||||
size_t value_length = 0;
|
size_t value_length = 0;
|
||||||
|
|
||||||
while (true) {
|
while (true)
|
||||||
|
{
|
||||||
state_ = SkipUntilNot(blank_or_newline);
|
state_ = SkipUntilNot(blank_or_newline);
|
||||||
if (state_ != State::CAN_CONTINUE) {
|
if (state_ != State::CAN_CONTINUE)
|
||||||
|
{
|
||||||
return state_;
|
return state_;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if it's a comment line; if so, read until end of line.
|
// Check if it's a comment line; if so, read until end of line.
|
||||||
if (*current_ != '#') {
|
if (*current_ != '#')
|
||||||
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
state_ = SkipUntil(new_line);
|
state_ = SkipUntil(new_line);
|
||||||
if (state_ != State::CAN_CONTINUE) {
|
if (state_ != State::CAN_CONTINUE)
|
||||||
|
{
|
||||||
return state_;
|
return state_;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -59,22 +71,26 @@ KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue* out)
|
||||||
|
|
||||||
key_begin = current_;
|
key_begin = current_;
|
||||||
state_ = SkipUntilNot(content_char);
|
state_ = SkipUntilNot(content_char);
|
||||||
if (state_ != State::CAN_CONTINUE) {
|
if (state_ != State::CAN_CONTINUE)
|
||||||
|
{
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
key_length = current_ - key_begin;
|
key_length = current_ - key_begin;
|
||||||
|
|
||||||
// There should be at least one blank character after the key string.
|
// There should be at least one blank character after the key string.
|
||||||
if (!blank(*current_)) {
|
if (!blank(*current_))
|
||||||
|
{
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
state_ = SkipUntilNot(blank);
|
state_ = SkipUntilNot(blank);
|
||||||
if (state_ != State::CAN_CONTINUE) {
|
if (state_ != State::CAN_CONTINUE)
|
||||||
|
{
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!content_char(*current_)) {
|
if (!content_char(*current_))
|
||||||
|
{
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -90,9 +106,9 @@ KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue* out)
|
||||||
// like "foo bar baz\n" where baz should not be treated as the Next key.
|
// like "foo bar baz\n" where baz should not be treated as the Next key.
|
||||||
SkipUntil(new_line);
|
SkipUntil(new_line);
|
||||||
|
|
||||||
if (out != nullptr) {
|
if (out != nullptr)
|
||||||
*out = KeyValue { std::string_view { key_begin, key_length },
|
{
|
||||||
std::string_view { value_begin, value_length } };
|
*out = KeyValue{std::string_view{key_begin, key_length}, std::string_view{value_begin, value_length}};
|
||||||
}
|
}
|
||||||
state_ = State::HAS_PAIR;
|
state_ = State::HAS_PAIR;
|
||||||
return state_;
|
return state_;
|
||||||
|
@ -102,11 +118,12 @@ error:
|
||||||
return state_;
|
return state_;
|
||||||
}
|
}
|
||||||
|
|
||||||
KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot(
|
KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot(const std::function<bool(char)> &f)
|
||||||
const std::function<bool(char)>& f)
|
{
|
||||||
|
while (current_ != end_ && *current_)
|
||||||
|
{
|
||||||
|
if (!f(*current_))
|
||||||
{
|
{
|
||||||
while (current_ != end_ && *current_) {
|
|
||||||
if (!f(*current_)) {
|
|
||||||
return State::CAN_CONTINUE;
|
return State::CAN_CONTINUE;
|
||||||
}
|
}
|
||||||
++current_;
|
++current_;
|
||||||
|
@ -115,11 +132,12 @@ KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot(
|
||||||
return State::END;
|
return State::END;
|
||||||
}
|
}
|
||||||
|
|
||||||
KeyValueBlobReader::State KeyValueBlobReader::SkipUntil(
|
KeyValueBlobReader::State KeyValueBlobReader::SkipUntil(const std::function<bool(char)> &f)
|
||||||
const std::function<bool(char)>& f)
|
{
|
||||||
|
while (current_ != end_ && *current_)
|
||||||
|
{
|
||||||
|
if (f(*current_))
|
||||||
{
|
{
|
||||||
while (current_ != end_ && *current_) {
|
|
||||||
if (f(*current_)) {
|
|
||||||
return State::CAN_CONTINUE;
|
return State::CAN_CONTINUE;
|
||||||
}
|
}
|
||||||
++current_;
|
++current_;
|
||||||
|
@ -128,8 +146,7 @@ KeyValueBlobReader::State KeyValueBlobReader::SkipUntil(
|
||||||
return State::END;
|
return State::END;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ostream& operator<<(
|
std::ostream &operator<<(std::ostream &os, const KeyValueBlobReader::KeyValue &kv)
|
||||||
std::ostream& os, const KeyValueBlobReader::KeyValue& kv)
|
|
||||||
{
|
{
|
||||||
os << "(key: " << kv.key << ", value: " << kv.value << ")";
|
os << "(key: " << kv.key << ", value: " << kv.value << ")";
|
||||||
return os;
|
return os;
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef SOURCE_ENGINE_KEYVALUEBLOBREADER_H_
|
#ifndef SOURCE_ENGINE_KEYVALUEBLOBREADER_H_
|
||||||
|
@ -39,11 +46,14 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
// std::string_view is used to allow returning results efficiently. As a result,
|
// std::string_view is used to allow returning results efficiently. As a result,
|
||||||
// the blob is a const char* and will never be mutated. This implies, for
|
// the blob is a const char* and will never be mutated. This implies, for
|
||||||
// example, read-only mmap can be used to parse large files.
|
// example, read-only mmap can be used to parse large files.
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
class KeyValueBlobReader {
|
class KeyValueBlobReader
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
enum class State : int {
|
enum class State : int
|
||||||
|
{
|
||||||
// There are no more key-value pairs in this blob.
|
// There are no more key-value pairs in this blob.
|
||||||
END = 0,
|
END = 0,
|
||||||
// The reader has produced a new key-value pair.
|
// The reader has produced a new key-value pair.
|
||||||
|
@ -54,15 +64,12 @@ public:
|
||||||
CAN_CONTINUE = 2
|
CAN_CONTINUE = 2
|
||||||
};
|
};
|
||||||
|
|
||||||
struct KeyValue {
|
struct KeyValue
|
||||||
constexpr KeyValue()
|
{
|
||||||
: key("")
|
constexpr KeyValue() : key(""), value("")
|
||||||
, value("")
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
constexpr KeyValue(std::string_view k, std::string_view v)
|
constexpr KeyValue(std::string_view k, std::string_view v) : key(k), value(v)
|
||||||
: key(k)
|
|
||||||
, value(v)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -75,9 +82,7 @@ public:
|
||||||
std::string_view value;
|
std::string_view value;
|
||||||
};
|
};
|
||||||
|
|
||||||
KeyValueBlobReader(const char* blob, size_t size)
|
KeyValueBlobReader(const char *blob, size_t size) : current_(blob), end_(blob + size)
|
||||||
: current_(blob)
|
|
||||||
, end_(blob + size)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,36 +1,43 @@
|
||||||
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef LMConsolidator_hpp
|
#ifndef LMConsolidator_hpp
|
||||||
#define LMConsolidator_hpp
|
#define LMConsolidator_hpp
|
||||||
|
|
||||||
#include <syslog.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <sstream>
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <set>
|
|
||||||
#include <regex>
|
#include <regex>
|
||||||
|
#include <set>
|
||||||
|
#include <sstream>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string>
|
||||||
|
#include <syslog.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
class LMConsolidator
|
class LMConsolidator
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,28 +1,35 @@
|
||||||
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "LMConsolidator.h"
|
#include "LMConsolidator.h"
|
||||||
#include "vChewing-Swift.h"
|
#include "vChewing-Swift.h"
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
constexpr std::string_view FORMATTED_PRAGMA_HEADER
|
constexpr std::string_view FORMATTED_PRAGMA_HEADER =
|
||||||
= "# 𝙵𝙾𝚁𝙼𝙰𝚃 𝚘𝚛𝚐.𝚊𝚝𝚎𝚕𝚒𝚎𝚛𝙸𝚗𝚖𝚞.𝚟𝚌𝚑𝚎𝚠𝚒𝚗𝚐.𝚞𝚜𝚎𝚛𝙻𝚊𝚗𝚐𝚞𝚊𝚐𝚎𝙼𝚘𝚍𝚎𝚕𝙳𝚊𝚝𝚊.𝚏𝚘𝚛𝚖𝚊𝚝𝚝𝚎𝚍";
|
"# 𝙵𝙾𝚁𝙼𝙰𝚃 𝚘𝚛𝚐.𝚊𝚝𝚎𝚕𝚒𝚎𝚛𝙸𝚗𝚖𝚞.𝚟𝚌𝚑𝚎𝚠𝚒𝚗𝚐.𝚞𝚜𝚎𝚛𝙻𝚊𝚗𝚐𝚞𝚊𝚐𝚎𝙼𝚘𝚍𝚎𝚕𝙳𝚊𝚝𝚊.𝚏𝚘𝚛𝚖𝚊𝚝𝚝𝚎𝚍";
|
||||||
|
|
||||||
// HEADER VERIFIER. CREDIT: Shiki Suen
|
// HEADER VERIFIER. CREDIT: Shiki Suen
|
||||||
bool LMConsolidator::CheckPragma(const char *path)
|
bool LMConsolidator::CheckPragma(const char *path)
|
||||||
|
@ -32,13 +39,17 @@ bool LMConsolidator::CheckPragma(const char *path)
|
||||||
{
|
{
|
||||||
string firstLine;
|
string firstLine;
|
||||||
getline(zfdCheckPragma, firstLine);
|
getline(zfdCheckPragma, firstLine);
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "HEADER SEEN ||%s", firstLine.c_str());
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
if (firstLine != FORMATTED_PRAGMA_HEADER) {
|
syslog(LOG_CONS, "HEADER SEEN ||%s", firstLine.c_str());
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "HEADER VERIFICATION FAILED. START IN-PLACE CONSOLIDATING PROCESS.");
|
if (firstLine != FORMATTED_PRAGMA_HEADER)
|
||||||
|
{
|
||||||
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "HEADER VERIFICATION FAILED. START IN-PLACE CONSOLIDATING PROCESS.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "HEADER VERIFICATION SUCCESSFUL.");
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "HEADER VERIFICATION SUCCESSFUL.");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -49,32 +60,45 @@ bool LMConsolidator::FixEOF(const char *path)
|
||||||
zfdEOFFixerIncomingStream.seekg(-1, std::ios_base::end);
|
zfdEOFFixerIncomingStream.seekg(-1, std::ios_base::end);
|
||||||
char z;
|
char z;
|
||||||
zfdEOFFixerIncomingStream.get(z);
|
zfdEOFFixerIncomingStream.get(z);
|
||||||
if(z!='\n'){
|
if (z != '\n')
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// REPORT: Data File not ended with a new line.\n");
|
{
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// DATA FILE: %s", path);
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// PROCEDURE: Trying to insert a new line as EOF before per-line check process.\n");
|
syslog(LOG_CONS, "// REPORT: Data File not ended with a new line.\n");
|
||||||
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "// DATA FILE: %s", path);
|
||||||
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "// PROCEDURE: Trying to insert a new line as EOF before per-line check process.\n");
|
||||||
std::ofstream zfdEOFFixerOutput(path, std::ios_base::app);
|
std::ofstream zfdEOFFixerOutput(path, std::ios_base::app);
|
||||||
zfdEOFFixerOutput << std::endl;
|
zfdEOFFixerOutput << std::endl;
|
||||||
zfdEOFFixerOutput.close();
|
zfdEOFFixerOutput.close();
|
||||||
if (zfdEOFFixerOutput.fail()) {
|
if (zfdEOFFixerOutput.fail())
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// REPORT: Failed to append a newline to the data file. Insufficient Privileges?\n");
|
{
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// DATA FILE: %s", path);
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "// REPORT: Failed to append a newline to the data file. Insufficient Privileges?\n");
|
||||||
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "// DATA FILE: %s", path);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
zfdEOFFixerIncomingStream.close();
|
zfdEOFFixerIncomingStream.close();
|
||||||
if (zfdEOFFixerIncomingStream.fail()) {
|
if (zfdEOFFixerIncomingStream.fail())
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// REPORT: Failed to read lines through the data file for EOF check. Insufficient Privileges?\n");
|
{
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// DATA FILE: %s", path);
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS,
|
||||||
|
"// REPORT: Failed to read lines through the data file for EOF check. Insufficient Privileges?\n");
|
||||||
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "// DATA FILE: %s", path);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
} // END: EOF FIXER.
|
} // END: EOF FIXER.
|
||||||
|
|
||||||
// CONTENT CONSOLIDATOR. CREDIT: Shiki Suen.
|
// CONTENT CONSOLIDATOR. CREDIT: Shiki Suen.
|
||||||
bool LMConsolidator::ConsolidateContent(const char *path, bool shouldCheckPragma) {
|
bool LMConsolidator::ConsolidateContent(const char *path, bool shouldCheckPragma)
|
||||||
|
{
|
||||||
bool pragmaCheckResult = LMConsolidator::CheckPragma(path);
|
bool pragmaCheckResult = LMConsolidator::CheckPragma(path);
|
||||||
if (pragmaCheckResult && shouldCheckPragma){
|
if (pragmaCheckResult && shouldCheckPragma)
|
||||||
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -87,17 +111,22 @@ bool LMConsolidator::ConsolidateContent(const char *path, bool shouldCheckPragma
|
||||||
vecEntry.push_back(zfdBuffer);
|
vecEntry.push_back(zfdBuffer);
|
||||||
}
|
}
|
||||||
// 第一遍 for 用來統整每行內的內容。
|
// 第一遍 for 用來統整每行內的內容。
|
||||||
// regex sedCJKWhiteSpace("\\x{3000}"), sedNonBreakWhiteSpace("\\x{A0}"), sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // 這樣寫會導致輸入法敲不了任何字,推測 Xcode 13 支援的 cpp / objCpp 可能對某些 Regex 寫法有相容性問題。
|
// regex sedCJKWhiteSpace("\\x{3000}"), sedNonBreakWhiteSpace("\\x{A0}"), sedWhiteSpace("\\s+"),
|
||||||
// regex sedCJKWhiteSpace(" "), sedNonBreakWhiteSpace(" "), sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // RegEx 先定義好。
|
// sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // 這樣寫會導致輸入法敲不了任何字,推測 Xcode 13 支援的 cpp /
|
||||||
|
// objCpp 可能對某些 Regex 寫法有相容性問題。 regex sedCJKWhiteSpace(" "), sedNonBreakWhiteSpace(" "),
|
||||||
|
// sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // RegEx 先定義好。
|
||||||
regex sedToConsolidate("( +| +| +|\t+)+"), sedToTrim("(^\\s|\\s$)");
|
regex sedToConsolidate("( +| +| +|\t+)+"), sedToTrim("(^\\s|\\s$)");
|
||||||
for(int i=0;i<vecEntry.size();i++) { // 第一遍 for 用來統整每行內的內容。
|
for (int i = 0; i < vecEntry.size(); i++)
|
||||||
if (vecEntry[i].size() != 0) { // 不要理會空行,否則給空行加上 endl 等於再加空行。
|
{ // 第一遍 for 用來統整每行內的內容。
|
||||||
// RegEx 處理順序:先將全形空格換成西文空格,然後合併任何意義上的連續空格(包括 tab 等),最後去除每行首尾空格。
|
if (vecEntry[i].size() != 0)
|
||||||
// vecEntry[i] = regex_replace(vecEntry[i], sedCJKWhiteSpace, " ").c_str(); // 中日韓全形空格轉為 ASCII 空格。
|
{ // 不要理會空行,否則給空行加上 endl 等於再加空行。
|
||||||
// vecEntry[i] = regex_replace(vecEntry[i], sedNonBreakWhiteSpace, " ").c_str(); // Non-Break 型空格轉為 ASCII 空格。
|
// RegEx 處理順序:先將全形空格換成西文空格,然後合併任何意義上的連續空格(包括 tab
|
||||||
// vecEntry[i] = regex_replace(vecEntry[i], sedWhiteSpace, " ").c_str(); // 所有意義上的連續的 \s 型空格都轉為單個 ASCII 空格。
|
// 等),最後去除每行首尾空格。 vecEntry[i] = regex_replace(vecEntry[i], sedCJKWhiteSpace, " ").c_str(); //
|
||||||
// vecEntry[i] = regex_replace(vecEntry[i], sedLeadingSpace, "").c_str(); // 去掉行首空格。
|
// 中日韓全形空格轉為 ASCII 空格。 vecEntry[i] = regex_replace(vecEntry[i], sedNonBreakWhiteSpace, "
|
||||||
// vecEntry[i] = regex_replace(vecEntry[i], sedTrailingSpace, "").c_str(); // 去掉行尾空格。
|
// ").c_str(); // Non-Break 型空格轉為 ASCII 空格。 vecEntry[i] = regex_replace(vecEntry[i], sedWhiteSpace,
|
||||||
|
// " ").c_str(); // 所有意義上的連續的 \s 型空格都轉為單個 ASCII 空格。 vecEntry[i] =
|
||||||
|
// regex_replace(vecEntry[i], sedLeadingSpace, "").c_str(); // 去掉行首空格。 vecEntry[i] =
|
||||||
|
// regex_replace(vecEntry[i], sedTrailingSpace, "").c_str(); // 去掉行尾空格。
|
||||||
// 上述命令分步驟執行容易產生效能問題,故濃縮為下述兩句。
|
// 上述命令分步驟執行容易產生效能問題,故濃縮為下述兩句。
|
||||||
vecEntry[i] = regex_replace(vecEntry[i], sedToConsolidate, " ").c_str();
|
vecEntry[i] = regex_replace(vecEntry[i], sedToConsolidate, " ").c_str();
|
||||||
vecEntry[i] = regex_replace(vecEntry[i], sedToTrim, "").c_str();
|
vecEntry[i] = regex_replace(vecEntry[i], sedToTrim, "").c_str();
|
||||||
|
@ -109,24 +138,36 @@ bool LMConsolidator::ConsolidateContent(const char *path, bool shouldCheckPragma
|
||||||
std::reverse(vecEntry.begin(), vecEntry.end()); // 再顛倒回來。
|
std::reverse(vecEntry.begin(), vecEntry.end()); // 再顛倒回來。
|
||||||
// 統整完畢。開始將統整過的內容寫入檔案。
|
// 統整完畢。開始將統整過的內容寫入檔案。
|
||||||
ofstream zfdContentConsolidatorOutput(path); // 這裡是要從頭開始重寫檔案內容,所以不需要「 ios_base::app 」。
|
ofstream zfdContentConsolidatorOutput(path); // 這裡是要從頭開始重寫檔案內容,所以不需要「 ios_base::app 」。
|
||||||
if (!pragmaCheckResult){
|
if (!pragmaCheckResult)
|
||||||
|
{
|
||||||
zfdContentConsolidatorOutput << FORMATTED_PRAGMA_HEADER << endl; // 寫入經過整理處理的 HEADER。
|
zfdContentConsolidatorOutput << FORMATTED_PRAGMA_HEADER << endl; // 寫入經過整理處理的 HEADER。
|
||||||
}
|
}
|
||||||
for(int i=0;i<vecEntry.size();i++) { // 第二遍 for 用來寫入統整過的內容。
|
for (int i = 0; i < vecEntry.size(); i++)
|
||||||
if (vecEntry[i].size() != 0) { // 這句很重要,不然還是會把經過 RegEx 處理後出現的空行搞到檔案裡。
|
{ // 第二遍 for 用來寫入統整過的內容。
|
||||||
zfdContentConsolidatorOutput<<vecEntry[i]<<endl; // 這裡是必須得加上 endl 的,不然所有行都變成一個整合行。
|
if (vecEntry[i].size() != 0)
|
||||||
|
{ // 這句很重要,不然還是會把經過 RegEx 處理後出現的空行搞到檔案裡。
|
||||||
|
zfdContentConsolidatorOutput << vecEntry[i]
|
||||||
|
<< endl; // 這裡是必須得加上 endl 的,不然所有行都變成一個整合行。
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
zfdContentConsolidatorOutput.close();
|
zfdContentConsolidatorOutput.close();
|
||||||
if (zfdContentConsolidatorOutput.fail()) {
|
if (zfdContentConsolidatorOutput.fail())
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// REPORT: Failed to write content-consolidated data to the file. Insufficient Privileges?\n");
|
{
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// DATA FILE: %s", path);
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS,
|
||||||
|
"// REPORT: Failed to write content-consolidated data to the file. Insufficient Privileges?\n");
|
||||||
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "// DATA FILE: %s", path);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
zfdContentConsolidatorIncomingStream.close();
|
zfdContentConsolidatorIncomingStream.close();
|
||||||
if (zfdContentConsolidatorIncomingStream.fail()) {
|
if (zfdContentConsolidatorIncomingStream.fail())
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// REPORT: Failed to read lines through the data file for content-consolidation. Insufficient Privileges?\n");
|
{
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "// DATA FILE: %s", path);
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "// REPORT: Failed to read lines through the data file for content-consolidation. "
|
||||||
|
"Insufficient Privileges?\n");
|
||||||
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "// DATA FILE: %s", path);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -1,28 +1,35 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef LMInstantiator_H
|
#ifndef LMInstantiator_H
|
||||||
#define LMInstantiator_H
|
#define LMInstantiator_H
|
||||||
|
|
||||||
#include "AssociatedPhrases.h"
|
#include "AssociatedPhrases.h"
|
||||||
#include "CoreLM.h"
|
|
||||||
#include "CNSLM.h"
|
#include "CNSLM.h"
|
||||||
|
#include "CoreLM.h"
|
||||||
#include "ParselessLM.h"
|
#include "ParselessLM.h"
|
||||||
#include "PhraseReplacementMap.h"
|
#include "PhraseReplacementMap.h"
|
||||||
#include "SymbolLM.h"
|
#include "SymbolLM.h"
|
||||||
|
@ -31,7 +38,8 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
using namespace Gramambular;
|
using namespace Gramambular;
|
||||||
|
|
||||||
|
@ -57,7 +65,8 @@ using namespace Gramambular;
|
||||||
/// model while launching and to load the user phrases anytime if the custom
|
/// model while launching and to load the user phrases anytime if the custom
|
||||||
/// files are modified. It does not keep the reference of the data pathes but
|
/// files are modified. It does not keep the reference of the data pathes but
|
||||||
/// you have to pass the paths when you ask it to do loading.
|
/// you have to pass the paths when you ask it to do loading.
|
||||||
class LMInstantiator : public Gramambular::LanguageModel {
|
class LMInstantiator : public Gramambular::LanguageModel
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
LMInstantiator();
|
LMInstantiator();
|
||||||
~LMInstantiator();
|
~LMInstantiator();
|
||||||
|
@ -128,7 +137,6 @@ public:
|
||||||
const std::vector<std::string> associatedPhrasesForKey(const std::string &key);
|
const std::vector<std::string> associatedPhrasesForKey(const std::string &key);
|
||||||
bool hasAssociatedPhrasesForKey(const std::string &key);
|
bool hasAssociatedPhrasesForKey(const std::string &key);
|
||||||
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
/// Filters and converts the input unigrams and return a new list of unigrams.
|
/// Filters and converts the input unigrams and return a new list of unigrams.
|
||||||
///
|
///
|
||||||
|
@ -137,8 +145,8 @@ protected:
|
||||||
/// @param insertedValues The values for unigrams already in the results.
|
/// @param insertedValues The values for unigrams already in the results.
|
||||||
/// It helps to prevent duplicated unigrams. Please note that the method
|
/// It helps to prevent duplicated unigrams. Please note that the method
|
||||||
/// has a side effect that it inserts values to `insertedValues`.
|
/// has a side effect that it inserts values to `insertedValues`.
|
||||||
const std::vector<Gramambular::Unigram> filterAndTransformUnigrams(const std::vector<Gramambular::Unigram> unigrams,
|
const std::vector<Gramambular::Unigram> filterAndTransformUnigrams(
|
||||||
const std::unordered_set<std::string>& excludedValues,
|
const std::vector<Gramambular::Unigram> unigrams, const std::unordered_set<std::string> &excludedValues,
|
||||||
std::unordered_set<std::string> &insertedValues);
|
std::unordered_set<std::string> &insertedValues);
|
||||||
|
|
||||||
ParselessLM m_languageModel;
|
ParselessLM m_languageModel;
|
||||||
|
@ -154,6 +162,6 @@ protected:
|
||||||
bool m_cnsEnabled;
|
bool m_cnsEnabled;
|
||||||
bool m_symbolEnabled;
|
bool m_symbolEnabled;
|
||||||
};
|
};
|
||||||
};
|
}; // namespace vChewing
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,27 +1,35 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "LMInstantiator.h"
|
#include "LMInstantiator.h"
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
LMInstantiator::LMInstantiator()
|
LMInstantiator::LMInstantiator()
|
||||||
{
|
{
|
||||||
|
@ -41,7 +49,8 @@ LMInstantiator::~LMInstantiator()
|
||||||
|
|
||||||
void LMInstantiator::loadLanguageModel(const char *languageModelDataPath)
|
void LMInstantiator::loadLanguageModel(const char *languageModelDataPath)
|
||||||
{
|
{
|
||||||
if (languageModelDataPath) {
|
if (languageModelDataPath)
|
||||||
|
{
|
||||||
m_languageModel.close();
|
m_languageModel.close();
|
||||||
m_languageModel.open(languageModelDataPath);
|
m_languageModel.open(languageModelDataPath);
|
||||||
}
|
}
|
||||||
|
@ -54,7 +63,8 @@ bool LMInstantiator::isDataModelLoaded()
|
||||||
|
|
||||||
void LMInstantiator::loadCNSData(const char *cnsDataPath)
|
void LMInstantiator::loadCNSData(const char *cnsDataPath)
|
||||||
{
|
{
|
||||||
if (cnsDataPath) {
|
if (cnsDataPath)
|
||||||
|
{
|
||||||
m_cnsModel.close();
|
m_cnsModel.close();
|
||||||
m_cnsModel.open(cnsDataPath);
|
m_cnsModel.open(cnsDataPath);
|
||||||
}
|
}
|
||||||
|
@ -67,7 +77,8 @@ bool LMInstantiator::isCNSDataLoaded()
|
||||||
|
|
||||||
void LMInstantiator::loadMiscData(const char *miscDataPath)
|
void LMInstantiator::loadMiscData(const char *miscDataPath)
|
||||||
{
|
{
|
||||||
if (miscDataPath) {
|
if (miscDataPath)
|
||||||
|
{
|
||||||
m_miscModel.close();
|
m_miscModel.close();
|
||||||
m_miscModel.open(miscDataPath);
|
m_miscModel.open(miscDataPath);
|
||||||
}
|
}
|
||||||
|
@ -80,7 +91,8 @@ bool LMInstantiator::isMiscDataLoaded()
|
||||||
|
|
||||||
void LMInstantiator::loadSymbolData(const char *symbolDataPath)
|
void LMInstantiator::loadSymbolData(const char *symbolDataPath)
|
||||||
{
|
{
|
||||||
if (symbolDataPath) {
|
if (symbolDataPath)
|
||||||
|
{
|
||||||
m_symbolModel.close();
|
m_symbolModel.close();
|
||||||
m_symbolModel.open(symbolDataPath);
|
m_symbolModel.open(symbolDataPath);
|
||||||
}
|
}
|
||||||
|
@ -91,14 +103,15 @@ bool LMInstantiator::isSymbolDataLoaded()
|
||||||
return m_symbolModel.isLoaded();
|
return m_symbolModel.isLoaded();
|
||||||
}
|
}
|
||||||
|
|
||||||
void LMInstantiator::loadUserPhrases(const char* userPhrasesDataPath,
|
void LMInstantiator::loadUserPhrases(const char *userPhrasesDataPath, const char *excludedPhrasesDataPath)
|
||||||
const char* excludedPhrasesDataPath)
|
{
|
||||||
|
if (userPhrasesDataPath)
|
||||||
{
|
{
|
||||||
if (userPhrasesDataPath) {
|
|
||||||
m_userPhrases.close();
|
m_userPhrases.close();
|
||||||
m_userPhrases.open(userPhrasesDataPath);
|
m_userPhrases.open(userPhrasesDataPath);
|
||||||
}
|
}
|
||||||
if (excludedPhrasesDataPath) {
|
if (excludedPhrasesDataPath)
|
||||||
|
{
|
||||||
m_excludedPhrases.close();
|
m_excludedPhrases.close();
|
||||||
m_excludedPhrases.open(excludedPhrasesDataPath);
|
m_excludedPhrases.open(excludedPhrasesDataPath);
|
||||||
}
|
}
|
||||||
|
@ -106,7 +119,8 @@ void LMInstantiator::loadUserPhrases(const char* userPhrasesDataPath,
|
||||||
|
|
||||||
void LMInstantiator::loadUserSymbolData(const char *userSymbolDataPath)
|
void LMInstantiator::loadUserSymbolData(const char *userSymbolDataPath)
|
||||||
{
|
{
|
||||||
if (userSymbolDataPath) {
|
if (userSymbolDataPath)
|
||||||
|
{
|
||||||
m_userSymbolModel.close();
|
m_userSymbolModel.close();
|
||||||
m_userSymbolModel.open(userSymbolDataPath);
|
m_userSymbolModel.open(userSymbolDataPath);
|
||||||
}
|
}
|
||||||
|
@ -114,7 +128,8 @@ void LMInstantiator::loadUserSymbolData(const char *userSymbolDataPath)
|
||||||
|
|
||||||
void LMInstantiator::loadUserAssociatedPhrases(const char *userAssociatedPhrasesPath)
|
void LMInstantiator::loadUserAssociatedPhrases(const char *userAssociatedPhrasesPath)
|
||||||
{
|
{
|
||||||
if (userAssociatedPhrasesPath) {
|
if (userAssociatedPhrasesPath)
|
||||||
|
{
|
||||||
m_associatedPhrases.close();
|
m_associatedPhrases.close();
|
||||||
m_associatedPhrases.open(userAssociatedPhrasesPath);
|
m_associatedPhrases.open(userAssociatedPhrasesPath);
|
||||||
}
|
}
|
||||||
|
@ -122,20 +137,23 @@ void LMInstantiator::loadUserAssociatedPhrases(const char *userAssociatedPhrases
|
||||||
|
|
||||||
void LMInstantiator::loadPhraseReplacementMap(const char *phraseReplacementPath)
|
void LMInstantiator::loadPhraseReplacementMap(const char *phraseReplacementPath)
|
||||||
{
|
{
|
||||||
if (phraseReplacementPath) {
|
if (phraseReplacementPath)
|
||||||
|
{
|
||||||
m_phraseReplacement.close();
|
m_phraseReplacement.close();
|
||||||
m_phraseReplacement.open(phraseReplacementPath);
|
m_phraseReplacement.open(phraseReplacementPath);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<Gramambular::Bigram> LMInstantiator::bigramsForKeys(const std::string& preceedingKey, const std::string& key)
|
const std::vector<Gramambular::Bigram> LMInstantiator::bigramsForKeys(const std::string &preceedingKey,
|
||||||
|
const std::string &key)
|
||||||
{
|
{
|
||||||
return std::vector<Gramambular::Bigram>();
|
return std::vector<Gramambular::Bigram>();
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<Gramambular::Unigram> LMInstantiator::unigramsForKey(const std::string &key)
|
const std::vector<Gramambular::Unigram> LMInstantiator::unigramsForKey(const std::string &key)
|
||||||
{
|
{
|
||||||
if (key == " ") {
|
if (key == " ")
|
||||||
|
{
|
||||||
std::vector<Gramambular::Unigram> spaceUnigrams;
|
std::vector<Gramambular::Unigram> spaceUnigrams;
|
||||||
Gramambular::Unigram g;
|
Gramambular::Unigram g;
|
||||||
g.keyValue.key = " ";
|
g.keyValue.key = " ";
|
||||||
|
@ -155,14 +173,15 @@ const std::vector<Gramambular::Unigram> LMInstantiator::unigramsForKey(const std
|
||||||
std::unordered_set<std::string> excludedValues;
|
std::unordered_set<std::string> excludedValues;
|
||||||
std::unordered_set<std::string> insertedValues;
|
std::unordered_set<std::string> insertedValues;
|
||||||
|
|
||||||
if (m_excludedPhrases.hasUnigramsForKey(key)) {
|
if (m_excludedPhrases.hasUnigramsForKey(key))
|
||||||
|
{
|
||||||
std::vector<Gramambular::Unigram> excludedUnigrams = m_excludedPhrases.unigramsForKey(key);
|
std::vector<Gramambular::Unigram> excludedUnigrams = m_excludedPhrases.unigramsForKey(key);
|
||||||
transform(excludedUnigrams.begin(), excludedUnigrams.end(),
|
transform(excludedUnigrams.begin(), excludedUnigrams.end(), inserter(excludedValues, excludedValues.end()),
|
||||||
inserter(excludedValues, excludedValues.end()),
|
|
||||||
[](const Gramambular::Unigram &u) { return u.keyValue.value; });
|
[](const Gramambular::Unigram &u) { return u.keyValue.value; });
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_userPhrases.hasUnigramsForKey(key)) {
|
if (m_userPhrases.hasUnigramsForKey(key))
|
||||||
|
{
|
||||||
std::vector<Gramambular::Unigram> rawUserUnigrams = m_userPhrases.unigramsForKey(key);
|
std::vector<Gramambular::Unigram> rawUserUnigrams = m_userPhrases.unigramsForKey(key);
|
||||||
// 用這句指令讓使用者語彙檔案內的詞條優先順序隨著行數增加而逐漸增高。
|
// 用這句指令讓使用者語彙檔案內的詞條優先順序隨著行數增加而逐漸增高。
|
||||||
// 這樣一來就可以在就地新增語彙時徹底複寫優先權。
|
// 這樣一來就可以在就地新增語彙時徹底複寫優先權。
|
||||||
|
@ -170,27 +189,32 @@ const std::vector<Gramambular::Unigram> LMInstantiator::unigramsForKey(const std
|
||||||
userUnigrams = filterAndTransformUnigrams(rawUserUnigrams, excludedValues, insertedValues);
|
userUnigrams = filterAndTransformUnigrams(rawUserUnigrams, excludedValues, insertedValues);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_languageModel.hasUnigramsForKey(key)) {
|
if (m_languageModel.hasUnigramsForKey(key))
|
||||||
|
{
|
||||||
std::vector<Gramambular::Unigram> rawGlobalUnigrams = m_languageModel.unigramsForKey(key);
|
std::vector<Gramambular::Unigram> rawGlobalUnigrams = m_languageModel.unigramsForKey(key);
|
||||||
allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues);
|
allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_miscModel.hasUnigramsForKey(key)) {
|
if (m_miscModel.hasUnigramsForKey(key))
|
||||||
|
{
|
||||||
std::vector<Gramambular::Unigram> rawMiscUnigrams = m_miscModel.unigramsForKey(key);
|
std::vector<Gramambular::Unigram> rawMiscUnigrams = m_miscModel.unigramsForKey(key);
|
||||||
miscUnigrams = filterAndTransformUnigrams(rawMiscUnigrams, excludedValues, insertedValues);
|
miscUnigrams = filterAndTransformUnigrams(rawMiscUnigrams, excludedValues, insertedValues);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_symbolModel.hasUnigramsForKey(key) && m_symbolEnabled) {
|
if (m_symbolModel.hasUnigramsForKey(key) && m_symbolEnabled)
|
||||||
|
{
|
||||||
std::vector<Gramambular::Unigram> rawSymbolUnigrams = m_symbolModel.unigramsForKey(key);
|
std::vector<Gramambular::Unigram> rawSymbolUnigrams = m_symbolModel.unigramsForKey(key);
|
||||||
symbolUnigrams = filterAndTransformUnigrams(rawSymbolUnigrams, excludedValues, insertedValues);
|
symbolUnigrams = filterAndTransformUnigrams(rawSymbolUnigrams, excludedValues, insertedValues);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_userSymbolModel.hasUnigramsForKey(key) && m_symbolEnabled) {
|
if (m_userSymbolModel.hasUnigramsForKey(key) && m_symbolEnabled)
|
||||||
|
{
|
||||||
std::vector<Gramambular::Unigram> rawUserSymbolUnigrams = m_userSymbolModel.unigramsForKey(key);
|
std::vector<Gramambular::Unigram> rawUserSymbolUnigrams = m_userSymbolModel.unigramsForKey(key);
|
||||||
userSymbolUnigrams = filterAndTransformUnigrams(rawUserSymbolUnigrams, excludedValues, insertedValues);
|
userSymbolUnigrams = filterAndTransformUnigrams(rawUserSymbolUnigrams, excludedValues, insertedValues);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_cnsModel.hasUnigramsForKey(key) && m_cnsEnabled) {
|
if (m_cnsModel.hasUnigramsForKey(key) && m_cnsEnabled)
|
||||||
|
{
|
||||||
std::vector<Gramambular::Unigram> rawCNSUnigrams = m_cnsModel.unigramsForKey(key);
|
std::vector<Gramambular::Unigram> rawCNSUnigrams = m_cnsModel.unigramsForKey(key);
|
||||||
cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues);
|
cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues);
|
||||||
}
|
}
|
||||||
|
@ -205,11 +229,13 @@ const std::vector<Gramambular::Unigram> LMInstantiator::unigramsForKey(const std
|
||||||
|
|
||||||
bool LMInstantiator::hasUnigramsForKey(const std::string &key)
|
bool LMInstantiator::hasUnigramsForKey(const std::string &key)
|
||||||
{
|
{
|
||||||
if (key == " ") {
|
if (key == " ")
|
||||||
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!m_excludedPhrases.hasUnigramsForKey(key)) {
|
if (!m_excludedPhrases.hasUnigramsForKey(key))
|
||||||
|
{
|
||||||
return m_userPhrases.hasUnigramsForKey(key) || m_languageModel.hasUnigramsForKey(key);
|
return m_userPhrases.hasUnigramsForKey(key) || m_languageModel.hasUnigramsForKey(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -246,26 +272,33 @@ bool LMInstantiator::symbolEnabled()
|
||||||
return m_symbolEnabled;
|
return m_symbolEnabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<Gramambular::Unigram> LMInstantiator::filterAndTransformUnigrams(const std::vector<Gramambular::Unigram> unigrams, const std::unordered_set<std::string>& excludedValues, std::unordered_set<std::string>& insertedValues)
|
const std::vector<Gramambular::Unigram> LMInstantiator::filterAndTransformUnigrams(
|
||||||
|
const std::vector<Gramambular::Unigram> unigrams, const std::unordered_set<std::string> &excludedValues,
|
||||||
|
std::unordered_set<std::string> &insertedValues)
|
||||||
{
|
{
|
||||||
std::vector<Gramambular::Unigram> results;
|
std::vector<Gramambular::Unigram> results;
|
||||||
|
|
||||||
for (auto&& unigram : unigrams) {
|
for (auto &&unigram : unigrams)
|
||||||
|
{
|
||||||
// excludedValues filters out the unigrams with the original value.
|
// excludedValues filters out the unigrams with the original value.
|
||||||
// insertedValues filters out the ones with the converted value
|
// insertedValues filters out the ones with the converted value
|
||||||
std::string originalValue = unigram.keyValue.value;
|
std::string originalValue = unigram.keyValue.value;
|
||||||
if (excludedValues.find(originalValue) != excludedValues.end()) {
|
if (excludedValues.find(originalValue) != excludedValues.end())
|
||||||
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string value = originalValue;
|
std::string value = originalValue;
|
||||||
if (m_phraseReplacementEnabled) {
|
if (m_phraseReplacementEnabled)
|
||||||
|
{
|
||||||
std::string replacement = m_phraseReplacement.valueForKey(value);
|
std::string replacement = m_phraseReplacement.valueForKey(value);
|
||||||
if (replacement != "") {
|
if (replacement != "")
|
||||||
|
{
|
||||||
value = replacement;
|
value = replacement;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (insertedValues.find(value) == insertedValues.end()) {
|
if (insertedValues.find(value) == insertedValues.end())
|
||||||
|
{
|
||||||
Gramambular::Unigram g;
|
Gramambular::Unigram g;
|
||||||
g.keyValue.value = value;
|
g.keyValue.value = value;
|
||||||
g.keyValue.key = unigram.keyValue.key;
|
g.keyValue.key = unigram.keyValue.key;
|
||||||
|
|
|
@ -1,31 +1,39 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef ASSOCIATEDPHRASES_H
|
#ifndef ASSOCIATEDPHRASES_H
|
||||||
#define ASSOCIATEDPHRASES_H
|
#define ASSOCIATEDPHRASES_H
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <map>
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
class AssociatedPhrases
|
class AssociatedPhrases
|
||||||
{
|
{
|
||||||
|
@ -40,8 +48,11 @@ public:
|
||||||
const bool hasValuesForKey(const std::string &key);
|
const bool hasValuesForKey(const std::string &key);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
struct Row {
|
struct Row
|
||||||
Row(std::string_view& k, std::string_view& v) : key(k), value(v) {}
|
{
|
||||||
|
Row(std::string_view &k, std::string_view &v) : key(k), value(v)
|
||||||
|
{
|
||||||
|
}
|
||||||
std::string_view key;
|
std::string_view key;
|
||||||
std::string_view value;
|
std::string_view value;
|
||||||
};
|
};
|
||||||
|
@ -53,6 +64,6 @@ protected:
|
||||||
size_t length;
|
size_t length;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
} // namespace vChewing
|
||||||
|
|
||||||
#endif /* AssociatedPhrases_hpp */
|
#endif /* AssociatedPhrases_hpp */
|
||||||
|
|
|
@ -1,52 +1,59 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "AssociatedPhrases.h"
|
#include "AssociatedPhrases.h"
|
||||||
#include "vChewing-Swift.h"
|
#include "vChewing-Swift.h"
|
||||||
#include <sys/mman.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
#include "KeyValueBlobReader.h"
|
#include "KeyValueBlobReader.h"
|
||||||
#include "LMConsolidator.h"
|
#include "LMConsolidator.h"
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
AssociatedPhrases::AssociatedPhrases()
|
AssociatedPhrases::AssociatedPhrases() : fd(-1), data(0), length(0)
|
||||||
: fd(-1)
|
|
||||||
, data(0)
|
|
||||||
, length(0)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
AssociatedPhrases::~AssociatedPhrases()
|
AssociatedPhrases::~AssociatedPhrases()
|
||||||
{
|
{
|
||||||
if (data) {
|
if (data)
|
||||||
|
{
|
||||||
close();
|
close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const bool AssociatedPhrases::isLoaded()
|
const bool AssociatedPhrases::isLoaded()
|
||||||
{
|
{
|
||||||
if (data) {
|
if (data)
|
||||||
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -54,7 +61,8 @@ const bool AssociatedPhrases::isLoaded()
|
||||||
|
|
||||||
bool AssociatedPhrases::open(const char *path)
|
bool AssociatedPhrases::open(const char *path)
|
||||||
{
|
{
|
||||||
if (data) {
|
if (data)
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,13 +70,15 @@ bool AssociatedPhrases::open(const char *path)
|
||||||
LMConsolidator::ConsolidateContent(path, true);
|
LMConsolidator::ConsolidateContent(path, true);
|
||||||
|
|
||||||
fd = ::open(path, O_RDONLY);
|
fd = ::open(path, O_RDONLY);
|
||||||
if (fd == -1) {
|
if (fd == -1)
|
||||||
|
{
|
||||||
printf("open:: file not exist");
|
printf("open:: file not exist");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct stat sb;
|
struct stat sb;
|
||||||
if (fstat(fd, &sb) == -1) {
|
if (fstat(fd, &sb) == -1)
|
||||||
|
{
|
||||||
printf("open:: cannot open file");
|
printf("open:: cannot open file");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -76,7 +86,8 @@ bool AssociatedPhrases::open(const char *path)
|
||||||
length = (size_t)sb.st_size;
|
length = (size_t)sb.st_size;
|
||||||
|
|
||||||
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
|
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
|
||||||
if (!data) {
|
if (!data)
|
||||||
|
{
|
||||||
::close(fd);
|
::close(fd);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -84,13 +95,16 @@ bool AssociatedPhrases::open(const char *path)
|
||||||
KeyValueBlobReader reader(static_cast<char *>(data), length);
|
KeyValueBlobReader reader(static_cast<char *>(data), length);
|
||||||
KeyValueBlobReader::KeyValue keyValue;
|
KeyValueBlobReader::KeyValue keyValue;
|
||||||
KeyValueBlobReader::State state;
|
KeyValueBlobReader::State state;
|
||||||
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) {
|
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR)
|
||||||
|
{
|
||||||
keyRowMap[keyValue.key].emplace_back(keyValue.key, keyValue.value);
|
keyRowMap[keyValue.key].emplace_back(keyValue.key, keyValue.value);
|
||||||
}
|
}
|
||||||
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
|
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
|
||||||
if (state == KeyValueBlobReader::State::ERROR) {
|
if (state == KeyValueBlobReader::State::ERROR)
|
||||||
|
{
|
||||||
// close();
|
// close();
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "AssociatedPhrases: Failed at Open Step 5. On Error Resume Next.\n");
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "AssociatedPhrases: Failed at Open Step 5. On Error Resume Next.\n");
|
||||||
// return false;
|
// return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -98,7 +112,8 @@ bool AssociatedPhrases::open(const char *path)
|
||||||
|
|
||||||
void AssociatedPhrases::close()
|
void AssociatedPhrases::close()
|
||||||
{
|
{
|
||||||
if (data) {
|
if (data)
|
||||||
|
{
|
||||||
munmap(data, length);
|
munmap(data, length);
|
||||||
::close(fd);
|
::close(fd);
|
||||||
data = 0;
|
data = 0;
|
||||||
|
@ -111,9 +126,11 @@ const std::vector<std::string> AssociatedPhrases::valuesForKey(const std::string
|
||||||
{
|
{
|
||||||
std::vector<std::string> v;
|
std::vector<std::string> v;
|
||||||
auto iter = keyRowMap.find(key);
|
auto iter = keyRowMap.find(key);
|
||||||
if (iter != keyRowMap.end()) {
|
if (iter != keyRowMap.end())
|
||||||
|
{
|
||||||
const std::vector<Row> &rows = iter->second;
|
const std::vector<Row> &rows = iter->second;
|
||||||
for (const auto& row : rows) {
|
for (const auto &row : rows)
|
||||||
|
{
|
||||||
std::string_view value = row.value;
|
std::string_view value = row.value;
|
||||||
v.push_back({value.data(), value.size()});
|
v.push_back({value.data(), value.size()});
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,30 +1,37 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef CoreLM_H
|
#ifndef CoreLM_H
|
||||||
#define CoreLM_H
|
#define CoreLM_H
|
||||||
|
|
||||||
#include "LanguageModel.h"
|
#include "LanguageModel.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <map>
|
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
// this class relies on the fact that we have a space-separated data
|
// this class relies on the fact that we have a space-separated data
|
||||||
// format, and we use mmap and zero-out the separators and line feeds
|
// format, and we use mmap and zero-out the separators and line feeds
|
||||||
|
@ -33,9 +40,11 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Gramambular;
|
using namespace Gramambular;
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
class CoreLM : public Gramambular::LanguageModel {
|
class CoreLM : public Gramambular::LanguageModel
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
CoreLM();
|
CoreLM();
|
||||||
~CoreLM();
|
~CoreLM();
|
||||||
|
@ -58,7 +67,8 @@ protected:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Row {
|
struct Row
|
||||||
|
{
|
||||||
const char *key;
|
const char *key;
|
||||||
const char *value;
|
const char *value;
|
||||||
const char *logProbability;
|
const char *logProbability;
|
||||||
|
|
|
@ -1,50 +1,56 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "CoreLM.h"
|
#include "CoreLM.h"
|
||||||
#include <sys/mman.h>
|
#include "vChewing-Swift.h"
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <unistd.h>
|
#include <sys/mman.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
#include <syslog.h>
|
#include <syslog.h>
|
||||||
#include "vChewing-Swift.h"
|
#include <unistd.h>
|
||||||
|
|
||||||
using namespace Gramambular;
|
using namespace Gramambular;
|
||||||
|
|
||||||
vChewing::CoreLM::CoreLM()
|
vChewing::CoreLM::CoreLM() : fd(-1), data(0), length(0)
|
||||||
: fd(-1)
|
|
||||||
, data(0)
|
|
||||||
, length(0)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
vChewing::CoreLM::~CoreLM()
|
vChewing::CoreLM::~CoreLM()
|
||||||
{
|
{
|
||||||
if (data) {
|
if (data)
|
||||||
|
{
|
||||||
close();
|
close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool vChewing::CoreLM::isLoaded()
|
bool vChewing::CoreLM::isLoaded()
|
||||||
{
|
{
|
||||||
if (data) {
|
if (data)
|
||||||
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -52,24 +58,28 @@ bool vChewing::CoreLM::isLoaded()
|
||||||
|
|
||||||
bool vChewing::CoreLM::open(const char *path)
|
bool vChewing::CoreLM::open(const char *path)
|
||||||
{
|
{
|
||||||
if (data) {
|
if (data)
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
fd = ::open(path, O_RDONLY);
|
fd = ::open(path, O_RDONLY);
|
||||||
if (fd == -1) {
|
if (fd == -1)
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct stat sb;
|
struct stat sb;
|
||||||
if (fstat(fd, &sb) == -1) {
|
if (fstat(fd, &sb) == -1)
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
length = (size_t)sb.st_size;
|
length = (size_t)sb.st_size;
|
||||||
|
|
||||||
data = mmap(NULL, length, PROT_WRITE, MAP_PRIVATE, fd, 0);
|
data = mmap(NULL, length, PROT_WRITE, MAP_PRIVATE, fd, 0);
|
||||||
if (!data) {
|
if (!data)
|
||||||
|
{
|
||||||
::close(fd);
|
::close(fd);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -117,18 +127,22 @@ bool vChewing::CoreLM::open(const char *path)
|
||||||
|
|
||||||
start:
|
start:
|
||||||
// EOF -> end
|
// EOF -> end
|
||||||
if (head == end) {
|
if (head == end)
|
||||||
|
{
|
||||||
goto end;
|
goto end;
|
||||||
}
|
}
|
||||||
|
|
||||||
c = *head;
|
c = *head;
|
||||||
// \s -> error
|
// \s -> error
|
||||||
if (c == ' ') {
|
if (c == ' ')
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // Start: \\s -> error");
|
{
|
||||||
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "vChewingDebug: CoreLM // Start: \\s -> error");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
// \n -> start
|
// \n -> start
|
||||||
else if (c == '\n') {
|
else if (c == '\n')
|
||||||
|
{
|
||||||
head++;
|
head++;
|
||||||
goto start;
|
goto start;
|
||||||
}
|
}
|
||||||
|
@ -140,19 +154,24 @@ start:
|
||||||
|
|
||||||
state1:
|
state1:
|
||||||
// EOF -> error
|
// EOF -> error
|
||||||
if (head == end) {
|
if (head == end)
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: EOF -> error");
|
{
|
||||||
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: EOF -> error");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
c = *head;
|
c = *head;
|
||||||
// \n -> error
|
// \n -> error
|
||||||
if (c == '\n') {
|
if (c == '\n')
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: \\n -> error");
|
{
|
||||||
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: \\n -> error");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
// \s -> state2 + zero out ending + record column start
|
// \s -> state2 + zero out ending + record column start
|
||||||
else if (c == ' ') {
|
else if (c == ' ')
|
||||||
|
{
|
||||||
*head = 0;
|
*head = 0;
|
||||||
head++;
|
head++;
|
||||||
row.key = head;
|
row.key = head;
|
||||||
|
@ -165,15 +184,19 @@ state1:
|
||||||
|
|
||||||
state2:
|
state2:
|
||||||
// eof -> error
|
// eof -> error
|
||||||
if (head == end) {
|
if (head == end)
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: EOF -> error");
|
{
|
||||||
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: EOF -> error");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
c = *head;
|
c = *head;
|
||||||
// \n, \s -> error
|
// \n, \s -> error
|
||||||
if (c == '\n' || c == ' ') {
|
if (c == '\n' || c == ' ')
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: \\n \\s -> error");
|
{
|
||||||
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: \\n \\s -> error");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -184,20 +207,25 @@ state2:
|
||||||
|
|
||||||
state3:
|
state3:
|
||||||
// eof -> error
|
// eof -> error
|
||||||
if (head == end) {
|
if (head == end)
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: EOF -> error");
|
{
|
||||||
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: EOF -> error");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
c = *head;
|
c = *head;
|
||||||
|
|
||||||
// \n -> error
|
// \n -> error
|
||||||
if (c == '\n') {
|
if (c == '\n')
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: \\n -> error");
|
{
|
||||||
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: \\n -> error");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
// \s -> state4 + zero out ending + record column start
|
// \s -> state4 + zero out ending + record column start
|
||||||
else if (c == ' ') {
|
else if (c == ' ')
|
||||||
|
{
|
||||||
*head = 0;
|
*head = 0;
|
||||||
head++;
|
head++;
|
||||||
row.logProbability = head;
|
row.logProbability = head;
|
||||||
|
@ -210,15 +238,19 @@ state3:
|
||||||
|
|
||||||
state4:
|
state4:
|
||||||
// eof -> error
|
// eof -> error
|
||||||
if (head == end) {
|
if (head == end)
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: EOF -> error");
|
{
|
||||||
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: EOF -> error");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
c = *head;
|
c = *head;
|
||||||
// \n, \s -> error
|
// \n, \s -> error
|
||||||
if (c == '\n' || c == ' ') {
|
if (c == '\n' || c == ' ')
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: \\n \\s -> error");
|
{
|
||||||
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: \\n \\s -> error");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -227,22 +259,26 @@ state4:
|
||||||
|
|
||||||
// fall through to state 5
|
// fall through to state 5
|
||||||
|
|
||||||
|
|
||||||
state5:
|
state5:
|
||||||
// eof -> error
|
// eof -> error
|
||||||
if (head == end) {
|
if (head == end)
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: EOF -> error");
|
{
|
||||||
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: EOF -> error");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
c = *head;
|
c = *head;
|
||||||
// \s -> error
|
// \s -> error
|
||||||
if (c == ' ') {
|
if (c == ' ')
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: \\s -> error");
|
{
|
||||||
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: \\s -> error");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
// \n -> start
|
// \n -> start
|
||||||
else if (c == '\n') {
|
else if (c == '\n')
|
||||||
|
{
|
||||||
*head = 0;
|
*head = 0;
|
||||||
head++;
|
head++;
|
||||||
keyRowMap[row.key].push_back(row);
|
keyRowMap[row.key].push_back(row);
|
||||||
|
@ -265,13 +301,15 @@ end:
|
||||||
emptyRow.value = space;
|
emptyRow.value = space;
|
||||||
emptyRow.logProbability = zero;
|
emptyRow.logProbability = zero;
|
||||||
keyRowMap[space].push_back(emptyRow);
|
keyRowMap[space].push_back(emptyRow);
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "vChewingDebug: CoreLM // File Load Complete.");
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "vChewingDebug: CoreLM // File Load Complete.");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void vChewing::CoreLM::close()
|
void vChewing::CoreLM::close()
|
||||||
{
|
{
|
||||||
if (data) {
|
if (data)
|
||||||
|
{
|
||||||
munmap(data, length);
|
munmap(data, length);
|
||||||
::close(fd);
|
::close(fd);
|
||||||
data = 0;
|
data = 0;
|
||||||
|
@ -283,9 +321,11 @@ void vChewing::CoreLM::close()
|
||||||
void vChewing::CoreLM::dump()
|
void vChewing::CoreLM::dump()
|
||||||
{
|
{
|
||||||
size_t rows = 0;
|
size_t rows = 0;
|
||||||
for (map<const char *, vector<Row> >::const_iterator i = keyRowMap.begin(), e = keyRowMap.end(); i != e; ++i) {
|
for (map<const char *, vector<Row>>::const_iterator i = keyRowMap.begin(), e = keyRowMap.end(); i != e; ++i)
|
||||||
|
{
|
||||||
const vector<Row> &r = (*i).second;
|
const vector<Row> &r = (*i).second;
|
||||||
for (vector<Row>::const_iterator ri = r.begin(), re = r.end(); ri != re; ++ri) {
|
for (vector<Row>::const_iterator ri = r.begin(), re = r.end(); ri != re; ++ri)
|
||||||
|
{
|
||||||
const Row &row = *ri;
|
const Row &row = *ri;
|
||||||
cerr << row.key << " " << row.value << " " << row.logProbability << "\n";
|
cerr << row.key << " " << row.value << " " << row.logProbability << "\n";
|
||||||
rows++;
|
rows++;
|
||||||
|
@ -303,8 +343,10 @@ const std::vector<Gramambular::Unigram> vChewing::CoreLM::unigramsForKey(const s
|
||||||
std::vector<Gramambular::Unigram> v;
|
std::vector<Gramambular::Unigram> v;
|
||||||
map<const char *, vector<Row>>::const_iterator i = keyRowMap.find(key.c_str());
|
map<const char *, vector<Row>>::const_iterator i = keyRowMap.find(key.c_str());
|
||||||
|
|
||||||
if (i != keyRowMap.end()) {
|
if (i != keyRowMap.end())
|
||||||
for (vector<Row>::const_iterator ri = (*i).second.begin(), re = (*i).second.end(); ri != re; ++ri) {
|
{
|
||||||
|
for (vector<Row>::const_iterator ri = (*i).second.begin(), re = (*i).second.end(); ri != re; ++ri)
|
||||||
|
{
|
||||||
Unigram g;
|
Unigram g;
|
||||||
const Row &r = *ri;
|
const Row &r = *ri;
|
||||||
g.keyValue.key = r.key;
|
g.keyValue.key = r.key;
|
||||||
|
|
|
@ -1,44 +1,54 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef CNSLM_H
|
#ifndef CNSLM_H
|
||||||
#define CNSLM_H
|
#define CNSLM_H
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <map>
|
|
||||||
#include <iostream>
|
|
||||||
#include "LanguageModel.h"
|
#include "LanguageModel.h"
|
||||||
#include "UserPhrasesLM.h"
|
#include "UserPhrasesLM.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
class CNSLM : public UserPhrasesLM
|
class CNSLM : public UserPhrasesLM
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
virtual bool allowConsolidation() override {
|
virtual bool allowConsolidation() override
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
virtual float overridedValue() override {
|
virtual float overridedValue() override
|
||||||
|
{
|
||||||
return -11.0;
|
return -11.0;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
} // namespace vChewing
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,44 +1,54 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef SYMBOLLM_H
|
#ifndef SYMBOLLM_H
|
||||||
#define SYMBOLLM_H
|
#define SYMBOLLM_H
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <map>
|
|
||||||
#include <iostream>
|
|
||||||
#include "LanguageModel.h"
|
#include "LanguageModel.h"
|
||||||
#include "UserPhrasesLM.h"
|
#include "UserPhrasesLM.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
class SymbolLM : public UserPhrasesLM
|
class SymbolLM : public UserPhrasesLM
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
virtual bool allowConsolidation() override {
|
virtual bool allowConsolidation() override
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
virtual float overridedValue() override {
|
virtual float overridedValue() override
|
||||||
|
{
|
||||||
return -13.0;
|
return -13.0;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
} // namespace vChewing
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,44 +1,54 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef USERSYMBOLLM_H
|
#ifndef USERSYMBOLLM_H
|
||||||
#define USERSYMBOLLM_H
|
#define USERSYMBOLLM_H
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <map>
|
|
||||||
#include <iostream>
|
|
||||||
#include "LanguageModel.h"
|
#include "LanguageModel.h"
|
||||||
#include "UserPhrasesLM.h"
|
#include "UserPhrasesLM.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
class UserSymbolLM : public UserPhrasesLM
|
class UserSymbolLM : public UserPhrasesLM
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
virtual bool allowConsolidation() override {
|
virtual bool allowConsolidation() override
|
||||||
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
virtual float overridedValue() override {
|
virtual float overridedValue() override
|
||||||
|
{
|
||||||
return -12.0;
|
return -12.0;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
} // namespace vChewing
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "ParselessLM.h"
|
#include "ParselessLM.h"
|
||||||
|
@ -26,11 +33,15 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
vChewing::ParselessLM::~ParselessLM() { close(); }
|
vChewing::ParselessLM::~ParselessLM()
|
||||||
|
{
|
||||||
|
close();
|
||||||
|
}
|
||||||
|
|
||||||
bool vChewing::ParselessLM::isLoaded()
|
bool vChewing::ParselessLM::isLoaded()
|
||||||
{
|
{
|
||||||
if (data_) {
|
if (data_)
|
||||||
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -38,17 +49,20 @@ bool vChewing::ParselessLM::isLoaded()
|
||||||
|
|
||||||
bool vChewing::ParselessLM::open(const std::string_view &path)
|
bool vChewing::ParselessLM::open(const std::string_view &path)
|
||||||
{
|
{
|
||||||
if (data_) {
|
if (data_)
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
fd_ = ::open(path.data(), O_RDONLY);
|
fd_ = ::open(path.data(), O_RDONLY);
|
||||||
if (fd_ == -1) {
|
if (fd_ == -1)
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct stat sb;
|
struct stat sb;
|
||||||
if (fstat(fd_, &sb) == -1) {
|
if (fstat(fd_, &sb) == -1)
|
||||||
|
{
|
||||||
::close(fd_);
|
::close(fd_);
|
||||||
fd_ = -1;
|
fd_ = -1;
|
||||||
return false;
|
return false;
|
||||||
|
@ -57,21 +71,22 @@ bool vChewing::ParselessLM::open(const std::string_view& path)
|
||||||
length_ = static_cast<size_t>(sb.st_size);
|
length_ = static_cast<size_t>(sb.st_size);
|
||||||
|
|
||||||
data_ = mmap(NULL, length_, PROT_READ, MAP_SHARED, fd_, 0);
|
data_ = mmap(NULL, length_, PROT_READ, MAP_SHARED, fd_, 0);
|
||||||
if (data_ == nullptr) {
|
if (data_ == nullptr)
|
||||||
|
{
|
||||||
::close(fd_);
|
::close(fd_);
|
||||||
fd_ = -1;
|
fd_ = -1;
|
||||||
length_ = 0;
|
length_ = 0;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
db_ = std::unique_ptr<ParselessPhraseDB>(new ParselessPhraseDB(
|
db_ = std::unique_ptr<ParselessPhraseDB>(new ParselessPhraseDB(static_cast<char *>(data_), length_));
|
||||||
static_cast<char*>(data_), length_));
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void vChewing::ParselessLM::close()
|
void vChewing::ParselessLM::close()
|
||||||
{
|
{
|
||||||
if (data_ != nullptr) {
|
if (data_ != nullptr)
|
||||||
|
{
|
||||||
munmap(data_, length_);
|
munmap(data_, length_);
|
||||||
::close(fd_);
|
::close(fd_);
|
||||||
fd_ = -1;
|
fd_ = -1;
|
||||||
|
@ -80,55 +95,61 @@ void vChewing::ParselessLM::close()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<Gramambular::Bigram>
|
const std::vector<Gramambular::Bigram> vChewing::ParselessLM::bigramsForKeys(const std::string &preceedingKey,
|
||||||
vChewing::ParselessLM::bigramsForKeys(
|
const std::string &key)
|
||||||
const std::string& preceedingKey, const std::string& key)
|
|
||||||
{
|
{
|
||||||
return std::vector<Gramambular::Bigram>();
|
return std::vector<Gramambular::Bigram>();
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<Gramambular::Unigram>
|
const std::vector<Gramambular::Unigram> vChewing::ParselessLM::unigramsForKey(const std::string &key)
|
||||||
vChewing::ParselessLM::unigramsForKey(const std::string& key)
|
{
|
||||||
|
if (db_ == nullptr)
|
||||||
{
|
{
|
||||||
if (db_ == nullptr) {
|
|
||||||
return std::vector<Gramambular::Unigram>();
|
return std::vector<Gramambular::Unigram>();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<Gramambular::Unigram> results;
|
std::vector<Gramambular::Unigram> results;
|
||||||
for (const auto& row : db_->findRows(key + " ")) {
|
for (const auto &row : db_->findRows(key + " "))
|
||||||
|
{
|
||||||
Gramambular::Unigram unigram;
|
Gramambular::Unigram unigram;
|
||||||
|
|
||||||
// Move ahead until we encounter the first space. This is the key.
|
// Move ahead until we encounter the first space. This is the key.
|
||||||
auto it = row.begin();
|
auto it = row.begin();
|
||||||
while (it != row.end() && *it != ' ') {
|
while (it != row.end() && *it != ' ')
|
||||||
|
{
|
||||||
++it;
|
++it;
|
||||||
}
|
}
|
||||||
|
|
||||||
unigram.keyValue.key = std::string(row.begin(), it);
|
unigram.keyValue.key = std::string(row.begin(), it);
|
||||||
|
|
||||||
// Read past the space.
|
// Read past the space.
|
||||||
if (it != row.end()) {
|
if (it != row.end())
|
||||||
|
{
|
||||||
++it;
|
++it;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (it != row.end()) {
|
if (it != row.end())
|
||||||
|
{
|
||||||
// Now it is the start of the value portion.
|
// Now it is the start of the value portion.
|
||||||
auto value_begin = it;
|
auto value_begin = it;
|
||||||
|
|
||||||
// Move ahead until we encounter the second space. This is the
|
// Move ahead until we encounter the second space. This is the
|
||||||
// value.
|
// value.
|
||||||
while (it != row.end() && *it != ' ') {
|
while (it != row.end() && *it != ' ')
|
||||||
|
{
|
||||||
++it;
|
++it;
|
||||||
}
|
}
|
||||||
unigram.keyValue.value = std::string(value_begin, it);
|
unigram.keyValue.value = std::string(value_begin, it);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read past the space. The remainder, if it exists, is the score.
|
// Read past the space. The remainder, if it exists, is the score.
|
||||||
if (it != row.end()) {
|
if (it != row.end())
|
||||||
|
{
|
||||||
++it;
|
++it;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (it != row.end()) {
|
if (it != row.end())
|
||||||
|
{
|
||||||
unigram.score = std::stod(std::string(it, row.end()));
|
unigram.score = std::stod(std::string(it, row.end()));
|
||||||
}
|
}
|
||||||
results.push_back(unigram);
|
results.push_back(unigram);
|
||||||
|
@ -138,7 +159,8 @@ vChewing::ParselessLM::unigramsForKey(const std::string& key)
|
||||||
|
|
||||||
bool vChewing::ParselessLM::hasUnigramsForKey(const std::string &key)
|
bool vChewing::ParselessLM::hasUnigramsForKey(const std::string &key)
|
||||||
{
|
{
|
||||||
if (db_ == nullptr) {
|
if (db_ == nullptr)
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef SOURCE_ENGINE_PARSELESSLM_H_
|
#ifndef SOURCE_ENGINE_PARSELESSLM_H_
|
||||||
|
@ -27,9 +34,11 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
#include "LanguageModel.h"
|
#include "LanguageModel.h"
|
||||||
#include "ParselessPhraseDB.h"
|
#include "ParselessPhraseDB.h"
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
class ParselessLM : public Gramambular::LanguageModel {
|
class ParselessLM : public Gramambular::LanguageModel
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
~ParselessLM() override;
|
~ParselessLM() override;
|
||||||
|
|
||||||
|
@ -37,10 +46,9 @@ public:
|
||||||
bool open(const std::string_view &path);
|
bool open(const std::string_view &path);
|
||||||
void close();
|
void close();
|
||||||
|
|
||||||
const std::vector<Gramambular::Bigram> bigramsForKeys(
|
const std::vector<Gramambular::Bigram> bigramsForKeys(const std::string &preceedingKey,
|
||||||
const std::string& preceedingKey, const std::string& key) override;
|
|
||||||
const std::vector<Gramambular::Unigram> unigramsForKey(
|
|
||||||
const std::string &key) override;
|
const std::string &key) override;
|
||||||
|
const std::vector<Gramambular::Unigram> unigramsForKey(const std::string &key) override;
|
||||||
bool hasUnigramsForKey(const std::string &key) override;
|
bool hasUnigramsForKey(const std::string &key) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "ParselessPhraseDB.h"
|
#include "ParselessPhraseDB.h"
|
||||||
|
@ -22,35 +29,35 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
ParselessPhraseDB::ParselessPhraseDB(
|
ParselessPhraseDB::ParselessPhraseDB(const char *buf, size_t length) : begin_(buf), end_(buf + length)
|
||||||
const char* buf, size_t length)
|
|
||||||
: begin_(buf)
|
|
||||||
, end_(buf + length)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string_view> ParselessPhraseDB::findRows(
|
std::vector<std::string_view> ParselessPhraseDB::findRows(const std::string_view &key)
|
||||||
const std::string_view& key)
|
|
||||||
{
|
{
|
||||||
std::vector<std::string_view> rows;
|
std::vector<std::string_view> rows;
|
||||||
|
|
||||||
const char *ptr = findFirstMatchingLine(key);
|
const char *ptr = findFirstMatchingLine(key);
|
||||||
if (ptr == nullptr) {
|
if (ptr == nullptr)
|
||||||
|
{
|
||||||
return rows;
|
return rows;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (ptr + key.length() <= end_
|
while (ptr + key.length() <= end_ && memcmp(ptr, key.data(), key.length()) == 0)
|
||||||
&& memcmp(ptr, key.data(), key.length()) == 0) {
|
{
|
||||||
const char *eol = ptr;
|
const char *eol = ptr;
|
||||||
|
|
||||||
while (eol != end_ && *eol != '\n') {
|
while (eol != end_ && *eol != '\n')
|
||||||
|
{
|
||||||
++eol;
|
++eol;
|
||||||
}
|
}
|
||||||
|
|
||||||
rows.emplace_back(ptr, eol - ptr);
|
rows.emplace_back(ptr, eol - ptr);
|
||||||
if (eol == end_) {
|
if (eol == end_)
|
||||||
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,71 +73,83 @@ std::vector<std::string_view> ParselessPhraseDB::findRows(
|
||||||
// current line is actually the first matching line: if the previous line is
|
// current line is actually the first matching line: if the previous line is
|
||||||
// less to the key and the current line starts exactly with the key, then
|
// less to the key and the current line starts exactly with the key, then
|
||||||
// the current line is the first matching line.
|
// the current line is the first matching line.
|
||||||
const char* ParselessPhraseDB::findFirstMatchingLine(
|
const char *ParselessPhraseDB::findFirstMatchingLine(const std::string_view &key)
|
||||||
const std::string_view& key)
|
{
|
||||||
|
if (key.empty())
|
||||||
{
|
{
|
||||||
if (key.empty()) {
|
|
||||||
return begin_;
|
return begin_;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *top = begin_;
|
const char *top = begin_;
|
||||||
const char *bottom = end_;
|
const char *bottom = end_;
|
||||||
|
|
||||||
while (top < bottom) {
|
while (top < bottom)
|
||||||
|
{
|
||||||
const char *mid = top + (bottom - top) / 2;
|
const char *mid = top + (bottom - top) / 2;
|
||||||
const char *ptr = mid;
|
const char *ptr = mid;
|
||||||
|
|
||||||
if (ptr != begin_) {
|
if (ptr != begin_)
|
||||||
|
{
|
||||||
--ptr;
|
--ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (ptr != begin_ && *ptr != '\n') {
|
while (ptr != begin_ && *ptr != '\n')
|
||||||
|
{
|
||||||
--ptr;
|
--ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *prev = nullptr;
|
const char *prev = nullptr;
|
||||||
if (*ptr == '\n') {
|
if (*ptr == '\n')
|
||||||
|
{
|
||||||
prev = ptr;
|
prev = ptr;
|
||||||
++ptr;
|
++ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ptr is now in the "current" line we're interested in.
|
// ptr is now in the "current" line we're interested in.
|
||||||
if (ptr + key.length() > end_) {
|
if (ptr + key.length() > end_)
|
||||||
|
{
|
||||||
// not enough data to compare at this point, bail.
|
// not enough data to compare at this point, bail.
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
int current_cmp = memcmp(ptr, key.data(), key.length());
|
int current_cmp = memcmp(ptr, key.data(), key.length());
|
||||||
|
|
||||||
if (current_cmp > 0) {
|
if (current_cmp > 0)
|
||||||
|
{
|
||||||
bottom = mid - 1;
|
bottom = mid - 1;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (current_cmp < 0) {
|
if (current_cmp < 0)
|
||||||
|
{
|
||||||
top = mid + 1;
|
top = mid + 1;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!prev) {
|
if (!prev)
|
||||||
|
{
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Move the prev so that it reaches the previous line.
|
// Move the prev so that it reaches the previous line.
|
||||||
if (prev != begin_) {
|
if (prev != begin_)
|
||||||
|
{
|
||||||
--prev;
|
--prev;
|
||||||
}
|
}
|
||||||
while (prev != begin_ && *prev != '\n') {
|
while (prev != begin_ && *prev != '\n')
|
||||||
|
{
|
||||||
--prev;
|
--prev;
|
||||||
}
|
}
|
||||||
if (*prev == '\n') {
|
if (*prev == '\n')
|
||||||
|
{
|
||||||
++prev;
|
++prev;
|
||||||
}
|
}
|
||||||
|
|
||||||
int prev_cmp = memcmp(prev, key.data(), key.length());
|
int prev_cmp = memcmp(prev, key.data(), key.length());
|
||||||
|
|
||||||
// This is the first occurrence.
|
// This is the first occurrence.
|
||||||
if (prev_cmp < 0 && current_cmp == 0) {
|
if (prev_cmp < 0 && current_cmp == 0)
|
||||||
|
{
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef SOURCE_ENGINE_PARSELESSPHRASEDB_H_
|
#ifndef SOURCE_ENGINE_PARSELESSPHRASEDB_H_
|
||||||
|
@ -24,17 +31,18 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
// Defines phrase database that consists of (key, value, score) rows that are
|
// Defines phrase database that consists of (key, value, score) rows that are
|
||||||
// pre-sorted by the byte value of the keys. It is way faster than FastLM
|
// pre-sorted by the byte value of the keys. It is way faster than FastLM
|
||||||
// because it does not need to parse anything. Instead, it relies on the fact
|
// because it does not need to parse anything. Instead, it relies on the fact
|
||||||
// that the database is already sorted, and binary search is used to find the
|
// that the database is already sorted, and binary search is used to find the
|
||||||
// rows.
|
// rows.
|
||||||
class ParselessPhraseDB {
|
class ParselessPhraseDB
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
ParselessPhraseDB(
|
ParselessPhraseDB(const char *buf, size_t length);
|
||||||
const char* buf, size_t length);
|
|
||||||
|
|
||||||
// Find the rows that match the key. Note that prefix match is used. If you
|
// Find the rows that match the key. Note that prefix match is used. If you
|
||||||
// need exact match, the key will need to have a delimiter (usually a space)
|
// need exact match, the key will need to have a delimiter (usually a space)
|
||||||
|
|
|
@ -1,30 +1,38 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef PHRASEREPLACEMENTMAP_H
|
#ifndef PHRASEREPLACEMENTMAP_H
|
||||||
#define PHRASEREPLACEMENTMAP_H
|
#define PHRASEREPLACEMENTMAP_H
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <map>
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
class PhraseReplacementMap
|
class PhraseReplacementMap
|
||||||
{
|
{
|
||||||
|
@ -43,6 +51,6 @@ protected:
|
||||||
size_t length;
|
size_t length;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
} // namespace vChewing
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,55 +1,62 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "PhraseReplacementMap.h"
|
#include "PhraseReplacementMap.h"
|
||||||
#include "vChewing-Swift.h"
|
#include "vChewing-Swift.h"
|
||||||
#include <sys/mman.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <unistd.h>
|
#include <sys/mman.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
#include <syslog.h>
|
#include <syslog.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
#include "KeyValueBlobReader.h"
|
#include "KeyValueBlobReader.h"
|
||||||
#include "LMConsolidator.h"
|
#include "LMConsolidator.h"
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
using std::string;
|
using std::string;
|
||||||
|
|
||||||
PhraseReplacementMap::PhraseReplacementMap()
|
PhraseReplacementMap::PhraseReplacementMap() : fd(-1), data(0), length(0)
|
||||||
: fd(-1)
|
|
||||||
, data(0)
|
|
||||||
, length(0)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
PhraseReplacementMap::~PhraseReplacementMap()
|
PhraseReplacementMap::~PhraseReplacementMap()
|
||||||
{
|
{
|
||||||
if (data) {
|
if (data)
|
||||||
|
{
|
||||||
close();
|
close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PhraseReplacementMap::open(const char *path)
|
bool PhraseReplacementMap::open(const char *path)
|
||||||
{
|
{
|
||||||
if (data) {
|
if (data)
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -57,13 +64,15 @@ bool PhraseReplacementMap::open(const char *path)
|
||||||
LMConsolidator::ConsolidateContent(path, true);
|
LMConsolidator::ConsolidateContent(path, true);
|
||||||
|
|
||||||
fd = ::open(path, O_RDONLY);
|
fd = ::open(path, O_RDONLY);
|
||||||
if (fd == -1) {
|
if (fd == -1)
|
||||||
|
{
|
||||||
printf("open:: file not exist");
|
printf("open:: file not exist");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct stat sb;
|
struct stat sb;
|
||||||
if (fstat(fd, &sb) == -1) {
|
if (fstat(fd, &sb) == -1)
|
||||||
|
{
|
||||||
printf("open:: cannot open file");
|
printf("open:: cannot open file");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -71,7 +80,8 @@ bool PhraseReplacementMap::open(const char *path)
|
||||||
length = (size_t)sb.st_size;
|
length = (size_t)sb.st_size;
|
||||||
|
|
||||||
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
|
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
|
||||||
if (!data) {
|
if (!data)
|
||||||
|
{
|
||||||
::close(fd);
|
::close(fd);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -79,13 +89,16 @@ bool PhraseReplacementMap::open(const char *path)
|
||||||
KeyValueBlobReader reader(static_cast<char *>(data), length);
|
KeyValueBlobReader reader(static_cast<char *>(data), length);
|
||||||
KeyValueBlobReader::KeyValue keyValue;
|
KeyValueBlobReader::KeyValue keyValue;
|
||||||
KeyValueBlobReader::State state;
|
KeyValueBlobReader::State state;
|
||||||
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) {
|
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR)
|
||||||
|
{
|
||||||
keyValueMap[keyValue.key] = keyValue.value;
|
keyValueMap[keyValue.key] = keyValue.value;
|
||||||
}
|
}
|
||||||
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
|
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
|
||||||
if (state == KeyValueBlobReader::State::ERROR) {
|
if (state == KeyValueBlobReader::State::ERROR)
|
||||||
|
{
|
||||||
// close();
|
// close();
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "PhraseReplacementMap: Failed at Open Step 5. On Error Resume Next.\n");
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "PhraseReplacementMap: Failed at Open Step 5. On Error Resume Next.\n");
|
||||||
// return false;
|
// return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -93,7 +106,8 @@ bool PhraseReplacementMap::open(const char *path)
|
||||||
|
|
||||||
void PhraseReplacementMap::close()
|
void PhraseReplacementMap::close()
|
||||||
{
|
{
|
||||||
if (data) {
|
if (data)
|
||||||
|
{
|
||||||
munmap(data, length);
|
munmap(data, length);
|
||||||
::close(fd);
|
::close(fd);
|
||||||
data = 0;
|
data = 0;
|
||||||
|
@ -105,12 +119,12 @@ void PhraseReplacementMap::close()
|
||||||
const std::string PhraseReplacementMap::valueForKey(const std::string &key)
|
const std::string PhraseReplacementMap::valueForKey(const std::string &key)
|
||||||
{
|
{
|
||||||
auto iter = keyValueMap.find(key);
|
auto iter = keyValueMap.find(key);
|
||||||
if (iter != keyValueMap.end()) {
|
if (iter != keyValueMap.end())
|
||||||
|
{
|
||||||
const std::string_view v = iter->second;
|
const std::string_view v = iter->second;
|
||||||
return {v.data(), v.size()};
|
return {v.data(), v.size()};
|
||||||
}
|
}
|
||||||
return string("");
|
return string("");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "UserOverrideModel.h"
|
#include "UserOverrideModel.h"
|
||||||
|
@ -23,50 +30,48 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
// About 20 generations.
|
// About 20 generations.
|
||||||
static const double DecayThreshould = 1.0 / 1048576.0;
|
static const double DecayThreshould = 1.0 / 1048576.0;
|
||||||
|
|
||||||
static double Score(size_t eventCount,
|
static double Score(size_t eventCount, size_t totalCount, double eventTimestamp, double timestamp, double lambda);
|
||||||
size_t totalCount,
|
|
||||||
double eventTimestamp,
|
|
||||||
double timestamp,
|
|
||||||
double lambda);
|
|
||||||
static bool IsEndingPunctuation(const std::string &value);
|
static bool IsEndingPunctuation(const std::string &value);
|
||||||
static std::string WalkedNodesToKey(const std::vector<Gramambular::NodeAnchor>& walkedNodes,
|
static std::string WalkedNodesToKey(const std::vector<Gramambular::NodeAnchor> &walkedNodes, size_t cursorIndex);
|
||||||
size_t cursorIndex);
|
|
||||||
|
|
||||||
UserOverrideModel::UserOverrideModel(size_t capacity, double decayConstant)
|
UserOverrideModel::UserOverrideModel(size_t capacity, double decayConstant) : m_capacity(capacity)
|
||||||
: m_capacity(capacity) {
|
{
|
||||||
assert(m_capacity > 0);
|
assert(m_capacity > 0);
|
||||||
m_decayExponent = log(0.5) / decayConstant;
|
m_decayExponent = log(0.5) / decayConstant;
|
||||||
}
|
}
|
||||||
|
|
||||||
void UserOverrideModel::observe(const std::vector<Gramambular::NodeAnchor>& walkedNodes,
|
void UserOverrideModel::observe(const std::vector<Gramambular::NodeAnchor> &walkedNodes, size_t cursorIndex,
|
||||||
size_t cursorIndex,
|
const std::string &candidate, double timestamp)
|
||||||
const std::string& candidate,
|
{
|
||||||
double timestamp) {
|
|
||||||
std::string key = WalkedNodesToKey(walkedNodes, cursorIndex);
|
std::string key = WalkedNodesToKey(walkedNodes, cursorIndex);
|
||||||
auto mapIter = m_lruMap.find(key);
|
auto mapIter = m_lruMap.find(key);
|
||||||
if (mapIter == m_lruMap.end()) {
|
if (mapIter == m_lruMap.end())
|
||||||
|
{
|
||||||
auto keyValuePair = KeyObservationPair(key, Observation());
|
auto keyValuePair = KeyObservationPair(key, Observation());
|
||||||
Observation &observation = keyValuePair.second;
|
Observation &observation = keyValuePair.second;
|
||||||
observation.update(candidate, timestamp);
|
observation.update(candidate, timestamp);
|
||||||
|
|
||||||
m_lruList.push_front(keyValuePair);
|
m_lruList.push_front(keyValuePair);
|
||||||
auto listIter = m_lruList.begin();
|
auto listIter = m_lruList.begin();
|
||||||
auto lruKeyValue = std::pair<std::string,
|
auto lruKeyValue = std::pair<std::string, std::list<KeyObservationPair>::iterator>(key, listIter);
|
||||||
std::list<KeyObservationPair>::iterator>(key, listIter);
|
|
||||||
m_lruMap.insert(lruKeyValue);
|
m_lruMap.insert(lruKeyValue);
|
||||||
|
|
||||||
if (m_lruList.size() > m_capacity) {
|
if (m_lruList.size() > m_capacity)
|
||||||
|
{
|
||||||
auto lastKeyValuePair = m_lruList.end();
|
auto lastKeyValuePair = m_lruList.end();
|
||||||
--lastKeyValuePair;
|
--lastKeyValuePair;
|
||||||
m_lruMap.erase(lastKeyValuePair->first);
|
m_lruMap.erase(lastKeyValuePair->first);
|
||||||
m_lruList.pop_back();
|
m_lruList.pop_back();
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
auto listIter = mapIter->second;
|
auto listIter = mapIter->second;
|
||||||
m_lruList.splice(m_lruList.begin(), m_lruList, listIter);
|
m_lruList.splice(m_lruList.begin(), m_lruList, listIter);
|
||||||
|
|
||||||
|
@ -76,12 +81,13 @@ void UserOverrideModel::observe(const std::vector<Gramambular::NodeAnchor>& walk
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string UserOverrideModel::suggest(const std::vector<Gramambular::NodeAnchor>& walkedNodes,
|
std::string UserOverrideModel::suggest(const std::vector<Gramambular::NodeAnchor> &walkedNodes, size_t cursorIndex,
|
||||||
size_t cursorIndex,
|
double timestamp)
|
||||||
double timestamp) {
|
{
|
||||||
std::string key = WalkedNodesToKey(walkedNodes, cursorIndex);
|
std::string key = WalkedNodesToKey(walkedNodes, cursorIndex);
|
||||||
auto mapIter = m_lruMap.find(key);
|
auto mapIter = m_lruMap.find(key);
|
||||||
if (mapIter == m_lruMap.end()) {
|
if (mapIter == m_lruMap.end())
|
||||||
|
{
|
||||||
return std::string();
|
return std::string();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,20 +97,17 @@ std::string UserOverrideModel::suggest(const std::vector<Gramambular::NodeAnchor
|
||||||
|
|
||||||
std::string candidate;
|
std::string candidate;
|
||||||
double score = 0.0;
|
double score = 0.0;
|
||||||
for (auto i = observation.overrides.begin();
|
for (auto i = observation.overrides.begin(); i != observation.overrides.end(); ++i)
|
||||||
i != observation.overrides.end();
|
{
|
||||||
++i) {
|
|
||||||
const Override &o = i->second;
|
const Override &o = i->second;
|
||||||
double overrideScore = Score(o.count,
|
double overrideScore = Score(o.count, observation.count, o.timestamp, timestamp, m_decayExponent);
|
||||||
observation.count,
|
if (overrideScore == 0.0)
|
||||||
o.timestamp,
|
{
|
||||||
timestamp,
|
|
||||||
m_decayExponent);
|
|
||||||
if (overrideScore == 0.0) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (overrideScore > score) {
|
if (overrideScore > score)
|
||||||
|
{
|
||||||
candidate = i->first;
|
candidate = i->first;
|
||||||
score = overrideScore;
|
score = overrideScore;
|
||||||
}
|
}
|
||||||
|
@ -112,21 +115,19 @@ std::string UserOverrideModel::suggest(const std::vector<Gramambular::NodeAnchor
|
||||||
return candidate;
|
return candidate;
|
||||||
}
|
}
|
||||||
|
|
||||||
void UserOverrideModel::Observation::update(const std::string& candidate,
|
void UserOverrideModel::Observation::update(const std::string &candidate, double timestamp)
|
||||||
double timestamp) {
|
{
|
||||||
count++;
|
count++;
|
||||||
auto &o = overrides[candidate];
|
auto &o = overrides[candidate];
|
||||||
o.timestamp = timestamp;
|
o.timestamp = timestamp;
|
||||||
o.count++;
|
o.count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
static double Score(size_t eventCount,
|
static double Score(size_t eventCount, size_t totalCount, double eventTimestamp, double timestamp, double lambda)
|
||||||
size_t totalCount,
|
{
|
||||||
double eventTimestamp,
|
|
||||||
double timestamp,
|
|
||||||
double lambda) {
|
|
||||||
double decay = exp((timestamp - eventTimestamp) * lambda);
|
double decay = exp((timestamp - eventTimestamp) * lambda);
|
||||||
if (decay < DecayThreshould) {
|
if (decay < DecayThreshould)
|
||||||
|
{
|
||||||
return 0.0;
|
return 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -134,29 +135,31 @@ static double Score(size_t eventCount,
|
||||||
return prob * decay;
|
return prob * decay;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool IsEndingPunctuation(const std::string& value) {
|
static bool IsEndingPunctuation(const std::string &value)
|
||||||
return value == "," || value == "。" || value== "!" || value == "?" ||
|
{
|
||||||
value == "」" || value == "』" || value== "”" || value == "’";
|
return value == "," || value == "。" || value == "!" || value == "?" || value == "」" || value == "』" ||
|
||||||
|
value == "”" || value == "’";
|
||||||
}
|
}
|
||||||
static std::string WalkedNodesToKey(const std::vector<Gramambular::NodeAnchor>& walkedNodes,
|
static std::string WalkedNodesToKey(const std::vector<Gramambular::NodeAnchor> &walkedNodes, size_t cursorIndex)
|
||||||
size_t cursorIndex) {
|
{
|
||||||
std::stringstream s;
|
std::stringstream s;
|
||||||
std::vector<Gramambular::NodeAnchor> n;
|
std::vector<Gramambular::NodeAnchor> n;
|
||||||
size_t ll = 0;
|
size_t ll = 0;
|
||||||
for (std::vector<Gramambular::NodeAnchor>::const_iterator i = walkedNodes.begin();
|
for (std::vector<Gramambular::NodeAnchor>::const_iterator i = walkedNodes.begin(); i != walkedNodes.end(); ++i)
|
||||||
i != walkedNodes.end();
|
{
|
||||||
++i) {
|
|
||||||
const auto &nn = *i;
|
const auto &nn = *i;
|
||||||
n.push_back(nn);
|
n.push_back(nn);
|
||||||
ll += nn.spanningLength;
|
ll += nn.spanningLength;
|
||||||
if (ll >= cursorIndex) {
|
if (ll >= cursorIndex)
|
||||||
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<Gramambular::NodeAnchor>::const_reverse_iterator r = n.rbegin();
|
std::vector<Gramambular::NodeAnchor>::const_reverse_iterator r = n.rbegin();
|
||||||
|
|
||||||
if (r == n.rend()) {
|
if (r == n.rend())
|
||||||
|
{
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -165,40 +168,44 @@ static std::string WalkedNodesToKey(const std::vector<Gramambular::NodeAnchor>&
|
||||||
|
|
||||||
s.clear();
|
s.clear();
|
||||||
s.str(std::string());
|
s.str(std::string());
|
||||||
if (r != n.rend()) {
|
if (r != n.rend())
|
||||||
|
{
|
||||||
std::string value = (*r).node->currentKeyValue().value;
|
std::string value = (*r).node->currentKeyValue().value;
|
||||||
if (IsEndingPunctuation(value)) {
|
if (IsEndingPunctuation(value))
|
||||||
|
{
|
||||||
s << "()";
|
s << "()";
|
||||||
r = n.rend();
|
r = n.rend();
|
||||||
} else {
|
}
|
||||||
s << "("
|
else
|
||||||
<< (*r).node->currentKeyValue().key
|
{
|
||||||
<< ","
|
s << "(" << (*r).node->currentKeyValue().key << "," << value << ")";
|
||||||
<< value
|
|
||||||
<< ")";
|
|
||||||
++r;
|
++r;
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
s << "()";
|
s << "()";
|
||||||
}
|
}
|
||||||
std::string prev = s.str();
|
std::string prev = s.str();
|
||||||
|
|
||||||
s.clear();
|
s.clear();
|
||||||
s.str(std::string());
|
s.str(std::string());
|
||||||
if (r != n.rend()) {
|
if (r != n.rend())
|
||||||
|
{
|
||||||
std::string value = (*r).node->currentKeyValue().value;
|
std::string value = (*r).node->currentKeyValue().value;
|
||||||
if (IsEndingPunctuation(value)) {
|
if (IsEndingPunctuation(value))
|
||||||
|
{
|
||||||
s << "()";
|
s << "()";
|
||||||
r = n.rend();
|
r = n.rend();
|
||||||
} else {
|
}
|
||||||
s << "("
|
else
|
||||||
<< (*r).node->currentKeyValue().key
|
{
|
||||||
<< ","
|
s << "(" << (*r).node->currentKeyValue().key << "," << value << ")";
|
||||||
<< value
|
|
||||||
<< ")";
|
|
||||||
++r;
|
++r;
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
s << "()";
|
s << "()";
|
||||||
}
|
}
|
||||||
std::string anterior = s.str();
|
std::string anterior = s.str();
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef USEROVERRIDEMODEL_H
|
#ifndef USEROVERRIDEMODEL_H
|
||||||
|
@ -25,36 +32,40 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
|
|
||||||
#include "Gramambular.h"
|
#include "Gramambular.h"
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
using namespace Gramambular;
|
using namespace Gramambular;
|
||||||
|
|
||||||
class UserOverrideModel {
|
class UserOverrideModel
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
UserOverrideModel(size_t capacity, double decayConstant);
|
UserOverrideModel(size_t capacity, double decayConstant);
|
||||||
|
|
||||||
void observe(const std::vector<Gramambular::NodeAnchor>& walkedNodes,
|
void observe(const std::vector<Gramambular::NodeAnchor> &walkedNodes, size_t cursorIndex,
|
||||||
size_t cursorIndex,
|
const std::string &candidate, double timestamp);
|
||||||
const std::string& candidate,
|
|
||||||
double timestamp);
|
|
||||||
|
|
||||||
std::string suggest(const std::vector<Gramambular::NodeAnchor>& walkedNodes,
|
std::string suggest(const std::vector<Gramambular::NodeAnchor> &walkedNodes, size_t cursorIndex, double timestamp);
|
||||||
size_t cursorIndex,
|
|
||||||
double timestamp);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct Override {
|
struct Override
|
||||||
|
{
|
||||||
size_t count;
|
size_t count;
|
||||||
double timestamp;
|
double timestamp;
|
||||||
|
|
||||||
Override() : count(0), timestamp(0.0) {}
|
Override() : count(0), timestamp(0.0)
|
||||||
|
{
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Observation {
|
struct Observation
|
||||||
|
{
|
||||||
size_t count;
|
size_t count;
|
||||||
std::map<std::string, Override> overrides;
|
std::map<std::string, Override> overrides;
|
||||||
|
|
||||||
Observation() : count(0) {}
|
Observation() : count(0)
|
||||||
|
{
|
||||||
|
}
|
||||||
void update(const std::string &candidate, double timestamp);
|
void update(const std::string &candidate, double timestamp);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -69,4 +80,3 @@ private:
|
||||||
}; // namespace vChewing
|
}; // namespace vChewing
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -1,31 +1,39 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef USERPHRASESLM_H
|
#ifndef USERPHRASESLM_H
|
||||||
#define USERPHRASESLM_H
|
#define USERPHRASESLM_H
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <map>
|
|
||||||
#include <iostream>
|
|
||||||
#include "LanguageModel.h"
|
#include "LanguageModel.h"
|
||||||
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
class UserPhrasesLM : public Gramambular::LanguageModel
|
class UserPhrasesLM : public Gramambular::LanguageModel
|
||||||
{
|
{
|
||||||
|
@ -38,21 +46,27 @@ public:
|
||||||
void close();
|
void close();
|
||||||
void dump();
|
void dump();
|
||||||
|
|
||||||
virtual bool allowConsolidation() {
|
virtual bool allowConsolidation()
|
||||||
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual float overridedValue() {
|
virtual float overridedValue()
|
||||||
|
{
|
||||||
return 0.0;
|
return 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual const std::vector<Gramambular::Bigram> bigramsForKeys(const std::string& preceedingKey, const std::string& key);
|
virtual const std::vector<Gramambular::Bigram> bigramsForKeys(const std::string &preceedingKey,
|
||||||
|
const std::string &key);
|
||||||
virtual const std::vector<Gramambular::Unigram> unigramsForKey(const std::string &key);
|
virtual const std::vector<Gramambular::Unigram> unigramsForKey(const std::string &key);
|
||||||
virtual bool hasUnigramsForKey(const std::string &key);
|
virtual bool hasUnigramsForKey(const std::string &key);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
struct Row {
|
struct Row
|
||||||
Row(std::string_view& k, std::string_view& v) : key(k), value(v) {}
|
{
|
||||||
|
Row(std::string_view &k, std::string_view &v) : key(k), value(v)
|
||||||
|
{
|
||||||
|
}
|
||||||
std::string_view key;
|
std::string_view key;
|
||||||
std::string_view value;
|
std::string_view value;
|
||||||
};
|
};
|
||||||
|
@ -63,6 +77,6 @@ protected:
|
||||||
size_t length;
|
size_t length;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
} // namespace vChewing
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,53 +1,60 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "UserPhrasesLM.h"
|
#include "UserPhrasesLM.h"
|
||||||
#include "vChewing-Swift.h"
|
#include "vChewing-Swift.h"
|
||||||
#include <sys/mman.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <unistd.h>
|
#include <sys/mman.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
#include <syslog.h>
|
#include <syslog.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
#include "KeyValueBlobReader.h"
|
#include "KeyValueBlobReader.h"
|
||||||
#include "LMConsolidator.h"
|
#include "LMConsolidator.h"
|
||||||
|
|
||||||
namespace vChewing {
|
namespace vChewing
|
||||||
|
{
|
||||||
|
|
||||||
UserPhrasesLM::UserPhrasesLM()
|
UserPhrasesLM::UserPhrasesLM() : fd(-1), data(0), length(0)
|
||||||
: fd(-1)
|
|
||||||
, data(0)
|
|
||||||
, length(0)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
UserPhrasesLM::~UserPhrasesLM()
|
UserPhrasesLM::~UserPhrasesLM()
|
||||||
{
|
{
|
||||||
if (data) {
|
if (data)
|
||||||
|
{
|
||||||
close();
|
close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool UserPhrasesLM::isLoaded()
|
bool UserPhrasesLM::isLoaded()
|
||||||
{
|
{
|
||||||
if (data) {
|
if (data)
|
||||||
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -55,23 +62,27 @@ bool UserPhrasesLM::isLoaded()
|
||||||
|
|
||||||
bool UserPhrasesLM::open(const char *path)
|
bool UserPhrasesLM::open(const char *path)
|
||||||
{
|
{
|
||||||
if (data) {
|
if (data)
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (allowConsolidation()) {
|
if (allowConsolidation())
|
||||||
|
{
|
||||||
LMConsolidator::FixEOF(path);
|
LMConsolidator::FixEOF(path);
|
||||||
LMConsolidator::ConsolidateContent(path, true);
|
LMConsolidator::ConsolidateContent(path, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
fd = ::open(path, O_RDONLY);
|
fd = ::open(path, O_RDONLY);
|
||||||
if (fd == -1) {
|
if (fd == -1)
|
||||||
|
{
|
||||||
printf("open:: file not exist");
|
printf("open:: file not exist");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct stat sb;
|
struct stat sb;
|
||||||
if (fstat(fd, &sb) == -1) {
|
if (fstat(fd, &sb) == -1)
|
||||||
|
{
|
||||||
printf("open:: cannot open file");
|
printf("open:: cannot open file");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -79,7 +90,8 @@ bool UserPhrasesLM::open(const char *path)
|
||||||
length = (size_t)sb.st_size;
|
length = (size_t)sb.st_size;
|
||||||
|
|
||||||
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
|
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
|
||||||
if (!data) {
|
if (!data)
|
||||||
|
{
|
||||||
::close(fd);
|
::close(fd);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -87,14 +99,18 @@ bool UserPhrasesLM::open(const char *path)
|
||||||
KeyValueBlobReader reader(static_cast<char *>(data), length);
|
KeyValueBlobReader reader(static_cast<char *>(data), length);
|
||||||
KeyValueBlobReader::KeyValue keyValue;
|
KeyValueBlobReader::KeyValue keyValue;
|
||||||
KeyValueBlobReader::State state;
|
KeyValueBlobReader::State state;
|
||||||
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) {
|
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR)
|
||||||
// We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF reading.
|
{
|
||||||
|
// We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF
|
||||||
|
// reading.
|
||||||
keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key);
|
keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key);
|
||||||
}
|
}
|
||||||
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
|
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
|
||||||
if (state == KeyValueBlobReader::State::ERROR) {
|
if (state == KeyValueBlobReader::State::ERROR)
|
||||||
|
{
|
||||||
// close();
|
// close();
|
||||||
if (mgrPrefs.isDebugModeEnabled) syslog(LOG_CONS, "UserPhrasesLM: Failed at Open Step 5. On Error Resume Next.\n");
|
if (mgrPrefs.isDebugModeEnabled)
|
||||||
|
syslog(LOG_CONS, "UserPhrasesLM: Failed at Open Step 5. On Error Resume Next.\n");
|
||||||
// return false;
|
// return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -102,7 +118,8 @@ bool UserPhrasesLM::open(const char *path)
|
||||||
|
|
||||||
void UserPhrasesLM::close()
|
void UserPhrasesLM::close()
|
||||||
{
|
{
|
||||||
if (data) {
|
if (data)
|
||||||
|
{
|
||||||
munmap(data, length);
|
munmap(data, length);
|
||||||
::close(fd);
|
::close(fd);
|
||||||
data = 0;
|
data = 0;
|
||||||
|
@ -113,15 +130,18 @@ void UserPhrasesLM::close()
|
||||||
|
|
||||||
void UserPhrasesLM::dump()
|
void UserPhrasesLM::dump()
|
||||||
{
|
{
|
||||||
for (const auto& entry : keyRowMap) {
|
for (const auto &entry : keyRowMap)
|
||||||
|
{
|
||||||
const std::vector<Row> &rows = entry.second;
|
const std::vector<Row> &rows = entry.second;
|
||||||
for (const auto& row : rows) {
|
for (const auto &row : rows)
|
||||||
|
{
|
||||||
std::cerr << row.key << " " << row.value << "\n";
|
std::cerr << row.key << " " << row.value << "\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<Gramambular::Bigram> UserPhrasesLM::bigramsForKeys(const std::string& preceedingKey, const std::string& key)
|
const std::vector<Gramambular::Bigram> UserPhrasesLM::bigramsForKeys(const std::string &preceedingKey,
|
||||||
|
const std::string &key)
|
||||||
{
|
{
|
||||||
return std::vector<Gramambular::Bigram>();
|
return std::vector<Gramambular::Bigram>();
|
||||||
}
|
}
|
||||||
|
@ -130,9 +150,11 @@ const std::vector<Gramambular::Unigram> UserPhrasesLM::unigramsForKey(const std:
|
||||||
{
|
{
|
||||||
std::vector<Gramambular::Unigram> v;
|
std::vector<Gramambular::Unigram> v;
|
||||||
auto iter = keyRowMap.find(key);
|
auto iter = keyRowMap.find(key);
|
||||||
if (iter != keyRowMap.end()) {
|
if (iter != keyRowMap.end())
|
||||||
|
{
|
||||||
const std::vector<Row> &rows = iter->second;
|
const std::vector<Row> &rows = iter->second;
|
||||||
for (const auto& row : rows) {
|
for (const auto &row : rows)
|
||||||
|
{
|
||||||
Gramambular::Unigram g;
|
Gramambular::Unigram g;
|
||||||
g.keyValue.key = row.key;
|
g.keyValue.key = row.key;
|
||||||
g.keyValue.value = row.value;
|
g.keyValue.value = row.value;
|
||||||
|
|
|
@ -1,24 +1,31 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#import <Foundation/Foundation.h>
|
|
||||||
#import "KeyHandler.h"
|
#import "KeyHandler.h"
|
||||||
|
#import <Foundation/Foundation.h>
|
||||||
|
|
||||||
NS_ASSUME_NONNULL_BEGIN
|
NS_ASSUME_NONNULL_BEGIN
|
||||||
|
|
||||||
|
@ -33,8 +40,13 @@ NS_ASSUME_NONNULL_BEGIN
|
||||||
+ (BOOL)checkIfSpecifiedUserDataFolderValid:(NSString *)folderPath;
|
+ (BOOL)checkIfSpecifiedUserDataFolderValid:(NSString *)folderPath;
|
||||||
+ (NSString *)dataFolderPath:(bool)isDefaultFolder NS_SWIFT_NAME(dataFolderPath(isDefaultFolder:));
|
+ (NSString *)dataFolderPath:(bool)isDefaultFolder NS_SWIFT_NAME(dataFolderPath(isDefaultFolder:));
|
||||||
|
|
||||||
+ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase inputMode:(InputMode)mode key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:));
|
+ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase
|
||||||
+ (BOOL)writeUserPhrase:(NSString *)userPhrase inputMode:(InputMode)mode areWeDuplicating:(BOOL)areWeDuplicating areWeDeleting:(BOOL)areWeDeleting;
|
inputMode:(InputMode)mode
|
||||||
|
key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:));
|
||||||
|
+ (BOOL)writeUserPhrase:(NSString *)userPhrase
|
||||||
|
inputMode:(InputMode)mode
|
||||||
|
areWeDuplicating:(BOOL)areWeDuplicating
|
||||||
|
areWeDeleting:(BOOL)areWeDeleting;
|
||||||
+ (void)setPhraseReplacementEnabled:(BOOL)phraseReplacementEnabled;
|
+ (void)setPhraseReplacementEnabled:(BOOL)phraseReplacementEnabled;
|
||||||
+ (void)setCNSEnabled:(BOOL)cnsEnabled;
|
+ (void)setCNSEnabled:(BOOL)cnsEnabled;
|
||||||
+ (void)setSymbolEnabled:(BOOL)symbolEnabled;
|
+ (void)setSymbolEnabled:(BOOL)symbolEnabled;
|
||||||
|
|
|
@ -1,26 +1,33 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#import "mgrLangModel.h"
|
#import "mgrLangModel.h"
|
||||||
|
#import "LMConsolidator.h"
|
||||||
#import "mgrLangModel_Privates.h"
|
#import "mgrLangModel_Privates.h"
|
||||||
#import "vChewing-Swift.h"
|
#import "vChewing-Swift.h"
|
||||||
#import "LMConsolidator.h"
|
|
||||||
|
|
||||||
static const int kUserOverrideModelCapacity = 500;
|
static const int kUserOverrideModelCapacity = 500;
|
||||||
static const double kObservedOverrideHalflife = 5400.0;
|
static const double kObservedOverrideHalflife = 5400.0;
|
||||||
|
@ -54,61 +61,79 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
||||||
|
|
||||||
+ (void)loadDataModels
|
+ (void)loadDataModels
|
||||||
{
|
{
|
||||||
if (!gLangModelCHT.isDataModelLoaded()) {
|
if (!gLangModelCHT.isDataModelLoaded())
|
||||||
|
{
|
||||||
LTLoadLanguageModelFile(@"data-cht", gLangModelCHT);
|
LTLoadLanguageModelFile(@"data-cht", gLangModelCHT);
|
||||||
}
|
}
|
||||||
if (!gLangModelCHT.isMiscDataLoaded()) {
|
if (!gLangModelCHT.isMiscDataLoaded())
|
||||||
|
{
|
||||||
gLangModelCHT.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]);
|
gLangModelCHT.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]);
|
||||||
}
|
}
|
||||||
if (!gLangModelCHT.isSymbolDataLoaded()){
|
if (!gLangModelCHT.isSymbolDataLoaded())
|
||||||
|
{
|
||||||
gLangModelCHT.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]);
|
gLangModelCHT.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]);
|
||||||
}
|
}
|
||||||
if (!gLangModelCHT.isCNSDataLoaded()){
|
if (!gLangModelCHT.isCNSDataLoaded())
|
||||||
|
{
|
||||||
gLangModelCHT.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]);
|
gLangModelCHT.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]);
|
||||||
}
|
}
|
||||||
// -----------------
|
// -----------------
|
||||||
if (!gLangModelCHS.isDataModelLoaded()) {
|
if (!gLangModelCHS.isDataModelLoaded())
|
||||||
|
{
|
||||||
LTLoadLanguageModelFile(@"data-chs", gLangModelCHS);
|
LTLoadLanguageModelFile(@"data-chs", gLangModelCHS);
|
||||||
}
|
}
|
||||||
if (!gLangModelCHS.isMiscDataLoaded()) {
|
if (!gLangModelCHS.isMiscDataLoaded())
|
||||||
|
{
|
||||||
gLangModelCHS.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]);
|
gLangModelCHS.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]);
|
||||||
}
|
}
|
||||||
if (!gLangModelCHS.isSymbolDataLoaded()){
|
if (!gLangModelCHS.isSymbolDataLoaded())
|
||||||
|
{
|
||||||
gLangModelCHS.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]);
|
gLangModelCHS.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]);
|
||||||
}
|
}
|
||||||
if (!gLangModelCHS.isCNSDataLoaded()){
|
if (!gLangModelCHS.isCNSDataLoaded())
|
||||||
|
{
|
||||||
gLangModelCHS.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]);
|
gLangModelCHS.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (void)loadDataModel:(InputMode)mode
|
+ (void)loadDataModel:(InputMode)mode
|
||||||
{
|
{
|
||||||
if ([mode isEqualToString:imeModeCHT]) {
|
if ([mode isEqualToString:imeModeCHT])
|
||||||
if (!gLangModelCHT.isDataModelLoaded()) {
|
{
|
||||||
|
if (!gLangModelCHT.isDataModelLoaded())
|
||||||
|
{
|
||||||
LTLoadLanguageModelFile(@"data-cht", gLangModelCHT);
|
LTLoadLanguageModelFile(@"data-cht", gLangModelCHT);
|
||||||
}
|
}
|
||||||
if (!gLangModelCHT.isMiscDataLoaded()) {
|
if (!gLangModelCHT.isMiscDataLoaded())
|
||||||
|
{
|
||||||
gLangModelCHT.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]);
|
gLangModelCHT.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]);
|
||||||
}
|
}
|
||||||
if (!gLangModelCHT.isSymbolDataLoaded()){
|
if (!gLangModelCHT.isSymbolDataLoaded())
|
||||||
|
{
|
||||||
gLangModelCHT.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]);
|
gLangModelCHT.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]);
|
||||||
}
|
}
|
||||||
if (!gLangModelCHT.isCNSDataLoaded()){
|
if (!gLangModelCHT.isCNSDataLoaded())
|
||||||
|
{
|
||||||
gLangModelCHT.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]);
|
gLangModelCHT.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ([mode isEqualToString:imeModeCHS]) {
|
if ([mode isEqualToString:imeModeCHS])
|
||||||
if (!gLangModelCHS.isDataModelLoaded()) {
|
{
|
||||||
|
if (!gLangModelCHS.isDataModelLoaded())
|
||||||
|
{
|
||||||
LTLoadLanguageModelFile(@"data-chs", gLangModelCHS);
|
LTLoadLanguageModelFile(@"data-chs", gLangModelCHS);
|
||||||
}
|
}
|
||||||
if (!gLangModelCHS.isMiscDataLoaded()) {
|
if (!gLangModelCHS.isMiscDataLoaded())
|
||||||
|
{
|
||||||
gLangModelCHS.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]);
|
gLangModelCHS.loadMiscData([[self specifyBundleDataPath:@"data-zhuyinwen"] UTF8String]);
|
||||||
}
|
}
|
||||||
if (!gLangModelCHS.isSymbolDataLoaded()){
|
if (!gLangModelCHS.isSymbolDataLoaded())
|
||||||
|
{
|
||||||
gLangModelCHS.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]);
|
gLangModelCHS.loadSymbolData([[self specifyBundleDataPath:@"data-symbols"] UTF8String]);
|
||||||
}
|
}
|
||||||
if (!gLangModelCHS.isCNSDataLoaded()){
|
if (!gLangModelCHS.isCNSDataLoaded())
|
||||||
|
{
|
||||||
gLangModelCHS.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]);
|
gLangModelCHS.loadCNSData([[self specifyBundleDataPath:@"char-kanji-cns"] UTF8String]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -116,8 +141,10 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
||||||
|
|
||||||
+ (void)loadUserPhrases
|
+ (void)loadUserPhrases
|
||||||
{
|
{
|
||||||
gLangModelCHT.loadUserPhrases([[self userPhrasesDataPath:imeModeCHT] UTF8String], [[self excludedPhrasesDataPath:imeModeCHT] UTF8String]);
|
gLangModelCHT.loadUserPhrases([[self userPhrasesDataPath:imeModeCHT] UTF8String],
|
||||||
gLangModelCHS.loadUserPhrases([[self userPhrasesDataPath:imeModeCHS] UTF8String], [[self excludedPhrasesDataPath:imeModeCHS] UTF8String]);
|
[[self excludedPhrasesDataPath:imeModeCHT] UTF8String]);
|
||||||
|
gLangModelCHS.loadUserPhrases([[self userPhrasesDataPath:imeModeCHS] UTF8String],
|
||||||
|
[[self excludedPhrasesDataPath:imeModeCHS] UTF8String]);
|
||||||
gLangModelCHT.loadUserSymbolData([[self userSymbolDataPath:imeModeCHT] UTF8String]);
|
gLangModelCHT.loadUserSymbolData([[self userSymbolDataPath:imeModeCHT] UTF8String]);
|
||||||
gLangModelCHS.loadUserSymbolData([[self userSymbolDataPath:imeModeCHS] UTF8String]);
|
gLangModelCHS.loadUserSymbolData([[self userSymbolDataPath:imeModeCHS] UTF8String]);
|
||||||
}
|
}
|
||||||
|
@ -139,19 +166,26 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
||||||
NSString *folderPath = [self dataFolderPath:false];
|
NSString *folderPath = [self dataFolderPath:false];
|
||||||
BOOL isFolder = NO;
|
BOOL isFolder = NO;
|
||||||
BOOL folderExist = [[NSFileManager defaultManager] fileExistsAtPath:folderPath isDirectory:&isFolder];
|
BOOL folderExist = [[NSFileManager defaultManager] fileExistsAtPath:folderPath isDirectory:&isFolder];
|
||||||
if (folderExist && !isFolder) {
|
if (folderExist && !isFolder)
|
||||||
|
{
|
||||||
NSError *error = nil;
|
NSError *error = nil;
|
||||||
[[NSFileManager defaultManager] removeItemAtPath:folderPath error:&error];
|
[[NSFileManager defaultManager] removeItemAtPath:folderPath error:&error];
|
||||||
if (error) {
|
if (error)
|
||||||
|
{
|
||||||
NSLog(@"Failed to remove folder %@", error);
|
NSLog(@"Failed to remove folder %@", error);
|
||||||
return NO;
|
return NO;
|
||||||
}
|
}
|
||||||
folderExist = NO;
|
folderExist = NO;
|
||||||
}
|
}
|
||||||
if (!folderExist) {
|
if (!folderExist)
|
||||||
|
{
|
||||||
NSError *error = nil;
|
NSError *error = nil;
|
||||||
[[NSFileManager defaultManager] createDirectoryAtPath:folderPath withIntermediateDirectories:YES attributes:nil error:&error];
|
[[NSFileManager defaultManager] createDirectoryAtPath:folderPath
|
||||||
if (error) {
|
withIntermediateDirectories:YES
|
||||||
|
attributes:nil
|
||||||
|
error:&error];
|
||||||
|
if (error)
|
||||||
|
{
|
||||||
NSLog(@"Failed to create folder %@", error);
|
NSLog(@"Failed to create folder %@", error);
|
||||||
return NO;
|
return NO;
|
||||||
}
|
}
|
||||||
|
@ -163,26 +197,34 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
||||||
{
|
{
|
||||||
BOOL isFolder = NO;
|
BOOL isFolder = NO;
|
||||||
BOOL folderExist = [[NSFileManager defaultManager] fileExistsAtPath:folderPath isDirectory:&isFolder];
|
BOOL folderExist = [[NSFileManager defaultManager] fileExistsAtPath:folderPath isDirectory:&isFolder];
|
||||||
if ((folderExist && !isFolder) || (!folderExist)) {
|
if ((folderExist && !isFolder) || (!folderExist))
|
||||||
|
{
|
||||||
return NO;
|
return NO;
|
||||||
}
|
}
|
||||||
return YES;
|
return YES;
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (BOOL)ensureFileExists:(NSString *)filePath populateWithTemplate:(NSString *)templateBasename extension:(NSString *)ext
|
+ (BOOL)ensureFileExists:(NSString *)filePath
|
||||||
|
populateWithTemplate:(NSString *)templateBasename
|
||||||
|
extension:(NSString *)ext
|
||||||
|
{
|
||||||
|
if (![[NSFileManager defaultManager] fileExistsAtPath:filePath])
|
||||||
{
|
{
|
||||||
if (![[NSFileManager defaultManager] fileExistsAtPath:filePath]) {
|
|
||||||
|
|
||||||
NSURL *templateURL = [[NSBundle mainBundle] URLForResource:templateBasename withExtension:ext];
|
NSURL *templateURL = [[NSBundle mainBundle] URLForResource:templateBasename withExtension:ext];
|
||||||
NSData *templateData;
|
NSData *templateData;
|
||||||
if (templateURL) {
|
if (templateURL)
|
||||||
|
{
|
||||||
templateData = [NSData dataWithContentsOfURL:templateURL];
|
templateData = [NSData dataWithContentsOfURL:templateURL];
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
templateData = [@"" dataUsingEncoding:NSUTF8StringEncoding];
|
templateData = [@"" dataUsingEncoding:NSUTF8StringEncoding];
|
||||||
}
|
}
|
||||||
|
|
||||||
BOOL result = [templateData writeToFile:filePath atomically:YES];
|
BOOL result = [templateData writeToFile:filePath atomically:YES];
|
||||||
if (!result) {
|
if (!result)
|
||||||
|
{
|
||||||
NSLog(@"Failed to write file");
|
NSLog(@"Failed to write file");
|
||||||
return NO;
|
return NO;
|
||||||
}
|
}
|
||||||
|
@ -192,36 +234,76 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
||||||
|
|
||||||
+ (BOOL)checkIfUserLanguageModelFilesExist
|
+ (BOOL)checkIfUserLanguageModelFilesExist
|
||||||
{
|
{
|
||||||
if (![self checkIfUserDataFolderExists]) return NO;
|
if (![self checkIfUserDataFolderExists])
|
||||||
if (![self ensureFileExists:[self userPhrasesDataPath:imeModeCHS] populateWithTemplate:kUserDataTemplateName extension:kTemplateExtension]) return NO;
|
return NO;
|
||||||
if (![self ensureFileExists:[self userPhrasesDataPath:imeModeCHT] populateWithTemplate:kUserDataTemplateName extension:kTemplateExtension]) return NO;
|
if (![self ensureFileExists:[self userPhrasesDataPath:imeModeCHS]
|
||||||
if (![self ensureFileExists:[self userAssociatedPhrasesDataPath:imeModeCHS] populateWithTemplate:kUserAssDataTemplateName extension:kTemplateExtension]) return NO;
|
populateWithTemplate:kUserDataTemplateName
|
||||||
if (![self ensureFileExists:[self userAssociatedPhrasesDataPath:imeModeCHT] populateWithTemplate:kUserAssDataTemplateName extension:kTemplateExtension]) return NO;
|
extension:kTemplateExtension])
|
||||||
if (![self ensureFileExists:[self excludedPhrasesDataPath:imeModeCHS] populateWithTemplate:kExcludedPhrasesvChewingTemplateName extension:kTemplateExtension]) return NO;
|
return NO;
|
||||||
if (![self ensureFileExists:[self excludedPhrasesDataPath:imeModeCHT] populateWithTemplate:kExcludedPhrasesvChewingTemplateName extension:kTemplateExtension]) return NO;
|
if (![self ensureFileExists:[self userPhrasesDataPath:imeModeCHT]
|
||||||
if (![self ensureFileExists:[self phraseReplacementDataPath:imeModeCHS] populateWithTemplate:kPhraseReplacementTemplateName extension:kTemplateExtension]) return NO;
|
populateWithTemplate:kUserDataTemplateName
|
||||||
if (![self ensureFileExists:[self phraseReplacementDataPath:imeModeCHT] populateWithTemplate:kPhraseReplacementTemplateName extension:kTemplateExtension]) return NO;
|
extension:kTemplateExtension])
|
||||||
if (![self ensureFileExists:[self userSymbolDataPath:imeModeCHT] populateWithTemplate:kUserSymbolDataTemplateName extension:kTemplateExtension]) return NO;
|
return NO;
|
||||||
if (![self ensureFileExists:[self userSymbolDataPath:imeModeCHS] populateWithTemplate:kUserSymbolDataTemplateName extension:kTemplateExtension]) return NO;
|
if (![self ensureFileExists:[self userAssociatedPhrasesDataPath:imeModeCHS]
|
||||||
|
populateWithTemplate:kUserAssDataTemplateName
|
||||||
|
extension:kTemplateExtension])
|
||||||
|
return NO;
|
||||||
|
if (![self ensureFileExists:[self userAssociatedPhrasesDataPath:imeModeCHT]
|
||||||
|
populateWithTemplate:kUserAssDataTemplateName
|
||||||
|
extension:kTemplateExtension])
|
||||||
|
return NO;
|
||||||
|
if (![self ensureFileExists:[self excludedPhrasesDataPath:imeModeCHS]
|
||||||
|
populateWithTemplate:kExcludedPhrasesvChewingTemplateName
|
||||||
|
extension:kTemplateExtension])
|
||||||
|
return NO;
|
||||||
|
if (![self ensureFileExists:[self excludedPhrasesDataPath:imeModeCHT]
|
||||||
|
populateWithTemplate:kExcludedPhrasesvChewingTemplateName
|
||||||
|
extension:kTemplateExtension])
|
||||||
|
return NO;
|
||||||
|
if (![self ensureFileExists:[self phraseReplacementDataPath:imeModeCHS]
|
||||||
|
populateWithTemplate:kPhraseReplacementTemplateName
|
||||||
|
extension:kTemplateExtension])
|
||||||
|
return NO;
|
||||||
|
if (![self ensureFileExists:[self phraseReplacementDataPath:imeModeCHT]
|
||||||
|
populateWithTemplate:kPhraseReplacementTemplateName
|
||||||
|
extension:kTemplateExtension])
|
||||||
|
return NO;
|
||||||
|
if (![self ensureFileExists:[self userSymbolDataPath:imeModeCHT]
|
||||||
|
populateWithTemplate:kUserSymbolDataTemplateName
|
||||||
|
extension:kTemplateExtension])
|
||||||
|
return NO;
|
||||||
|
if (![self ensureFileExists:[self userSymbolDataPath:imeModeCHS]
|
||||||
|
populateWithTemplate:kUserSymbolDataTemplateName
|
||||||
|
extension:kTemplateExtension])
|
||||||
|
return NO;
|
||||||
return YES;
|
return YES;
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase inputMode:(InputMode)mode key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:))
|
+ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase
|
||||||
|
inputMode:(InputMode)mode
|
||||||
|
key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:))
|
||||||
{
|
{
|
||||||
string unigramKey = string(key.UTF8String);
|
string unigramKey = string(key.UTF8String);
|
||||||
vector<vChewing::Unigram> unigrams = [mode isEqualToString:imeModeCHT] ? gLangModelCHT.unigramsForKey(unigramKey): gLangModelCHS.unigramsForKey(unigramKey);
|
vector<vChewing::Unigram> unigrams = [mode isEqualToString:imeModeCHT] ? gLangModelCHT.unigramsForKey(unigramKey)
|
||||||
|
: gLangModelCHS.unigramsForKey(unigramKey);
|
||||||
string userPhraseString = string(userPhrase.UTF8String);
|
string userPhraseString = string(userPhrase.UTF8String);
|
||||||
for (auto unigram: unigrams) {
|
for (auto unigram : unigrams)
|
||||||
if (unigram.keyValue.value == userPhraseString) {
|
{
|
||||||
|
if (unigram.keyValue.value == userPhraseString)
|
||||||
|
{
|
||||||
return YES;
|
return YES;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return NO;
|
return NO;
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (BOOL)writeUserPhrase:(NSString *)userPhrase inputMode:(InputMode)mode areWeDuplicating:(BOOL)areWeDuplicating areWeDeleting:(BOOL)areWeDeleting
|
+ (BOOL)writeUserPhrase:(NSString *)userPhrase
|
||||||
|
inputMode:(InputMode)mode
|
||||||
|
areWeDuplicating:(BOOL)areWeDuplicating
|
||||||
|
areWeDeleting:(BOOL)areWeDeleting
|
||||||
|
{
|
||||||
|
if (![self checkIfUserLanguageModelFilesExist])
|
||||||
{
|
{
|
||||||
if (![self checkIfUserLanguageModelFilesExist]) {
|
|
||||||
return NO;
|
return NO;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -233,7 +315,8 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
||||||
// [currentMarkedPhrase appendString:@"\n"];
|
// [currentMarkedPhrase appendString:@"\n"];
|
||||||
// }
|
// }
|
||||||
[currentMarkedPhrase appendString:userPhrase];
|
[currentMarkedPhrase appendString:userPhrase];
|
||||||
if (areWeDuplicating && !areWeDeleting) {
|
if (areWeDuplicating && !areWeDeleting)
|
||||||
|
{
|
||||||
// Do not use ASCII characters to comment here.
|
// Do not use ASCII characters to comment here.
|
||||||
// Otherwise, it will be scrambled by cnvHYPYtoBPMF module shipped in the vChewing Phrase Editor.
|
// Otherwise, it will be scrambled by cnvHYPYtoBPMF module shipped in the vChewing Phrase Editor.
|
||||||
[currentMarkedPhrase appendString:@"\t#𝙾𝚟𝚎𝚛𝚛𝚒𝚍𝚎"];
|
[currentMarkedPhrase appendString:@"\t#𝙾𝚟𝚎𝚛𝚛𝚒𝚍𝚎"];
|
||||||
|
@ -241,7 +324,8 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
||||||
[currentMarkedPhrase appendString:@"\n"];
|
[currentMarkedPhrase appendString:@"\n"];
|
||||||
|
|
||||||
NSFileHandle *writeFile = [NSFileHandle fileHandleForUpdatingAtPath:path];
|
NSFileHandle *writeFile = [NSFileHandle fileHandleForUpdatingAtPath:path];
|
||||||
if (!writeFile) {
|
if (!writeFile)
|
||||||
|
{
|
||||||
return NO;
|
return NO;
|
||||||
}
|
}
|
||||||
[writeFile seekToEndOfFile];
|
[writeFile seekToEndOfFile];
|
||||||
|
@ -249,12 +333,14 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
||||||
[writeFile writeData:data];
|
[writeFile writeData:data];
|
||||||
[writeFile closeFile];
|
[writeFile closeFile];
|
||||||
|
|
||||||
// We enforce the format consolidation here, since the pragma header will let the UserPhraseLM bypasses the consolidating process on load.
|
// We enforce the format consolidation here, since the pragma header will let the UserPhraseLM bypasses the
|
||||||
|
// consolidating process on load.
|
||||||
vChewing::LMConsolidator::ConsolidateContent([path UTF8String], false);
|
vChewing::LMConsolidator::ConsolidateContent([path UTF8String], false);
|
||||||
|
|
||||||
// We use FSEventStream to monitor the change of the user phrase folder,
|
// We use FSEventStream to monitor the change of the user phrase folder,
|
||||||
// so we don't have to load data here unless FSEventStream is disabled by user.
|
// so we don't have to load data here unless FSEventStream is disabled by user.
|
||||||
if (!mgrPrefs.shouldAutoReloadUserDataFiles) {
|
if (!mgrPrefs.shouldAutoReloadUserDataFiles)
|
||||||
|
{
|
||||||
[self loadUserPhrases];
|
[self loadUserPhrases];
|
||||||
}
|
}
|
||||||
return YES;
|
return YES;
|
||||||
|
@ -263,15 +349,21 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
||||||
+ (NSString *)dataFolderPath:(bool)isDefaultFolder
|
+ (NSString *)dataFolderPath:(bool)isDefaultFolder
|
||||||
{
|
{
|
||||||
// 此處不能用「~」來取代當前使用者目錄名稱。不然的話,一旦輸入法被系統的沙箱干預的話,則反而會定位到沙箱目錄內。
|
// 此處不能用「~」來取代當前使用者目錄名稱。不然的話,一旦輸入法被系統的沙箱干預的話,則反而會定位到沙箱目錄內。
|
||||||
NSString *appSupportPath = [NSFileManager.defaultManager URLsForDirectory:NSApplicationSupportDirectory inDomains:NSUserDomainMask][0].path;
|
NSString *appSupportPath = [NSFileManager.defaultManager URLsForDirectory:NSApplicationSupportDirectory
|
||||||
|
inDomains:NSUserDomainMask][0].path;
|
||||||
NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"vChewing"].stringByExpandingTildeInPath;
|
NSString *userDictPath = [appSupportPath stringByAppendingPathComponent:@"vChewing"].stringByExpandingTildeInPath;
|
||||||
if (mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath == userDictPath || isDefaultFolder) {
|
if (mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath == userDictPath || isDefaultFolder)
|
||||||
|
{
|
||||||
return userDictPath;
|
return userDictPath;
|
||||||
}
|
}
|
||||||
if ([mgrPrefs ifSpecifiedUserDataPathExistsInPlist]) {
|
if ([mgrPrefs ifSpecifiedUserDataPathExistsInPlist])
|
||||||
if ([self checkIfSpecifiedUserDataFolderValid:mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath]) {
|
{
|
||||||
|
if ([self checkIfSpecifiedUserDataFolderValid:mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath])
|
||||||
|
{
|
||||||
return mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath;
|
return mgrPrefs.userDataFolderSpecified.stringByExpandingTildeInPath;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
[NSUserDefaults.standardUserDefaults removeObjectForKey:@"UserDataFolderSpecified"];
|
[NSUserDefaults.standardUserDefaults removeObjectForKey:@"UserDataFolderSpecified"];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -286,13 +378,15 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
||||||
|
|
||||||
+ (NSString *)userSymbolDataPath:(InputMode)mode;
|
+ (NSString *)userSymbolDataPath:(InputMode)mode;
|
||||||
{
|
{
|
||||||
NSString *fileName = [mode isEqualToString:imeModeCHT] ? @"usersymbolphrases-cht.txt" : @"usersymbolphrases-chs.txt";
|
NSString *fileName =
|
||||||
|
[mode isEqualToString:imeModeCHT] ? @"usersymbolphrases-cht.txt" : @"usersymbolphrases-chs.txt";
|
||||||
return [[self dataFolderPath:false] stringByAppendingPathComponent:fileName];
|
return [[self dataFolderPath:false] stringByAppendingPathComponent:fileName];
|
||||||
}
|
}
|
||||||
|
|
||||||
+ (NSString *)userAssociatedPhrasesDataPath:(InputMode)mode;
|
+ (NSString *)userAssociatedPhrasesDataPath:(InputMode)mode;
|
||||||
{
|
{
|
||||||
NSString *fileName = [mode isEqualToString:imeModeCHT] ? @"associatedPhrases-cht.txt" : @"associatedPhrases-chs.txt";
|
NSString *fileName =
|
||||||
|
[mode isEqualToString:imeModeCHT] ? @"associatedPhrases-cht.txt" : @"associatedPhrases-chs.txt";
|
||||||
return [[self dataFolderPath:false] stringByAppendingPathComponent:fileName];
|
return [[self dataFolderPath:false] stringByAppendingPathComponent:fileName];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -304,7 +398,8 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
|
||||||
|
|
||||||
+ (NSString *)phraseReplacementDataPath:(InputMode)mode;
|
+ (NSString *)phraseReplacementDataPath:(InputMode)mode;
|
||||||
{
|
{
|
||||||
NSString *fileName = [mode isEqualToString:imeModeCHT] ? @"phrases-replacement-cht.txt" : @"phrases-replacement-chs.txt";
|
NSString *fileName =
|
||||||
|
[mode isEqualToString:imeModeCHT] ? @"phrases-replacement-cht.txt" : @"phrases-replacement-chs.txt";
|
||||||
return [[self dataFolderPath:false] stringByAppendingPathComponent:fileName];
|
return [[self dataFolderPath:false] stringByAppendingPathComponent:fileName];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,25 +1,32 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#import "mgrLangModel.h"
|
|
||||||
#import "UserOverrideModel.h"
|
|
||||||
#import "LMInstantiator.h"
|
#import "LMInstantiator.h"
|
||||||
|
#import "UserOverrideModel.h"
|
||||||
|
#import "mgrLangModel.h"
|
||||||
|
|
||||||
NS_ASSUME_NONNULL_BEGIN
|
NS_ASSUME_NONNULL_BEGIN
|
||||||
|
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef BIGRAM_H_
|
#ifndef BIGRAM_H_
|
||||||
|
@ -24,8 +31,10 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
|
|
||||||
#include "KeyValuePair.h"
|
#include "KeyValuePair.h"
|
||||||
|
|
||||||
namespace Gramambular {
|
namespace Gramambular
|
||||||
class Bigram {
|
{
|
||||||
|
class Bigram
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
Bigram();
|
Bigram();
|
||||||
|
|
||||||
|
@ -37,26 +46,27 @@ public:
|
||||||
bool operator<(const Bigram &another) const;
|
bool operator<(const Bigram &another) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline std::ostream& operator<<(std::ostream& stream, const Bigram& gram) {
|
inline std::ostream &operator<<(std::ostream &stream, const Bigram &gram)
|
||||||
|
{
|
||||||
std::streamsize p = stream.precision();
|
std::streamsize p = stream.precision();
|
||||||
stream.precision(6);
|
stream.precision(6);
|
||||||
stream << "(" << gram.keyValue << "|" << gram.preceedingKeyValue << ","
|
stream << "(" << gram.keyValue << "|" << gram.preceedingKeyValue << "," << gram.score << ")";
|
||||||
<< gram.score << ")";
|
|
||||||
stream.precision(p);
|
stream.precision(p);
|
||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::ostream& operator<<(std::ostream& stream,
|
inline std::ostream &operator<<(std::ostream &stream, const std::vector<Bigram> &grams)
|
||||||
const std::vector<Bigram>& grams) {
|
{
|
||||||
stream << "[" << grams.size() << "]=>{";
|
stream << "[" << grams.size() << "]=>{";
|
||||||
|
|
||||||
size_t index = 0;
|
size_t index = 0;
|
||||||
|
|
||||||
for (std::vector<Bigram>::const_iterator gi = grams.begin();
|
for (std::vector<Bigram>::const_iterator gi = grams.begin(); gi != grams.end(); ++gi, ++index)
|
||||||
gi != grams.end(); ++gi, ++index) {
|
{
|
||||||
stream << index << "=>";
|
stream << index << "=>";
|
||||||
stream << *gi;
|
stream << *gi;
|
||||||
if (gi + 1 != grams.end()) {
|
if (gi + 1 != grams.end())
|
||||||
|
{
|
||||||
stream << ",";
|
stream << ",";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -65,20 +75,29 @@ inline std::ostream& operator<<(std::ostream& stream,
|
||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Bigram::Bigram() : score(0.0) {}
|
inline Bigram::Bigram() : score(0.0)
|
||||||
|
{
|
||||||
inline bool Bigram::operator==(const Bigram& another) const {
|
|
||||||
return preceedingKeyValue == another.preceedingKeyValue &&
|
|
||||||
keyValue == another.keyValue && score == another.score;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool Bigram::operator<(const Bigram& another) const {
|
inline bool Bigram::operator==(const Bigram &another) const
|
||||||
if (preceedingKeyValue < another.preceedingKeyValue) {
|
{
|
||||||
|
return preceedingKeyValue == another.preceedingKeyValue && keyValue == another.keyValue && score == another.score;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool Bigram::operator<(const Bigram &another) const
|
||||||
|
{
|
||||||
|
if (preceedingKeyValue < another.preceedingKeyValue)
|
||||||
|
{
|
||||||
return true;
|
return true;
|
||||||
} else if (preceedingKeyValue == another.preceedingKeyValue) {
|
}
|
||||||
if (keyValue < another.keyValue) {
|
else if (preceedingKeyValue == another.preceedingKeyValue)
|
||||||
|
{
|
||||||
|
if (keyValue < another.keyValue)
|
||||||
|
{
|
||||||
return true;
|
return true;
|
||||||
} else if (keyValue == another.keyValue) {
|
}
|
||||||
|
else if (keyValue == another.keyValue)
|
||||||
|
{
|
||||||
return score < another.score;
|
return score < another.score;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -88,5 +107,4 @@ inline bool Bigram::operator<(const Bigram& another) const {
|
||||||
}
|
}
|
||||||
} // namespace Gramambular
|
} // namespace Gramambular
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef BLOCKREADINGBUILDER_H_
|
#ifndef BLOCKREADINGBUILDER_H_
|
||||||
|
@ -26,9 +33,11 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
#include "Grid.h"
|
#include "Grid.h"
|
||||||
#include "LanguageModel.h"
|
#include "LanguageModel.h"
|
||||||
|
|
||||||
namespace Gramambular {
|
namespace Gramambular
|
||||||
|
{
|
||||||
|
|
||||||
class BlockReadingBuilder {
|
class BlockReadingBuilder
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
explicit BlockReadingBuilder(LanguageModel *lm);
|
explicit BlockReadingBuilder(LanguageModel *lm);
|
||||||
void clear();
|
void clear();
|
||||||
|
@ -53,8 +62,7 @@ protected:
|
||||||
void build();
|
void build();
|
||||||
|
|
||||||
static const std::string Join(std::vector<std::string>::const_iterator begin,
|
static const std::string Join(std::vector<std::string>::const_iterator begin,
|
||||||
std::vector<std::string>::const_iterator end,
|
std::vector<std::string>::const_iterator end, const std::string &separator);
|
||||||
const std::string& separator);
|
|
||||||
|
|
||||||
// 規定最多可以組成的詞的字數上限為 10
|
// 規定最多可以組成的詞的字數上限為 10
|
||||||
static const size_t MaximumBuildSpanLength = 10;
|
static const size_t MaximumBuildSpanLength = 10;
|
||||||
|
@ -67,25 +75,34 @@ protected:
|
||||||
std::string m_joinSeparator;
|
std::string m_joinSeparator;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel* lm)
|
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *lm) : m_LM(lm), m_cursorIndex(0)
|
||||||
: m_LM(lm), m_cursorIndex(0) {}
|
{
|
||||||
|
}
|
||||||
|
|
||||||
inline void BlockReadingBuilder::clear() {
|
inline void BlockReadingBuilder::clear()
|
||||||
|
{
|
||||||
m_cursorIndex = 0;
|
m_cursorIndex = 0;
|
||||||
m_readings.clear();
|
m_readings.clear();
|
||||||
m_grid.clear();
|
m_grid.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
inline size_t BlockReadingBuilder::length() const { return m_readings.size(); }
|
inline size_t BlockReadingBuilder::length() const
|
||||||
|
{
|
||||||
|
return m_readings.size();
|
||||||
|
}
|
||||||
|
|
||||||
inline size_t BlockReadingBuilder::cursorIndex() const { return m_cursorIndex; }
|
inline size_t BlockReadingBuilder::cursorIndex() const
|
||||||
|
{
|
||||||
|
return m_cursorIndex;
|
||||||
|
}
|
||||||
|
|
||||||
inline void BlockReadingBuilder::setCursorIndex(size_t newIndex) {
|
inline void BlockReadingBuilder::setCursorIndex(size_t newIndex)
|
||||||
|
{
|
||||||
m_cursorIndex = newIndex > m_readings.size() ? m_readings.size() : newIndex;
|
m_cursorIndex = newIndex > m_readings.size() ? m_readings.size() : newIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void BlockReadingBuilder::insertReadingAtCursor(
|
inline void BlockReadingBuilder::insertReadingAtCursor(const std::string &reading)
|
||||||
const std::string& reading) {
|
{
|
||||||
m_readings.insert(m_readings.begin() + m_cursorIndex, reading);
|
m_readings.insert(m_readings.begin() + m_cursorIndex, reading);
|
||||||
|
|
||||||
m_grid.expandGridByOneAtLocation(m_cursorIndex);
|
m_grid.expandGridByOneAtLocation(m_cursorIndex);
|
||||||
|
@ -93,42 +110,49 @@ inline void BlockReadingBuilder::insertReadingAtCursor(
|
||||||
m_cursorIndex++;
|
m_cursorIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::vector<std::string> BlockReadingBuilder::readings() const {
|
inline std::vector<std::string> BlockReadingBuilder::readings() const
|
||||||
|
{
|
||||||
return m_readings;
|
return m_readings;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool BlockReadingBuilder::deleteReadingBeforeCursor() {
|
inline bool BlockReadingBuilder::deleteReadingBeforeCursor()
|
||||||
if (!m_cursorIndex) {
|
{
|
||||||
|
if (!m_cursorIndex)
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_readings.erase(m_readings.begin() + m_cursorIndex - 1,
|
m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.begin() + m_cursorIndex);
|
||||||
m_readings.begin() + m_cursorIndex);
|
|
||||||
m_cursorIndex--;
|
m_cursorIndex--;
|
||||||
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
|
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
|
||||||
build();
|
build();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool BlockReadingBuilder::deleteReadingAfterCursor() {
|
inline bool BlockReadingBuilder::deleteReadingAfterCursor()
|
||||||
if (m_cursorIndex == m_readings.size()) {
|
{
|
||||||
|
if (m_cursorIndex == m_readings.size())
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_readings.erase(m_readings.begin() + m_cursorIndex,
|
m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.begin() + m_cursorIndex + 1);
|
||||||
m_readings.begin() + m_cursorIndex + 1);
|
|
||||||
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
|
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
|
||||||
build();
|
build();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool BlockReadingBuilder::removeHeadReadings(size_t count) {
|
inline bool BlockReadingBuilder::removeHeadReadings(size_t count)
|
||||||
if (count > length()) {
|
{
|
||||||
|
if (count > length())
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < count; i++) {
|
for (size_t i = 0; i < count; i++)
|
||||||
if (m_cursorIndex) {
|
{
|
||||||
|
if (m_cursorIndex)
|
||||||
|
{
|
||||||
m_cursorIndex--;
|
m_cursorIndex--;
|
||||||
}
|
}
|
||||||
m_readings.erase(m_readings.begin(), m_readings.begin() + 1);
|
m_readings.erase(m_readings.begin(), m_readings.begin() + 1);
|
||||||
|
@ -139,44 +163,56 @@ inline bool BlockReadingBuilder::removeHeadReadings(size_t count) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void BlockReadingBuilder::setJoinSeparator(
|
inline void BlockReadingBuilder::setJoinSeparator(const std::string &separator)
|
||||||
const std::string& separator) {
|
{
|
||||||
m_joinSeparator = separator;
|
m_joinSeparator = separator;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline const std::string BlockReadingBuilder::joinSeparator() const {
|
inline const std::string BlockReadingBuilder::joinSeparator() const
|
||||||
|
{
|
||||||
return m_joinSeparator;
|
return m_joinSeparator;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Grid& BlockReadingBuilder::grid() { return m_grid; }
|
inline Grid &BlockReadingBuilder::grid()
|
||||||
|
{
|
||||||
|
return m_grid;
|
||||||
|
}
|
||||||
|
|
||||||
inline void BlockReadingBuilder::build() {
|
inline void BlockReadingBuilder::build()
|
||||||
if (!m_LM) {
|
{
|
||||||
|
if (!m_LM)
|
||||||
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t begin = 0;
|
size_t begin = 0;
|
||||||
size_t end = m_cursorIndex + MaximumBuildSpanLength;
|
size_t end = m_cursorIndex + MaximumBuildSpanLength;
|
||||||
|
|
||||||
if (m_cursorIndex < MaximumBuildSpanLength) {
|
if (m_cursorIndex < MaximumBuildSpanLength)
|
||||||
|
{
|
||||||
begin = 0;
|
begin = 0;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
begin = m_cursorIndex - MaximumBuildSpanLength;
|
begin = m_cursorIndex - MaximumBuildSpanLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (end > m_readings.size()) {
|
if (end > m_readings.size())
|
||||||
|
{
|
||||||
end = m_readings.size();
|
end = m_readings.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t p = begin; p < end; p++) {
|
for (size_t p = begin; p < end; p++)
|
||||||
for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) {
|
{
|
||||||
std::string combinedReading = Join(
|
for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++)
|
||||||
m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
|
{
|
||||||
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q,
|
std::string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
|
||||||
combinedReading)) {
|
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading))
|
||||||
|
{
|
||||||
std::vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
|
std::vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
|
||||||
|
|
||||||
if (unigrams.size() > 0) {
|
if (unigrams.size() > 0)
|
||||||
|
{
|
||||||
Node n(combinedReading, unigrams, std::vector<Bigram>());
|
Node n(combinedReading, unigrams, std::vector<Bigram>());
|
||||||
m_grid.insertNode(n, p, q);
|
m_grid.insertNode(n, p, q);
|
||||||
}
|
}
|
||||||
|
@ -185,15 +221,17 @@ inline void BlockReadingBuilder::build() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline const std::string BlockReadingBuilder::Join(
|
inline const std::string BlockReadingBuilder::Join(std::vector<std::string>::const_iterator begin,
|
||||||
std::vector<std::string>::const_iterator begin,
|
|
||||||
std::vector<std::string>::const_iterator end,
|
std::vector<std::string>::const_iterator end,
|
||||||
const std::string& separator) {
|
const std::string &separator)
|
||||||
|
{
|
||||||
std::string result;
|
std::string result;
|
||||||
for (std::vector<std::string>::const_iterator iter = begin; iter != end;) {
|
for (std::vector<std::string>::const_iterator iter = begin; iter != end;)
|
||||||
|
{
|
||||||
result += *iter;
|
result += *iter;
|
||||||
++iter;
|
++iter;
|
||||||
if (iter != end) {
|
if (iter != end)
|
||||||
|
{
|
||||||
result += separator;
|
result += separator;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -201,5 +239,4 @@ inline const std::string BlockReadingBuilder::Join(
|
||||||
}
|
}
|
||||||
} // namespace Gramambular
|
} // namespace Gramambular
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef GRAMAMBULAR_H_
|
#ifndef GRAMAMBULAR_H_
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef GRID_H_
|
#ifndef GRID_H_
|
||||||
|
@ -27,15 +34,15 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
#include "NodeAnchor.h"
|
#include "NodeAnchor.h"
|
||||||
#include "Span.h"
|
#include "Span.h"
|
||||||
|
|
||||||
namespace Gramambular {
|
namespace Gramambular
|
||||||
|
{
|
||||||
|
|
||||||
class Grid {
|
class Grid
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
void clear();
|
void clear();
|
||||||
void insertNode(const Node &node, size_t location, size_t spanningLength);
|
void insertNode(const Node &node, size_t location, size_t spanningLength);
|
||||||
bool hasNodeAtLocationSpanningLengthMatchingKey(size_t location,
|
bool hasNodeAtLocationSpanningLengthMatchingKey(size_t location, size_t spanningLength, const std::string &key);
|
||||||
size_t spanningLength,
|
|
||||||
const std::string& key);
|
|
||||||
|
|
||||||
void expandGridByOneAtLocation(size_t location);
|
void expandGridByOneAtLocation(size_t location);
|
||||||
void shrinkGridByOneAtLocation(size_t location);
|
void shrinkGridByOneAtLocation(size_t location);
|
||||||
|
@ -49,46 +56,52 @@ public:
|
||||||
// evaluated to that unigram, while all other overlapping nodes will be reset
|
// evaluated to that unigram, while all other overlapping nodes will be reset
|
||||||
// to their initial state (that is, if any of those nodes were "frozen" or
|
// to their initial state (that is, if any of those nodes were "frozen" or
|
||||||
// fixed, they will be unfrozen.)
|
// fixed, they will be unfrozen.)
|
||||||
NodeAnchor fixNodeSelectedCandidate(size_t location,
|
NodeAnchor fixNodeSelectedCandidate(size_t location, const std::string &value);
|
||||||
const std::string& value);
|
|
||||||
|
|
||||||
// Similar to fixNodeSelectedCandidate, but instead of "freezing" the node,
|
// Similar to fixNodeSelectedCandidate, but instead of "freezing" the node,
|
||||||
// only boost the unigram that represents the value with an overriding score.
|
// only boost the unigram that represents the value with an overriding score.
|
||||||
// This has the same side effect as fixNodeSelectedCandidate, which is that
|
// This has the same side effect as fixNodeSelectedCandidate, which is that
|
||||||
// all other overlapping nodes will be reset to their initial state.
|
// all other overlapping nodes will be reset to their initial state.
|
||||||
void overrideNodeScoreForSelectedCandidate(size_t location,
|
void overrideNodeScoreForSelectedCandidate(size_t location, const std::string &value, float overridingScore);
|
||||||
const std::string& value,
|
|
||||||
float overridingScore);
|
|
||||||
|
|
||||||
std::string dumpDOT() {
|
std::string dumpDOT()
|
||||||
|
{
|
||||||
std::stringstream sst;
|
std::stringstream sst;
|
||||||
sst << "digraph {" << std::endl;
|
sst << "digraph {" << std::endl;
|
||||||
sst << "graph [ rankdir=LR ];" << std::endl;
|
sst << "graph [ rankdir=LR ];" << std::endl;
|
||||||
sst << "BOS;" << std::endl;
|
sst << "BOS;" << std::endl;
|
||||||
|
|
||||||
for (size_t p = 0; p < m_spans.size(); p++) {
|
for (size_t p = 0; p < m_spans.size(); p++)
|
||||||
|
{
|
||||||
Span &span = m_spans[p];
|
Span &span = m_spans[p];
|
||||||
for (size_t ni = 0; ni <= span.maximumLength(); ni++) {
|
for (size_t ni = 0; ni <= span.maximumLength(); ni++)
|
||||||
|
{
|
||||||
Node *np = span.nodeOfLength(ni);
|
Node *np = span.nodeOfLength(ni);
|
||||||
if (np) {
|
if (np)
|
||||||
if (!p) {
|
{
|
||||||
|
if (!p)
|
||||||
|
{
|
||||||
sst << "BOS -> " << np->currentKeyValue().value << ";" << std::endl;
|
sst << "BOS -> " << np->currentKeyValue().value << ";" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
sst << np->currentKeyValue().value << ";" << std::endl;
|
sst << np->currentKeyValue().value << ";" << std::endl;
|
||||||
|
|
||||||
if (p + ni < m_spans.size()) {
|
if (p + ni < m_spans.size())
|
||||||
|
{
|
||||||
Span &dstSpan = m_spans[p + ni];
|
Span &dstSpan = m_spans[p + ni];
|
||||||
for (size_t q = 0; q <= dstSpan.maximumLength(); q++) {
|
for (size_t q = 0; q <= dstSpan.maximumLength(); q++)
|
||||||
|
{
|
||||||
Node *dn = dstSpan.nodeOfLength(q);
|
Node *dn = dstSpan.nodeOfLength(q);
|
||||||
if (dn) {
|
if (dn)
|
||||||
sst << np->currentKeyValue().value << " -> "
|
{
|
||||||
<< dn->currentKeyValue().value << ";" << std::endl;
|
sst << np->currentKeyValue().value << " -> " << dn->currentKeyValue().value << ";"
|
||||||
|
<< std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p + ni == m_spans.size()) {
|
if (p + ni == m_spans.size())
|
||||||
|
{
|
||||||
sst << np->currentKeyValue().value << " -> "
|
sst << np->currentKeyValue().value << " -> "
|
||||||
<< "EOS;" << std::endl;
|
<< "EOS;" << std::endl;
|
||||||
}
|
}
|
||||||
|
@ -105,14 +118,19 @@ protected:
|
||||||
std::vector<Span> m_spans;
|
std::vector<Span> m_spans;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline void Grid::clear() { m_spans.clear(); }
|
inline void Grid::clear()
|
||||||
|
{
|
||||||
|
m_spans.clear();
|
||||||
|
}
|
||||||
|
|
||||||
inline void Grid::insertNode(const Node& node, size_t location,
|
inline void Grid::insertNode(const Node &node, size_t location, size_t spanningLength)
|
||||||
size_t spanningLength) {
|
{
|
||||||
if (location >= m_spans.size()) {
|
if (location >= m_spans.size())
|
||||||
|
{
|
||||||
size_t diff = location - m_spans.size() + 1;
|
size_t diff = location - m_spans.size() + 1;
|
||||||
|
|
||||||
for (size_t i = 0; i < diff; i++) {
|
for (size_t i = 0; i < diff; i++)
|
||||||
|
{
|
||||||
m_spans.push_back(Span());
|
m_spans.push_back(Span());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -120,55 +138,74 @@ inline void Grid::insertNode(const Node& node, size_t location,
|
||||||
m_spans[location].insertNodeOfLength(node, spanningLength);
|
m_spans[location].insertNodeOfLength(node, spanningLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(
|
inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(size_t location, size_t spanningLength,
|
||||||
size_t location, size_t spanningLength, const std::string& key) {
|
const std::string &key)
|
||||||
if (location > m_spans.size()) {
|
{
|
||||||
|
if (location > m_spans.size())
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Node *n = m_spans[location].nodeOfLength(spanningLength);
|
const Node *n = m_spans[location].nodeOfLength(spanningLength);
|
||||||
if (!n) {
|
if (!n)
|
||||||
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return key == n->key();
|
return key == n->key();
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void Grid::expandGridByOneAtLocation(size_t location) {
|
inline void Grid::expandGridByOneAtLocation(size_t location)
|
||||||
if (!location || location == m_spans.size()) {
|
{
|
||||||
|
if (!location || location == m_spans.size())
|
||||||
|
{
|
||||||
m_spans.insert(m_spans.begin() + location, Span());
|
m_spans.insert(m_spans.begin() + location, Span());
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
m_spans.insert(m_spans.begin() + location, Span());
|
m_spans.insert(m_spans.begin() + location, Span());
|
||||||
for (size_t i = 0; i < location; i++) {
|
for (size_t i = 0; i < location; i++)
|
||||||
|
{
|
||||||
// zaps overlapping spans
|
// zaps overlapping spans
|
||||||
m_spans[i].removeNodeOfLengthGreaterThan(location - i);
|
m_spans[i].removeNodeOfLengthGreaterThan(location - i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void Grid::shrinkGridByOneAtLocation(size_t location) {
|
inline void Grid::shrinkGridByOneAtLocation(size_t location)
|
||||||
if (location >= m_spans.size()) {
|
{
|
||||||
|
if (location >= m_spans.size())
|
||||||
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_spans.erase(m_spans.begin() + location);
|
m_spans.erase(m_spans.begin() + location);
|
||||||
for (size_t i = 0; i < location; i++) {
|
for (size_t i = 0; i < location; i++)
|
||||||
|
{
|
||||||
// zaps overlapping spans
|
// zaps overlapping spans
|
||||||
m_spans[i].removeNodeOfLengthGreaterThan(location - i);
|
m_spans[i].removeNodeOfLengthGreaterThan(location - i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline size_t Grid::width() const { return m_spans.size(); }
|
inline size_t Grid::width() const
|
||||||
|
{
|
||||||
|
return m_spans.size();
|
||||||
|
}
|
||||||
|
|
||||||
inline std::vector<NodeAnchor> Grid::nodesEndingAt(size_t location) {
|
inline std::vector<NodeAnchor> Grid::nodesEndingAt(size_t location)
|
||||||
|
{
|
||||||
std::vector<NodeAnchor> result;
|
std::vector<NodeAnchor> result;
|
||||||
|
|
||||||
if (m_spans.size() && location <= m_spans.size()) {
|
if (m_spans.size() && location <= m_spans.size())
|
||||||
for (size_t i = 0; i < location; i++) {
|
{
|
||||||
|
for (size_t i = 0; i < location; i++)
|
||||||
|
{
|
||||||
Span &span = m_spans[i];
|
Span &span = m_spans[i];
|
||||||
if (i + span.maximumLength() >= location) {
|
if (i + span.maximumLength() >= location)
|
||||||
|
{
|
||||||
Node *np = span.nodeOfLength(location - i);
|
Node *np = span.nodeOfLength(location - i);
|
||||||
if (np) {
|
if (np)
|
||||||
|
{
|
||||||
NodeAnchor na;
|
NodeAnchor na;
|
||||||
na.node = np;
|
na.node = np;
|
||||||
na.location = i;
|
na.location = i;
|
||||||
|
@ -183,21 +220,28 @@ inline std::vector<NodeAnchor> Grid::nodesEndingAt(size_t location) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t location) {
|
inline std::vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t location)
|
||||||
|
{
|
||||||
std::vector<NodeAnchor> result;
|
std::vector<NodeAnchor> result;
|
||||||
|
|
||||||
if (m_spans.size() && location <= m_spans.size()) {
|
if (m_spans.size() && location <= m_spans.size())
|
||||||
for (size_t i = 0; i < location; i++) {
|
{
|
||||||
|
for (size_t i = 0; i < location; i++)
|
||||||
|
{
|
||||||
Span &span = m_spans[i];
|
Span &span = m_spans[i];
|
||||||
|
|
||||||
if (i + span.maximumLength() >= location) {
|
if (i + span.maximumLength() >= location)
|
||||||
for (size_t j = 1, m = span.maximumLength(); j <= m; j++) {
|
{
|
||||||
if (i + j < location) {
|
for (size_t j = 1, m = span.maximumLength(); j <= m; j++)
|
||||||
|
{
|
||||||
|
if (i + j < location)
|
||||||
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
Node *np = span.nodeOfLength(j);
|
Node *np = span.nodeOfLength(j);
|
||||||
if (np) {
|
if (np)
|
||||||
|
{
|
||||||
NodeAnchor na;
|
NodeAnchor na;
|
||||||
na.node = np;
|
na.node = np;
|
||||||
na.location = i;
|
na.location = i;
|
||||||
|
@ -215,18 +259,21 @@ inline std::vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t location) {
|
||||||
|
|
||||||
// For nodes found at the location, fix their currently-selected candidate using
|
// For nodes found at the location, fix their currently-selected candidate using
|
||||||
// the supplied string value.
|
// the supplied string value.
|
||||||
inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location,
|
inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, const std::string &value)
|
||||||
const std::string& value) {
|
{
|
||||||
std::vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
|
std::vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
|
||||||
NodeAnchor node;
|
NodeAnchor node;
|
||||||
for (auto nodeAnchor : nodes) {
|
for (auto nodeAnchor : nodes)
|
||||||
|
{
|
||||||
auto candidates = nodeAnchor.node->candidates();
|
auto candidates = nodeAnchor.node->candidates();
|
||||||
|
|
||||||
// Reset the candidate-fixed state of every node at the location.
|
// Reset the candidate-fixed state of every node at the location.
|
||||||
const_cast<Node *>(nodeAnchor.node)->resetCandidate();
|
const_cast<Node *>(nodeAnchor.node)->resetCandidate();
|
||||||
|
|
||||||
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
|
for (size_t i = 0, c = candidates.size(); i < c; ++i)
|
||||||
if (candidates[i].value == value) {
|
{
|
||||||
|
if (candidates[i].value == value)
|
||||||
|
{
|
||||||
const_cast<Node *>(nodeAnchor.node)->selectCandidateAtIndex(i);
|
const_cast<Node *>(nodeAnchor.node)->selectCandidateAtIndex(i);
|
||||||
node = nodeAnchor;
|
node = nodeAnchor;
|
||||||
break;
|
break;
|
||||||
|
@ -236,19 +283,22 @@ inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location,
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void Grid::overrideNodeScoreForSelectedCandidate(
|
inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const std::string &value,
|
||||||
size_t location, const std::string& value, float overridingScore) {
|
float overridingScore)
|
||||||
|
{
|
||||||
std::vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
|
std::vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
|
||||||
for (auto nodeAnchor : nodes) {
|
for (auto nodeAnchor : nodes)
|
||||||
|
{
|
||||||
auto candidates = nodeAnchor.node->candidates();
|
auto candidates = nodeAnchor.node->candidates();
|
||||||
|
|
||||||
// Reset the candidate-fixed state of every node at the location.
|
// Reset the candidate-fixed state of every node at the location.
|
||||||
const_cast<Node *>(nodeAnchor.node)->resetCandidate();
|
const_cast<Node *>(nodeAnchor.node)->resetCandidate();
|
||||||
|
|
||||||
for (size_t i = 0, c = candidates.size(); i < c; ++i) {
|
for (size_t i = 0, c = candidates.size(); i < c; ++i)
|
||||||
if (candidates[i].value == value) {
|
{
|
||||||
const_cast<Node*>(nodeAnchor.node)
|
if (candidates[i].value == value)
|
||||||
->selectFloatingCandidateAtIndex(i, overridingScore);
|
{
|
||||||
|
const_cast<Node *>(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -257,5 +307,4 @@ inline void Grid::overrideNodeScoreForSelectedCandidate(
|
||||||
|
|
||||||
} // namespace Gramambular
|
} // namespace Gramambular
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef KEYVALUEPAIR_H_
|
#ifndef KEYVALUEPAIR_H_
|
||||||
|
@ -23,9 +30,11 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
namespace Gramambular {
|
namespace Gramambular
|
||||||
|
{
|
||||||
|
|
||||||
class KeyValuePair {
|
class KeyValuePair
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
std::string key;
|
std::string key;
|
||||||
std::string value;
|
std::string value;
|
||||||
|
@ -34,25 +43,29 @@ public:
|
||||||
bool operator<(const KeyValuePair &another) const;
|
bool operator<(const KeyValuePair &another) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline std::ostream& operator<<(std::ostream& stream,
|
inline std::ostream &operator<<(std::ostream &stream, const KeyValuePair &pair)
|
||||||
const KeyValuePair& pair) {
|
{
|
||||||
stream << "(" << pair.key << "," << pair.value << ")";
|
stream << "(" << pair.key << "," << pair.value << ")";
|
||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool KeyValuePair::operator==(const KeyValuePair& another) const {
|
inline bool KeyValuePair::operator==(const KeyValuePair &another) const
|
||||||
|
{
|
||||||
return key == another.key && value == another.value;
|
return key == another.key && value == another.value;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool KeyValuePair::operator<(const KeyValuePair& another) const {
|
inline bool KeyValuePair::operator<(const KeyValuePair &another) const
|
||||||
if (key < another.key) {
|
{
|
||||||
|
if (key < another.key)
|
||||||
|
{
|
||||||
return true;
|
return true;
|
||||||
} else if (key == another.key) {
|
}
|
||||||
|
else if (key == another.key)
|
||||||
|
{
|
||||||
return value < another.value;
|
return value < another.value;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} // namespace Gramambular
|
} // namespace Gramambular
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef LANGUAGEMODEL_H_
|
#ifndef LANGUAGEMODEL_H_
|
||||||
|
@ -26,18 +33,20 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
#include "Bigram.h"
|
#include "Bigram.h"
|
||||||
#include "Unigram.h"
|
#include "Unigram.h"
|
||||||
|
|
||||||
namespace Gramambular {
|
namespace Gramambular
|
||||||
|
{
|
||||||
|
|
||||||
class LanguageModel {
|
class LanguageModel
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
virtual ~LanguageModel() {}
|
virtual ~LanguageModel()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
virtual const std::vector<Bigram> bigramsForKeys(
|
virtual const std::vector<Bigram> bigramsForKeys(const std::string &preceedingKey, const std::string &key) = 0;
|
||||||
const std::string& preceedingKey, const std::string& key) = 0;
|
|
||||||
virtual const std::vector<Unigram> unigramsForKey(const std::string &key) = 0;
|
virtual const std::vector<Unigram> unigramsForKey(const std::string &key) = 0;
|
||||||
virtual bool hasUnigramsForKey(const std::string &key) = 0;
|
virtual bool hasUnigramsForKey(const std::string &key) = 0;
|
||||||
};
|
};
|
||||||
} // namespace Gramambular
|
} // namespace Gramambular
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef NODE_H_
|
#ifndef NODE_H_
|
||||||
|
@ -27,16 +34,16 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
|
|
||||||
#include "LanguageModel.h"
|
#include "LanguageModel.h"
|
||||||
|
|
||||||
namespace Gramambular {
|
namespace Gramambular
|
||||||
|
{
|
||||||
|
|
||||||
class Node {
|
class Node
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
Node();
|
Node();
|
||||||
Node(const std::string& key, const std::vector<Unigram>& unigrams,
|
Node(const std::string &key, const std::vector<Unigram> &unigrams, const std::vector<Bigram> &bigrams);
|
||||||
const std::vector<Bigram>& bigrams);
|
|
||||||
|
|
||||||
void primeNodeWithPreceedingKeyValues(
|
void primeNodeWithPreceedingKeyValues(const std::vector<KeyValuePair> &keyValues);
|
||||||
const std::vector<KeyValuePair>& keyValues);
|
|
||||||
|
|
||||||
bool isCandidateFixed() const;
|
bool isCandidateFixed() const;
|
||||||
const std::vector<KeyValuePair> &candidates() const;
|
const std::vector<KeyValuePair> &candidates() const;
|
||||||
|
@ -67,65 +74,65 @@ protected:
|
||||||
friend std::ostream &operator<<(std::ostream &stream, const Node &node);
|
friend std::ostream &operator<<(std::ostream &stream, const Node &node);
|
||||||
};
|
};
|
||||||
|
|
||||||
inline std::ostream& operator<<(std::ostream& stream, const Node& node) {
|
inline std::ostream &operator<<(std::ostream &stream, const Node &node)
|
||||||
stream << "(node,key:" << node.m_key
|
{
|
||||||
<< ",fixed:" << (node.m_candidateFixed ? "true" : "false")
|
stream << "(node,key:" << node.m_key << ",fixed:" << (node.m_candidateFixed ? "true" : "false")
|
||||||
<< ",selected:" << node.m_selectedUnigramIndex << ","
|
<< ",selected:" << node.m_selectedUnigramIndex << "," << node.m_unigrams << ")";
|
||||||
<< node.m_unigrams << ")";
|
|
||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Node::Node()
|
inline Node::Node() : m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0)
|
||||||
: m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) {}
|
{
|
||||||
|
}
|
||||||
|
|
||||||
inline Node::Node(const std::string& key, const std::vector<Unigram>& unigrams,
|
inline Node::Node(const std::string &key, const std::vector<Unigram> &unigrams, const std::vector<Bigram> &bigrams)
|
||||||
const std::vector<Bigram>& bigrams)
|
: m_key(key), m_unigrams(unigrams), m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0)
|
||||||
: m_key(key),
|
{
|
||||||
m_unigrams(unigrams),
|
|
||||||
m_candidateFixed(false),
|
|
||||||
m_selectedUnigramIndex(0),
|
|
||||||
m_score(0.0) {
|
|
||||||
stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare);
|
stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare);
|
||||||
|
|
||||||
if (m_unigrams.size()) {
|
if (m_unigrams.size())
|
||||||
|
{
|
||||||
m_score = m_unigrams[0].score;
|
m_score = m_unigrams[0].score;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
for (std::vector<Unigram>::const_iterator ui = m_unigrams.begin();
|
for (std::vector<Unigram>::const_iterator ui = m_unigrams.begin(); ui != m_unigrams.end(); ++ui)
|
||||||
ui != m_unigrams.end(); ++ui) {
|
{
|
||||||
m_valueUnigramIndexMap[(*ui).keyValue.value] = i;
|
m_valueUnigramIndexMap[(*ui).keyValue.value] = i;
|
||||||
i++;
|
i++;
|
||||||
|
|
||||||
m_candidates.push_back((*ui).keyValue);
|
m_candidates.push_back((*ui).keyValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (std::vector<Bigram>::const_iterator bi = bigrams.begin();
|
for (std::vector<Bigram>::const_iterator bi = bigrams.begin(); bi != bigrams.end(); ++bi)
|
||||||
bi != bigrams.end(); ++bi) {
|
{
|
||||||
m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi);
|
m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void Node::primeNodeWithPreceedingKeyValues(
|
inline void Node::primeNodeWithPreceedingKeyValues(const std::vector<KeyValuePair> &keyValues)
|
||||||
const std::vector<KeyValuePair>& keyValues) {
|
{
|
||||||
size_t newIndex = m_selectedUnigramIndex;
|
size_t newIndex = m_selectedUnigramIndex;
|
||||||
double max = m_score;
|
double max = m_score;
|
||||||
|
|
||||||
if (!isCandidateFixed()) {
|
if (!isCandidateFixed())
|
||||||
for (std::vector<KeyValuePair>::const_iterator kvi = keyValues.begin();
|
{
|
||||||
kvi != keyValues.end(); ++kvi) {
|
for (std::vector<KeyValuePair>::const_iterator kvi = keyValues.begin(); kvi != keyValues.end(); ++kvi)
|
||||||
std::map<KeyValuePair, std::vector<Bigram> >::const_iterator f =
|
{
|
||||||
m_preceedingGramBigramMap.find(*kvi);
|
std::map<KeyValuePair, std::vector<Bigram>>::const_iterator f = m_preceedingGramBigramMap.find(*kvi);
|
||||||
if (f != m_preceedingGramBigramMap.end()) {
|
if (f != m_preceedingGramBigramMap.end())
|
||||||
|
{
|
||||||
const std::vector<Bigram> &bigrams = (*f).second;
|
const std::vector<Bigram> &bigrams = (*f).second;
|
||||||
|
|
||||||
for (std::vector<Bigram>::const_iterator bi = bigrams.begin();
|
for (std::vector<Bigram>::const_iterator bi = bigrams.begin(); bi != bigrams.end(); ++bi)
|
||||||
bi != bigrams.end(); ++bi) {
|
{
|
||||||
const Bigram &bigram = *bi;
|
const Bigram &bigram = *bi;
|
||||||
if (bigram.score > max) {
|
if (bigram.score > max)
|
||||||
|
{
|
||||||
std::map<std::string, size_t>::const_iterator uf =
|
std::map<std::string, size_t>::const_iterator uf =
|
||||||
m_valueUnigramIndexMap.find((*bi).keyValue.value);
|
m_valueUnigramIndexMap.find((*bi).keyValue.value);
|
||||||
if (uf != m_valueUnigramIndexMap.end()) {
|
if (uf != m_valueUnigramIndexMap.end())
|
||||||
|
{
|
||||||
newIndex = (*uf).second;
|
newIndex = (*uf).second;
|
||||||
max = bigram.score;
|
max = bigram.score;
|
||||||
}
|
}
|
||||||
|
@ -135,25 +142,35 @@ inline void Node::primeNodeWithPreceedingKeyValues(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_score != max) {
|
if (m_score != max)
|
||||||
|
{
|
||||||
m_score = max;
|
m_score = max;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (newIndex != m_selectedUnigramIndex) {
|
if (newIndex != m_selectedUnigramIndex)
|
||||||
|
{
|
||||||
m_selectedUnigramIndex = newIndex;
|
m_selectedUnigramIndex = newIndex;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool Node::isCandidateFixed() const { return m_candidateFixed; }
|
inline bool Node::isCandidateFixed() const
|
||||||
|
{
|
||||||
|
return m_candidateFixed;
|
||||||
|
}
|
||||||
|
|
||||||
inline const std::vector<KeyValuePair>& Node::candidates() const {
|
inline const std::vector<KeyValuePair> &Node::candidates() const
|
||||||
|
{
|
||||||
return m_candidates;
|
return m_candidates;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void Node::selectCandidateAtIndex(size_t index, bool fix) {
|
inline void Node::selectCandidateAtIndex(size_t index, bool fix)
|
||||||
if (index >= m_unigrams.size()) {
|
{
|
||||||
|
if (index >= m_unigrams.size())
|
||||||
|
{
|
||||||
m_selectedUnigramIndex = 0;
|
m_selectedUnigramIndex = 0;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
m_selectedUnigramIndex = index;
|
m_selectedUnigramIndex = index;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -161,53 +178,72 @@ inline void Node::selectCandidateAtIndex(size_t index, bool fix) {
|
||||||
m_score = 99;
|
m_score = 99;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void Node::resetCandidate() {
|
inline void Node::resetCandidate()
|
||||||
|
{
|
||||||
m_selectedUnigramIndex = 0;
|
m_selectedUnigramIndex = 0;
|
||||||
m_candidateFixed = 0;
|
m_candidateFixed = 0;
|
||||||
if (m_unigrams.size()) {
|
if (m_unigrams.size())
|
||||||
|
{
|
||||||
m_score = m_unigrams[0].score;
|
m_score = m_unigrams[0].score;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) {
|
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score)
|
||||||
if (index >= m_unigrams.size()) {
|
{
|
||||||
|
if (index >= m_unigrams.size())
|
||||||
|
{
|
||||||
m_selectedUnigramIndex = 0;
|
m_selectedUnigramIndex = 0;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
m_selectedUnigramIndex = index;
|
m_selectedUnigramIndex = index;
|
||||||
}
|
}
|
||||||
m_candidateFixed = false;
|
m_candidateFixed = false;
|
||||||
m_score = score;
|
m_score = score;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline const std::string& Node::key() const { return m_key; }
|
inline const std::string &Node::key() const
|
||||||
|
{
|
||||||
|
return m_key;
|
||||||
|
}
|
||||||
|
|
||||||
inline double Node::score() const { return m_score; }
|
inline double Node::score() const
|
||||||
|
{
|
||||||
|
return m_score;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline double Node::scoreForCandidate(const std::string &candidate) const
|
||||||
inline double Node::scoreForCandidate(const std::string& candidate) const {
|
{
|
||||||
for (auto unigram : m_unigrams) {
|
for (auto unigram : m_unigrams)
|
||||||
if (unigram.keyValue.value == candidate) {
|
{
|
||||||
|
if (unigram.keyValue.value == candidate)
|
||||||
|
{
|
||||||
return unigram.score;
|
return unigram.score;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0.0;
|
return 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline double Node::highestUnigramScore() const {
|
inline double Node::highestUnigramScore() const
|
||||||
if (m_unigrams.empty()) {
|
{
|
||||||
|
if (m_unigrams.empty())
|
||||||
|
{
|
||||||
return 0.0;
|
return 0.0;
|
||||||
}
|
}
|
||||||
return m_unigrams[0].score;
|
return m_unigrams[0].score;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline const KeyValuePair Node::currentKeyValue() const {
|
inline const KeyValuePair Node::currentKeyValue() const
|
||||||
if (m_selectedUnigramIndex >= m_unigrams.size()) {
|
{
|
||||||
|
if (m_selectedUnigramIndex >= m_unigrams.size())
|
||||||
|
{
|
||||||
return KeyValuePair();
|
return KeyValuePair();
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
return m_candidates[m_selectedUnigramIndex];
|
return m_candidates[m_selectedUnigramIndex];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace Gramambular
|
} // namespace Gramambular
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef NODEANCHOR_H_
|
#ifndef NODEANCHOR_H_
|
||||||
|
@ -24,33 +31,39 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
|
|
||||||
#include "Node.h"
|
#include "Node.h"
|
||||||
|
|
||||||
namespace Gramambular {
|
namespace Gramambular
|
||||||
|
{
|
||||||
|
|
||||||
struct NodeAnchor {
|
struct NodeAnchor
|
||||||
|
{
|
||||||
const Node *node = nullptr;
|
const Node *node = nullptr;
|
||||||
size_t location = 0;
|
size_t location = 0;
|
||||||
size_t spanningLength = 0;
|
size_t spanningLength = 0;
|
||||||
double accumulatedScore = 0.0;
|
double accumulatedScore = 0.0;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline std::ostream& operator<<(std::ostream& stream,
|
inline std::ostream &operator<<(std::ostream &stream, const NodeAnchor &anchor)
|
||||||
const NodeAnchor& anchor) {
|
{
|
||||||
stream << "{@(" << anchor.location << "," << anchor.spanningLength << "),";
|
stream << "{@(" << anchor.location << "," << anchor.spanningLength << "),";
|
||||||
if (anchor.node) {
|
if (anchor.node)
|
||||||
|
{
|
||||||
stream << *(anchor.node);
|
stream << *(anchor.node);
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
stream << "null";
|
stream << "null";
|
||||||
}
|
}
|
||||||
stream << "}";
|
stream << "}";
|
||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::ostream& operator<<(std::ostream& stream,
|
inline std::ostream &operator<<(std::ostream &stream, const std::vector<NodeAnchor> &anchor)
|
||||||
const std::vector<NodeAnchor>& anchor) {
|
{
|
||||||
for (std::vector<NodeAnchor>::const_iterator i = anchor.begin();
|
for (std::vector<NodeAnchor>::const_iterator i = anchor.begin(); i != anchor.end(); ++i)
|
||||||
i != anchor.end(); ++i) {
|
{
|
||||||
stream << *i;
|
stream << *i;
|
||||||
if (i + 1 != anchor.end()) {
|
if (i + 1 != anchor.end())
|
||||||
|
{
|
||||||
stream << "<-";
|
stream << "<-";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -59,5 +72,4 @@ inline std::ostream& operator<<(std::ostream& stream,
|
||||||
}
|
}
|
||||||
} // namespace Gramambular
|
} // namespace Gramambular
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef SPAN_H_
|
#ifndef SPAN_H_
|
||||||
|
@ -26,8 +33,10 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
|
|
||||||
#include "Node.h"
|
#include "Node.h"
|
||||||
|
|
||||||
namespace Gramambular {
|
namespace Gramambular
|
||||||
class Span {
|
{
|
||||||
|
class Span
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
void clear();
|
void clear();
|
||||||
void insertNodeOfLength(const Node &node, size_t length);
|
void insertNodeOfLength(const Node &node, size_t length);
|
||||||
|
@ -41,52 +50,63 @@ protected:
|
||||||
size_t m_maximumLength = 0;
|
size_t m_maximumLength = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline void Span::clear() {
|
inline void Span::clear()
|
||||||
|
{
|
||||||
m_lengthNodeMap.clear();
|
m_lengthNodeMap.clear();
|
||||||
m_maximumLength = 0;
|
m_maximumLength = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void Span::insertNodeOfLength(const Node& node, size_t length) {
|
inline void Span::insertNodeOfLength(const Node &node, size_t length)
|
||||||
|
{
|
||||||
m_lengthNodeMap[length] = node;
|
m_lengthNodeMap[length] = node;
|
||||||
if (length > m_maximumLength) {
|
if (length > m_maximumLength)
|
||||||
|
{
|
||||||
m_maximumLength = length;
|
m_maximumLength = length;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void Span::removeNodeOfLengthGreaterThan(size_t length) {
|
inline void Span::removeNodeOfLengthGreaterThan(size_t length)
|
||||||
if (length > m_maximumLength) {
|
{
|
||||||
|
if (length > m_maximumLength)
|
||||||
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t max = 0;
|
size_t max = 0;
|
||||||
std::set<size_t> removeSet;
|
std::set<size_t> removeSet;
|
||||||
for (std::map<size_t, Node>::iterator i = m_lengthNodeMap.begin(),
|
for (std::map<size_t, Node>::iterator i = m_lengthNodeMap.begin(), e = m_lengthNodeMap.end(); i != e; ++i)
|
||||||
e = m_lengthNodeMap.end();
|
{
|
||||||
i != e; ++i) {
|
if ((*i).first > length)
|
||||||
if ((*i).first > length) {
|
{
|
||||||
removeSet.insert((*i).first);
|
removeSet.insert((*i).first);
|
||||||
} else {
|
}
|
||||||
if ((*i).first > max) {
|
else
|
||||||
|
{
|
||||||
|
if ((*i).first > max)
|
||||||
|
{
|
||||||
max = (*i).first;
|
max = (*i).first;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (std::set<size_t>::iterator i = removeSet.begin(), e = removeSet.end();
|
for (std::set<size_t>::iterator i = removeSet.begin(), e = removeSet.end(); i != e; ++i)
|
||||||
i != e; ++i) {
|
{
|
||||||
m_lengthNodeMap.erase(*i);
|
m_lengthNodeMap.erase(*i);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_maximumLength = max;
|
m_maximumLength = max;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Node* Span::nodeOfLength(size_t length) {
|
inline Node *Span::nodeOfLength(size_t length)
|
||||||
|
{
|
||||||
std::map<size_t, Node>::iterator f = m_lengthNodeMap.find(length);
|
std::map<size_t, Node>::iterator f = m_lengthNodeMap.find(length);
|
||||||
return f == m_lengthNodeMap.end() ? 0 : &(*f).second;
|
return f == m_lengthNodeMap.end() ? 0 : &(*f).second;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline size_t Span::maximumLength() const { return m_maximumLength; }
|
inline size_t Span::maximumLength() const
|
||||||
|
{
|
||||||
|
return m_maximumLength;
|
||||||
|
}
|
||||||
} // namespace Gramambular
|
} // namespace Gramambular
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef UNIGRAM_H_
|
#ifndef UNIGRAM_H_
|
||||||
|
@ -24,9 +31,11 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
|
|
||||||
#include "KeyValuePair.h"
|
#include "KeyValuePair.h"
|
||||||
|
|
||||||
namespace Gramambular {
|
namespace Gramambular
|
||||||
|
{
|
||||||
|
|
||||||
class Unigram {
|
class Unigram
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
Unigram();
|
Unigram();
|
||||||
|
|
||||||
|
@ -39,7 +48,8 @@ public:
|
||||||
static bool ScoreCompare(const Unigram &a, const Unigram &b);
|
static bool ScoreCompare(const Unigram &a, const Unigram &b);
|
||||||
};
|
};
|
||||||
|
|
||||||
inline std::ostream& operator<<(std::ostream& stream, const Unigram& gram) {
|
inline std::ostream &operator<<(std::ostream &stream, const Unigram &gram)
|
||||||
|
{
|
||||||
std::streamsize p = stream.precision();
|
std::streamsize p = stream.precision();
|
||||||
stream.precision(6);
|
stream.precision(6);
|
||||||
stream << "(" << gram.keyValue << "," << gram.score << ")";
|
stream << "(" << gram.keyValue << "," << gram.score << ")";
|
||||||
|
@ -47,17 +57,18 @@ inline std::ostream& operator<<(std::ostream& stream, const Unigram& gram) {
|
||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline std::ostream& operator<<(std::ostream& stream,
|
inline std::ostream &operator<<(std::ostream &stream, const std::vector<Unigram> &grams)
|
||||||
const std::vector<Unigram>& grams) {
|
{
|
||||||
stream << "[" << grams.size() << "]=>{";
|
stream << "[" << grams.size() << "]=>{";
|
||||||
|
|
||||||
size_t index = 0;
|
size_t index = 0;
|
||||||
|
|
||||||
for (std::vector<Unigram>::const_iterator gi = grams.begin();
|
for (std::vector<Unigram>::const_iterator gi = grams.begin(); gi != grams.end(); ++gi, ++index)
|
||||||
gi != grams.end(); ++gi, ++index) {
|
{
|
||||||
stream << index << "=>";
|
stream << index << "=>";
|
||||||
stream << *gi;
|
stream << *gi;
|
||||||
if (gi + 1 != grams.end()) {
|
if (gi + 1 != grams.end())
|
||||||
|
{
|
||||||
stream << ",";
|
stream << ",";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -66,25 +77,32 @@ inline std::ostream& operator<<(std::ostream& stream,
|
||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Unigram::Unigram() : score(0.0) {}
|
inline Unigram::Unigram() : score(0.0)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
inline bool Unigram::operator==(const Unigram& another) const {
|
inline bool Unigram::operator==(const Unigram &another) const
|
||||||
|
{
|
||||||
return keyValue == another.keyValue && score == another.score;
|
return keyValue == another.keyValue && score == another.score;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool Unigram::operator<(const Unigram& another) const {
|
inline bool Unigram::operator<(const Unigram &another) const
|
||||||
if (keyValue < another.keyValue) {
|
{
|
||||||
|
if (keyValue < another.keyValue)
|
||||||
|
{
|
||||||
return true;
|
return true;
|
||||||
} else if (keyValue == another.keyValue) {
|
}
|
||||||
|
else if (keyValue == another.keyValue)
|
||||||
|
{
|
||||||
return score < another.score;
|
return score < another.score;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool Unigram::ScoreCompare(const Unigram& a, const Unigram& b) {
|
inline bool Unigram::ScoreCompare(const Unigram &a, const Unigram &b)
|
||||||
|
{
|
||||||
return a.score > b.score;
|
return a.score > b.score;
|
||||||
}
|
}
|
||||||
} // namespace Gramambular
|
} // namespace Gramambular
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,20 +1,27 @@
|
||||||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||||
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
// All possible vChewing-specific modifications are of:
|
||||||
|
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||||
/*
|
/*
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
the Software without restriction, including without limitation the rights to
|
||||||
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
1. The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
|
2. No trademark license is granted to use the trade names, trademarks, service
|
||||||
except as required to fulfill notice requirements above.
|
marks, or product names of Contributor, except as required to fulfill notice
|
||||||
|
requirements above.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef WALKER_H_
|
#ifndef WALKER_H_
|
||||||
|
@ -25,23 +32,27 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
|
||||||
|
|
||||||
#include "Grid.h"
|
#include "Grid.h"
|
||||||
|
|
||||||
namespace Gramambular {
|
namespace Gramambular
|
||||||
|
{
|
||||||
|
|
||||||
class Walker {
|
class Walker
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
explicit Walker(Grid *inGrid);
|
explicit Walker(Grid *inGrid);
|
||||||
const std::vector<NodeAnchor> reverseWalk(size_t location,
|
const std::vector<NodeAnchor> reverseWalk(size_t location, double accumulatedScore = 0.0);
|
||||||
double accumulatedScore = 0.0);
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Grid *m_grid;
|
Grid *m_grid;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline Walker::Walker(Grid* inGrid) : m_grid(inGrid) {}
|
inline Walker::Walker(Grid *inGrid) : m_grid(inGrid)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
inline const std::vector<NodeAnchor> Walker::reverseWalk(
|
inline const std::vector<NodeAnchor> Walker::reverseWalk(size_t location, double accumulatedScore)
|
||||||
size_t location, double accumulatedScore) {
|
{
|
||||||
if (!location || location > m_grid->width()) {
|
if (!location || location > m_grid->width())
|
||||||
|
{
|
||||||
return std::vector<NodeAnchor>();
|
return std::vector<NodeAnchor>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -49,29 +60,31 @@ inline const std::vector<NodeAnchor> Walker::reverseWalk(
|
||||||
|
|
||||||
std::vector<NodeAnchor> nodes = m_grid->nodesEndingAt(location);
|
std::vector<NodeAnchor> nodes = m_grid->nodesEndingAt(location);
|
||||||
|
|
||||||
for (std::vector<NodeAnchor>::iterator ni = nodes.begin(); ni != nodes.end();
|
for (std::vector<NodeAnchor>::iterator ni = nodes.begin(); ni != nodes.end(); ++ni)
|
||||||
++ni) {
|
{
|
||||||
if (!(*ni).node) {
|
if (!(*ni).node)
|
||||||
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
(*ni).accumulatedScore = accumulatedScore + (*ni).node->score();
|
(*ni).accumulatedScore = accumulatedScore + (*ni).node->score();
|
||||||
|
|
||||||
std::vector<NodeAnchor> path =
|
std::vector<NodeAnchor> path = reverseWalk(location - (*ni).spanningLength, (*ni).accumulatedScore);
|
||||||
reverseWalk(location - (*ni).spanningLength, (*ni).accumulatedScore);
|
|
||||||
path.insert(path.begin(), *ni);
|
path.insert(path.begin(), *ni);
|
||||||
|
|
||||||
paths.push_back(path);
|
paths.push_back(path);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!paths.size()) {
|
if (!paths.size())
|
||||||
|
{
|
||||||
return std::vector<NodeAnchor>();
|
return std::vector<NodeAnchor>();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<NodeAnchor> *result = &*(paths.begin());
|
std::vector<NodeAnchor> *result = &*(paths.begin());
|
||||||
for (std::vector<std::vector<NodeAnchor> >::iterator pi = paths.begin();
|
for (std::vector<std::vector<NodeAnchor>>::iterator pi = paths.begin(); pi != paths.end(); ++pi)
|
||||||
pi != paths.end(); ++pi) {
|
{
|
||||||
if ((*pi).back().accumulatedScore > result->back().accumulatedScore) {
|
if ((*pi).back().accumulatedScore > result->back().accumulatedScore)
|
||||||
|
{
|
||||||
result = &*pi;
|
result = &*pi;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -80,5 +93,4 @@ inline const std::vector<NodeAnchor> Walker::reverseWalk(
|
||||||
}
|
}
|
||||||
} // namespace Gramambular
|
} // namespace Gramambular
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue