LM // Add SymbolLM, plus Re-Enable CoreLM.

This commit is contained in:
ShikiSuen 2022-02-25 23:05:12 +08:00
parent a6692413fa
commit 3e40494781
6 changed files with 148 additions and 12 deletions

View File

@ -21,9 +21,11 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR TH
#define LMInstantiator_H
#include "AssociatedPhrases.h"
#include "CoreLM.h"
#include "CNSLM.h"
#include "ParselessLM.h"
#include "PhraseReplacementMap.h"
#include "SymbolLM.h"
#include "UserPhrasesLM.h"
#include <stdio.h>
#include <unordered_set>
@ -65,6 +67,18 @@ public:
/// If the data model is already loaded.
bool isDataModelLoaded();
/// Asks to load the primary language model at the given path.
/// @param miscDataPath The path of the misc data model.
void loadMiscData(const char* miscDataPath);
/// If the data model is already loaded.
bool isMiscDataLoaded();
/// Asks to load the primary language model at the given path.
/// @param symbolDataPath The path of the symbol data model.
void loadSymbolData(const char* symbolDataPath);
/// If the data model is already loaded.
bool isSymbolDataLoaded();
/// Asks to load the primary language model at the given path.
/// @param cnsDataPath The path of the CNS data model.
void loadCNSData(const char* cnsDataPath);
@ -126,6 +140,8 @@ protected:
std::unordered_set<std::string>& insertedValues);
ParselessLM m_languageModel;
CoreLM m_miscModel;
SymbolLM m_symbolModel;
CNSLM m_cnsModel;
UserPhrasesLM m_userPhrases;
UserPhrasesLM m_excludedPhrases;

View File

@ -30,6 +30,7 @@ LMInstantiator::LMInstantiator()
LMInstantiator::~LMInstantiator()
{
m_languageModel.close();
m_miscModel.close();
m_userPhrases.close();
m_cnsModel.close();
m_excludedPhrases.close();
@ -63,6 +64,32 @@ bool LMInstantiator::isCNSDataLoaded()
return m_cnsModel.isLoaded();
}
void LMInstantiator::loadMiscData(const char* miscDataPath)
{
if (miscDataPath) {
m_miscModel.close();
m_miscModel.open(miscDataPath);
}
}
bool LMInstantiator::isMiscDataLoaded()
{
return m_miscModel.isLoaded();
}
void LMInstantiator::loadSymbolData(const char* symbolDataPath)
{
if (symbolDataPath) {
m_symbolModel.close();
m_symbolModel.open(symbolDataPath);
}
}
bool LMInstantiator::isSymbolDataLoaded()
{
return m_symbolModel.isLoaded();
}
void LMInstantiator::loadUserPhrases(const char* userPhrasesDataPath,
const char* excludedPhrasesDataPath)
{
@ -110,6 +137,8 @@ const std::vector<Taiyan::Gramambular::Unigram> LMInstantiator::unigramsForKey(c
}
std::vector<Taiyan::Gramambular::Unigram> allUnigrams;
std::vector<Taiyan::Gramambular::Unigram> miscUnigrams;
std::vector<Taiyan::Gramambular::Unigram> symbolUnigrams;
std::vector<Taiyan::Gramambular::Unigram> userUnigrams;
std::vector<Taiyan::Gramambular::Unigram> cnsUnigrams;
@ -136,6 +165,16 @@ const std::vector<Taiyan::Gramambular::Unigram> LMInstantiator::unigramsForKey(c
allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues);
}
if (m_miscModel.hasUnigramsForKey(key)) {
std::vector<Taiyan::Gramambular::Unigram> rawMiscUnigrams = m_miscModel.unigramsForKey(key);
miscUnigrams = filterAndTransformUnigrams(rawMiscUnigrams, excludedValues, insertedValues);
}
if (m_symbolModel.hasUnigramsForKey(key)) {
std::vector<Taiyan::Gramambular::Unigram> rawSymbolUnigrams = m_symbolModel.unigramsForKey(key);
symbolUnigrams = filterAndTransformUnigrams(rawSymbolUnigrams, excludedValues, insertedValues);
}
if (m_cnsModel.hasUnigramsForKey(key) && m_cnsEnabled) {
std::vector<Taiyan::Gramambular::Unigram> rawCNSUnigrams = m_cnsModel.unigramsForKey(key);
cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues);
@ -143,6 +182,8 @@ const std::vector<Taiyan::Gramambular::Unigram> LMInstantiator::unigramsForKey(c
allUnigrams.insert(allUnigrams.begin(), userUnigrams.begin(), userUnigrams.end());
allUnigrams.insert(allUnigrams.end(), cnsUnigrams.begin(), cnsUnigrams.end());
allUnigrams.insert(allUnigrams.begin(), miscUnigrams.begin(), miscUnigrams.end());
allUnigrams.insert(allUnigrams.end(), symbolUnigrams.begin(), symbolUnigrams.end());
return allUnigrams;
}

View File

@ -0,0 +1,44 @@
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
// All possible vChewing-specific modifications are (c) 2021 and onwards The vChewing Project (MIT-NTL License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
to permit persons to whom the Software is furnished to do so, subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service marks, or product names of Contributor,
except as required to fulfill notice requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SYMBOLLM_H
#define SYMBOLLM_H
#include <string>
#include <map>
#include <iostream>
#include "LanguageModel.h"
#include "UserPhrasesLM.h"
namespace vChewing {
class SymbolLM: public UserPhrasesLM
{
public:
virtual bool allowConsolidation() override {
return false;
}
virtual float overridedValue() override {
return -12.0;
}
};
}
#endif

View File

@ -36,13 +36,13 @@ NS_ASSUME_NONNULL_BEGIN
+ (BOOL)writeUserPhrase:(NSString *)userPhrase inputMode:(InputMode)mode areWeDuplicating:(BOOL)areWeDuplicating;
+ (void)setPhraseReplacementEnabled:(BOOL)phraseReplacementEnabled;
+ (void)setCNSEnabled:(BOOL)cnsEnabled;
+ (NSString *)specifyBundleDataPath:(NSString *)filename;
+ (NSString *)userPhrasesDataPath:(InputMode)mode;
+ (NSString *)userAssociatedPhrasesDataPath:(InputMode)mode;
+ (NSString *)excludedPhrasesDataPath:(InputMode)mode;
+ (NSString *)phraseReplacementDataPath:(InputMode)mode;
@property (class, readonly, nonatomic) NSString *dataFolderPath;
@property (class, readonly, nonatomic) NSString *cnsDataPath;
@end

View File

@ -45,19 +45,38 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
lm.loadLanguageModel([dataPath UTF8String]);
}
+ (NSString *)specifyBundleDataPath:(NSString *)filenameWithoutExtension;
{
Class cls = NSClassFromString(@"ctlInputMethod");
return [[NSBundle bundleForClass:cls] pathForResource:filenameWithoutExtension ofType:@"txt"];
}
+ (void)loadDataModels
{
if (!gLangModelCHT.isDataModelLoaded()) {
LTLoadLanguageModelFile(@"data-cht", gLangModelCHT);
}
if (!gLangModelCHT.isCNSDataLoaded()){
gLangModelCHT.loadCNSData([[self cnsDataPath] UTF8String]);
if (!gLangModelCHT.isMiscDataLoaded()) {
gLangModelCHT.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]);
}
if (!gLangModelCHT.isSymbolDataLoaded()){
gLangModelCHT.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]);
}
if (!gLangModelCHT.isCNSDataLoaded()){
gLangModelCHT.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]);
}
// -----------------
if (!gLangModelCHS.isDataModelLoaded()) {
LTLoadLanguageModelFile(@"data-chs", gLangModelCHS);
}
if (!gLangModelCHS.isMiscDataLoaded()) {
gLangModelCHS.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]);
}
if (!gLangModelCHS.isSymbolDataLoaded()){
gLangModelCHS.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]);
}
if (!gLangModelCHS.isCNSDataLoaded()){
gLangModelCHS.loadCNSData([[self cnsDataPath] UTF8String]);
gLangModelCHS.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]);
}
}
@ -67,8 +86,14 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
if (!gLangModelCHT.isDataModelLoaded()) {
LTLoadLanguageModelFile(@"data-cht", gLangModelCHT);
}
if (!gLangModelCHT.isMiscDataLoaded()) {
gLangModelCHT.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]);
}
if (!gLangModelCHT.isSymbolDataLoaded()){
gLangModelCHT.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]);
}
if (!gLangModelCHT.isCNSDataLoaded()){
gLangModelCHT.loadCNSData([[self cnsDataPath] UTF8String]);
gLangModelCHT.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]);
}
}
@ -76,8 +101,14 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
if (!gLangModelCHS.isDataModelLoaded()) {
LTLoadLanguageModelFile(@"data-chs", gLangModelCHS);
}
if (!gLangModelCHS.isMiscDataLoaded()) {
gLangModelCHS.loadMiscData([[self specifyBundleDataPath: @"data-zhuyinwen"] UTF8String]);
}
if (!gLangModelCHS.isSymbolDataLoaded()){
gLangModelCHS.loadSymbolData([[self specifyBundleDataPath: @"data-symbols"] UTF8String]);
}
if (!gLangModelCHS.isCNSDataLoaded()){
gLangModelCHS.loadCNSData([[self cnsDataPath] UTF8String]);
gLangModelCHS.loadCNSData([[self specifyBundleDataPath: @"char-kanji-cns"] UTF8String]);
}
}
}
@ -308,12 +339,6 @@ static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing
return [[self dataFolderPath] stringByAppendingPathComponent:fileName];
}
+ (NSString *)cnsDataPath
{
Class cls = NSClassFromString(@"ctlInputMethod");
return [[NSBundle bundleForClass:cls] pathForResource:@"char-kanji-cns" ofType:@"txt"];
}
+ (vChewing::LMInstantiator *)lmCHT
{
return &gLangModelCHT;

View File

@ -11,6 +11,8 @@
5B11328927B94CFB00E58451 /* AppleKeyboardConverter.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B11328827B94CFB00E58451 /* AppleKeyboardConverter.swift */; };
5B2DB16F27AF6891006D874E /* data-chs.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5B2DB16D27AF6891006D874E /* data-chs.txt */; };
5B2DB17027AF6891006D874E /* data-cht.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5B2DB16E27AF6891006D874E /* data-cht.txt */; };
5B4D47C127C9304000220DDC /* data-zhuyinwen.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5B4D47BD27C9304000220DDC /* data-zhuyinwen.txt */; };
5B4D47C227C9304000220DDC /* data-symbols.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5B4D47BE27C9304000220DDC /* data-symbols.txt */; };
5B62A31727AE73A700A19448 /* unzip.m in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A30927AE73A700A19448 /* unzip.m */; };
5B62A31827AE73A700A19448 /* zip.m in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A30A27AE73A700A19448 /* zip.m */; };
5B62A31927AE73A700A19448 /* ioapi.m in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A30B27AE73A700A19448 /* ioapi.m */; };
@ -174,6 +176,8 @@
5B2DB16E27AF6891006D874E /* data-cht.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = "data-cht.txt"; path = "Data/data-cht.txt"; sourceTree = "<group>"; };
5B2DB17127AF8771006D874E /* Makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; name = Makefile; path = Data/Makefile; sourceTree = "<group>"; };
5B30F11227BA568800484E24 /* vChewingKeyLayout.bundle */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.plug-in"; path = vChewingKeyLayout.bundle; sourceTree = "<group>"; };
5B4D47BD27C9304000220DDC /* data-zhuyinwen.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = "data-zhuyinwen.txt"; path = "../../libvchewing-data/components/common/data-zhuyinwen.txt"; sourceTree = "<group>"; };
5B4D47BE27C9304000220DDC /* data-symbols.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = "data-symbols.txt"; path = "../../libvchewing-data/components/common/data-symbols.txt"; sourceTree = "<group>"; };
5B62A30927AE73A700A19448 /* unzip.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = unzip.m; sourceTree = "<group>"; };
5B62A30A27AE73A700A19448 /* zip.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = zip.m; sourceTree = "<group>"; };
5B62A30B27AE73A700A19448 /* ioapi.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = ioapi.m; sourceTree = "<group>"; };
@ -206,6 +210,7 @@
5B73FB5F27B2BE1300E9BF49 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/InfoPlist.strings; sourceTree = "<group>"; };
5B7BC4AF27AFFBE800F66C24 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Source/WindowNIBs/Base.lproj/frmPrefWindow.xib; sourceTree = "<group>"; };
5B7BC4B227AFFC0B00F66C24 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = Source/WindowNIBs/en.lproj/frmPrefWindow.strings; sourceTree = "<group>"; };
5B8F43ED27C9BC220069AC27 /* SymbolLM.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = SymbolLM.h; sourceTree = "<group>"; };
5BBBB75D27AED54C0023B93A /* Beep.m4a */ = {isa = PBXFileReference; lastKnownFileType = file; path = Beep.m4a; sourceTree = "<group>"; };
5BBBB75E27AED54C0023B93A /* Fart.m4a */ = {isa = PBXFileReference; lastKnownFileType = file; path = Fart.m4a; sourceTree = "<group>"; };
5BBBB76627AED5DB0023B93A /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/frmNonModalAlertWindow.xib; sourceTree = "<group>"; };
@ -365,6 +370,7 @@
5B4D47B627C9186900220DDC /* InstantiatedModels */ = {
isa = PBXGroup;
children = (
5B8F43ED27C9BC220069AC27 /* SymbolLM.h */,
5B62A32B27AE78B000A19448 /* CNSLM.h */,
);
path = InstantiatedModels;
@ -596,6 +602,8 @@
5BD05B8027B22F3C004C4F1D /* char-kanji-cns.txt */,
5B2DB16D27AF6891006D874E /* data-chs.txt */,
5B2DB16E27AF6891006D874E /* data-cht.txt */,
5B4D47BE27C9304000220DDC /* data-symbols.txt */,
5B4D47BD27C9304000220DDC /* data-zhuyinwen.txt */,
5B2DB17127AF8771006D874E /* Makefile */,
);
name = Data;
@ -962,6 +970,7 @@
D4E33D8A27A838CF006DB1CF /* Localizable.strings in Resources */,
5BDCBB2E27B4E67A00D0CC59 /* vChewingPhraseEditor.app in Resources */,
5BBBB76027AED54C0023B93A /* Fart.m4a in Resources */,
5B4D47C227C9304000220DDC /* data-symbols.txt in Resources */,
6A2E40F6253A69DA00D1AE1D /* Images.xcassets in Resources */,
D4E33D8F27A838F0006DB1CF /* InfoPlist.strings in Resources */,
5BBBB76B27AED5DB0023B93A /* frmNonModalAlertWindow.xib in Resources */,
@ -969,6 +978,7 @@
5BBBB77527AED70B0023B93A /* MenuIcon-SCVIM.png in Resources */,
5B7BC4B027AFFBE800F66C24 /* frmPrefWindow.xib in Resources */,
5BD05B8127B22F3C004C4F1D /* char-kanji-cns.txt in Resources */,
5B4D47C127C9304000220DDC /* data-zhuyinwen.txt in Resources */,
5B2DB17027AF6891006D874E /* data-cht.txt in Resources */,
5BBBB77327AED70B0023B93A /* MenuIcon-TCVIM@2x.png in Resources */,
5BBBB77627AED70B0023B93A /* MenuIcon-TCVIM.png in Resources */,