Pre Merge pull request !26 from ShikiSuen/upd/1.5.4
This commit is contained in:
commit
df42ddade4
|
@ -3,7 +3,7 @@
|
|||
"accessLevel" : "private"
|
||||
},
|
||||
"indentation" : {
|
||||
"tabs" : 1
|
||||
"spaces" : 2
|
||||
},
|
||||
"indentConditionalCompilationBlocks" : true,
|
||||
"indentSwitchCaseLabels" : true,
|
||||
|
@ -51,6 +51,6 @@
|
|||
"UseWhereClausesInForLoops" : false,
|
||||
"ValidateDocumentationComments" : false
|
||||
},
|
||||
"tabWidth" : 4,
|
||||
"tabWidth" : 8,
|
||||
"version" : 1
|
||||
}
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
# 威注音輸入法研發參與相關說明
|
||||
|
||||
威注音輸入法歡迎有人參與。但為了不讓參與者們浪費各自的熱情,特設此文以說明該專案目前最需要協助的地方。
|
||||
威注音輸入法歡迎有熱心的志願者們參與。
|
||||
|
||||
1. 有人能用 Swift 將該專案內這兩個源自 LibFormosa 的組件套件重寫:
|
||||
威注音目前的 codebase 更能代表一個先進的 macOS 輸入法雛形專案的形態。目前的 dev 分支除了 Mandarin 模組(以及其與 KeyHandler 的對接的部分)以外被威注音使用的部分全都是清一色的 Swift codebase,一目了然,方便他人參與,比某些其它開源品牌旗下的專案更具程式方面的生命力。為什麼這樣講呢?那些傳統開源品牌的專案主要使用 C++ 這門不太友好的語言(Mandarin 模組現在對我而言仍舊是天書,一大堆針對記憶體指針的操作完全看不懂。搞不清楚在這一層之上的功能邏輯的話,就無法制定 Swift 版的 coding 策略),這也是我這次用 Swift 重寫了語言模型引擎的原因(也是為後來者行方便)。
|
||||
|
||||
為了不讓參與者們浪費各自的熱情,特設此文以說明該專案目前最需要協助的地方。
|
||||
|
||||
1. 有人能用 Swift 將該專案內的這個源自 LibFormosa 的組件套件重寫:
|
||||
- Mandarin 組件,用以分析普通話音韻數據、創建且控制 Syllable Composer 注音拼識組件。
|
||||
- Gramambular 套裝,這包括了 Source 資料夾下的其餘全部的 (Obj)C(++) 檔案(LMConsolidator 除外)。
|
||||
- LMConsolidator 有 Swift 版本,已經用於威注音語彙編輯器內。給主程式用 C++ 版本僅為了與 Gramambular 協作方便。
|
||||
- 這也包括了所有與 Language Model 有關的實現,因為都是 Gramambular 內的某個語言模組 Protocol 衍生出來的東西。
|
||||
- LMInstantiator 是用來將語言模組副本化的組件,原本不屬於 Gramambular,但與其衍生的各類語言模組高度耦合。
|
||||
- KeyValueBlobReader 不屬於 Gramambular,但與其衍生的各類語言模組高度耦合、也與 KeyHandler 高度耦合。
|
||||
- 一堆記憶體指針操作,實在看不懂這個組件的處理邏輯是什麼,無能為力。
|
||||
2. 讓 Alt+波浪鍵選單能夠在諸如 MS Word 以及終端機內正常工作(可以用方向鍵控制高亮候選內容,等)。
|
||||
- 原理上而言恐怕得欺騙當前正在接受輸入的應用、使其誤以為當前有組字區。這只是推測。
|
||||
3. SQLite 實現。
|
||||
|
@ -25,11 +25,9 @@
|
|||
該專案對源碼格式有規範,且 Swift 與其他 (Obj)C(++) 系語言持不同規範:
|
||||
|
||||
- Swift: 採 [Apple 官方 Swift-Format](https://github.com/apple/swift-format),且施加如下例外修改項目:
|
||||
- Indentation 僅使用 `"indentation" : { "tabs" : 1 },`,不以空格來縮進。
|
||||
- `"indentSwitchCaseLabels" : true,`
|
||||
- `"lineLength" : 120,`
|
||||
- `"NoBlockComments" : false,`
|
||||
- `"tabWidth" : 4,`
|
||||
- `"OnlyOneTrailingClosureArgument" : false,` // SwiftUI 相容
|
||||
- `"UseTripleSlashForDocumentationComments" : false,`
|
||||
- `"DontRepeatTypeInStaticProperties" : false,`
|
||||
|
@ -37,6 +35,6 @@
|
|||
- 該規範以四個西文半形空格為行縮進單位。
|
||||
- 由於今後不會再用這類語言給該倉庫新增內容,所以相關規範就不改動了。
|
||||
|
||||
至於對 Swift 檔案改採 1-Tab 縮進,則是為了在尊重所有用戶的需求的同時、最大程度上節約檔案體積。使用者可自行修改 Xcode 的預設 Tab 縮進尺寸。
|
||||
之前,為了節省檔案體積,曾經對 Swift 檔案改採 1-Tab 縮進。然而,這會導致 Gitee 等線上 git 專案管理網站內的顯示變成 8-Space 縮進。於是,該專案對 Swift 檔案又改回了 2-Spaces 縮進。
|
||||
|
||||
$ EOF.
|
|
@ -56,8 +56,8 @@
|
|||
/* Class = "NSTextFieldCell"; title = "Derived from OpenVanilla McBopopmofo Project."; ObjectID = "QYf-Nf-hoi"; */
|
||||
"QYf-Nf-hoi.title" = "Derived from OpenVanilla McBopopmofo Project.";
|
||||
|
||||
/* Class = "NSTextFieldCell"; title = "McBopomofo Engine by Mengjuei Hsieh, Lukhnos Liu, Zonble Yang, et al.\nvChewing macOS Development: Shiki Suen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen."; ObjectID = "VW8-s5-Wpn"; */
|
||||
"VW8-s5-Wpn.title" = "McBopomofo Engine by Mengjuei Hsieh, Lukhnos Liu, Zonble Yang, et al.\nvChewing macOS Development: Shiki Suen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.";
|
||||
/* Class = "NSTextFieldCell"; title = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine."; ObjectID = "VW8-s5-Wpn"; */
|
||||
"VW8-s5-Wpn.title" = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine.";
|
||||
|
||||
/* Class = "NSTextFieldCell"; title = "Placeholder for showing copyright information."; ObjectID = "eo3-TK-0rB"; */
|
||||
// "eo3-TK-0rB.title" = "Placeholder for showing copyright information.";
|
||||
|
|
|
@ -56,8 +56,8 @@
|
|||
/* Class = "NSTextFieldCell"; title = "Derived from OpenVanilla McBopopmofo Project."; ObjectID = "QYf-Nf-hoi"; */
|
||||
"QYf-Nf-hoi.title" = "OpenVanilla 小麦注音プロジェクトから派生。";
|
||||
|
||||
/* Class = "NSTextFieldCell"; title = "McBopomofo Engine by Mengjuei Hsieh, Lukhnos Liu, Zonble Yang, et al.\nvChewing macOS Development: Shiki Suen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen."; ObjectID = "VW8-s5-Wpn"; */
|
||||
"VW8-s5-Wpn.title" = "小麦注音入力エンジン開発:Mengjuei Hsieh, Lukhnos Liu, Zonble Yang, など。\nmacOS 版威注音の開発:Shiki Suen, Hiraku Wang, など。\n威注音語彙データの維持:Shiki Suen。";
|
||||
/* Class = "NSTextFieldCell"; title = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine."; ObjectID = "VW8-s5-Wpn"; */
|
||||
"VW8-s5-Wpn.title" = "ボポモフォエンジン開発:Lukhnos Liu。\n入力状態管理システム開発:Zonble Yang。\nmacOS 版威注音の開発:Shiki Suen, Hiraku Wang, など。\n威注音語彙データの維持:Shiki Suen。\nMegrez 辞書処理エンジン:Shiki Suen(Lukhnos の Gramambular C++ エンジンを Swift で再開発したものである)。";
|
||||
|
||||
/* Class = "NSTextFieldCell"; title = "Placeholder for showing copyright information."; ObjectID = "eo3-TK-0rB"; */
|
||||
"eo3-TK-0rB.title" = "Placeholder for showing copyright information.";
|
||||
|
|
|
@ -58,7 +58,7 @@
|
|||
|
||||
/* Class = "NSTextFieldCell"; title = "McBopomofo Engine by Mengjuei Hsieh, Lukhnos Liu, Zonble Yang, et al.\nvChewing macOS Development: Shiki Suen, Hiraku Wang, etc.
|
||||
vChewing Phrase Database Maintained by Shiki Suen."; ObjectID = "VW8-s5-Wpn"; */
|
||||
"VW8-s5-Wpn.title" = "小麦注音引擎研发:Mengjuei Hsieh, Lukhnos Liu, Zonble Yang, 等。\n威注音 macOS 程式研发:Shiki Suen, Hiraku Wang, 等。\n威注音词库维护:Shiki Suen。";
|
||||
"VW8-s5-Wpn.title" = "注音拼音输入处理引擎研发:Lukhnos Liu。\n输入法状态管理引擎研发:Zonble Yang。\n威注音 macOS 程式研发:Shiki Suen, Hiraku Wang, 等。\n威注音词库维护:Shiki Suen。\n天权星语汇引擎:Shiki Suen,用 Swift 将 Lukhnos 的 C++ Gramambular 重写而得。";
|
||||
|
||||
/* Class = "NSTextFieldCell"; title = "Placeholder for showing copyright information."; ObjectID = "eo3-TK-0rB"; */
|
||||
// "eo3-TK-0rB.title" = "Placeholder for showing copyright information.";
|
||||
|
|
|
@ -58,7 +58,7 @@
|
|||
|
||||
/* Class = "NSTextFieldCell"; title = "McBopomofo Engine by Mengjuei Hsieh, Lukhnos Liu, Zonble Yang, et al.\nvChewing macOS Development: Shiki Suen, Hiraku Wang, etc.
|
||||
vChewing Phrase Database Maintained by Shiki Suen."; ObjectID = "VW8-s5-Wpn"; */
|
||||
"VW8-s5-Wpn.title" = "小麥注音引擎研發:Mengjuei Hsieh, Lukhnos Liu, Zonble Yang, 等。\n威注音 macOS 程式研發:Shiki Suen, Hiraku Wang, 等。\n威注音詞庫維護:Shiki Suen。";
|
||||
"VW8-s5-Wpn.title" = "注音拼音輸入處理引擎研發:Lukhnos Liu。\n輸入法狀態管理引擎研發:Zonble Yang。\n威注音 macOS 程式研發:Shiki Suen, Hiraku Wang, 等。\n威注音詞庫維護:Shiki Suen。\n天權星語彙引擎:Shiki Suen,用 Swift 將 Lukhnos 的 C++ Gramambular 重寫而得。";
|
||||
|
||||
/* Class = "NSTextFieldCell"; title = "Placeholder for showing copyright information."; ObjectID = "eo3-TK-0rB"; */
|
||||
// "eo3-TK-0rB.title" = "Placeholder for showing copyright information.";
|
||||
|
|
|
@ -3,9 +3,11 @@
|
|||
vChewing macOS: MIT-NTL License 麻理(去商标)授权合约
|
||||
|
||||
© 2011-2022 OpenVanilla Project & © 2021-2022 vChewing Project.
|
||||
小麦注音引擎研发:Mengjuei Hsieh, Lukhnos Liu, Zonble Yang, 等。
|
||||
注音拼音输入处理引擎研发:Lukhnos Liu。
|
||||
输入法状态管理引擎研发:Zonble Yang。
|
||||
威注音 macOS 程式研发:Shiki Suen, Hiraku Wang, 等。
|
||||
威注音词库维护:Shiki Suen。
|
||||
天权星语汇引擎:Shiki Suen,用 Swift 将 Lukhnos 的 C++ Gramambular 重写而得。
|
||||
|
||||
软件之著作权利人依此麻理授权条款,将其对于软件之著作权利授权释出,只须使用者践履以下二项麻理授权条款叙明之义务性规定,其即享有对此软件程式及其相关说明文档自由不受限制地进行利用之权利,范围包括「使用、重制、修改、合并、出版、散布、再授权、及贩售程式重制作品」等诸多方面之应用,而散布程式之人、更可将上述权利传递予其后收受程式之后手,倘若其后收受程式之人亦服膺以下二项麻理授权条款之义务性规定,则其对程式亦享有与前手运用范围相同之同一权利。
|
||||
|
||||
|
|
|
@ -3,9 +3,11 @@
|
|||
vChewing macOS: MIT-NTL License 麻理(去商標)授權合約
|
||||
|
||||
© 2011-2022 OpenVanilla Project & © 2021-2022 vChewing Project.
|
||||
小麥注音引擎研發:Mengjuei Hsieh, Lukhnos Liu, Zonble Yang, 等。
|
||||
注音拼音輸入處理引擎研發:Lukhnos Liu。
|
||||
輸入法狀態管理引擎研發:Zonble Yang。
|
||||
威注音 macOS 程式研發:Shiki Suen, Hiraku Wang, 等。
|
||||
威注音詞庫維護:Shiki Suen。
|
||||
天權星語彙引擎:Shiki Suen,用 Swift 將 Lukhnos 的 C++ Gramambular 重寫而得。
|
||||
|
||||
軟體之著作權利人依此麻理授權條款,將其對於軟體之著作權利授權釋出,只須使用者踐履以下二項麻理授權條款敘明之義務性規定,其即享有對此軟體程式及其相關說明文檔自由不受限制地進行利用之權利,範圍包括「使用、重製、修改、合併、出版、散布、再授權、及販售程式重製作品」等諸多方面之應用,而散布程式之人、更可將上述權利傳遞予其後收受程式之後手,倘若其後收受程式之人亦服膺以下二項麻理授權條款之義務性規定,則其對程式亦享有與前手運用範圍相同之同一權利。
|
||||
|
||||
|
|
|
@ -2,10 +2,11 @@
|
|||
|
||||
vChewing macOS: MIT商標不許可ライセンス (MIT-NTL License)
|
||||
|
||||
© 2011-2022 OpenVanilla Project & © 2021-2022 vChewing Project.
|
||||
小麦注音入力エンジン開発:Mengjuei Hsieh, Lukhnos Liu, Zonble Yang, など。
|
||||
macOS 版威注音の開発:Hiraku Wang, Shiki Suen, など。
|
||||
ボポモフォエンジン開発:Lukhnos Liu。
|
||||
入力状態管理システム開発:Zonble Yang。
|
||||
macOS 版威注音の開発:Shiki Suen, Hiraku Wang, など。
|
||||
威注音語彙データの維持:Shiki Suen。
|
||||
Megrez 辞書処理エンジン:Shiki Suen(Lukhnos の Gramambular C++ エンジンを Swift で再開発したものである)。
|
||||
|
||||
以下に定める条件に従い、本ソフトウェアおよび関連文書のファイル(以下「ソフトウェア」)の複製を取得するすべての人に対し、ソフトウェアを無制限に扱うことを無償で許可します。これには、ソフトウェアの複製を使用、複写、変更、結合、掲載、頒布、サブライセンス、および/または販売する権利、およびソフトウェアを提供する相手に同じことを許可する権利も無制限に含まれます。
|
||||
|
||||
|
|
|
@ -3,9 +3,11 @@ DISCLAIMER: The vChewing project, having no relationship of cooperation or affil
|
|||
vChewing macOS: MIT-NTL License
|
||||
|
||||
© 2011-2022 OpenVanilla Project & © 2021-2022 vChewing Project.
|
||||
McBopomofo Engine by Mengjuei Hsieh, Lukhnos Liu, Zonble Yang, et al.
|
||||
Mandarin Syllable Composer Engine by Lukhnos Liu.
|
||||
Input State Management Architecture by Zonble Yang.
|
||||
vChewing macOS Development: Shiki Suen, Hiraku Wang, etc.
|
||||
vChewing Phrase Database Maintained by Shiki Suen.
|
||||
Megrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
|
|
1
Makefile
1
Makefile
|
@ -28,6 +28,7 @@ clang-format: clang-format-swift clang-format-cpp
|
|||
|
||||
clang-format-swift:
|
||||
@git ls-files --exclude-standard | grep -E '\.swift$$' | xargs swift-format format --in-place --configuration ./.clang-format-swift.json --parallel
|
||||
@git ls-files --exclude-standard | grep -E '\.swift$$' | xargs swift-format lint --configuration ./.clang-format-swift.json --parallel
|
||||
|
||||
clang-format-cpp:
|
||||
@git ls-files --exclude-standard | grep -E '\.(cpp|hpp|c|cc|cxx|hxx|ixx|h|m|mm|hh)$$' | xargs clang-format -i
|
||||
|
|
|
@ -48,10 +48,6 @@ let package = Package(
|
|||
"src/UTF8StringSliceTest.cpp",
|
||||
"src/UTF8UtilTest.cpp",
|
||||
"deps/google-benchmark",
|
||||
"deps/gtest-1.11.0",
|
||||
"deps/pybind11-2.5.0",
|
||||
"deps/rapidjson-1.1.0",
|
||||
"deps/tclap-1.2.2",
|
||||
|
||||
"src/CmdLineOutput.hpp",
|
||||
"src/Config.hpp",
|
||||
|
|
|
@ -0,0 +1,69 @@
|
|||
// Copyright (c) 2019 and onwards Robert Muckle-Jones (Apache 2.0 License).
|
||||
|
||||
import Foundation
|
||||
|
||||
public class LineReader {
|
||||
let encoding: String.Encoding
|
||||
let chunkSize: Int
|
||||
var fileHandle: FileHandle
|
||||
let delimData: Data
|
||||
var buffer: Data
|
||||
var atEof: Bool
|
||||
|
||||
public init(
|
||||
file: FileHandle, encoding: String.Encoding = .utf8,
|
||||
chunkSize: Int = 4096
|
||||
) throws {
|
||||
let fileHandle = file
|
||||
self.encoding = encoding
|
||||
self.chunkSize = chunkSize
|
||||
self.fileHandle = fileHandle
|
||||
delimData = "\n".data(using: encoding)!
|
||||
buffer = Data(capacity: chunkSize)
|
||||
atEof = false
|
||||
}
|
||||
|
||||
/// Return next line, or nil on EOF.
|
||||
public func nextLine() -> String? {
|
||||
// Read data chunks from file until a line delimiter is found:
|
||||
while !atEof {
|
||||
// get a data from the buffer up to the next delimiter
|
||||
if let range = buffer.range(of: delimData) {
|
||||
// convert data to a string
|
||||
let line = String(data: buffer.subdata(in: 0..<range.lowerBound), encoding: encoding)!
|
||||
// remove that data from the buffer
|
||||
buffer.removeSubrange(0..<range.upperBound)
|
||||
return line
|
||||
}
|
||||
|
||||
let nextData = fileHandle.readData(ofLength: chunkSize)
|
||||
if !nextData.isEmpty {
|
||||
buffer.append(nextData)
|
||||
} else {
|
||||
// End of file or read error
|
||||
atEof = true
|
||||
if !buffer.isEmpty {
|
||||
// Buffer contains last line in file (not terminated by delimiter).
|
||||
let line = String(data: buffer as Data, encoding: encoding)!
|
||||
return line
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
/// Start reading from the beginning of file.
|
||||
public func rewind() {
|
||||
fileHandle.seek(toFileOffset: 0)
|
||||
buffer.count = 0
|
||||
atEof = false
|
||||
}
|
||||
}
|
||||
|
||||
extension LineReader: Sequence {
|
||||
public func makeIterator() -> AnyIterator<String> {
|
||||
AnyIterator {
|
||||
self.nextLine()
|
||||
}
|
||||
}
|
||||
}
|
|
@ -24,31 +24,20 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef USERSYMBOLLM_H
|
||||
#define USERSYMBOLLM_H
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
#include "LanguageModel.h"
|
||||
#include "UserPhrasesLM.h"
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
namespace vChewing
|
||||
{
|
||||
@interface Composer : NSObject
|
||||
+ (BOOL)chkKeyValidity:(UniChar)charCode;
|
||||
+ (BOOL)isBufferEmpty;
|
||||
+ (void)clearBuffer;
|
||||
+ (void)combineReadingKey:(UniChar)charCode;
|
||||
+ (BOOL)checkWhetherToneMarkerConfirms;
|
||||
+ (NSString *)getSyllableComposition;
|
||||
+ (void)doBackSpaceToBuffer;
|
||||
+ (NSString *)getComposition;
|
||||
+ (void)ensureParser;
|
||||
@end
|
||||
|
||||
class UserSymbolLM : public UserPhrasesLM
|
||||
{
|
||||
public:
|
||||
bool allowConsolidation() override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
float overridedValue() override
|
||||
{
|
||||
return -12.0;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace vChewing
|
||||
|
||||
#endif
|
||||
NS_ASSUME_NONNULL_END
|
|
@ -0,0 +1,117 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#import "Composer.hh"
|
||||
#import "Mandarin.h"
|
||||
#import "vChewing-Swift.h"
|
||||
|
||||
static Mandarin::BopomofoReadingBuffer *PhoneticBuffer;
|
||||
|
||||
@implementation Composer
|
||||
|
||||
+ (BOOL)chkKeyValidity:(UniChar)charCode
|
||||
{
|
||||
return PhoneticBuffer->isValidKey((char)charCode);
|
||||
}
|
||||
|
||||
+ (BOOL)isBufferEmpty
|
||||
{
|
||||
return PhoneticBuffer->isEmpty();
|
||||
}
|
||||
|
||||
+ (void)clearBuffer
|
||||
{
|
||||
PhoneticBuffer->clear();
|
||||
}
|
||||
|
||||
+ (void)combineReadingKey:(UniChar)charCode
|
||||
{
|
||||
PhoneticBuffer->combineKey((char)charCode);
|
||||
}
|
||||
|
||||
+ (BOOL)checkWhetherToneMarkerConfirms
|
||||
{
|
||||
return PhoneticBuffer->hasToneMarker();
|
||||
}
|
||||
|
||||
+ (NSString *)getSyllableComposition
|
||||
{
|
||||
return [NSString stringWithUTF8String:PhoneticBuffer->syllable().composedString().c_str()];
|
||||
}
|
||||
|
||||
+ (void)doBackSpaceToBuffer
|
||||
{
|
||||
PhoneticBuffer->backspace();
|
||||
}
|
||||
|
||||
+ (NSString *)getComposition
|
||||
{
|
||||
return [NSString stringWithUTF8String:PhoneticBuffer->composedString().c_str()];
|
||||
}
|
||||
|
||||
+ (void)ensureParser
|
||||
{
|
||||
if (PhoneticBuffer)
|
||||
{
|
||||
switch (mgrPrefs.mandarinParser)
|
||||
{
|
||||
case MandarinParserOfStandard:
|
||||
PhoneticBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::StandardLayout());
|
||||
break;
|
||||
case MandarinParserOfEten:
|
||||
PhoneticBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::ETenLayout());
|
||||
break;
|
||||
case MandarinParserOfHsu:
|
||||
PhoneticBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::HsuLayout());
|
||||
break;
|
||||
case MandarinParserOfEen26:
|
||||
PhoneticBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::ETen26Layout());
|
||||
break;
|
||||
case MandarinParserOfIBM:
|
||||
PhoneticBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::IBMLayout());
|
||||
break;
|
||||
case MandarinParserOfMiTAC:
|
||||
PhoneticBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::MiTACLayout());
|
||||
break;
|
||||
case MandarinParserOfFakeSeigyou:
|
||||
PhoneticBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::FakeSeigyouLayout());
|
||||
break;
|
||||
case MandarinParserOfHanyuPinyin:
|
||||
PhoneticBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::HanyuPinyinLayout());
|
||||
break;
|
||||
default:
|
||||
PhoneticBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::StandardLayout());
|
||||
mgrPrefs.mandarinParser = MandarinParserOfStandard;
|
||||
}
|
||||
PhoneticBuffer->clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
PhoneticBuffer = new Mandarin::BopomofoReadingBuffer(Mandarin::BopomofoKeyboardLayout::StandardLayout());
|
||||
}
|
||||
}
|
||||
|
||||
@end
|
|
@ -46,7 +46,7 @@ public class OpenCCBridge: NSObject {
|
|||
///
|
||||
/// - Parameter string: Text in Original Script.
|
||||
/// - Returns: Text converted to Different Script.
|
||||
@objc public static func crossConvert(_ string: String) -> String? {
|
||||
public static func crossConvert(_ string: String) -> String? {
|
||||
switch ctlInputMethod.currentKeyHandler.inputMode {
|
||||
case InputMode.imeModeCHS:
|
||||
return shared.traditionalize?.convert(string)
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit 8199254d3abbf63e3b7535bbc975f8519a2d6834
|
||||
Subproject commit 4065cb727373ab12a3401eb3526e4a6208671e59
|
|
@ -30,5 +30,5 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
|
||||
@import Foundation;
|
||||
|
||||
#import "KeyHandler.h"
|
||||
#import "mgrLangModel.h"
|
||||
#import "CTools.h"
|
||||
#import "Composer.hh"
|
||||
|
|
|
@ -47,7 +47,7 @@ class AppDelegate: NSObject, NSApplicationDelegate, ctlNonModalAlertWindowDelega
|
|||
private var ctlAboutWindowInstance: ctlAboutWindow? // New About Window
|
||||
private var checkTask: URLSessionTask?
|
||||
private var updateNextStepURL: URL?
|
||||
private var fsStreamHelper = FSEventStreamHelper(
|
||||
public var fsStreamHelper = FSEventStreamHelper(
|
||||
path: mgrLangModel.dataFolderPath(isDefaultFolder: false),
|
||||
queue: DispatchQueue(label: "vChewing User Phrases")
|
||||
)
|
||||
|
@ -76,7 +76,11 @@ class AppDelegate: NSObject, NSApplicationDelegate, ctlNonModalAlertWindowDelega
|
|||
}
|
||||
}
|
||||
|
||||
@objc func showPreferences() {
|
||||
func updateStreamHelperPath() {
|
||||
fsStreamHelper.path = mgrPrefs.userDataFolderSpecified
|
||||
}
|
||||
|
||||
func showPreferences() {
|
||||
if ctlPrefWindowInstance == nil {
|
||||
ctlPrefWindowInstance = ctlPrefWindow.init(windowNibName: "frmPrefWindow")
|
||||
}
|
||||
|
@ -88,7 +92,7 @@ class AppDelegate: NSObject, NSApplicationDelegate, ctlNonModalAlertWindowDelega
|
|||
}
|
||||
|
||||
// New About Window
|
||||
@objc func showAbout() {
|
||||
func showAbout() {
|
||||
if ctlAboutWindowInstance == nil {
|
||||
ctlAboutWindowInstance = ctlAboutWindow.init(windowNibName: "frmAboutWindow")
|
||||
}
|
||||
|
@ -98,12 +102,10 @@ class AppDelegate: NSObject, NSApplicationDelegate, ctlNonModalAlertWindowDelega
|
|||
NSApp.setActivationPolicy(.accessory)
|
||||
}
|
||||
|
||||
@objc(checkForUpdate)
|
||||
func checkForUpdate() {
|
||||
checkForUpdate(forced: false)
|
||||
}
|
||||
|
||||
@objc(checkForUpdateForced:)
|
||||
func checkForUpdate(forced: Bool) {
|
||||
if checkTask != nil {
|
||||
// busy
|
||||
|
|
|
@ -39,12 +39,12 @@ class AppleKeyboardConverter: NSObject {
|
|||
"org.unknown.keylayout.vChewingIBM",
|
||||
"org.unknown.keylayout.vChewingMiTAC",
|
||||
]
|
||||
@objc class func isDynamicBasicKeyboardLayoutEnabled() -> Bool {
|
||||
class func isDynamicBasicKeyboardLayoutEnabled() -> Bool {
|
||||
AppleKeyboardConverter.arrDynamicBasicKeyLayout.contains(mgrPrefs.basicKeyboardLayout)
|
||||
}
|
||||
|
||||
// 處理 Apple 注音鍵盤佈局類型。
|
||||
@objc class func cnvApple2ABC(_ charCode: UniChar) -> UniChar {
|
||||
class func cnvApple2ABC(_ charCode: UniChar) -> UniChar {
|
||||
var charCode = charCode
|
||||
// 在按鍵資訊被送往 OVMandarin 之前,先轉換為可以被 OVMandarin 正常處理的資訊。
|
||||
if isDynamicBasicKeyboardLayoutEnabled() {
|
||||
|
@ -185,7 +185,7 @@ class AppleKeyboardConverter: NSObject {
|
|||
return charCode
|
||||
}
|
||||
|
||||
@objc class func cnvStringApple2ABC(_ strProcessed: String) -> String {
|
||||
class func cnvStringApple2ABC(_ strProcessed: String) -> String {
|
||||
var strProcessed = strProcessed
|
||||
if isDynamicBasicKeyboardLayoutEnabled() {
|
||||
// 針對不同的 Apple 動態鍵盤佈局糾正大寫英文輸入。
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
|
@ -24,18 +22,12 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef GRAMAMBULAR_H_
|
||||
#define GRAMAMBULAR_H_
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
#include "Bigram.h"
|
||||
#include "BlockReadingBuilder.h"
|
||||
#include "Grid.h"
|
||||
#include "KeyValuePair.h"
|
||||
#include "LanguageModel.h"
|
||||
#include "Node.h"
|
||||
#include "NodeAnchor.h"
|
||||
#include "Span.h"
|
||||
#include "Unigram.h"
|
||||
#include "Walker.h"
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
#endif
|
||||
@interface CTools : NSObject
|
||||
+ (BOOL)isPrintable:(UniChar)charCode;
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
|
@ -22,30 +22,11 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef LMConsolidator_hpp
|
||||
#define LMConsolidator_hpp
|
||||
#import "CTools.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <regex>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include <syslog.h>
|
||||
|
||||
using namespace std;
|
||||
namespace vChewing
|
||||
@implementation CTools
|
||||
+ (BOOL)isPrintable:(UniChar)charCode
|
||||
{
|
||||
|
||||
class LMConsolidator
|
||||
{
|
||||
public:
|
||||
static bool CheckPragma(const char *path);
|
||||
static bool FixEOF(const char *path);
|
||||
static bool ConsolidateContent(const char *path, bool shouldCheckPragma);
|
||||
};
|
||||
|
||||
} // namespace vChewing
|
||||
#endif /* LMConsolidator_hpp */
|
||||
return isprint(charCode);
|
||||
}
|
||||
@end
|
|
@ -29,7 +29,7 @@ import Cocoa
|
|||
// Use KeyCodes as much as possible since its recognition won't be affected by macOS Base Keyboard Layouts.
|
||||
// KeyCodes: https://eastmanreference.com/complete-list-of-applescript-key-codes
|
||||
// Also: HIToolbox.framework/Versions/A/Headers/Events.h
|
||||
@objc enum KeyCode: UInt16 {
|
||||
enum KeyCode: UInt16 {
|
||||
case kNone = 0
|
||||
case kCarriageReturn = 36 // Renamed from "kReturn" to avoid nomenclatural confusions.
|
||||
case kTab = 48
|
||||
|
@ -91,11 +91,11 @@ enum CharCode: UInt /* 16 */ {
|
|||
}
|
||||
|
||||
class InputHandler: NSObject {
|
||||
@objc private(set) var useVerticalMode: Bool
|
||||
@objc private(set) var inputText: String?
|
||||
@objc private(set) var inputTextIgnoringModifiers: String?
|
||||
@objc private(set) var charCode: UInt16
|
||||
@objc private(set) var keyCode: UInt16
|
||||
private(set) var useVerticalMode: Bool
|
||||
private(set) var inputText: String?
|
||||
private(set) var inputTextIgnoringModifiers: String?
|
||||
private(set) var charCode: UInt16
|
||||
private(set) var keyCode: UInt16
|
||||
private var isFlagChanged: Bool
|
||||
private var flags: NSEvent.ModifierFlags
|
||||
private var cursorForwardKey: KeyCode
|
||||
|
@ -104,9 +104,9 @@ class InputHandler: NSObject {
|
|||
private var extraChooseCandidateKeyReverse: KeyCode
|
||||
private var absorbedArrowKey: KeyCode
|
||||
private var verticalModeOnlyChooseCandidateKey: KeyCode
|
||||
@objc private(set) var emacsKey: vChewingEmacsKey
|
||||
private(set) var emacsKey: vChewingEmacsKey
|
||||
|
||||
@objc init(
|
||||
init(
|
||||
inputText: String?, keyCode: UInt16, charCode: UInt16, flags: NSEvent.ModifierFlags,
|
||||
isVerticalMode: Bool, inputTextIgnoringModifiers: String? = nil
|
||||
) {
|
||||
|
@ -133,7 +133,7 @@ class InputHandler: NSObject {
|
|||
super.init()
|
||||
}
|
||||
|
||||
@objc init(event: NSEvent, isVerticalMode: Bool) {
|
||||
init(event: NSEvent, isVerticalMode: Bool) {
|
||||
inputText = AppleKeyboardConverter.cnvStringApple2ABC(event.characters ?? "")
|
||||
inputTextIgnoringModifiers = AppleKeyboardConverter.cnvStringApple2ABC(
|
||||
event.charactersIgnoringModifiers ?? "")
|
||||
|
@ -172,143 +172,143 @@ class InputHandler: NSObject {
|
|||
"<\(super.description) inputText:\(String(describing: inputText)), inputTextIgnoringModifiers:\(String(describing: inputTextIgnoringModifiers)) charCode:\(charCode), keyCode:\(keyCode), flags:\(flags), cursorForwardKey:\(cursorForwardKey), cursorBackwardKey:\(cursorBackwardKey), extraChooseCandidateKey:\(extraChooseCandidateKey), extraChooseCandidateKeyReverse:\(extraChooseCandidateKeyReverse), absorbedArrowKey:\(absorbedArrowKey), verticalModeOnlyChooseCandidateKey:\(verticalModeOnlyChooseCandidateKey), emacsKey:\(emacsKey), useVerticalMode:\(useVerticalMode)>"
|
||||
}
|
||||
|
||||
@objc var isShiftHold: Bool {
|
||||
var isShiftHold: Bool {
|
||||
flags.contains([.shift])
|
||||
}
|
||||
|
||||
@objc var isCommandHold: Bool {
|
||||
var isCommandHold: Bool {
|
||||
flags.contains([.command])
|
||||
}
|
||||
|
||||
@objc var isControlHold: Bool {
|
||||
var isControlHold: Bool {
|
||||
flags.contains([.control])
|
||||
}
|
||||
|
||||
@objc var isControlHotKey: Bool {
|
||||
var isControlHotKey: Bool {
|
||||
flags.contains([.control]) && inputText?.first?.isLetter ?? false
|
||||
}
|
||||
|
||||
@objc var isOptionHotKey: Bool {
|
||||
var isOptionHotKey: Bool {
|
||||
flags.contains([.option]) && inputText?.first?.isLetter ?? false
|
||||
}
|
||||
|
||||
@objc var isOptionHold: Bool {
|
||||
var isOptionHold: Bool {
|
||||
flags.contains([.option])
|
||||
}
|
||||
|
||||
@objc var isCapsLockOn: Bool {
|
||||
var isCapsLockOn: Bool {
|
||||
flags.contains([.capsLock])
|
||||
}
|
||||
|
||||
@objc var isNumericPad: Bool {
|
||||
var isNumericPad: Bool {
|
||||
flags.contains([.numericPad])
|
||||
}
|
||||
|
||||
@objc var isFunctionKeyHold: Bool {
|
||||
var isFunctionKeyHold: Bool {
|
||||
flags.contains([.function])
|
||||
}
|
||||
|
||||
@objc var isReservedKey: Bool {
|
||||
var isReservedKey: Bool {
|
||||
guard let code = KeyCode(rawValue: keyCode) else {
|
||||
return false
|
||||
}
|
||||
return code.rawValue != KeyCode.kNone.rawValue
|
||||
}
|
||||
|
||||
@objc var isTab: Bool {
|
||||
var isTab: Bool {
|
||||
KeyCode(rawValue: keyCode) == KeyCode.kTab
|
||||
}
|
||||
|
||||
@objc var isEnter: Bool {
|
||||
var isEnter: Bool {
|
||||
(KeyCode(rawValue: keyCode) == KeyCode.kCarriageReturn)
|
||||
|| (KeyCode(rawValue: keyCode) == KeyCode.kLineFeed)
|
||||
}
|
||||
|
||||
@objc var isUp: Bool {
|
||||
var isUp: Bool {
|
||||
KeyCode(rawValue: keyCode) == KeyCode.kUpArrow
|
||||
}
|
||||
|
||||
@objc var isDown: Bool {
|
||||
var isDown: Bool {
|
||||
KeyCode(rawValue: keyCode) == KeyCode.kDownArrow
|
||||
}
|
||||
|
||||
@objc var isLeft: Bool {
|
||||
var isLeft: Bool {
|
||||
KeyCode(rawValue: keyCode) == KeyCode.kLeftArrow
|
||||
}
|
||||
|
||||
@objc var isRight: Bool {
|
||||
var isRight: Bool {
|
||||
KeyCode(rawValue: keyCode) == KeyCode.kRightArrow
|
||||
}
|
||||
|
||||
@objc var isPageUp: Bool {
|
||||
var isPageUp: Bool {
|
||||
KeyCode(rawValue: keyCode) == KeyCode.kPageUp
|
||||
}
|
||||
|
||||
@objc var isPageDown: Bool {
|
||||
var isPageDown: Bool {
|
||||
KeyCode(rawValue: keyCode) == KeyCode.kPageDown
|
||||
}
|
||||
|
||||
@objc var isSpace: Bool {
|
||||
var isSpace: Bool {
|
||||
KeyCode(rawValue: keyCode) == KeyCode.kSpace
|
||||
}
|
||||
|
||||
@objc var isBackSpace: Bool {
|
||||
var isBackSpace: Bool {
|
||||
KeyCode(rawValue: keyCode) == KeyCode.kBackSpace
|
||||
}
|
||||
|
||||
@objc var isESC: Bool {
|
||||
var isESC: Bool {
|
||||
KeyCode(rawValue: keyCode) == KeyCode.kEscape
|
||||
}
|
||||
|
||||
@objc var isHome: Bool {
|
||||
var isHome: Bool {
|
||||
KeyCode(rawValue: keyCode) == KeyCode.kHome
|
||||
}
|
||||
|
||||
@objc var isEnd: Bool {
|
||||
var isEnd: Bool {
|
||||
KeyCode(rawValue: keyCode) == KeyCode.kEnd
|
||||
}
|
||||
|
||||
@objc var isDelete: Bool {
|
||||
var isDelete: Bool {
|
||||
KeyCode(rawValue: keyCode) == KeyCode.kWindowDelete
|
||||
}
|
||||
|
||||
@objc var isCursorBackward: Bool {
|
||||
var isCursorBackward: Bool {
|
||||
KeyCode(rawValue: keyCode) == cursorBackwardKey
|
||||
}
|
||||
|
||||
@objc var isCursorForward: Bool {
|
||||
var isCursorForward: Bool {
|
||||
KeyCode(rawValue: keyCode) == cursorForwardKey
|
||||
}
|
||||
|
||||
@objc var isAbsorbedArrowKey: Bool {
|
||||
var isAbsorbedArrowKey: Bool {
|
||||
KeyCode(rawValue: keyCode) == absorbedArrowKey
|
||||
}
|
||||
|
||||
@objc var isExtraChooseCandidateKey: Bool {
|
||||
var isExtraChooseCandidateKey: Bool {
|
||||
KeyCode(rawValue: keyCode) == extraChooseCandidateKey
|
||||
}
|
||||
|
||||
@objc var isExtraChooseCandidateKeyReverse: Bool {
|
||||
var isExtraChooseCandidateKeyReverse: Bool {
|
||||
KeyCode(rawValue: keyCode) == extraChooseCandidateKeyReverse
|
||||
}
|
||||
|
||||
@objc var isVerticalModeOnlyChooseCandidateKey: Bool {
|
||||
var isVerticalModeOnlyChooseCandidateKey: Bool {
|
||||
KeyCode(rawValue: keyCode) == verticalModeOnlyChooseCandidateKey
|
||||
}
|
||||
|
||||
@objc var isUpperCaseASCIILetterKey: Bool {
|
||||
var isUpperCaseASCIILetterKey: Bool {
|
||||
// 這裡必須加上「flags == .shift」,否則會出現某些情況下輸入法「誤判當前鍵入的非 Shift 字符為大寫」的問題。
|
||||
charCode >= 65 && charCode <= 90 && flags == .shift
|
||||
}
|
||||
|
||||
@objc var isSymbolMenuPhysicalKey: Bool {
|
||||
var isSymbolMenuPhysicalKey: Bool {
|
||||
// 這裡必須用 KeyCode,這樣才不會受隨 macOS 版本更動的 Apple 動態注音鍵盤排列內容的影響。
|
||||
// 只是必須得與 ![input isShift] 搭配使用才可以(也就是僅判定 Shift 沒被摁下的情形)。
|
||||
KeyCode(rawValue: keyCode) == KeyCode.kSymbolMenuPhysicalKey
|
||||
}
|
||||
}
|
||||
|
||||
@objc enum vChewingEmacsKey: UInt16 {
|
||||
enum vChewingEmacsKey: UInt16 {
|
||||
case none = 0
|
||||
case forward = 6 // F
|
||||
case backward = 2 // B
|
||||
|
@ -319,7 +319,7 @@ class InputHandler: NSObject {
|
|||
}
|
||||
|
||||
class EmacsKeyHelper: NSObject {
|
||||
@objc static func detect(charCode: UniChar, flags: NSEvent.ModifierFlags) -> vChewingEmacsKey {
|
||||
static func detect(charCode: UniChar, flags: NSEvent.ModifierFlags) -> vChewingEmacsKey {
|
||||
let charCode = AppleKeyboardConverter.cnvApple2ABC(charCode)
|
||||
if flags.contains(.control) {
|
||||
return vChewingEmacsKey(rawValue: charCode) ?? .none
|
||||
|
|
|
@ -59,7 +59,6 @@ import Cocoa
|
|||
/// one among the candidates.
|
||||
class InputState: NSObject {
|
||||
/// Represents that the input controller is deactivated.
|
||||
@objc(InputStateDeactivated)
|
||||
class Deactivated: InputState {
|
||||
override var description: String {
|
||||
"<InputState.Deactivated>"
|
||||
|
@ -69,9 +68,8 @@ class InputState: NSObject {
|
|||
// MARK: -
|
||||
|
||||
/// Represents that the composing buffer is empty.
|
||||
@objc(InputStateEmpty)
|
||||
class Empty: InputState {
|
||||
@objc var composingBuffer: String {
|
||||
var composingBuffer: String {
|
||||
""
|
||||
}
|
||||
|
||||
|
@ -83,9 +81,8 @@ class InputState: NSObject {
|
|||
// MARK: -
|
||||
|
||||
/// Represents that the composing buffer is empty.
|
||||
@objc(InputStateEmptyIgnoringPreviousState)
|
||||
class EmptyIgnoringPreviousState: InputState {
|
||||
@objc var composingBuffer: String {
|
||||
var composingBuffer: String {
|
||||
""
|
||||
}
|
||||
|
||||
|
@ -97,11 +94,10 @@ class InputState: NSObject {
|
|||
// MARK: -
|
||||
|
||||
/// Represents that the input controller is committing text into client app.
|
||||
@objc(InputStateCommitting)
|
||||
class Committing: InputState {
|
||||
@objc private(set) var poppedText: String = ""
|
||||
private(set) var poppedText: String = ""
|
||||
|
||||
@objc convenience init(poppedText: String) {
|
||||
convenience init(poppedText: String) {
|
||||
self.init()
|
||||
self.poppedText = poppedText
|
||||
}
|
||||
|
@ -114,12 +110,11 @@ class InputState: NSObject {
|
|||
// MARK: -
|
||||
|
||||
/// Represents that the composing buffer is not empty.
|
||||
@objc(InputStateNotEmpty)
|
||||
class NotEmpty: InputState {
|
||||
@objc private(set) var composingBuffer: String
|
||||
@objc private(set) var cursorIndex: UInt
|
||||
private(set) var composingBuffer: String
|
||||
private(set) var cursorIndex: UInt
|
||||
|
||||
@objc init(composingBuffer: String, cursorIndex: UInt) {
|
||||
init(composingBuffer: String, cursorIndex: UInt) {
|
||||
self.composingBuffer = composingBuffer
|
||||
self.cursorIndex = cursorIndex
|
||||
}
|
||||
|
@ -132,16 +127,15 @@ class InputState: NSObject {
|
|||
// MARK: -
|
||||
|
||||
/// Represents that the user is inputting text.
|
||||
@objc(InputStateInputting)
|
||||
class Inputting: NotEmpty {
|
||||
@objc var poppedText: String = ""
|
||||
@objc var tooltip: String = ""
|
||||
var poppedText: String = ""
|
||||
var tooltip: String = ""
|
||||
|
||||
@objc override init(composingBuffer: String, cursorIndex: UInt) {
|
||||
override init(composingBuffer: String, cursorIndex: UInt) {
|
||||
super.init(composingBuffer: composingBuffer, cursorIndex: cursorIndex)
|
||||
}
|
||||
|
||||
@objc var attributedString: NSAttributedString {
|
||||
var attributedString: NSAttributedString {
|
||||
let attributedSting = NSAttributedString(
|
||||
string: composingBuffer,
|
||||
attributes: [
|
||||
|
@ -163,12 +157,11 @@ class InputState: NSObject {
|
|||
private let kMaxMarkRangeLength = mgrPrefs.maxCandidateLength
|
||||
|
||||
/// Represents that the user is marking a range in the composing buffer.
|
||||
@objc(InputStateMarking)
|
||||
class Marking: NotEmpty {
|
||||
@objc private(set) var markerIndex: UInt
|
||||
@objc private(set) var markedRange: NSRange
|
||||
@objc private var deleteTargetExists = false
|
||||
@objc var tooltip: String {
|
||||
private(set) var markerIndex: UInt
|
||||
private(set) var markedRange: NSRange
|
||||
private var deleteTargetExists = false
|
||||
var tooltip: String {
|
||||
if composingBuffer.count != readings.count {
|
||||
TooltipController.backgroundColor = NSColor(
|
||||
red: 0.55, green: 0.00, blue: 0.00, alpha: 1.00
|
||||
|
@ -251,10 +244,10 @@ class InputState: NSObject {
|
|||
)
|
||||
}
|
||||
|
||||
@objc var tooltipForInputting: String = ""
|
||||
@objc private(set) var readings: [String]
|
||||
var tooltipForInputting: String = ""
|
||||
private(set) var readings: [String]
|
||||
|
||||
@objc init(composingBuffer: String, cursorIndex: UInt, markerIndex: UInt, readings: [String]) {
|
||||
init(composingBuffer: String, cursorIndex: UInt, markerIndex: UInt, readings: [String]) {
|
||||
self.markerIndex = markerIndex
|
||||
let begin = min(cursorIndex, markerIndex)
|
||||
let end = max(cursorIndex, markerIndex)
|
||||
|
@ -263,7 +256,7 @@ class InputState: NSObject {
|
|||
super.init(composingBuffer: composingBuffer, cursorIndex: cursorIndex)
|
||||
}
|
||||
|
||||
@objc var attributedString: NSAttributedString {
|
||||
var attributedString: NSAttributedString {
|
||||
let attributedSting = NSMutableAttributedString(string: composingBuffer)
|
||||
let end = markedRange.location + markedRange.length
|
||||
|
||||
|
@ -296,13 +289,13 @@ class InputState: NSObject {
|
|||
"<InputState.Marking, composingBuffer:\(composingBuffer), cursorIndex:\(cursorIndex), markedRange:\(markedRange)>"
|
||||
}
|
||||
|
||||
@objc func convertToInputting() -> Inputting {
|
||||
func convertToInputting() -> Inputting {
|
||||
let state = Inputting(composingBuffer: composingBuffer, cursorIndex: cursorIndex)
|
||||
state.tooltip = tooltipForInputting
|
||||
return state
|
||||
}
|
||||
|
||||
@objc var validToWrite: Bool {
|
||||
var validToWrite: Bool {
|
||||
/// vChewing allows users to input a string whose length differs
|
||||
/// from the amount of Bopomofo readings. In this case, the range
|
||||
/// in the composing buffer and the readings could not match, so
|
||||
|
@ -323,7 +316,7 @@ class InputState: NSObject {
|
|||
&& markedRange.length <= kMaxMarkRangeLength
|
||||
}
|
||||
|
||||
@objc var chkIfUserPhraseExists: Bool {
|
||||
var chkIfUserPhraseExists: Bool {
|
||||
let text = (composingBuffer as NSString).substring(with: markedRange)
|
||||
let (exactBegin, _) = (composingBuffer as NSString).characterIndex(
|
||||
from: markedRange.location)
|
||||
|
@ -337,7 +330,7 @@ class InputState: NSObject {
|
|||
== true
|
||||
}
|
||||
|
||||
@objc var userPhrase: String {
|
||||
var userPhrase: String {
|
||||
let text = (composingBuffer as NSString).substring(with: markedRange)
|
||||
let (exactBegin, _) = (composingBuffer as NSString).characterIndex(
|
||||
from: markedRange.location)
|
||||
|
@ -348,7 +341,7 @@ class InputState: NSObject {
|
|||
return "\(text) \(joined)"
|
||||
}
|
||||
|
||||
@objc var userPhraseConverted: String {
|
||||
var userPhraseConverted: String {
|
||||
let text =
|
||||
OpenCCBridge.crossConvert(
|
||||
(composingBuffer as NSString).substring(with: markedRange)) ?? ""
|
||||
|
@ -366,18 +359,17 @@ class InputState: NSObject {
|
|||
// MARK: -
|
||||
|
||||
/// Represents that the user is choosing in a candidates list.
|
||||
@objc(InputStateChoosingCandidate)
|
||||
class ChoosingCandidate: NotEmpty {
|
||||
@objc private(set) var candidates: [String]
|
||||
@objc private(set) var useVerticalMode: Bool
|
||||
private(set) var candidates: [String]
|
||||
private(set) var useVerticalMode: Bool
|
||||
|
||||
@objc init(composingBuffer: String, cursorIndex: UInt, candidates: [String], useVerticalMode: Bool) {
|
||||
init(composingBuffer: String, cursorIndex: UInt, candidates: [String], useVerticalMode: Bool) {
|
||||
self.candidates = candidates
|
||||
self.useVerticalMode = useVerticalMode
|
||||
super.init(composingBuffer: composingBuffer, cursorIndex: cursorIndex)
|
||||
}
|
||||
|
||||
@objc var attributedString: NSAttributedString {
|
||||
var attributedString: NSAttributedString {
|
||||
let attributedSting = NSAttributedString(
|
||||
string: composingBuffer,
|
||||
attributes: [
|
||||
|
@ -397,11 +389,10 @@ class InputState: NSObject {
|
|||
|
||||
/// Represents that the user is choosing in a candidates list
|
||||
/// in the associated phrases mode.
|
||||
@objc(InputStateAssociatedPhrases)
|
||||
class AssociatedPhrases: InputState {
|
||||
@objc private(set) var candidates: [String] = []
|
||||
@objc private(set) var useVerticalMode: Bool = false
|
||||
@objc init(candidates: [String], useVerticalMode: Bool) {
|
||||
private(set) var candidates: [String] = []
|
||||
private(set) var useVerticalMode: Bool = false
|
||||
init(candidates: [String], useVerticalMode: Bool) {
|
||||
self.candidates = candidates
|
||||
self.useVerticalMode = useVerticalMode
|
||||
super.init()
|
||||
|
@ -412,11 +403,10 @@ class InputState: NSObject {
|
|||
}
|
||||
}
|
||||
|
||||
@objc(InputStateSymbolTable)
|
||||
class SymbolTable: ChoosingCandidate {
|
||||
@objc var node: SymbolNode
|
||||
var node: SymbolNode
|
||||
|
||||
@objc init(node: SymbolNode, useVerticalMode: Bool) {
|
||||
init(node: SymbolNode, useVerticalMode: Bool) {
|
||||
self.node = node
|
||||
let candidates = node.children?.map(\.title) ?? [String]()
|
||||
super.init(
|
||||
|
@ -432,53 +422,53 @@ class InputState: NSObject {
|
|||
}
|
||||
|
||||
class SymbolNode: NSObject {
|
||||
@objc var title: String
|
||||
@objc var children: [SymbolNode]?
|
||||
var title: String
|
||||
var children: [SymbolNode]?
|
||||
|
||||
@objc init(_ title: String, _ children: [SymbolNode]? = nil) {
|
||||
init(_ title: String, _ children: [SymbolNode]? = nil) {
|
||||
self.title = title
|
||||
self.children = children
|
||||
super.init()
|
||||
}
|
||||
|
||||
@objc init(_ title: String, symbols: String) {
|
||||
init(_ title: String, symbols: String) {
|
||||
self.title = title
|
||||
children = Array(symbols).map { SymbolNode(String($0), nil) }
|
||||
super.init()
|
||||
}
|
||||
|
||||
@objc static let catCommonSymbols = String(
|
||||
static let catCommonSymbols = String(
|
||||
format: NSLocalizedString("catCommonSymbols", comment: ""))
|
||||
@objc static let catHoriBrackets = String(
|
||||
static let catHoriBrackets = String(
|
||||
format: NSLocalizedString("catHoriBrackets", comment: ""))
|
||||
@objc static let catVertBrackets = String(
|
||||
static let catVertBrackets = String(
|
||||
format: NSLocalizedString("catVertBrackets", comment: ""))
|
||||
@objc static let catGreekLetters = String(
|
||||
static let catGreekLetters = String(
|
||||
format: NSLocalizedString("catGreekLetters", comment: ""))
|
||||
@objc static let catMathSymbols = String(
|
||||
static let catMathSymbols = String(
|
||||
format: NSLocalizedString("catMathSymbols", comment: ""))
|
||||
@objc static let catCurrencyUnits = String(
|
||||
static let catCurrencyUnits = String(
|
||||
format: NSLocalizedString("catCurrencyUnits", comment: ""))
|
||||
@objc static let catSpecialSymbols = String(
|
||||
static let catSpecialSymbols = String(
|
||||
format: NSLocalizedString("catSpecialSymbols", comment: ""))
|
||||
@objc static let catUnicodeSymbols = String(
|
||||
static let catUnicodeSymbols = String(
|
||||
format: NSLocalizedString("catUnicodeSymbols", comment: ""))
|
||||
@objc static let catCircledKanjis = String(
|
||||
static let catCircledKanjis = String(
|
||||
format: NSLocalizedString("catCircledKanjis", comment: ""))
|
||||
@objc static let catCircledKataKana = String(
|
||||
static let catCircledKataKana = String(
|
||||
format: NSLocalizedString("catCircledKataKana", comment: ""))
|
||||
@objc static let catBracketKanjis = String(
|
||||
static let catBracketKanjis = String(
|
||||
format: NSLocalizedString("catBracketKanjis", comment: ""))
|
||||
@objc static let catSingleTableLines = String(
|
||||
static let catSingleTableLines = String(
|
||||
format: NSLocalizedString("catSingleTableLines", comment: ""))
|
||||
@objc static let catDoubleTableLines = String(
|
||||
static let catDoubleTableLines = String(
|
||||
format: NSLocalizedString("catDoubleTableLines", comment: ""))
|
||||
@objc static let catFillingBlocks = String(
|
||||
static let catFillingBlocks = String(
|
||||
format: NSLocalizedString("catFillingBlocks", comment: ""))
|
||||
@objc static let catLineSegments = String(
|
||||
static let catLineSegments = String(
|
||||
format: NSLocalizedString("catLineSegments", comment: ""))
|
||||
|
||||
@objc static let root: SymbolNode = .init(
|
||||
static let root: SymbolNode = .init(
|
||||
"/",
|
||||
[
|
||||
SymbolNode("`"),
|
||||
|
|
|
@ -1,102 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
@class InputHandler;
|
||||
@class InputState;
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
typedef NSString *const InputMode NS_TYPED_ENUM;
|
||||
extern InputMode imeModeCHT;
|
||||
extern InputMode imeModeCHS;
|
||||
extern InputMode imeModeNULL;
|
||||
|
||||
struct BufferStatePackage
|
||||
{
|
||||
NSString *composedText;
|
||||
NSInteger cursorIndex;
|
||||
NSString *resultOfRear;
|
||||
NSString *resultOfFront;
|
||||
};
|
||||
|
||||
@class KeyHandler;
|
||||
|
||||
@protocol KeyHandlerDelegate <NSObject>
|
||||
- (id)ctlCandidateForKeyHandler:(KeyHandler *)keyHandler;
|
||||
- (void)keyHandler:(KeyHandler *)keyHandler didSelectCandidateAtIndex:(NSInteger)index ctlCandidate:(id)controller;
|
||||
- (BOOL)keyHandler:(KeyHandler *)keyHandler didRequestWriteUserPhraseWithState:(InputState *)state;
|
||||
@end
|
||||
|
||||
@interface KeyHandler : NSObject
|
||||
|
||||
- (BOOL)isBuilderEmpty;
|
||||
|
||||
- (void)fixNodeWithValue:(NSString *)value NS_SWIFT_NAME(fixNode(value:));
|
||||
- (void)clear;
|
||||
|
||||
@property(strong, nonatomic) InputMode inputMode;
|
||||
@property(weak, nonatomic) id<KeyHandlerDelegate> delegate;
|
||||
|
||||
// The following items need to be exposed to Swift:
|
||||
- (void)_walk;
|
||||
- (NSString *)_popOverflowComposingTextAndWalk;
|
||||
- (NSArray<NSString *> *)_currentReadings;
|
||||
|
||||
- (BOOL)checkWhetherToneMarkerConfirmsPhoneticReadingBuffer;
|
||||
- (BOOL)chkKeyValidity:(UniChar)value;
|
||||
- (BOOL)ifLangModelHasUnigramsForKey:(NSString *)reading;
|
||||
- (BOOL)isPhoneticReadingBufferEmpty;
|
||||
- (BOOL)isPrintable:(UniChar)charCode;
|
||||
- (NSArray<NSString *> *)buildAssociatePhraseArrayWithKey:(NSString *)key;
|
||||
- (NSArray<NSString *> *)getCandidatesArray;
|
||||
- (NSInteger)getKeyLengthAtIndexZero;
|
||||
- (NSInteger)getBuilderCursorIndex;
|
||||
- (NSInteger)getBuilderLength;
|
||||
- (NSInteger)getPackagedCursorIndex;
|
||||
- (NSString *)getComposedText;
|
||||
- (NSString *)getCompositionFromPhoneticReadingBuffer;
|
||||
- (NSString *)getStrLocationResult:(BOOL)isFront NS_SWIFT_NAME(getStrLocationResult(isFront:));
|
||||
- (NSString *)getSyllableCompositionFromPhoneticReadingBuffer;
|
||||
- (void)clearPhoneticReadingBuffer;
|
||||
- (void)combinePhoneticReadingBufferKey:(UniChar)charCode;
|
||||
- (void)createNewBuilder;
|
||||
- (void)dealWithOverrideModelSuggestions;
|
||||
- (void)deleteBuilderReadingAfterCursor;
|
||||
- (void)deleteBuilderReadingInFrontOfCursor;
|
||||
- (void)doBackSpaceToPhoneticReadingBuffer;
|
||||
- (void)ensurePhoneticParser;
|
||||
- (void)insertReadingToBuilderAtCursor:(NSString *)reading;
|
||||
- (void)packageBufferStateMaterials;
|
||||
- (void)removeBuilderAndReset:(BOOL)shouldReset;
|
||||
- (void)setBuilderCursorIndex:(NSInteger)value;
|
||||
- (void)setInputModesToLM:(BOOL)isCHS;
|
||||
- (void)syncBaseLMPrefs;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
|
@ -1,637 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#import "KeyHandler.h"
|
||||
#import "Gramambular.h"
|
||||
#import "LMInstantiator.h"
|
||||
#import "Mandarin.h"
|
||||
#import "UserOverrideModel.h"
|
||||
#import "mgrLangModel_Privates.h"
|
||||
#import "vChewing-Swift.h"
|
||||
#import <string>
|
||||
|
||||
InputMode imeModeCHS = ctlInputMethod.kIMEModeCHS;
|
||||
InputMode imeModeCHT = ctlInputMethod.kIMEModeCHT;
|
||||
InputMode imeModeNULL = ctlInputMethod.kIMEModeNULL;
|
||||
|
||||
typedef vChewing::LMInstantiator BaseLM;
|
||||
typedef vChewing::UserOverrideModel UserOverrideLM;
|
||||
typedef Gramambular::BlockReadingBuilder BlockBuilder;
|
||||
typedef Mandarin::BopomofoReadingBuffer PhoneticBuffer;
|
||||
|
||||
static const double kEpsilon = 0.000001;
|
||||
|
||||
NSString *packagedComposedText;
|
||||
NSInteger packagedCursorIndex;
|
||||
NSString *packagedResultOfRear;
|
||||
NSString *packagedResultOfFront;
|
||||
|
||||
// NON-SWIFTIFIABLE
|
||||
static double FindHighestScore(const std::vector<Gramambular::NodeAnchor> &nodes, double epsilon)
|
||||
{
|
||||
double highestScore = 0.0;
|
||||
for (auto ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni)
|
||||
{
|
||||
double score = ni->node->highestUnigramScore();
|
||||
if (score > highestScore)
|
||||
highestScore = score;
|
||||
}
|
||||
return highestScore + epsilon;
|
||||
}
|
||||
|
||||
// NON-SWIFTIFIABLE
|
||||
class NodeAnchorDescendingSorter
|
||||
{
|
||||
public:
|
||||
bool operator()(const Gramambular::NodeAnchor &a, const Gramambular::NodeAnchor &b) const
|
||||
{
|
||||
return a.node->key().length() > b.node->key().length();
|
||||
}
|
||||
};
|
||||
|
||||
// if DEBUG is defined, a DOT file (GraphViz format) will be written to the
|
||||
// specified path every time the grid is walked
|
||||
#if DEBUG
|
||||
static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot";
|
||||
#endif
|
||||
|
||||
// NON-SWIFTIFIABLE
|
||||
@implementation KeyHandler
|
||||
{
|
||||
// the reading buffer that takes user input
|
||||
PhoneticBuffer *_bpmfReadingBuffer;
|
||||
|
||||
// language model
|
||||
BaseLM *_languageModel;
|
||||
|
||||
// user override model
|
||||
UserOverrideLM *_userOverrideModel;
|
||||
|
||||
// the grid (lattice) builder for the unigrams (and bigrams)
|
||||
BlockBuilder *_builder;
|
||||
|
||||
// latest walked path (trellis) using the Viterbi algorithm
|
||||
std::vector<Gramambular::NodeAnchor> _walkedNodes;
|
||||
|
||||
NSString *_inputMode;
|
||||
}
|
||||
|
||||
@synthesize delegate = _delegate;
|
||||
|
||||
// NON-SWIFTIFIABLE DUE TO VARIABLE AVAILABLE ACCESSIBILITY RANGE.
|
||||
// VARIABLE: "_inputMode"
|
||||
- (NSString *)inputMode
|
||||
{
|
||||
return _inputMode;
|
||||
}
|
||||
|
||||
// NON-SWIFTIFIABLE
|
||||
- (BOOL)isBuilderEmpty
|
||||
{
|
||||
return (_builder->grid().width() == 0);
|
||||
}
|
||||
|
||||
// NON-SWIFTIFIABLE DUE TO VARIABLE AVAILABLE ACCESSIBILITY RANGE.
|
||||
// VARIABLE: "_inputMode"
|
||||
- (void)setInputMode:(NSString *)value
|
||||
{
|
||||
// 下面這句的「isKindOfClass」是做類型檢查,
|
||||
// 為了應對出現輸入法 plist 被改壞掉這樣的極端情況。
|
||||
BOOL isCHS = [value isKindOfClass:[NSString class]] && [value isEqual:imeModeCHS];
|
||||
|
||||
// 緊接著將新的簡繁輸入模式提報給 ctlInputMethod:
|
||||
ctlInputMethod.currentInputMode = isCHS ? imeModeCHS : imeModeCHT;
|
||||
mgrPrefs.mostRecentInputMode = ctlInputMethod.currentInputMode;
|
||||
|
||||
// 拿當前的 _inputMode 與 ctlInputMethod 的提報結果對比,不同的話則套用新設定:
|
||||
if (![_inputMode isEqualToString:ctlInputMethod.currentInputMode])
|
||||
{
|
||||
// Reinitiate language models if necessary
|
||||
[self setInputModesToLM:isCHS];
|
||||
|
||||
// Synchronize the sub-languageModel state settings to the new LM.
|
||||
[self syncBaseLMPrefs];
|
||||
|
||||
[self removeBuilderAndReset:YES];
|
||||
|
||||
if (![self isPhoneticReadingBufferEmpty])
|
||||
[self clearPhoneticReadingBuffer];
|
||||
}
|
||||
_inputMode = ctlInputMethod.currentInputMode;
|
||||
}
|
||||
|
||||
// NON-SWIFTIFIABLE: Required by an ObjC(pp)-based class.
|
||||
- (void)dealloc
|
||||
{ // clean up everything
|
||||
if (_bpmfReadingBuffer)
|
||||
delete _bpmfReadingBuffer;
|
||||
if (_builder)
|
||||
[self removeBuilderAndReset:NO];
|
||||
}
|
||||
|
||||
// NON-SWIFTIFIABLE: Not placeable in swift extensions.
|
||||
- (instancetype)init
|
||||
{
|
||||
self = [super init];
|
||||
if (self)
|
||||
{
|
||||
[self ensurePhoneticParser];
|
||||
[self setInputMode:ctlInputMethod.currentInputMode];
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
// NON-SWIFTIFIABLE
|
||||
- (void)fixNodeWithValue:(NSString *)value
|
||||
{
|
||||
NSInteger cursorIndex = [self getActualCandidateCursorIndex];
|
||||
std::string stringValue(value.UTF8String);
|
||||
Gramambular::NodeAnchor selectedNode = _builder->grid().fixNodeSelectedCandidate(cursorIndex, stringValue);
|
||||
if (!mgrPrefs.useSCPCTypingMode)
|
||||
{ // 不要針對逐字選字模式啟用臨時半衰記憶模型。
|
||||
// If the length of the readings and the characters do not match,
|
||||
// it often means it is a special symbol and it should not be stored
|
||||
// in the user override model.
|
||||
BOOL addToOverrideModel = YES;
|
||||
if (selectedNode.spanningLength != [value count])
|
||||
addToOverrideModel = NO;
|
||||
|
||||
if (addToOverrideModel)
|
||||
{
|
||||
double score = selectedNode.node->scoreForCandidate(stringValue);
|
||||
if (score <= -12) // 威注音的 SymbolLM 的 Score 是 -12。
|
||||
addToOverrideModel = NO;
|
||||
}
|
||||
if (addToOverrideModel)
|
||||
_userOverrideModel->observe(_walkedNodes, cursorIndex, stringValue, [[NSDate date] timeIntervalSince1970]);
|
||||
}
|
||||
[self _walk];
|
||||
|
||||
if (mgrPrefs.moveCursorAfterSelectingCandidate)
|
||||
{
|
||||
size_t nextPosition = 0;
|
||||
for (auto node : _walkedNodes)
|
||||
{
|
||||
if (nextPosition >= cursorIndex)
|
||||
break;
|
||||
nextPosition += node.spanningLength;
|
||||
}
|
||||
if (nextPosition <= [self getBuilderLength])
|
||||
[self setBuilderCursorIndex:nextPosition];
|
||||
}
|
||||
}
|
||||
|
||||
// NON-SWIFTIFIABLE
|
||||
- (void)clear
|
||||
{
|
||||
[self clearPhoneticReadingBuffer];
|
||||
_builder->clear();
|
||||
_walkedNodes.clear();
|
||||
}
|
||||
|
||||
#pragma mark - States Building
|
||||
|
||||
// NON-SWIFTIFIABLE
|
||||
- (void)packageBufferStateMaterials
|
||||
{
|
||||
// We gather the data through this function, package it,
|
||||
// and sent it to our Swift extension to build the InputState.Inputting there.
|
||||
// Otherwise, ObjC++ always bugs for "expecting a type".
|
||||
|
||||
// "updating the composing buffer" means to request the client to "refresh" the text input buffer
|
||||
// with our "composing text"
|
||||
NSMutableString *composingBuffer = [[NSMutableString alloc] init];
|
||||
NSInteger composedStringCursorIndex = 0;
|
||||
|
||||
// we must do some Unicode codepoint counting to find the actual cursor location for the client
|
||||
// i.e. we need to take UTF-16 into consideration, for which a surrogate pair takes 2 UniChars
|
||||
// locations
|
||||
|
||||
size_t readingCursorIndex = 0;
|
||||
size_t builderCursorIndex = [self getBuilderCursorIndex];
|
||||
|
||||
NSString *resultOfRear = @"";
|
||||
NSString *resultOfFront = @"";
|
||||
|
||||
for (std::vector<Gramambular::NodeAnchor>::iterator wi = _walkedNodes.begin(), we = _walkedNodes.end(); wi != we;
|
||||
++wi)
|
||||
{
|
||||
if ((*wi).node)
|
||||
{
|
||||
std::string nodeStr = (*wi).node->currentKeyValue().value;
|
||||
NSString *valueString = [NSString stringWithUTF8String:nodeStr.c_str()];
|
||||
[composingBuffer appendString:valueString];
|
||||
|
||||
NSArray<NSString *> *splited = [valueString split];
|
||||
NSInteger codepointCount = splited.count;
|
||||
|
||||
// this re-aligns the cursor index in the composed string
|
||||
// (the actual cursor on the screen) with the builder's logical
|
||||
// cursor (reading) cursor; each built node has a "spanning length"
|
||||
// (e.g. two reading blocks has a spanning length of 2), and we
|
||||
// accumulate those lengths to calculate the displayed cursor
|
||||
// index
|
||||
size_t spanningLength = (*wi).spanningLength;
|
||||
if (readingCursorIndex + spanningLength <= builderCursorIndex)
|
||||
{
|
||||
composedStringCursorIndex += [valueString length];
|
||||
readingCursorIndex += spanningLength;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (codepointCount == spanningLength)
|
||||
{
|
||||
for (size_t i = 0; i < codepointCount && readingCursorIndex < builderCursorIndex; i++)
|
||||
{
|
||||
composedStringCursorIndex += [splited[i] length];
|
||||
readingCursorIndex++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (readingCursorIndex < builderCursorIndex)
|
||||
{
|
||||
composedStringCursorIndex += [valueString length];
|
||||
readingCursorIndex += spanningLength;
|
||||
if (readingCursorIndex > builderCursorIndex)
|
||||
{
|
||||
readingCursorIndex = builderCursorIndex;
|
||||
}
|
||||
if (builderCursorIndex == 0)
|
||||
{
|
||||
resultOfFront =
|
||||
[NSString stringWithUTF8String:_builder->readings()[builderCursorIndex].c_str()];
|
||||
}
|
||||
else if (builderCursorIndex >= _builder->readings().size())
|
||||
{
|
||||
resultOfRear = [NSString
|
||||
stringWithUTF8String:_builder->readings()[_builder->readings().size() - 1].c_str()];
|
||||
}
|
||||
else
|
||||
{
|
||||
resultOfFront =
|
||||
[NSString stringWithUTF8String:_builder->readings()[builderCursorIndex].c_str()];
|
||||
resultOfRear =
|
||||
[NSString stringWithUTF8String:_builder->readings()[builderCursorIndex - 1].c_str()];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// now we gather all the info, we separate the composing buffer to two parts, head and tail,
|
||||
// and insert the reading text (the Mandarin syllable) in between them;
|
||||
// the reading text is what the user is typing
|
||||
NSString *head = [composingBuffer substringToIndex:composedStringCursorIndex];
|
||||
NSString *reading = [self getCompositionFromPhoneticReadingBuffer];
|
||||
NSString *tail = [composingBuffer substringFromIndex:composedStringCursorIndex];
|
||||
NSString *composedText = [head stringByAppendingString:[reading stringByAppendingString:tail]];
|
||||
NSInteger cursorIndex = composedStringCursorIndex + [reading length];
|
||||
|
||||
packagedComposedText = composedText;
|
||||
packagedCursorIndex = cursorIndex;
|
||||
packagedResultOfRear = resultOfRear;
|
||||
packagedResultOfFront = resultOfFront;
|
||||
}
|
||||
|
||||
// NON-SWIFTIFIABLE DUE TO VARIABLE AVAILABLE ACCESSIBILITY RANGE.
|
||||
- (NSString *)getStrLocationResult:(BOOL)isFront
|
||||
{
|
||||
if (isFront)
|
||||
return packagedResultOfFront;
|
||||
else
|
||||
return packagedResultOfRear;
|
||||
}
|
||||
|
||||
// NON-SWIFTIFIABLE DUE TO VARIABLE AVAILABLE ACCESSIBILITY RANGE.
|
||||
- (NSString *)getComposedText
|
||||
{
|
||||
return packagedComposedText;
|
||||
}
|
||||
|
||||
// NON-SWIFTIFIABLE DUE TO VARIABLE AVAILABLE ACCESSIBILITY RANGE.
|
||||
- (NSInteger)getPackagedCursorIndex
|
||||
{
|
||||
return packagedCursorIndex;
|
||||
}
|
||||
|
||||
// NON-SWIFTIFIABLE
|
||||
- (void)_walk
|
||||
{
|
||||
// retrieve the most likely trellis, i.e. a Maximum Likelihood Estimation
|
||||
// of the best possible Mandarin characters given the input syllables,
|
||||
// using the Viterbi algorithm implemented in the Gramambular library
|
||||
Gramambular::Walker walker(&_builder->grid());
|
||||
|
||||
// the reverse walk traces the trellis from the end
|
||||
_walkedNodes = walker.reverseWalk(_builder->grid().width());
|
||||
|
||||
// then we reverse the nodes so that we get the forward-walked nodes
|
||||
reverse(_walkedNodes.begin(), _walkedNodes.end());
|
||||
|
||||
// if DEBUG is defined, a GraphViz file is written to kGraphVizOutputfile
|
||||
#if DEBUG
|
||||
std::string dotDump = _builder->grid().dumpDOT();
|
||||
NSString *dotStr = [NSString stringWithUTF8String:dotDump.c_str()];
|
||||
NSError *error = nil;
|
||||
|
||||
BOOL __unused success = [dotStr writeToFile:kGraphVizOutputfile
|
||||
atomically:YES
|
||||
encoding:NSUTF8StringEncoding
|
||||
error:&error];
|
||||
#endif
|
||||
}
|
||||
|
||||
// NON-SWIFTIFIABLE
|
||||
- (NSString *)_popOverflowComposingTextAndWalk
|
||||
{
|
||||
// in an ideal world, we can as well let the user type forever,
|
||||
// but because the Viterbi algorithm has a complexity of O(N^2),
|
||||
// the walk will become slower as the number of nodes increase,
|
||||
// therefore we need to auto-commit overflown texts which usually
|
||||
// lose their influence over the whole MLE anyway -- so that when
|
||||
// the user type along, the already composed text in the rear side
|
||||
// of the buffer will be committed (i.e. "popped out").
|
||||
|
||||
NSString *poppedText = @"";
|
||||
NSInteger composingBufferSize = mgrPrefs.composingBufferSize;
|
||||
|
||||
if (_builder->grid().width() > (size_t)composingBufferSize)
|
||||
{
|
||||
if (_walkedNodes.size() > 0)
|
||||
{
|
||||
Gramambular::NodeAnchor &anchor = _walkedNodes[0];
|
||||
poppedText = [NSString stringWithUTF8String:anchor.node->currentKeyValue().value.c_str()];
|
||||
_builder->removeHeadReadings(anchor.spanningLength);
|
||||
}
|
||||
}
|
||||
|
||||
[self _walk];
|
||||
return poppedText;
|
||||
}
|
||||
|
||||
// NON-SWIFTIFIABLE
|
||||
- (NSArray<NSString *> *)_currentReadings
|
||||
{
|
||||
NSMutableArray<NSString *> *readingsArray = [[NSMutableArray alloc] init];
|
||||
std::vector<std::string> v = _builder->readings();
|
||||
for (std::vector<std::string>::iterator it_i = v.begin(); it_i != v.end(); ++it_i)
|
||||
[readingsArray addObject:[NSString stringWithUTF8String:it_i->c_str()]];
|
||||
return readingsArray;
|
||||
}
|
||||
|
||||
// NON-SWIFTIFIABLE
|
||||
- (NSArray<NSString *> *)buildAssociatePhraseArrayWithKey:(NSString *)key
|
||||
{
|
||||
NSMutableArray<NSString *> *array = [NSMutableArray array];
|
||||
std::string cppKey = std::string(key.UTF8String);
|
||||
if (_languageModel->hasAssociatedPhrasesForKey(cppKey))
|
||||
{
|
||||
std::vector<std::string> phrases = _languageModel->associatedPhrasesForKey(cppKey);
|
||||
for (auto phrase : phrases)
|
||||
{
|
||||
NSString *item = [[NSString alloc] initWithUTF8String:phrase.c_str()];
|
||||
[array addObject:item];
|
||||
}
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
#pragma mark - 必須用 ObjCpp 處理的部分: Mandarin
|
||||
|
||||
- (BOOL)chkKeyValidity:(UniChar)charCode
|
||||
{
|
||||
return _bpmfReadingBuffer->isValidKey((char)charCode);
|
||||
}
|
||||
|
||||
- (BOOL)isPhoneticReadingBufferEmpty
|
||||
{
|
||||
return _bpmfReadingBuffer->isEmpty();
|
||||
}
|
||||
|
||||
- (void)clearPhoneticReadingBuffer
|
||||
{
|
||||
_bpmfReadingBuffer->clear();
|
||||
}
|
||||
|
||||
- (void)combinePhoneticReadingBufferKey:(UniChar)charCode
|
||||
{
|
||||
_bpmfReadingBuffer->combineKey((char)charCode);
|
||||
}
|
||||
|
||||
- (BOOL)checkWhetherToneMarkerConfirmsPhoneticReadingBuffer
|
||||
{
|
||||
return _bpmfReadingBuffer->hasToneMarker();
|
||||
}
|
||||
|
||||
- (NSString *)getSyllableCompositionFromPhoneticReadingBuffer
|
||||
{
|
||||
return [NSString stringWithUTF8String:_bpmfReadingBuffer->syllable().composedString().c_str()];
|
||||
}
|
||||
|
||||
- (void)doBackSpaceToPhoneticReadingBuffer
|
||||
{
|
||||
_bpmfReadingBuffer->backspace();
|
||||
}
|
||||
|
||||
- (NSString *)getCompositionFromPhoneticReadingBuffer
|
||||
{
|
||||
return [NSString stringWithUTF8String:_bpmfReadingBuffer->composedString().c_str()];
|
||||
}
|
||||
|
||||
- (void)ensurePhoneticParser
|
||||
{
|
||||
if (_bpmfReadingBuffer)
|
||||
{
|
||||
switch (mgrPrefs.mandarinParser)
|
||||
{
|
||||
case MandarinParserOfStandard:
|
||||
_bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::StandardLayout());
|
||||
break;
|
||||
case MandarinParserOfEten:
|
||||
_bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::ETenLayout());
|
||||
break;
|
||||
case MandarinParserOfHsu:
|
||||
_bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::HsuLayout());
|
||||
break;
|
||||
case MandarinParserOfEen26:
|
||||
_bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::ETen26Layout());
|
||||
break;
|
||||
case MandarinParserOfIBM:
|
||||
_bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::IBMLayout());
|
||||
break;
|
||||
case MandarinParserOfMiTAC:
|
||||
_bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::MiTACLayout());
|
||||
break;
|
||||
case MandarinParserOfFakeSeigyou:
|
||||
_bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::FakeSeigyouLayout());
|
||||
break;
|
||||
case MandarinParserOfHanyuPinyin:
|
||||
_bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::HanyuPinyinLayout());
|
||||
break;
|
||||
default:
|
||||
_bpmfReadingBuffer->setKeyboardLayout(Mandarin::BopomofoKeyboardLayout::StandardLayout());
|
||||
mgrPrefs.mandarinParser = MandarinParserOfStandard;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
_bpmfReadingBuffer = new Mandarin::BopomofoReadingBuffer(Mandarin::BopomofoKeyboardLayout::StandardLayout());
|
||||
}
|
||||
}
|
||||
|
||||
#pragma mark - 必須用 ObjCpp 處理的部分: Gramambular 等
|
||||
|
||||
- (void)removeBuilderAndReset:(BOOL)shouldReset
|
||||
{
|
||||
if (_builder)
|
||||
{
|
||||
delete _builder;
|
||||
if (shouldReset)
|
||||
[self createNewBuilder];
|
||||
}
|
||||
else if (shouldReset)
|
||||
[self createNewBuilder];
|
||||
}
|
||||
|
||||
- (void)createNewBuilder
|
||||
{
|
||||
_builder = new Gramambular::BlockReadingBuilder(_languageModel);
|
||||
// Each Mandarin syllable is separated by a hyphen.
|
||||
_builder->setJoinSeparator("-");
|
||||
}
|
||||
|
||||
- (void)setInputModesToLM:(BOOL)isCHS
|
||||
{
|
||||
_languageModel = isCHS ? [mgrLangModel lmCHS] : [mgrLangModel lmCHT];
|
||||
_userOverrideModel = isCHS ? [mgrLangModel userOverrideModelCHS] : [mgrLangModel userOverrideModelCHT];
|
||||
}
|
||||
|
||||
- (void)syncBaseLMPrefs
|
||||
{
|
||||
if (_languageModel)
|
||||
{
|
||||
_languageModel->setPhraseReplacementEnabled(mgrPrefs.phraseReplacementEnabled);
|
||||
_languageModel->setSymbolEnabled(mgrPrefs.symbolInputEnabled);
|
||||
_languageModel->setCNSEnabled(mgrPrefs.cns11643Enabled);
|
||||
}
|
||||
}
|
||||
|
||||
// ----
|
||||
|
||||
- (BOOL)ifLangModelHasUnigramsForKey:(NSString *)reading
|
||||
{
|
||||
return _languageModel->hasUnigramsForKey((std::string)[reading UTF8String]);
|
||||
}
|
||||
|
||||
- (void)insertReadingToBuilderAtCursor:(NSString *)reading
|
||||
{
|
||||
_builder->insertReadingAtCursor((std::string)[reading UTF8String]);
|
||||
}
|
||||
|
||||
- (void)dealWithOverrideModelSuggestions
|
||||
{
|
||||
// 這一整段都太 C++ 且只出現一次,就整個端過來了。
|
||||
// 拆開封裝的話,只會把問題搞得更麻煩而已。
|
||||
std::string overrideValue = (mgrPrefs.useSCPCTypingMode)
|
||||
? ""
|
||||
: _userOverrideModel->suggest(_walkedNodes, [self getBuilderCursorIndex],
|
||||
[[NSDate date] timeIntervalSince1970]);
|
||||
|
||||
if (!overrideValue.empty())
|
||||
{
|
||||
NSInteger cursorIndex = [self getActualCandidateCursorIndex];
|
||||
std::vector<Gramambular::NodeAnchor> nodes = mgrPrefs.setRearCursorMode
|
||||
? _builder->grid().nodesCrossingOrEndingAt(cursorIndex)
|
||||
: _builder->grid().nodesEndingAt(cursorIndex);
|
||||
double highestScore = FindHighestScore(nodes, kEpsilon);
|
||||
_builder->grid().overrideNodeScoreForSelectedCandidate(cursorIndex, overrideValue,
|
||||
static_cast<float>(highestScore));
|
||||
}
|
||||
}
|
||||
|
||||
- (void)setBuilderCursorIndex:(NSInteger)value
|
||||
{
|
||||
_builder->setCursorIndex(value);
|
||||
}
|
||||
|
||||
- (NSInteger)getBuilderCursorIndex
|
||||
{
|
||||
return _builder->cursorIndex();
|
||||
}
|
||||
|
||||
- (NSInteger)getBuilderLength
|
||||
{
|
||||
return _builder->length();
|
||||
}
|
||||
|
||||
- (void)deleteBuilderReadingInFrontOfCursor
|
||||
{
|
||||
_builder->deleteReadingBeforeCursor();
|
||||
}
|
||||
|
||||
- (void)deleteBuilderReadingAfterCursor
|
||||
{
|
||||
_builder->deleteReadingAfterCursor();
|
||||
}
|
||||
|
||||
- (NSArray<NSString *> *)getCandidatesArray
|
||||
{
|
||||
NSMutableArray<NSString *> *candidatesArray = [[NSMutableArray alloc] init];
|
||||
|
||||
NSInteger cursorIndex = [self getActualCandidateCursorIndex];
|
||||
std::vector<Gramambular::NodeAnchor> nodes = mgrPrefs.setRearCursorMode
|
||||
? _builder->grid().nodesCrossingOrEndingAt(cursorIndex)
|
||||
: _builder->grid().nodesEndingAt(cursorIndex);
|
||||
|
||||
// sort the nodes, so that longer nodes (representing longer phrases) are placed at the top of the candidate list
|
||||
stable_sort(nodes.begin(), nodes.end(), NodeAnchorDescendingSorter());
|
||||
|
||||
// then use the C++ trick to retrieve the candidates for each node at/crossing the cursor
|
||||
for (std::vector<Gramambular::NodeAnchor>::iterator ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni)
|
||||
{
|
||||
const std::vector<Gramambular::KeyValuePair> &candidates = (*ni).node->candidates();
|
||||
for (std::vector<Gramambular::KeyValuePair>::const_iterator ci = candidates.begin(), ce = candidates.end();
|
||||
ci != ce; ++ci)
|
||||
[candidatesArray addObject:[NSString stringWithUTF8String:(*ci).value.c_str()]];
|
||||
}
|
||||
return candidatesArray;
|
||||
}
|
||||
|
||||
- (NSInteger)getKeyLengthAtIndexZero
|
||||
{
|
||||
return [NSString stringWithUTF8String:_walkedNodes[0].node->currentKeyValue().value.c_str()].length;
|
||||
}
|
||||
|
||||
#pragma mark - 威注音認為有必要單獨拿出來處理的部分,交給 Swift 則有些困難。
|
||||
|
||||
- (BOOL)isPrintable:(UniChar)charCode
|
||||
{
|
||||
return isprint(charCode);
|
||||
}
|
||||
|
||||
@end
|
|
@ -0,0 +1,324 @@
|
|||
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Refactored from the ObjCpp-version of this class by:
|
||||
// (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import Cocoa
|
||||
|
||||
public enum InputMode: String {
|
||||
case imeModeCHS = "org.atelierInmu.inputmethod.vChewing.IMECHS"
|
||||
case imeModeCHT = "org.atelierInmu.inputmethod.vChewing.IMECHT"
|
||||
case imeModeNULL = ""
|
||||
}
|
||||
|
||||
// MARK: - Delegate.
|
||||
|
||||
protocol KeyHandlerDelegate: NSObjectProtocol {
|
||||
func ctlCandidate(for _: KeyHandler) -> Any
|
||||
func keyHandler(
|
||||
_: KeyHandler, didSelectCandidateAt index: Int,
|
||||
ctlCandidate controller: Any
|
||||
)
|
||||
func keyHandler(_ keyHandler: KeyHandler, didRequestWriteUserPhraseWith state: InputState)
|
||||
-> Bool
|
||||
}
|
||||
|
||||
// MARK: - Kernel.
|
||||
|
||||
class KeyHandler: NSObject {
|
||||
let kEpsilon: Double = 0.000001
|
||||
var _inputMode: String = ""
|
||||
var _languageModel: vChewing.LMInstantiator = .init()
|
||||
var _userOverrideModel: vChewing.LMUserOverride = .init()
|
||||
var _builder: Megrez.BlockReadingBuilder
|
||||
var _walkedNodes: [Megrez.NodeAnchor] = []
|
||||
|
||||
weak var delegate: KeyHandlerDelegate?
|
||||
|
||||
var inputMode: InputMode {
|
||||
get {
|
||||
switch _inputMode {
|
||||
case "org.atelierInmu.inputmethod.vChewing.IMECHS":
|
||||
return InputMode.imeModeCHS
|
||||
case "org.atelierInmu.inputmethod.vChewing.IMECHT":
|
||||
return InputMode.imeModeCHT
|
||||
default:
|
||||
return InputMode.imeModeNULL
|
||||
}
|
||||
}
|
||||
set { setInputMode(newValue.rawValue) }
|
||||
}
|
||||
|
||||
override init() {
|
||||
_builder = Megrez.BlockReadingBuilder(lm: _languageModel)
|
||||
super.init()
|
||||
Composer.ensureParser()
|
||||
setInputMode(ctlInputMethod.currentInputMode)
|
||||
}
|
||||
|
||||
func clear() {
|
||||
Composer.clearBuffer()
|
||||
_builder.clear()
|
||||
_walkedNodes.removeAll()
|
||||
}
|
||||
|
||||
// 這個函數得獨立出來給 ObjC 使用。
|
||||
func setInputMode(_ value: String) {
|
||||
// 下面這句的「isKindOfClass」是做類型檢查,
|
||||
// 為了應對出現輸入法 plist 被改壞掉這樣的極端情況。
|
||||
let isCHS: Bool = (value == InputMode.imeModeCHS.rawValue)
|
||||
|
||||
// 緊接著將新的簡繁輸入模式提報給 ctlInputMethod:
|
||||
ctlInputMethod.currentInputMode = isCHS ? InputMode.imeModeCHS.rawValue : InputMode.imeModeCHT.rawValue
|
||||
mgrPrefs.mostRecentInputMode = ctlInputMethod.currentInputMode
|
||||
|
||||
// 拿當前的 _inputMode 與 ctlInputMethod 的提報結果對比,不同的話則套用新設定:
|
||||
if _inputMode != ctlInputMethod.currentInputMode {
|
||||
// Reinitiate language models if necessary
|
||||
setInputModesToLM(isCHS: isCHS)
|
||||
|
||||
// Synchronize the sub-languageModel state settings to the new LM.
|
||||
syncBaseLMPrefs()
|
||||
|
||||
// Create new grid builder.
|
||||
createNewBuilder()
|
||||
|
||||
if !Composer.isBufferEmpty() {
|
||||
Composer.clearBuffer()
|
||||
}
|
||||
}
|
||||
// 直接寫到衛星模組內,省得類型轉換
|
||||
_inputMode = ctlInputMethod.currentInputMode
|
||||
}
|
||||
|
||||
// MARK: - Functions dealing with Megrez.
|
||||
|
||||
func walk() {
|
||||
// Retrieve the most likely trellis, i.e. a Maximum Likelihood Estimation
|
||||
// of the best possible Mandarin characters given the input syllables,
|
||||
// using the Viterbi algorithm implemented in the Gramambular library
|
||||
let walker = Megrez.Walker(grid: _builder.grid())
|
||||
|
||||
// the reverse walk traces the trellis from the end
|
||||
let walked: [Megrez.NodeAnchor] = walker.reverseWalk(at: _builder.grid().width())
|
||||
|
||||
// then we use ".reversed()" to reverse the nodes so that we get the forward-walked nodes
|
||||
_walkedNodes.removeAll()
|
||||
_walkedNodes.append(contentsOf: walked.reversed())
|
||||
}
|
||||
|
||||
func popOverflowComposingTextAndWalk() -> String {
|
||||
// In ideal situations we can allow users to type infinitely in a buffer.
|
||||
// However, Viberti algorithm has a complexity of O(N^2), the walk will
|
||||
// become slower as the number of nodes increase. Therefore, we need to
|
||||
// auto-commit overflown texts which usually lose their influence over
|
||||
// the whole MLE anyway -- so that when the user type along, the already
|
||||
// composed text in the rear side of the buffer will be committed out.
|
||||
// (i.e. popped out.)
|
||||
|
||||
var poppedText = ""
|
||||
if _builder.grid().width() > mgrPrefs.composingBufferSize {
|
||||
if _walkedNodes.count > 0 {
|
||||
let anchor: Megrez.NodeAnchor = _walkedNodes[0]
|
||||
if let theNode = anchor.node {
|
||||
poppedText = theNode.currentKeyValue().value
|
||||
}
|
||||
_builder.removeHeadReadings(count: anchor.spanningLength)
|
||||
}
|
||||
}
|
||||
walk()
|
||||
return poppedText
|
||||
}
|
||||
|
||||
func buildAssociatePhraseArray(withKey key: String) -> [String] {
|
||||
var arrResult: [String] = []
|
||||
if _languageModel.hasAssociatedPhrasesForKey(key) {
|
||||
arrResult.append(contentsOf: _languageModel.associatedPhrasesForKey(key))
|
||||
}
|
||||
return arrResult
|
||||
}
|
||||
|
||||
func fixNode(value: String) {
|
||||
let cursorIndex: Int = getActualCandidateCursorIndex()
|
||||
let selectedNode: Megrez.NodeAnchor = _builder.grid().fixNodeSelectedCandidate(
|
||||
location: cursorIndex, value: value
|
||||
)
|
||||
// 不要針對逐字選字模式啟用臨時半衰記憶模型。
|
||||
if !mgrPrefs.useSCPCTypingMode {
|
||||
// If the length of the readings and the characters do not match,
|
||||
// it often means it is a special symbol and it should not be stored
|
||||
// in the user override model.
|
||||
var addToUserOverrideModel = true
|
||||
if selectedNode.spanningLength != value.count {
|
||||
addToUserOverrideModel = false
|
||||
}
|
||||
if addToUserOverrideModel {
|
||||
if let theNode = selectedNode.node {
|
||||
// 威注音的 SymbolLM 的 Score 是 -12。
|
||||
if theNode.scoreFor(candidate: value) <= -12 {
|
||||
addToUserOverrideModel = false
|
||||
}
|
||||
}
|
||||
}
|
||||
if addToUserOverrideModel {
|
||||
_userOverrideModel.observe(
|
||||
walkedNodes: _walkedNodes, cursorIndex: cursorIndex, candidate: value,
|
||||
timestamp: NSDate().timeIntervalSince1970
|
||||
)
|
||||
}
|
||||
}
|
||||
walk()
|
||||
|
||||
if mgrPrefs.moveCursorAfterSelectingCandidate {
|
||||
var nextPosition = 0
|
||||
for node in _walkedNodes {
|
||||
if nextPosition >= cursorIndex { break }
|
||||
nextPosition += node.spanningLength
|
||||
}
|
||||
if nextPosition <= getBuilderLength() {
|
||||
setBuilderCursorIndex(value: nextPosition)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func getCandidatesArray() -> [String] {
|
||||
var arrCandidates: [String] = []
|
||||
var arrNodes: [Megrez.NodeAnchor] = []
|
||||
arrNodes.append(contentsOf: getRawNodes())
|
||||
|
||||
/// 原理:nodes 這個回饋結果包含一堆子陣列,分別對應不同詞長的候選字。
|
||||
/// 這裡先對陣列排序、讓最長候選字的子陣列的優先權最高。
|
||||
/// 這個過程不會傷到子陣列內部的排序。
|
||||
if !arrNodes.isEmpty {
|
||||
// sort the nodes, so that longer nodes (representing longer phrases)
|
||||
// are placed at the top of the candidate list
|
||||
arrNodes.sort { $0.keyLength > $1.keyLength }
|
||||
|
||||
// then use the Swift trick to retrieve the candidates for each node at/crossing the cursor
|
||||
for currentNodeAnchor in arrNodes {
|
||||
if let currentNode = currentNodeAnchor.node {
|
||||
for currentCandidate in currentNode.candidates() {
|
||||
arrCandidates.append(currentCandidate.value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return arrCandidates
|
||||
}
|
||||
|
||||
func dealWithOverrideModelSuggestions() {
|
||||
let overrideValue =
|
||||
mgrPrefs.useSCPCTypingMode
|
||||
? ""
|
||||
: _userOverrideModel.suggest(
|
||||
walkedNodes: _walkedNodes, cursorIndex: getBuilderCursorIndex(),
|
||||
timestamp: NSDate().timeIntervalSince1970
|
||||
)
|
||||
|
||||
if !overrideValue.isEmpty {
|
||||
_builder.grid().overrideNodeScoreForSelectedCandidate(
|
||||
location: getActualCandidateCursorIndex(),
|
||||
value: overrideValue,
|
||||
overridingScore: findHighestScore(nodes: getRawNodes(), epsilon: kEpsilon)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
func findHighestScore(nodes: [Megrez.NodeAnchor], epsilon: Double) -> Double {
|
||||
var highestScore: Double = 0
|
||||
for currentAnchor in nodes {
|
||||
if let theNode = currentAnchor.node {
|
||||
let score = theNode.highestUnigramScore()
|
||||
if score > highestScore {
|
||||
highestScore = score
|
||||
}
|
||||
}
|
||||
}
|
||||
return highestScore + epsilon
|
||||
}
|
||||
|
||||
// MARK: - Extracted methods and functions.
|
||||
|
||||
func isBuilderEmpty() -> Bool { _builder.grid().width() == 0 }
|
||||
|
||||
func getRawNodes() -> [Megrez.NodeAnchor] {
|
||||
/// 警告:不要對游標前置風格使用 nodesCrossing,否則會導致游標行為與 macOS 內建注音輸入法不一致。
|
||||
/// 微軟新注音輸入法的游標後置風格也是不允許 nodeCrossing 的,但目前 Megrez 暫時缺乏對該特性的支援。
|
||||
/// 所以暫時只能將威注音的游標後置風格描述成「跟 Windows 版雅虎奇摩注音一致」。
|
||||
mgrPrefs.setRearCursorMode
|
||||
? _builder.grid().nodesCrossingOrEndingAt(location: getActualCandidateCursorIndex())
|
||||
: _builder.grid().nodesEndingAt(location: getActualCandidateCursorIndex())
|
||||
}
|
||||
|
||||
func setInputModesToLM(isCHS: Bool) {
|
||||
_languageModel = isCHS ? mgrLangModel.lmCHS : mgrLangModel.lmCHT
|
||||
_userOverrideModel = isCHS ? mgrLangModel.uomCHS : mgrLangModel.uomCHT
|
||||
}
|
||||
|
||||
func syncBaseLMPrefs() {
|
||||
_languageModel.isPhraseReplacementEnabled = mgrPrefs.phraseReplacementEnabled
|
||||
_languageModel.isCNSEnabled = mgrPrefs.cns11643Enabled
|
||||
_languageModel.isSymbolEnabled = mgrPrefs.symbolInputEnabled
|
||||
}
|
||||
|
||||
func createNewBuilder() {
|
||||
_builder = Megrez.BlockReadingBuilder(lm: _languageModel)
|
||||
// Each Mandarin syllable is separated by a hyphen.
|
||||
_builder.setJoinSeparator(separator: "-")
|
||||
}
|
||||
|
||||
func currentReadings() -> [String] { _builder.readings() }
|
||||
|
||||
func ifLangModelHasUnigrams(forKey reading: String) -> Bool {
|
||||
_languageModel.hasUnigramsFor(key: reading)
|
||||
}
|
||||
|
||||
func insertReadingToBuilderAtCursor(reading: String) {
|
||||
_builder.insertReadingAtCursor(reading: reading)
|
||||
}
|
||||
|
||||
func setBuilderCursorIndex(value: Int) {
|
||||
_builder.setCursorIndex(newIndex: value)
|
||||
}
|
||||
|
||||
func getBuilderCursorIndex() -> Int {
|
||||
_builder.cursorIndex()
|
||||
}
|
||||
|
||||
func getBuilderLength() -> Int {
|
||||
_builder.length()
|
||||
}
|
||||
|
||||
func deleteBuilderReadingInFrontOfCursor() {
|
||||
_builder.deleteReadingBeforeCursor()
|
||||
}
|
||||
|
||||
func deleteBuilderReadingAfterCursor() {
|
||||
_builder.deleteReadingAfterCursor()
|
||||
}
|
||||
|
||||
func getKeyLengthAtIndexZero() -> Int {
|
||||
_walkedNodes[0].node?.currentKeyValue().value.count ?? 0
|
||||
}
|
||||
}
|
|
@ -28,7 +28,7 @@ import Cocoa
|
|||
|
||||
// MARK: - § Handle Candidate State.
|
||||
|
||||
@objc extension KeyHandler {
|
||||
extension KeyHandler {
|
||||
func handleCandidate(
|
||||
state: InputState,
|
||||
input: InputHandler,
|
||||
|
@ -331,7 +331,7 @@ import Cocoa
|
|||
let punctuation: String = arrPunctuations.joined(separator: "")
|
||||
|
||||
var shouldAutoSelectCandidate: Bool =
|
||||
chkKeyValidity(charCode) || ifLangModelHasUnigrams(forKey: customPunctuation)
|
||||
Composer.chkKeyValidity(charCode) || ifLangModelHasUnigrams(forKey: customPunctuation)
|
||||
|| ifLangModelHasUnigrams(forKey: punctuation)
|
||||
|
||||
if !shouldAutoSelectCandidate, input.isUpperCaseASCIILetterKey {
|
||||
|
|
|
@ -28,7 +28,7 @@ import Cocoa
|
|||
|
||||
// MARK: - § Handle Input with States.
|
||||
|
||||
@objc extension KeyHandler {
|
||||
extension KeyHandler {
|
||||
func handle(
|
||||
input: InputHandler,
|
||||
state: InputState,
|
||||
|
@ -75,7 +75,7 @@ import Cocoa
|
|||
|
||||
// If ASCII but not printable, don't use insertText:replacementRange:
|
||||
// Certain apps don't handle non-ASCII char insertions.
|
||||
if charCode < 0x80, !isPrintable(charCode) {
|
||||
if charCode < 0x80, !CTools.isPrintable(charCode) {
|
||||
return false
|
||||
}
|
||||
|
||||
|
@ -90,7 +90,7 @@ import Cocoa
|
|||
|
||||
if input.isNumericPad {
|
||||
if !input.isLeft, !input.isRight, !input.isDown,
|
||||
!input.isUp, !input.isSpace, isPrintable(charCode)
|
||||
!input.isUp, !input.isSpace, CTools.isPrintable(charCode)
|
||||
{
|
||||
clear()
|
||||
stateCallback(InputState.Empty())
|
||||
|
@ -139,13 +139,13 @@ import Cocoa
|
|||
let skipPhoneticHandling = input.isReservedKey || input.isControlHold || input.isOptionHold
|
||||
|
||||
// See if Phonetic reading is valid.
|
||||
if !skipPhoneticHandling && chkKeyValidity(charCode) {
|
||||
combinePhoneticReadingBufferKey(charCode)
|
||||
if !skipPhoneticHandling && Composer.chkKeyValidity(charCode) {
|
||||
Composer.combineReadingKey(charCode)
|
||||
|
||||
// If we have a tone marker, we have to insert the reading to the
|
||||
// builder in other words, if we don't have a tone marker, we just
|
||||
// update the composing buffer.
|
||||
composeReading = checkWhetherToneMarkerConfirmsPhoneticReadingBuffer()
|
||||
composeReading = Composer.checkWhetherToneMarkerConfirms()
|
||||
if !composeReading {
|
||||
stateCallback(buildInputtingState())
|
||||
return true
|
||||
|
@ -155,28 +155,28 @@ import Cocoa
|
|||
// See if we have composition if Enter/Space is hit and buffer is not empty.
|
||||
// We use "|=" conditioning so that the tone marker key is also taken into account.
|
||||
// However, Swift does not support "|=".
|
||||
composeReading = composeReading || (!isPhoneticReadingBufferEmpty() && (input.isSpace || input.isEnter))
|
||||
composeReading = composeReading || (!Composer.isBufferEmpty() && (input.isSpace || input.isEnter))
|
||||
if composeReading {
|
||||
let reading = getSyllableCompositionFromPhoneticReadingBuffer()
|
||||
let reading = Composer.getSyllableComposition()
|
||||
|
||||
if !ifLangModelHasUnigrams(forKey: reading) {
|
||||
IME.prtDebugIntel("B49C0979")
|
||||
IME.prtDebugIntel("B49C0979:語彙庫內無「\(reading)」的匹配記錄。")
|
||||
errorCallback()
|
||||
stateCallback(buildInputtingState())
|
||||
return true
|
||||
}
|
||||
|
||||
// ... and insert it into the lattice grid...
|
||||
insertReadingToBuilder(atCursor: reading)
|
||||
insertReadingToBuilderAtCursor(reading: reading)
|
||||
|
||||
// ... then walk the lattice grid...
|
||||
let poppedText = _popOverflowComposingTextAndWalk()
|
||||
let poppedText = popOverflowComposingTextAndWalk()
|
||||
|
||||
// ... get and tweak override model suggestion if possible...
|
||||
dealWithOverrideModelSuggestions()
|
||||
|
||||
// ... then update the text.
|
||||
clearPhoneticReadingBuffer()
|
||||
Composer.clearBuffer()
|
||||
|
||||
let inputting = buildInputtingState()
|
||||
inputting.poppedText = poppedText
|
||||
|
@ -216,7 +216,7 @@ import Cocoa
|
|||
// MARK: Calling candidate window using Space or Down or PageUp / PageDn.
|
||||
|
||||
if let currentState = state as? InputState.NotEmpty {
|
||||
if isPhoneticReadingBufferEmpty(),
|
||||
if Composer.isBufferEmpty(),
|
||||
input.isExtraChooseCandidateKey || input.isExtraChooseCandidateKeyReverse || input.isSpace
|
||||
|| input.isPageDown || input.isPageUp || input.isTab
|
||||
|| (input.useVerticalMode && (input.isVerticalModeOnlyChooseCandidateKey))
|
||||
|
@ -233,8 +233,8 @@ import Cocoa
|
|||
stateCallback(InputState.Committing(poppedText: " "))
|
||||
stateCallback(InputState.Empty())
|
||||
} else if ifLangModelHasUnigrams(forKey: " ") {
|
||||
insertReadingToBuilder(atCursor: " ")
|
||||
let poppedText = _popOverflowComposingTextAndWalk()
|
||||
insertReadingToBuilderAtCursor(reading: " ")
|
||||
let poppedText = popOverflowComposingTextAndWalk()
|
||||
let inputting = buildInputtingState()
|
||||
inputting.poppedText = poppedText
|
||||
stateCallback(inputting)
|
||||
|
@ -329,9 +329,9 @@ import Cocoa
|
|||
if input.isSymbolMenuPhysicalKey && !input.isShiftHold {
|
||||
if !input.isOptionHold {
|
||||
if ifLangModelHasUnigrams(forKey: "_punctuation_list") {
|
||||
if isPhoneticReadingBufferEmpty() {
|
||||
insertReadingToBuilder(atCursor: "_punctuation_list")
|
||||
let poppedText: String! = _popOverflowComposingTextAndWalk()
|
||||
if Composer.isBufferEmpty() {
|
||||
insertReadingToBuilderAtCursor(reading: "_punctuation_list")
|
||||
let poppedText: String! = popOverflowComposingTextAndWalk()
|
||||
let inputting = buildInputtingState()
|
||||
inputting.poppedText = poppedText
|
||||
stateCallback(inputting)
|
||||
|
@ -354,7 +354,7 @@ import Cocoa
|
|||
|
||||
// MARK: Punctuation
|
||||
|
||||
// if nothing is matched, see if it's a punctuation key for current layout.
|
||||
// If nothing is matched, see if it's a punctuation key for current layout.
|
||||
|
||||
var punctuationNamePrefix = ""
|
||||
|
||||
|
@ -418,7 +418,7 @@ import Cocoa
|
|||
// "thinking" that the key is not actually consumed.
|
||||
// 砍掉這一段會導致「F1-F12 按鍵干擾組字區」的問題。
|
||||
// 暫時只能先恢復這段,且補上偵錯彙報機制,方便今後排查故障。
|
||||
if (state is InputState.NotEmpty) || !isPhoneticReadingBufferEmpty() {
|
||||
if (state is InputState.NotEmpty) || !Composer.isBufferEmpty() {
|
||||
IME.prtDebugIntel(
|
||||
"Blocked data: charCode: \(charCode), keyCode: \(input.keyCode)")
|
||||
IME.prtDebugIntel("A9BFF20E")
|
||||
|
|
|
@ -28,7 +28,7 @@ import Cocoa
|
|||
|
||||
// MARK: - § Misc functions.
|
||||
|
||||
@objc extension KeyHandler {
|
||||
extension KeyHandler {
|
||||
func getCurrentMandarinParser() -> String {
|
||||
mgrPrefs.mandarinParserName + "_"
|
||||
}
|
||||
|
@ -43,7 +43,7 @@ import Cocoa
|
|||
&& (cursorIndex < getBuilderLength()))
|
||||
|| cursorIndex == 0
|
||||
{
|
||||
if cursorIndex == 0 && !mgrPrefs.setRearCursorMode {
|
||||
if cursorIndex == 0, !mgrPrefs.setRearCursorMode {
|
||||
cursorIndex += getKeyLengthAtIndexZero()
|
||||
} else {
|
||||
cursorIndex += 1
|
||||
|
|
|
@ -28,49 +28,71 @@ import Cocoa
|
|||
|
||||
// MARK: - § State managements.
|
||||
|
||||
@objc extension KeyHandler {
|
||||
extension KeyHandler {
|
||||
// MARK: - 構築狀態(State Building)
|
||||
|
||||
func buildInputtingState() -> InputState.Inputting {
|
||||
// 觸發資料封裝更新,否則下文拿到的資料會是過期的。
|
||||
packageBufferStateMaterials()
|
||||
// 獲取封裝好的資料
|
||||
let composedText = getComposedText()
|
||||
let packagedCursorIndex = UInt(getPackagedCursorIndex())
|
||||
let resultOfRear = getStrLocationResult(isFront: false)
|
||||
let resultOfFront = getStrLocationResult(isFront: true)
|
||||
// "Updating the composing buffer" means to request the client
|
||||
// to "refresh" the text input buffer with our "composing text"
|
||||
var composingBuffer = ""
|
||||
var composedStringCursorIndex = 0
|
||||
|
||||
// 初期化狀態
|
||||
let newState = InputState.Inputting(composingBuffer: composedText, cursorIndex: packagedCursorIndex)
|
||||
var readingCursorIndex: size_t = 0
|
||||
let builderCursorIndex: size_t = getBuilderCursorIndex()
|
||||
|
||||
// 組建提示文本
|
||||
var tooltip = ""
|
||||
// We must do some Unicode codepoint counting to find the actual cursor location for the client
|
||||
// i.e. we need to take UTF-16 into consideration, for which a surrogate pair takes 2 UniChars
|
||||
// locations. These processes are inherited from the ObjC++ version of this class and might be
|
||||
// unnecessary in Swift, but this deduction requires further experiments.
|
||||
for walkedNode in _walkedNodes {
|
||||
if let theNode = walkedNode.node {
|
||||
let strNodeValue = theNode.currentKeyValue().value
|
||||
composingBuffer += strNodeValue
|
||||
|
||||
// 如果在用特定的模式的話,則始終顯示對應的提示。
|
||||
// TODO: 該功能無法正常運作,暫時註釋掉。
|
||||
// if ctlInputMethod.currentKeyHandler.inputMode == InputMode.imeModeCHT {
|
||||
// if mgrPrefs.chineseConversionEnabled && !mgrPrefs.shiftJISShinjitaiOutputEnabled {
|
||||
// tooltip = String(
|
||||
// format: "%@%@%@", NSLocalizedString("Force KangXi Writing", comment: ""), "\n",
|
||||
// NSLocalizedString("NotificationSwitchON", comment: ""))
|
||||
// } else if mgrPrefs.shiftJISShinjitaiOutputEnabled {
|
||||
// tooltip = String(
|
||||
// format: "%@%@%@", NSLocalizedString("JIS Shinjitai Output", comment: ""), "\n",
|
||||
// NSLocalizedString("NotificationSwitchON", comment: ""))
|
||||
// }
|
||||
// }
|
||||
let arrSplit: [NSString] = (strNodeValue as NSString).split()
|
||||
let codepointCount = arrSplit.count
|
||||
|
||||
// 備註:因為目前的輸入法已經有了 NSString Emoji 支援,所以這個工具提示可能不會出現了。
|
||||
// 姑且留下來用作萬一時的偵錯用途。
|
||||
if resultOfRear != "" || resultOfFront != "" {
|
||||
tooltip = String(
|
||||
format: NSLocalizedString("Cursor is between \"%@\" and \"%@\".", comment: ""),
|
||||
resultOfFront, resultOfRear
|
||||
)
|
||||
// This re-aligns the cursor index in the composed string
|
||||
// (the actual cursor on the screen) with the builder's logical
|
||||
// cursor (reading) cursor; each built node has a "spanning length"
|
||||
// (e.g. two reading blocks has a spanning length of 2), and we
|
||||
// accumulate those lengths to calculate the displayed cursor
|
||||
// index.
|
||||
let spanningLength: Int = walkedNode.spanningLength
|
||||
if readingCursorIndex + spanningLength <= builderCursorIndex {
|
||||
composedStringCursorIndex += (strNodeValue as NSString).length
|
||||
readingCursorIndex += spanningLength
|
||||
} else {
|
||||
if codepointCount == spanningLength {
|
||||
var i = 0
|
||||
while i < codepointCount, readingCursorIndex < builderCursorIndex {
|
||||
composedStringCursorIndex += arrSplit[i].length
|
||||
readingCursorIndex += 1
|
||||
i += 1
|
||||
}
|
||||
} else {
|
||||
if readingCursorIndex < builderCursorIndex {
|
||||
composedStringCursorIndex += (strNodeValue as NSString).length
|
||||
readingCursorIndex += spanningLength
|
||||
if readingCursorIndex > builderCursorIndex {
|
||||
readingCursorIndex = builderCursorIndex
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now, we gather all the intel, separate the composing buffer to two parts (head and tail),
|
||||
// and insert the reading text (the Mandarin syllable) in between them.
|
||||
// The reading text is what the user is typing.
|
||||
|
||||
newState.tooltip = tooltip
|
||||
return newState
|
||||
let head = String((composingBuffer as NSString).substring(to: composedStringCursorIndex))
|
||||
let reading = Composer.getComposition()
|
||||
let tail = String((composingBuffer as NSString).substring(from: composedStringCursorIndex))
|
||||
let composedText = head + reading + tail
|
||||
let cursorIndex = composedStringCursorIndex + reading.count
|
||||
|
||||
return InputState.Inputting(composingBuffer: composedText, cursorIndex: UInt(cursorIndex))
|
||||
}
|
||||
|
||||
// MARK: - 用以生成候選詞陣列及狀態
|
||||
|
@ -102,7 +124,8 @@ import Cocoa
|
|||
) -> InputState.AssociatedPhrases! {
|
||||
// 上一行必須要用驚嘆號,否則 Xcode 會誤導你砍掉某些實際上必需的語句。
|
||||
InputState.AssociatedPhrases(
|
||||
candidates: buildAssociatePhraseArray(withKey: key), useVerticalMode: useVerticalMode)
|
||||
candidates: buildAssociatePhraseArray(withKey: key), useVerticalMode: useVerticalMode
|
||||
)
|
||||
}
|
||||
|
||||
// MARK: - 用以處理就地新增自訂語彙時的行為
|
||||
|
@ -190,14 +213,14 @@ import Cocoa
|
|||
return false
|
||||
}
|
||||
|
||||
if isPhoneticReadingBufferEmpty() {
|
||||
insertReadingToBuilder(atCursor: customPunctuation)
|
||||
let poppedText = _popOverflowComposingTextAndWalk()
|
||||
if Composer.isBufferEmpty() {
|
||||
insertReadingToBuilderAtCursor(reading: customPunctuation)
|
||||
let poppedText = popOverflowComposingTextAndWalk()
|
||||
let inputting = buildInputtingState()
|
||||
inputting.poppedText = poppedText
|
||||
stateCallback(inputting)
|
||||
|
||||
if mgrPrefs.useSCPCTypingMode, isPhoneticReadingBufferEmpty() {
|
||||
if mgrPrefs.useSCPCTypingMode, Composer.isBufferEmpty() {
|
||||
let candidateState = buildCandidate(
|
||||
state: inputting,
|
||||
useVerticalMode: useVerticalMode
|
||||
|
@ -256,7 +279,7 @@ import Cocoa
|
|||
return false
|
||||
}
|
||||
|
||||
let readings: [String] = _currentReadings()
|
||||
let readings: [String] = currentReadings()
|
||||
let composingBuffer =
|
||||
(IME.areWeUsingOurOwnPhraseEditor)
|
||||
? readings.joined(separator: "-")
|
||||
|
@ -280,10 +303,10 @@ import Cocoa
|
|||
return false
|
||||
}
|
||||
|
||||
if isPhoneticReadingBufferEmpty() {
|
||||
if Composer.isBufferEmpty() {
|
||||
if getBuilderCursorIndex() >= 0 {
|
||||
deleteBuilderReadingInFrontOfCursor()
|
||||
_walk()
|
||||
walk()
|
||||
} else {
|
||||
IME.prtDebugIntel("9D69908D")
|
||||
errorCallback()
|
||||
|
@ -291,10 +314,10 @@ import Cocoa
|
|||
return true
|
||||
}
|
||||
} else {
|
||||
doBackSpaceToPhoneticReadingBuffer()
|
||||
Composer.doBackSpaceToBuffer()
|
||||
}
|
||||
|
||||
if isPhoneticReadingBufferEmpty(), getBuilderLength() == 0 {
|
||||
if Composer.isBufferEmpty(), getBuilderLength() == 0 {
|
||||
stateCallback(InputState.EmptyIgnoringPreviousState())
|
||||
} else {
|
||||
stateCallback(buildInputtingState())
|
||||
|
@ -313,10 +336,10 @@ import Cocoa
|
|||
return false
|
||||
}
|
||||
|
||||
if isPhoneticReadingBufferEmpty() {
|
||||
if Composer.isBufferEmpty() {
|
||||
if getBuilderCursorIndex() != getBuilderLength() {
|
||||
deleteBuilderReadingAfterCursor()
|
||||
_walk()
|
||||
walk()
|
||||
let inputting = buildInputtingState()
|
||||
// 這裡不用「count > 0」,因為該整數變數只要「!isEmpty」那就必定滿足這個條件。
|
||||
if !inputting.composingBuffer.isEmpty {
|
||||
|
@ -348,7 +371,7 @@ import Cocoa
|
|||
if !(state is InputState.Inputting) {
|
||||
return false
|
||||
}
|
||||
if !isPhoneticReadingBufferEmpty() {
|
||||
if !Composer.isBufferEmpty() {
|
||||
IME.prtDebugIntel("9B6F908D")
|
||||
errorCallback()
|
||||
}
|
||||
|
@ -367,7 +390,7 @@ import Cocoa
|
|||
return false
|
||||
}
|
||||
|
||||
if !isPhoneticReadingBufferEmpty() {
|
||||
if !Composer.isBufferEmpty() {
|
||||
IME.prtDebugIntel("ABC44080")
|
||||
errorCallback()
|
||||
stateCallback(state)
|
||||
|
@ -375,7 +398,7 @@ import Cocoa
|
|||
}
|
||||
|
||||
if getBuilderCursorIndex() != 0 {
|
||||
setBuilderCursorIndex(0)
|
||||
setBuilderCursorIndex(value: 0)
|
||||
stateCallback(buildInputtingState())
|
||||
} else {
|
||||
IME.prtDebugIntel("66D97F90")
|
||||
|
@ -397,7 +420,7 @@ import Cocoa
|
|||
return false
|
||||
}
|
||||
|
||||
if !isPhoneticReadingBufferEmpty() {
|
||||
if !Composer.isBufferEmpty() {
|
||||
IME.prtDebugIntel("9B69908D")
|
||||
errorCallback()
|
||||
stateCallback(state)
|
||||
|
@ -405,7 +428,7 @@ import Cocoa
|
|||
}
|
||||
|
||||
if getBuilderCursorIndex() != getBuilderLength() {
|
||||
setBuilderCursorIndex(getBuilderLength())
|
||||
setBuilderCursorIndex(value: getBuilderLength())
|
||||
stateCallback(buildInputtingState())
|
||||
} else {
|
||||
IME.prtDebugIntel("9B69908E")
|
||||
|
@ -436,8 +459,8 @@ import Cocoa
|
|||
stateCallback(InputState.EmptyIgnoringPreviousState())
|
||||
} else {
|
||||
// If reading is not empty, we cancel the reading.
|
||||
if !isPhoneticReadingBufferEmpty() {
|
||||
clearPhoneticReadingBuffer()
|
||||
if !Composer.isBufferEmpty() {
|
||||
Composer.clearBuffer()
|
||||
if getBuilderLength() == 0 {
|
||||
stateCallback(InputState.Empty())
|
||||
} else {
|
||||
|
@ -458,7 +481,7 @@ import Cocoa
|
|||
) -> Bool {
|
||||
if !(state is InputState.Inputting) { return false }
|
||||
|
||||
if !isPhoneticReadingBufferEmpty() {
|
||||
if !Composer.isBufferEmpty() {
|
||||
IME.prtDebugIntel("B3BA5257")
|
||||
errorCallback()
|
||||
stateCallback(state)
|
||||
|
@ -475,7 +498,7 @@ import Cocoa
|
|||
composingBuffer: currentState.composingBuffer,
|
||||
cursorIndex: currentState.cursorIndex,
|
||||
markerIndex: UInt(nextPosition),
|
||||
readings: _currentReadings()
|
||||
readings: currentReadings()
|
||||
)
|
||||
marking.tooltipForInputting = currentState.tooltip
|
||||
stateCallback(marking)
|
||||
|
@ -486,7 +509,7 @@ import Cocoa
|
|||
}
|
||||
} else {
|
||||
if getBuilderCursorIndex() < getBuilderLength() {
|
||||
setBuilderCursorIndex(getBuilderCursorIndex() + 1)
|
||||
setBuilderCursorIndex(value: getBuilderCursorIndex() + 1)
|
||||
stateCallback(buildInputtingState())
|
||||
} else {
|
||||
IME.prtDebugIntel("A96AAD58")
|
||||
|
@ -509,7 +532,7 @@ import Cocoa
|
|||
) -> Bool {
|
||||
if !(state is InputState.Inputting) { return false }
|
||||
|
||||
if !isPhoneticReadingBufferEmpty() {
|
||||
if !Composer.isBufferEmpty() {
|
||||
IME.prtDebugIntel("6ED95318")
|
||||
errorCallback()
|
||||
stateCallback(state)
|
||||
|
@ -526,7 +549,7 @@ import Cocoa
|
|||
composingBuffer: currentState.composingBuffer,
|
||||
cursorIndex: currentState.cursorIndex,
|
||||
markerIndex: UInt(previousPosition),
|
||||
readings: _currentReadings()
|
||||
readings: currentReadings()
|
||||
)
|
||||
marking.tooltipForInputting = currentState.tooltip
|
||||
stateCallback(marking)
|
||||
|
@ -537,7 +560,7 @@ import Cocoa
|
|||
}
|
||||
} else {
|
||||
if getBuilderCursorIndex() > 0 {
|
||||
setBuilderCursorIndex(getBuilderCursorIndex() - 1)
|
||||
setBuilderCursorIndex(value: getBuilderCursorIndex() - 1)
|
||||
stateCallback(buildInputtingState())
|
||||
} else {
|
||||
IME.prtDebugIntel("7045E6F3")
|
||||
|
|
|
@ -1,155 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "KeyValueBlobReader.h"
|
||||
|
||||
namespace vChewing
|
||||
{
|
||||
|
||||
KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue *out)
|
||||
{
|
||||
static auto new_line = [](char c) { return c == '\n' || c == '\r'; };
|
||||
static auto blank = [](char c) { return c == ' ' || c == '\t'; };
|
||||
static auto blank_or_newline = [](char c) { return blank(c) || new_line(c); };
|
||||
static auto content_char = [](char c) { return !blank(c) && !new_line(c); };
|
||||
|
||||
if (state_ == State::ERROR)
|
||||
{
|
||||
return state_;
|
||||
}
|
||||
|
||||
const char *key_begin = nullptr;
|
||||
size_t key_length = 0;
|
||||
const char *value_begin = nullptr;
|
||||
size_t value_length = 0;
|
||||
|
||||
while (true)
|
||||
{
|
||||
state_ = SkipUntilNot(blank_or_newline);
|
||||
if (state_ != State::CAN_CONTINUE)
|
||||
{
|
||||
return state_;
|
||||
}
|
||||
|
||||
// Check if it's a comment line; if so, read until end of line.
|
||||
if (*current_ != '#')
|
||||
{
|
||||
break;
|
||||
}
|
||||
state_ = SkipUntil(new_line);
|
||||
if (state_ != State::CAN_CONTINUE)
|
||||
{
|
||||
return state_;
|
||||
}
|
||||
}
|
||||
|
||||
// No need to check whether* current_ is a content_char, since content_char
|
||||
// is defined as not blank and not new_line.
|
||||
|
||||
key_begin = current_;
|
||||
state_ = SkipUntilNot(content_char);
|
||||
if (state_ != State::CAN_CONTINUE)
|
||||
{
|
||||
goto error;
|
||||
}
|
||||
key_length = current_ - key_begin;
|
||||
|
||||
// There should be at least one blank character after the key string.
|
||||
if (!blank(*current_))
|
||||
{
|
||||
goto error;
|
||||
}
|
||||
|
||||
state_ = SkipUntilNot(blank);
|
||||
if (state_ != State::CAN_CONTINUE)
|
||||
{
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (!content_char(*current_))
|
||||
{
|
||||
goto error;
|
||||
}
|
||||
|
||||
value_begin = current_;
|
||||
// value must only contain content characters, blanks not are allowed.
|
||||
// also, there's no need to check the state after this, since we will always
|
||||
// emit the value. This also avoids the situation where trailing spaces in a
|
||||
// line would become part of the value.
|
||||
SkipUntilNot(content_char);
|
||||
value_length = current_ - value_begin;
|
||||
|
||||
// Unconditionally skip until the end of the line. This prevents the case
|
||||
// like "foo bar baz\n" where baz should not be treated as the Next key.
|
||||
SkipUntil(new_line);
|
||||
|
||||
if (out != nullptr)
|
||||
{
|
||||
*out = KeyValue{std::string_view{key_begin, key_length}, std::string_view{value_begin, value_length}};
|
||||
}
|
||||
state_ = State::HAS_PAIR;
|
||||
return state_;
|
||||
|
||||
error:
|
||||
state_ = State::ERROR;
|
||||
return state_;
|
||||
}
|
||||
|
||||
KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot(const std::function<bool(char)> &f)
|
||||
{
|
||||
while (current_ != end_ && *current_)
|
||||
{
|
||||
if (!f(*current_))
|
||||
{
|
||||
return State::CAN_CONTINUE;
|
||||
}
|
||||
++current_;
|
||||
}
|
||||
|
||||
return State::END;
|
||||
}
|
||||
|
||||
KeyValueBlobReader::State KeyValueBlobReader::SkipUntil(const std::function<bool(char)> &f)
|
||||
{
|
||||
while (current_ != end_ && *current_)
|
||||
{
|
||||
if (f(*current_))
|
||||
{
|
||||
return State::CAN_CONTINUE;
|
||||
}
|
||||
++current_;
|
||||
}
|
||||
|
||||
return State::END;
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const KeyValueBlobReader::KeyValue &kv)
|
||||
{
|
||||
os << "(key: " << kv.key << ", value: " << kv.value << ")";
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace vChewing
|
|
@ -1,107 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef SOURCE_ENGINE_KEYVALUEBLOBREADER_H_
|
||||
#define SOURCE_ENGINE_KEYVALUEBLOBREADER_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <string_view>
|
||||
|
||||
// A reader for text-based, blank-separated key-value pairs in a binary blob.
|
||||
//
|
||||
// This reader is suitable for reading language model files that entirely
|
||||
// consist of key-value pairs. Leading or trailing spaces are ignored.
|
||||
// Lines that start with "#" are treated as comments. Values cannot contain
|
||||
// spaces. Any space after the value string is parsed is ignored. This implies
|
||||
// that after a blank, anything that comes after the value can be used as
|
||||
// comment. Both ' ' and '\t' are treated as blank characters, and the parser
|
||||
// is agnostic to how lines are ended, and so LF, CR LF, and CR are all valid
|
||||
// line endings.
|
||||
//
|
||||
// std::string_view is used to allow returning results efficiently. As a result,
|
||||
// the blob is a const char* and will never be mutated. This implies, for
|
||||
// example, read-only mmap can be used to parse large files.
|
||||
namespace vChewing
|
||||
{
|
||||
|
||||
class KeyValueBlobReader
|
||||
{
|
||||
public:
|
||||
enum class State : int
|
||||
{
|
||||
// There are no more key-value pairs in this blob.
|
||||
END = 0,
|
||||
// The reader has produced a new key-value pair.
|
||||
HAS_PAIR = 1,
|
||||
// An error is encountered and the parsing stopped.
|
||||
ERROR = -1,
|
||||
// Internal-only state: the parser can continue parsing.
|
||||
CAN_CONTINUE = 2
|
||||
};
|
||||
|
||||
struct KeyValue
|
||||
{
|
||||
constexpr KeyValue() : key(""), value("")
|
||||
{
|
||||
}
|
||||
constexpr KeyValue(std::string_view k, std::string_view v) : key(k), value(v)
|
||||
{
|
||||
}
|
||||
|
||||
bool operator==(const KeyValue &another) const
|
||||
{
|
||||
return key == another.key && value == another.value;
|
||||
}
|
||||
|
||||
std::string_view key;
|
||||
std::string_view value;
|
||||
};
|
||||
|
||||
KeyValueBlobReader(const char *blob, size_t size) : current_(blob), end_(blob + size)
|
||||
{
|
||||
}
|
||||
|
||||
// Parse the next key-value pair and return the state of the reader. If
|
||||
// `out` is passed, out will be set to the produced key-value pair if there
|
||||
// is one.
|
||||
State Next(KeyValue *out = nullptr);
|
||||
|
||||
private:
|
||||
State SkipUntil(const std::function<bool(char)> &f);
|
||||
State SkipUntilNot(const std::function<bool(char)> &f);
|
||||
|
||||
const char *current_;
|
||||
const char *end_;
|
||||
State state_ = State::CAN_CONTINUE;
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &, const KeyValueBlobReader::KeyValue &);
|
||||
|
||||
} // namespace vChewing
|
||||
|
||||
#endif // SOURCE_ENGINE_KEYVALUEBLOBREADER_H_
|
|
@ -47,7 +47,7 @@ extension NSString {
|
|||
return (string.count, string)
|
||||
}
|
||||
|
||||
@objc public func nextUtf16Position(for index: Int) -> Int {
|
||||
public func nextUtf16Position(for index: Int) -> Int {
|
||||
var (fixedIndex, string) = characterIndex(from: index)
|
||||
if fixedIndex < string.count {
|
||||
fixedIndex += 1
|
||||
|
@ -55,7 +55,7 @@ extension NSString {
|
|||
return string[..<string.index(string.startIndex, offsetBy: fixedIndex)].utf16.count
|
||||
}
|
||||
|
||||
@objc public func previousUtf16Position(for index: Int) -> Int {
|
||||
public func previousUtf16Position(for index: Int) -> Int {
|
||||
var (fixedIndex, string) = characterIndex(from: index)
|
||||
if fixedIndex > 0 {
|
||||
fixedIndex -= 1
|
||||
|
@ -63,11 +63,11 @@ extension NSString {
|
|||
return string[..<string.index(string.startIndex, offsetBy: fixedIndex)].utf16.count
|
||||
}
|
||||
|
||||
@objc public var count: Int {
|
||||
public var count: Int {
|
||||
(self as String).count
|
||||
}
|
||||
|
||||
@objc public func split() -> [NSString] {
|
||||
public func split() -> [NSString] {
|
||||
Array(self as String).map {
|
||||
NSString(string: String($0))
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ extension String {
|
|||
}
|
||||
|
||||
class vChewingKanjiConverter: NSObject {
|
||||
@objc class func cnvTradToKangXi(_ strObj: String) -> String {
|
||||
class func cnvTradToKangXi(_ strObj: String) -> String {
|
||||
var strObj = strObj
|
||||
strObj.selfReplace("偽", "僞")
|
||||
strObj.selfReplace("啟", "啓")
|
||||
|
@ -217,7 +217,7 @@ class vChewingKanjiConverter: NSObject {
|
|||
return strObj
|
||||
}
|
||||
|
||||
@objc class func cnvTradToJIS(_ strObj: String) -> String {
|
||||
class func cnvTradToJIS(_ strObj: String) -> String {
|
||||
// 該轉換是由康熙繁體轉換至日語當用漢字的,所以需要先跑一遍康熙轉換。
|
||||
var strObj = cnvTradToKangXi(strObj)
|
||||
strObj.selfReplace("兩", "両")
|
||||
|
|
|
@ -37,7 +37,7 @@ public class FSEventStreamHelper: NSObject {
|
|||
var id: FSEventStreamEventId
|
||||
}
|
||||
|
||||
public let path: String
|
||||
public var path: String
|
||||
public let dispatchQueue: DispatchQueue
|
||||
public weak var delegate: FSEventStreamHelperDelegate?
|
||||
|
||||
|
|
|
@ -1,176 +0,0 @@
|
|||
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "LMConsolidator.h"
|
||||
#include "vChewing-Swift.h"
|
||||
|
||||
namespace vChewing
|
||||
{
|
||||
|
||||
constexpr std::string_view FORMATTED_PRAGMA_HEADER =
|
||||
"# 𝙵𝙾𝚁𝙼𝙰𝚃 𝚘𝚛𝚐.𝚊𝚝𝚎𝚕𝚒𝚎𝚛𝙸𝚗𝚖𝚞.𝚟𝚌𝚑𝚎𝚠𝚒𝚗𝚐.𝚞𝚜𝚎𝚛𝙻𝚊𝚗𝚐𝚞𝚊𝚐𝚎𝙼𝚘𝚍𝚎𝚕𝙳𝚊𝚝𝚊.𝚏𝚘𝚛𝚖𝚊𝚝𝚝𝚎𝚍";
|
||||
|
||||
// HEADER VERIFIER. CREDIT: Shiki Suen
|
||||
bool LMConsolidator::CheckPragma(const char *path)
|
||||
{
|
||||
ifstream zfdCheckPragma(path);
|
||||
if (zfdCheckPragma.good())
|
||||
{
|
||||
string firstLine;
|
||||
getline(zfdCheckPragma, firstLine);
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "HEADER SEEN ||%s", firstLine.c_str());
|
||||
if (firstLine != FORMATTED_PRAGMA_HEADER)
|
||||
{
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "HEADER VERIFICATION FAILED. START IN-PLACE CONSOLIDATING PROCESS.");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "HEADER VERIFICATION SUCCESSFUL.");
|
||||
return true;
|
||||
}
|
||||
|
||||
// EOF FIXER. CREDIT: Shiki Suen.
|
||||
bool LMConsolidator::FixEOF(const char *path)
|
||||
{
|
||||
std::fstream zfdEOFFixerIncomingStream(path);
|
||||
zfdEOFFixerIncomingStream.seekg(-1, std::ios_base::end);
|
||||
char z;
|
||||
zfdEOFFixerIncomingStream.get(z);
|
||||
if (z != '\n')
|
||||
{
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "// REPORT: Data File not ended with a new line.\n");
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "// DATA FILE: %s", path);
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "// PROCEDURE: Trying to insert a new line as EOF before per-line check process.\n");
|
||||
std::ofstream zfdEOFFixerOutput(path, std::ios_base::app);
|
||||
zfdEOFFixerOutput << std::endl;
|
||||
zfdEOFFixerOutput.close();
|
||||
if (zfdEOFFixerOutput.fail())
|
||||
{
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "// REPORT: Failed to append a newline to the data file. Insufficient Privileges?\n");
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "// DATA FILE: %s", path);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
zfdEOFFixerIncomingStream.close();
|
||||
if (zfdEOFFixerIncomingStream.fail())
|
||||
{
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS,
|
||||
"// REPORT: Failed to read lines through the data file for EOF check. Insufficient Privileges?\n");
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "// DATA FILE: %s", path);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
} // END: EOF FIXER.
|
||||
|
||||
// CONTENT CONSOLIDATOR. CREDIT: Shiki Suen.
|
||||
bool LMConsolidator::ConsolidateContent(const char *path, bool shouldCheckPragma)
|
||||
{
|
||||
bool pragmaCheckResult = LMConsolidator::CheckPragma(path);
|
||||
if (pragmaCheckResult && shouldCheckPragma)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
ifstream zfdContentConsolidatorIncomingStream(path);
|
||||
vector<string> vecEntry;
|
||||
while (!zfdContentConsolidatorIncomingStream.eof())
|
||||
{ // Xcode 13 能用的 ObjCpp 與 Cpp 並無原生支援「\h」這個 Regex 參數的能力,只能逐行處理。
|
||||
string zfdBuffer;
|
||||
getline(zfdContentConsolidatorIncomingStream, zfdBuffer);
|
||||
vecEntry.push_back(zfdBuffer);
|
||||
}
|
||||
// 第一遍 for 用來統整每行內的內容。
|
||||
// regex sedCJKWhiteSpace("\\x{3000}"), sedNonBreakWhiteSpace("\\x{A0}"), sedWhiteSpace("\\s+"),
|
||||
// sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // 這樣寫會導致輸入法敲不了任何字,推測 Xcode 13 支援的 cpp /
|
||||
// objCpp 可能對某些 Regex 寫法有相容性問題。 regex sedCJKWhiteSpace(" "), sedNonBreakWhiteSpace(" "),
|
||||
// sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // RegEx 先定義好。
|
||||
regex sedToConsolidate("( +| +| +|\t+)+"), sedToTrim("(^\\s|\\s$)");
|
||||
for (int i = 0; i < vecEntry.size(); i++)
|
||||
{ // 第一遍 for 用來統整每行內的內容。
|
||||
if (vecEntry[i].size() != 0)
|
||||
{ // 不要理會空行,否則給空行加上 endl 等於再加空行。
|
||||
// RegEx 處理順序:先將全形空格換成西文空格,然後合併任何意義上的連續空格(包括 tab
|
||||
// 等),最後去除每行首尾空格。 vecEntry[i] = regex_replace(vecEntry[i], sedCJKWhiteSpace, " ").c_str(); //
|
||||
// 中日韓全形空格轉為 ASCII 空格。 vecEntry[i] = regex_replace(vecEntry[i], sedNonBreakWhiteSpace, "
|
||||
// ").c_str(); // Non-Break 型空格轉為 ASCII 空格。 vecEntry[i] = regex_replace(vecEntry[i], sedWhiteSpace,
|
||||
// " ").c_str(); // 所有意義上的連續的 \s 型空格都轉為單個 ASCII 空格。 vecEntry[i] =
|
||||
// regex_replace(vecEntry[i], sedLeadingSpace, "").c_str(); // 去掉行首空格。 vecEntry[i] =
|
||||
// regex_replace(vecEntry[i], sedTrailingSpace, "").c_str(); // 去掉行尾空格。
|
||||
// 上述命令分步驟執行容易產生效能問題,故濃縮為下述兩句。
|
||||
vecEntry[i] = regex_replace(vecEntry[i], sedToConsolidate, " ").c_str();
|
||||
vecEntry[i] = regex_replace(vecEntry[i], sedToTrim, "").c_str();
|
||||
}
|
||||
}
|
||||
// 在第二遍 for 運算之前,針對 vecEntry 去除重複條目。
|
||||
std::reverse(vecEntry.begin(), vecEntry.end()); // 先首尾顛倒,免得破壞最新的 override 資訊。
|
||||
vecEntry.erase(unique(vecEntry.begin(), vecEntry.end()), vecEntry.end()); // 去重複。
|
||||
std::reverse(vecEntry.begin(), vecEntry.end()); // 再顛倒回來。
|
||||
// 統整完畢。開始將統整過的內容寫入檔案。
|
||||
ofstream zfdContentConsolidatorOutput(path); // 這裡是要從頭開始重寫檔案內容,所以不需要「 ios_base::app 」。
|
||||
if (!pragmaCheckResult)
|
||||
{
|
||||
zfdContentConsolidatorOutput << FORMATTED_PRAGMA_HEADER << endl; // 寫入經過整理處理的 HEADER。
|
||||
}
|
||||
for (int i = 0; i < vecEntry.size(); i++)
|
||||
{ // 第二遍 for 用來寫入統整過的內容。
|
||||
if (vecEntry[i].size() != 0)
|
||||
{ // 這句很重要,不然還是會把經過 RegEx 處理後出現的空行搞到檔案裡。
|
||||
zfdContentConsolidatorOutput << vecEntry[i]
|
||||
<< endl; // 這裡是必須得加上 endl 的,不然所有行都變成一個整合行。
|
||||
}
|
||||
}
|
||||
zfdContentConsolidatorOutput.close();
|
||||
if (zfdContentConsolidatorOutput.fail())
|
||||
{
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS,
|
||||
"// REPORT: Failed to write content-consolidated data to the file. Insufficient Privileges?\n");
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "// DATA FILE: %s", path);
|
||||
return false;
|
||||
}
|
||||
zfdContentConsolidatorIncomingStream.close();
|
||||
if (zfdContentConsolidatorIncomingStream.fail())
|
||||
{
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "// REPORT: Failed to read lines through the data file for content-consolidation. "
|
||||
"Insufficient Privileges?\n");
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "// DATA FILE: %s", path);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
} // END: CONTENT CONSOLIDATOR.
|
||||
|
||||
} // namespace vChewing
|
|
@ -25,13 +25,16 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
import Carbon
|
||||
import Cocoa
|
||||
|
||||
// The namespace of this input method.
|
||||
public enum vChewing {}
|
||||
|
||||
public class IME: NSObject {
|
||||
static let arrSupportedLocales = ["en", "zh-Hant", "zh-Hans", "ja"]
|
||||
static let dlgOpenPath = NSOpenPanel()
|
||||
|
||||
// MARK: - 開關判定當前應用究竟是?
|
||||
|
||||
@objc static var areWeUsingOurOwnPhraseEditor: Bool = false
|
||||
static var areWeUsingOurOwnPhraseEditor: Bool = false
|
||||
|
||||
// MARK: - 自 ctlInputMethod 讀取當前輸入法的簡繁體模式
|
||||
|
||||
|
@ -46,7 +49,7 @@ public class IME: NSObject {
|
|||
|
||||
// MARK: - Print debug information to the console.
|
||||
|
||||
@objc static func prtDebugIntel(_ strPrint: String) {
|
||||
static func prtDebugIntel(_ strPrint: String) {
|
||||
if mgrPrefs.isDebugModeEnabled {
|
||||
NSLog("vChewingErrorCallback: %@", strPrint)
|
||||
}
|
||||
|
@ -54,27 +57,29 @@ public class IME: NSObject {
|
|||
|
||||
// MARK: - Tell whether this IME is running with Root privileges.
|
||||
|
||||
@objc static var isSudoMode: Bool {
|
||||
static var isSudoMode: Bool {
|
||||
NSUserName() == "root"
|
||||
}
|
||||
|
||||
// MARK: - Initializing Language Models.
|
||||
|
||||
@objc static func initLangModels(userOnly: Bool) {
|
||||
if !userOnly {
|
||||
mgrLangModel.loadDataModels() // 這句還是不要砍了。
|
||||
}
|
||||
static func initLangModels(userOnly: Bool) {
|
||||
DispatchQueue.global(qos: .userInitiated).async {
|
||||
// mgrLangModel 的 loadUserPhrases 等函數在自動讀取 dataFolderPath 時,
|
||||
// 如果發現自訂目錄不可用,則會自動抹去自訂目錄設定、改採預設目錄。
|
||||
// 所以這裡不需要特別處理。
|
||||
mgrLangModel.loadUserPhrases()
|
||||
mgrLangModel.loadUserPhraseReplacement()
|
||||
mgrLangModel.loadUserAssociatedPhrases()
|
||||
mgrLangModel.loadUserPhraseReplacement()
|
||||
mgrLangModel.loadUserPhrases()
|
||||
}
|
||||
if !userOnly {
|
||||
// mgrLangModel.loadDataModels()
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - System Dark Mode Status Detector.
|
||||
|
||||
@objc static func isDarkMode() -> Bool {
|
||||
static func isDarkMode() -> Bool {
|
||||
if #available(macOS 10.15, *) {
|
||||
let appearanceDescription = NSApplication.shared.effectiveAppearance.debugDescription
|
||||
.lowercased()
|
||||
|
|
|
@ -37,7 +37,6 @@ public class InputSourceHelper: NSObject {
|
|||
TISCreateInputSourceList(nil, true).takeRetainedValue() as! [TISInputSource]
|
||||
}
|
||||
|
||||
@objc(inputSourceForProperty:stringValue:)
|
||||
public static func inputSource(for propertyKey: CFString, stringValue: String)
|
||||
-> TISInputSource?
|
||||
{
|
||||
|
@ -57,12 +56,10 @@ public class InputSourceHelper: NSObject {
|
|||
return nil
|
||||
}
|
||||
|
||||
@objc(inputSourceForInputSourceID:)
|
||||
public static func inputSource(for sourceID: String) -> TISInputSource? {
|
||||
inputSource(for: kTISPropertyInputSourceID, stringValue: sourceID)
|
||||
}
|
||||
|
||||
@objc(inputSourceEnabled:)
|
||||
public static func inputSourceEnabled(for source: TISInputSource) -> Bool {
|
||||
if let valuePts = TISGetInputSourceProperty(source, kTISPropertyInputSourceIsEnabled) {
|
||||
let value = Unmanaged<CFBoolean>.fromOpaque(valuePts).takeUnretainedValue()
|
||||
|
@ -71,13 +68,11 @@ public class InputSourceHelper: NSObject {
|
|||
return false
|
||||
}
|
||||
|
||||
@objc(enableInputSource:)
|
||||
public static func enable(inputSource: TISInputSource) -> Bool {
|
||||
let status = TISEnableInputSource(inputSource)
|
||||
return status == noErr
|
||||
}
|
||||
|
||||
@objc(enableAllInputModesForInputSourceBundleID:)
|
||||
public static func enableAllInputMode(for inputSourceBundleD: String) -> Bool {
|
||||
var enabled = false
|
||||
for source in allInstalledInputSources() {
|
||||
|
@ -99,7 +94,6 @@ public class InputSourceHelper: NSObject {
|
|||
return enabled
|
||||
}
|
||||
|
||||
@objc(enableInputMode:forInputSourceBundleID:)
|
||||
public static func enable(inputMode modeID: String, for bundleID: String) -> Bool {
|
||||
for source in allInstalledInputSources() {
|
||||
guard let bundleIDPtr = TISGetInputSourceProperty(source, kTISPropertyBundleID),
|
||||
|
@ -122,13 +116,11 @@ public class InputSourceHelper: NSObject {
|
|||
return false
|
||||
}
|
||||
|
||||
@objc(disableInputSource:)
|
||||
public static func disable(inputSource: TISInputSource) -> Bool {
|
||||
let status = TISDisableInputSource(inputSource)
|
||||
return status == noErr
|
||||
}
|
||||
|
||||
@objc(registerInputSource:)
|
||||
public static func registerTnputSource(at url: URL) -> Bool {
|
||||
let status = TISRegisterInputSource(url as CFURL)
|
||||
return status == noErr
|
||||
|
|
|
@ -38,10 +38,6 @@ extension ctlCandidate {
|
|||
|
||||
@objc(ctlInputMethod)
|
||||
class ctlInputMethod: IMKInputController {
|
||||
@objc static let kIMEModeCHS = "org.atelierInmu.inputmethod.vChewing.IMECHS"
|
||||
@objc static let kIMEModeCHT = "org.atelierInmu.inputmethod.vChewing.IMECHT"
|
||||
@objc static let kIMEModeNULL = "org.atelierInmu.inputmethod.vChewing.IMENULL"
|
||||
|
||||
@objc static var areWeDeleting = false
|
||||
|
||||
private static let tooltipController = TooltipController()
|
||||
|
@ -97,7 +93,7 @@ class ctlInputMethod: IMKInputController {
|
|||
currentClient = client
|
||||
|
||||
keyHandler.clear()
|
||||
keyHandler.ensurePhoneticParser()
|
||||
Composer.ensureParser()
|
||||
if let bundleCheckID = (client as? IMKTextInput)?.bundleIdentifier() {
|
||||
if bundleCheckID != Bundle.main.bundleIdentifier {
|
||||
// Override the keyboard layout to the basic one.
|
||||
|
@ -116,7 +112,7 @@ class ctlInputMethod: IMKInputController {
|
|||
}
|
||||
|
||||
override func setValue(_ value: Any!, forTag _: Int, client: Any!) {
|
||||
var newInputMode = InputMode(rawValue: value as? String ?? InputMode.imeModeNULL.rawValue)
|
||||
var newInputMode = InputMode(rawValue: value as? String ?? "") ?? InputMode.imeModeNULL
|
||||
switch newInputMode {
|
||||
case InputMode.imeModeCHS:
|
||||
newInputMode = InputMode.imeModeCHS
|
||||
|
|
|
@ -86,13 +86,15 @@ extension ctlInputMethod {
|
|||
halfWidthPunctuationItem.keyEquivalentModifierMask = [.command, .control]
|
||||
halfWidthPunctuationItem.state = mgrPrefs.halfWidthPunctuationEnabled.state
|
||||
|
||||
if optionKeyPressed {
|
||||
if optionKeyPressed || mgrPrefs.phraseReplacementEnabled {
|
||||
let phaseReplacementItem = menu.addItem(
|
||||
withTitle: NSLocalizedString("Use Phrase Replacement", comment: ""),
|
||||
action: #selector(togglePhraseReplacement(_:)), keyEquivalent: ""
|
||||
)
|
||||
phaseReplacementItem.state = mgrPrefs.phraseReplacementEnabled.state
|
||||
}
|
||||
|
||||
if optionKeyPressed {
|
||||
let toggleSymbolInputItem = menu.addItem(
|
||||
withTitle: NSLocalizedString("Symbol & Emoji Input", comment: ""),
|
||||
action: #selector(toggleSymbolEnabled(_:)), keyEquivalent: ""
|
||||
|
@ -345,8 +347,7 @@ extension ctlInputMethod {
|
|||
}
|
||||
|
||||
@objc func reloadUserPhrases(_: Any?) {
|
||||
mgrLangModel.loadUserPhrases()
|
||||
mgrLangModel.loadUserPhraseReplacement()
|
||||
IME.initLangModels(userOnly: true)
|
||||
}
|
||||
|
||||
@objc func showAbout(_: Any?) {
|
||||
|
|
|
@ -80,7 +80,7 @@ private let kDefaultKeys = "123456789"
|
|||
|
||||
// MARK: - UserDefaults extension.
|
||||
|
||||
@objc extension UserDefaults {
|
||||
extension UserDefaults {
|
||||
func setDefault(_ value: Any?, forKey defaultName: String) {
|
||||
if object(forKey: defaultName) == nil {
|
||||
set(value, forKey: defaultName)
|
||||
|
@ -236,7 +236,7 @@ public class mgrPrefs: NSObject {
|
|||
|
||||
// MARK: - 既然 Preferences Module 的預設屬性不自動寫入 plist,那這邊就先寫入了。
|
||||
|
||||
@objc public static func setMissingDefaults() {
|
||||
public static func setMissingDefaults() {
|
||||
UserDefaults.standard.setDefault(mgrPrefs.isDebugModeEnabled, forKey: UserDef.kIsDebugModeEnabled)
|
||||
UserDefaults.standard.setDefault(mgrPrefs.mostRecentInputMode, forKey: UserDef.kMostRecentInputMode)
|
||||
UserDefaults.standard.setDefault(mgrPrefs.checkUpdateAutomatically, forKey: UserDef.kCheckUpdateAutomatically)
|
||||
|
@ -278,90 +278,90 @@ public class mgrPrefs: NSObject {
|
|||
}
|
||||
|
||||
@UserDefault(key: UserDef.kIsDebugModeEnabled, defaultValue: false)
|
||||
@objc static var isDebugModeEnabled: Bool
|
||||
static var isDebugModeEnabled: Bool
|
||||
|
||||
@UserDefault(key: UserDef.kMostRecentInputMode, defaultValue: "")
|
||||
@objc static var mostRecentInputMode: String
|
||||
static var mostRecentInputMode: String
|
||||
|
||||
@UserDefault(key: UserDef.kCheckUpdateAutomatically, defaultValue: false)
|
||||
@objc static var checkUpdateAutomatically: Bool
|
||||
static var checkUpdateAutomatically: Bool
|
||||
|
||||
@UserDefault(key: UserDef.kUserDataFolderSpecified, defaultValue: "")
|
||||
@objc static var userDataFolderSpecified: String
|
||||
static var userDataFolderSpecified: String
|
||||
|
||||
@objc static func ifSpecifiedUserDataPathExistsInPlist() -> Bool {
|
||||
static func ifSpecifiedUserDataPathExistsInPlist() -> Bool {
|
||||
UserDefaults.standard.object(forKey: UserDef.kUserDataFolderSpecified) != nil
|
||||
}
|
||||
|
||||
@objc static func resetSpecifiedUserDataFolder() {
|
||||
static func resetSpecifiedUserDataFolder() {
|
||||
UserDefaults.standard.removeObject(forKey: "UserDataFolderSpecified")
|
||||
IME.initLangModels(userOnly: true)
|
||||
}
|
||||
|
||||
@UserDefault(key: UserDef.kAppleLanguages, defaultValue: [])
|
||||
@objc static var appleLanguages: [String]
|
||||
static var appleLanguages: [String]
|
||||
|
||||
@UserDefault(key: UserDef.kMandarinParser, defaultValue: 0)
|
||||
@objc static var mandarinParser: Int
|
||||
|
||||
@objc static var mandarinParserName: String {
|
||||
static var mandarinParserName: String {
|
||||
(MandarinParser(rawValue: mandarinParser) ?? MandarinParser.ofStandard).name
|
||||
}
|
||||
|
||||
@UserDefault(
|
||||
key: UserDef.kBasicKeyboardLayout, defaultValue: "com.apple.keylayout.ZhuyinBopomofo"
|
||||
)
|
||||
@objc static var basicKeyboardLayout: String
|
||||
static var basicKeyboardLayout: String
|
||||
|
||||
@UserDefault(key: UserDef.kShowPageButtonsInCandidateWindow, defaultValue: true)
|
||||
@objc static var showPageButtonsInCandidateWindow: Bool
|
||||
static var showPageButtonsInCandidateWindow: Bool
|
||||
|
||||
@CandidateListTextSize(key: UserDef.kCandidateListTextSize)
|
||||
@objc static var candidateListTextSize: CGFloat
|
||||
static var candidateListTextSize: CGFloat
|
||||
|
||||
@UserDefault(key: UserDef.kShouldAutoReloadUserDataFiles, defaultValue: true)
|
||||
@objc static var shouldAutoReloadUserDataFiles: Bool
|
||||
static var shouldAutoReloadUserDataFiles: Bool
|
||||
|
||||
@UserDefault(key: UserDef.kSetRearCursorMode, defaultValue: false)
|
||||
@objc static var setRearCursorMode: Bool
|
||||
static var setRearCursorMode: Bool
|
||||
|
||||
@UserDefault(key: UserDef.kMoveCursorAfterSelectingCandidate, defaultValue: true)
|
||||
@objc static var moveCursorAfterSelectingCandidate: Bool
|
||||
static var moveCursorAfterSelectingCandidate: Bool
|
||||
|
||||
@UserDefault(key: UserDef.kUseHorizontalCandidateList, defaultValue: true)
|
||||
@objc static var useHorizontalCandidateList: Bool
|
||||
static var useHorizontalCandidateList: Bool
|
||||
|
||||
@ComposingBufferSize(key: UserDef.kComposingBufferSize)
|
||||
@objc static var composingBufferSize: Int
|
||||
static var composingBufferSize: Int
|
||||
|
||||
@UserDefault(key: UserDef.kChooseCandidateUsingSpace, defaultValue: true)
|
||||
@objc static var chooseCandidateUsingSpace: Bool
|
||||
static var chooseCandidateUsingSpace: Bool
|
||||
|
||||
@UserDefault(key: UserDef.kUseSCPCTypingMode, defaultValue: false)
|
||||
@objc static var useSCPCTypingMode: Bool
|
||||
static var useSCPCTypingMode: Bool
|
||||
|
||||
@objc static func toggleSCPCTypingModeEnabled() -> Bool {
|
||||
static func toggleSCPCTypingModeEnabled() -> Bool {
|
||||
useSCPCTypingMode = !useSCPCTypingMode
|
||||
UserDefaults.standard.set(useSCPCTypingMode, forKey: UserDef.kUseSCPCTypingMode)
|
||||
return useSCPCTypingMode
|
||||
}
|
||||
|
||||
@UserDefault(key: UserDef.kMaxCandidateLength, defaultValue: kDefaultComposingBufferSize * 2)
|
||||
@objc static var maxCandidateLength: Int
|
||||
static var maxCandidateLength: Int
|
||||
|
||||
@UserDefault(key: UserDef.kShouldNotFartInLieuOfBeep, defaultValue: true)
|
||||
@objc static var shouldNotFartInLieuOfBeep: Bool
|
||||
static var shouldNotFartInLieuOfBeep: Bool
|
||||
|
||||
@objc static func toggleShouldNotFartInLieuOfBeep() -> Bool {
|
||||
static func toggleShouldNotFartInLieuOfBeep() -> Bool {
|
||||
shouldNotFartInLieuOfBeep = !shouldNotFartInLieuOfBeep
|
||||
UserDefaults.standard.set(shouldNotFartInLieuOfBeep, forKey: UserDef.kShouldNotFartInLieuOfBeep)
|
||||
return shouldNotFartInLieuOfBeep
|
||||
}
|
||||
|
||||
@UserDefault(key: UserDef.kCNS11643Enabled, defaultValue: false)
|
||||
@objc static var cns11643Enabled: Bool
|
||||
static var cns11643Enabled: Bool
|
||||
|
||||
@objc static func toggleCNS11643Enabled() -> Bool {
|
||||
static func toggleCNS11643Enabled() -> Bool {
|
||||
cns11643Enabled = !cns11643Enabled
|
||||
mgrLangModel.setCNSEnabled(cns11643Enabled) // 很重要
|
||||
UserDefaults.standard.set(cns11643Enabled, forKey: UserDef.kCNS11643Enabled)
|
||||
|
@ -369,9 +369,9 @@ public class mgrPrefs: NSObject {
|
|||
}
|
||||
|
||||
@UserDefault(key: UserDef.kSymbolInputEnabled, defaultValue: true)
|
||||
@objc static var symbolInputEnabled: Bool
|
||||
static var symbolInputEnabled: Bool
|
||||
|
||||
@objc static func toggleSymbolInputEnabled() -> Bool {
|
||||
static func toggleSymbolInputEnabled() -> Bool {
|
||||
symbolInputEnabled = !symbolInputEnabled
|
||||
mgrLangModel.setSymbolEnabled(symbolInputEnabled) // 很重要
|
||||
UserDefaults.standard.set(symbolInputEnabled, forKey: UserDef.kSymbolInputEnabled)
|
||||
|
@ -379,9 +379,9 @@ public class mgrPrefs: NSObject {
|
|||
}
|
||||
|
||||
@UserDefault(key: UserDef.kChineseConversionEnabled, defaultValue: false)
|
||||
@objc static var chineseConversionEnabled: Bool
|
||||
static var chineseConversionEnabled: Bool
|
||||
|
||||
@objc @discardableResult static func toggleChineseConversionEnabled() -> Bool {
|
||||
@discardableResult static func toggleChineseConversionEnabled() -> Bool {
|
||||
chineseConversionEnabled = !chineseConversionEnabled
|
||||
// 康熙轉換與 JIS 轉換不能同時開啟,否則會出現某些奇奇怪怪的情況
|
||||
if chineseConversionEnabled, shiftJISShinjitaiOutputEnabled {
|
||||
|
@ -395,9 +395,9 @@ public class mgrPrefs: NSObject {
|
|||
}
|
||||
|
||||
@UserDefault(key: UserDef.kShiftJISShinjitaiOutputEnabled, defaultValue: false)
|
||||
@objc static var shiftJISShinjitaiOutputEnabled: Bool
|
||||
static var shiftJISShinjitaiOutputEnabled: Bool
|
||||
|
||||
@objc @discardableResult static func toggleShiftJISShinjitaiOutputEnabled() -> Bool {
|
||||
@discardableResult static func toggleShiftJISShinjitaiOutputEnabled() -> Bool {
|
||||
shiftJISShinjitaiOutputEnabled = !shiftJISShinjitaiOutputEnabled
|
||||
// 康熙轉換與 JIS 轉換不能同時開啟,否則會出現某些奇奇怪怪的情況
|
||||
if shiftJISShinjitaiOutputEnabled, chineseConversionEnabled {
|
||||
|
@ -410,42 +410,42 @@ public class mgrPrefs: NSObject {
|
|||
}
|
||||
|
||||
@UserDefault(key: UserDef.kHalfWidthPunctuationEnabled, defaultValue: false)
|
||||
@objc static var halfWidthPunctuationEnabled: Bool
|
||||
static var halfWidthPunctuationEnabled: Bool
|
||||
|
||||
@objc static func toggleHalfWidthPunctuationEnabled() -> Bool {
|
||||
static func toggleHalfWidthPunctuationEnabled() -> Bool {
|
||||
halfWidthPunctuationEnabled = !halfWidthPunctuationEnabled
|
||||
return halfWidthPunctuationEnabled
|
||||
}
|
||||
|
||||
@UserDefault(key: UserDef.kEscToCleanInputBuffer, defaultValue: true)
|
||||
@objc static var escToCleanInputBuffer: Bool
|
||||
static var escToCleanInputBuffer: Bool
|
||||
|
||||
@UserDefault(key: UserDef.kSpecifyShiftTabKeyBehavior, defaultValue: false)
|
||||
@objc static var specifyShiftTabKeyBehavior: Bool
|
||||
static var specifyShiftTabKeyBehavior: Bool
|
||||
|
||||
@UserDefault(key: UserDef.kSpecifyShiftSpaceKeyBehavior, defaultValue: false)
|
||||
@objc static var specifyShiftSpaceKeyBehavior: Bool
|
||||
static var specifyShiftSpaceKeyBehavior: Bool
|
||||
|
||||
// MARK: - Optional settings
|
||||
|
||||
@UserDefault(key: UserDef.kCandidateTextFontName, defaultValue: nil)
|
||||
@objc static var candidateTextFontName: String?
|
||||
static var candidateTextFontName: String?
|
||||
|
||||
@UserDefault(key: UserDef.kCandidateKeyLabelFontName, defaultValue: nil)
|
||||
@objc static var candidateKeyLabelFontName: String?
|
||||
static var candidateKeyLabelFontName: String?
|
||||
|
||||
@UserDefault(key: UserDef.kCandidateKeys, defaultValue: kDefaultKeys)
|
||||
@objc static var candidateKeys: String
|
||||
static var candidateKeys: String
|
||||
|
||||
@objc static var defaultCandidateKeys: String {
|
||||
static var defaultCandidateKeys: String {
|
||||
kDefaultKeys
|
||||
}
|
||||
|
||||
@objc static var suggestedCandidateKeys: [String] {
|
||||
static var suggestedCandidateKeys: [String] {
|
||||
[kDefaultKeys, "234567890", "QWERTYUIO", "QWERTASDF", "ASDFGHJKL", "ASDFZXCVB"]
|
||||
}
|
||||
|
||||
@objc static func validate(candidateKeys: String) throws {
|
||||
static func validate(candidateKeys: String) throws {
|
||||
let trimmed = candidateKeys.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
if trimmed.isEmpty {
|
||||
throw CandidateKeyError.empty
|
||||
|
@ -500,9 +500,9 @@ public class mgrPrefs: NSObject {
|
|||
}
|
||||
|
||||
@UserDefault(key: UserDef.kPhraseReplacementEnabled, defaultValue: false)
|
||||
@objc static var phraseReplacementEnabled: Bool
|
||||
static var phraseReplacementEnabled: Bool
|
||||
|
||||
@objc static func togglePhraseReplacementEnabled() -> Bool {
|
||||
static func togglePhraseReplacementEnabled() -> Bool {
|
||||
phraseReplacementEnabled = !phraseReplacementEnabled
|
||||
mgrLangModel.setPhraseReplacementEnabled(phraseReplacementEnabled)
|
||||
UserDefaults.standard.set(phraseReplacementEnabled, forKey: UserDef.kPhraseReplacementEnabled)
|
||||
|
@ -510,9 +510,9 @@ public class mgrPrefs: NSObject {
|
|||
}
|
||||
|
||||
@UserDefault(key: UserDef.kAssociatedPhrasesEnabled, defaultValue: false)
|
||||
@objc static var associatedPhrasesEnabled: Bool
|
||||
static var associatedPhrasesEnabled: Bool
|
||||
|
||||
@objc static func toggleAssociatedPhrasesEnabled() -> Bool {
|
||||
static func toggleAssociatedPhrasesEnabled() -> Bool {
|
||||
associatedPhrasesEnabled = !associatedPhrasesEnabled
|
||||
UserDefaults.standard.set(associatedPhrasesEnabled, forKey: UserDef.kAssociatedPhrasesEnabled)
|
||||
return associatedPhrasesEnabled
|
||||
|
|
|
@ -0,0 +1,167 @@
|
|||
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import Foundation
|
||||
|
||||
extension vChewing {
|
||||
public enum LMConsolidator {
|
||||
public static let kPragmaHeader = "# 𝙵𝙾𝚁𝙼𝙰𝚃 𝚘𝚛𝚐.𝚊𝚝𝚎𝚕𝚒𝚎𝚛𝙸𝚗𝚖𝚞.𝚟𝚌𝚑𝚎𝚠𝚒𝚗𝚐.𝚞𝚜𝚎𝚛𝙻𝚊𝚗𝚐𝚞𝚊𝚐𝚎𝙼𝚘𝚍𝚎𝚕𝙳𝚊𝚝𝚊.𝚏𝚘𝚛𝚖𝚊𝚝𝚝𝚎𝚍"
|
||||
|
||||
public static func checkPragma(path: String) -> Bool {
|
||||
if FileManager.default.fileExists(atPath: path) {
|
||||
let fileHandle = FileHandle(forReadingAtPath: path)!
|
||||
do {
|
||||
let lineReader = try LineReader(file: fileHandle)
|
||||
for strLine in lineReader { // 不需要 i=0,因為第一遍迴圈就出結果。
|
||||
if strLine != kPragmaHeader {
|
||||
IME.prtDebugIntel("Header Mismatch, Starting In-Place Consolidation.")
|
||||
return false
|
||||
} else {
|
||||
IME.prtDebugIntel("Header Verification Succeeded: \(strLine).")
|
||||
return true
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
IME.prtDebugIntel("Header Verification Failed: File Access Error.")
|
||||
return false
|
||||
}
|
||||
}
|
||||
IME.prtDebugIntel("Header Verification Failed: File Missing.")
|
||||
return false
|
||||
}
|
||||
|
||||
@discardableResult public static func fixEOF(path: String) -> Bool {
|
||||
let urlPath = URL(fileURLWithPath: path)
|
||||
if FileManager.default.fileExists(atPath: path) {
|
||||
var strIncoming = ""
|
||||
do {
|
||||
strIncoming += try String(contentsOf: urlPath, encoding: .utf8)
|
||||
if !strIncoming.hasSuffix("\n") {
|
||||
IME.prtDebugIntel("EOF Fix Necessity Confirmed, Start Fixing.")
|
||||
if let writeFile = FileHandle(forUpdatingAtPath: path),
|
||||
let endl = "\n".data(using: .utf8)
|
||||
{
|
||||
writeFile.seekToEndOfFile()
|
||||
writeFile.write(endl)
|
||||
writeFile.closeFile()
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
IME.prtDebugIntel("EOF Fix Failed w/ File: \(path)")
|
||||
IME.prtDebugIntel("EOF Fix Failed w/ Error: \(error).")
|
||||
return false
|
||||
}
|
||||
IME.prtDebugIntel("EOF Successfully Ensured (with possible autofixes performed).")
|
||||
return true
|
||||
}
|
||||
IME.prtDebugIntel("EOF Fix Failed: File Missing at \(path).")
|
||||
return false
|
||||
}
|
||||
|
||||
@discardableResult public static func consolidate(path: String, pragma shouldCheckPragma: Bool) -> Bool {
|
||||
var pragmaResult = false
|
||||
if shouldCheckPragma {
|
||||
pragmaResult = checkPragma(path: path)
|
||||
if pragmaResult {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
let urlPath = URL(fileURLWithPath: path)
|
||||
if FileManager.default.fileExists(atPath: path) {
|
||||
var strProcessed = ""
|
||||
do {
|
||||
strProcessed += try String(contentsOf: urlPath, encoding: .utf8)
|
||||
|
||||
// Step 1: Consolidating formats per line.
|
||||
// -------
|
||||
// CJKWhiteSpace (\x{3000}) to ASCII Space
|
||||
// NonBreakWhiteSpace (\x{A0}) to ASCII Space
|
||||
// Tab to ASCII Space
|
||||
// 統整連續空格為一個 ASCII 空格
|
||||
strProcessed.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ")
|
||||
// 去除行尾行首空格
|
||||
strProcessed.regReplace(pattern: #"(^ | $)"#, replaceWith: "")
|
||||
// CR & FF to LF, 且去除重複行
|
||||
strProcessed.regReplace(pattern: #"(\f+|\r+|\n+)+"#, replaceWith: "\n")
|
||||
if strProcessed.prefix(1) == " " { // 去除檔案開頭空格
|
||||
strProcessed.removeFirst()
|
||||
}
|
||||
if strProcessed.suffix(1) == " " { // 去除檔案結尾空格
|
||||
strProcessed.removeLast()
|
||||
}
|
||||
|
||||
// Step 3: Add Formatted Pragma, the Sorted Header:
|
||||
if !pragmaResult {
|
||||
strProcessed = kPragmaHeader + "\n" + strProcessed // Add Sorted Header
|
||||
}
|
||||
|
||||
// Step 4: Deduplication.
|
||||
let arrData = strProcessed.components(separatedBy: "\n")
|
||||
strProcessed = "" // Reset its value
|
||||
// 下面兩行的 reversed 是首尾顛倒,免得破壞最新的 override 資訊。
|
||||
let arrDataDeduplicated = Array(NSOrderedSet(array: arrData.reversed()).array as! [String])
|
||||
for lineData in arrDataDeduplicated.reversed() {
|
||||
strProcessed += lineData
|
||||
strProcessed += "\n"
|
||||
}
|
||||
|
||||
// Step 5: Remove duplicated newlines at the end of the file.
|
||||
strProcessed.regReplace(pattern: "\\n+", replaceWith: "\n")
|
||||
|
||||
// Step 6: Write consolidated file contents.
|
||||
try strProcessed.write(to: urlPath, atomically: false, encoding: .utf8)
|
||||
|
||||
} catch {
|
||||
IME.prtDebugIntel("Consolidation Failed w/ File: \(path)")
|
||||
IME.prtDebugIntel("Consolidation Failed w/ Error: \(error).")
|
||||
return false
|
||||
}
|
||||
IME.prtDebugIntel("Either Consolidation Successful Or No-Need-To-Consolidate.")
|
||||
return true
|
||||
}
|
||||
IME.prtDebugIntel("Consolidation Failed: File Missing at \(path).")
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - String Extension
|
||||
|
||||
extension String {
|
||||
fileprivate mutating func regReplace(pattern: String, replaceWith: String = "") {
|
||||
// Ref: https://stackoverflow.com/a/40993403/4162914 && https://stackoverflow.com/a/71291137/4162914
|
||||
do {
|
||||
let regex = try NSRegularExpression(
|
||||
pattern: pattern, options: [.caseInsensitive, .anchorsMatchLines]
|
||||
)
|
||||
let range = NSRange(startIndex..., in: self)
|
||||
self = regex.stringByReplacingMatches(
|
||||
in: self, options: [], range: range, withTemplate: replaceWith
|
||||
)
|
||||
} catch { return }
|
||||
}
|
||||
}
|
|
@ -1,167 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef LMInstantiator_H
|
||||
#define LMInstantiator_H
|
||||
|
||||
#include "AssociatedPhrases.h"
|
||||
#include "CNSLM.h"
|
||||
#include "CoreLM.h"
|
||||
#include "ParselessLM.h"
|
||||
#include "PhraseReplacementMap.h"
|
||||
#include "SymbolLM.h"
|
||||
#include "UserPhrasesLM.h"
|
||||
#include "UserSymbolLM.h"
|
||||
#include <stdio.h>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace vChewing
|
||||
{
|
||||
|
||||
using namespace Gramambular;
|
||||
|
||||
/// LMInstantiator is a facade for managing a set of models including
|
||||
/// the input method language model, user phrases and excluded phrases.
|
||||
///
|
||||
/// It is the primary model class that the input controller and grammar builder
|
||||
/// of vChewing talks to. When the grammar builder starts to build a sentence
|
||||
/// from a series of BPMF readings, it passes the readings to the model to see
|
||||
/// if there are valid unigrams, and use returned unigrams to produce the final
|
||||
/// results.
|
||||
///
|
||||
/// LMInstantiator combine and transform the unigrams from the primary language
|
||||
/// model and user phrases. The process is
|
||||
///
|
||||
/// 1) Get the original unigrams.
|
||||
/// 2) Drop the unigrams whose value is contained in the exclusion map.
|
||||
/// 3) Replace the values of the unigrams using the phrase replacement map.
|
||||
/// 4) Replace the values of the unigrams using an external converter lambda.
|
||||
/// 5) Drop the duplicated phrases.
|
||||
///
|
||||
/// The controller can ask the model to load the primary input method language
|
||||
/// model while launching and to load the user phrases anytime if the custom
|
||||
/// files are modified. It does not keep the reference of the data pathes but
|
||||
/// you have to pass the paths when you ask it to do loading.
|
||||
class LMInstantiator : public Gramambular::LanguageModel
|
||||
{
|
||||
public:
|
||||
LMInstantiator();
|
||||
~LMInstantiator();
|
||||
|
||||
/// Asks to load the primary language model at the given path.
|
||||
/// @param languageModelPath The path of the language model.
|
||||
void loadLanguageModel(const char *languageModelPath);
|
||||
/// If the data model is already loaded.
|
||||
bool isDataModelLoaded();
|
||||
|
||||
/// Asks to load the primary language model at the given path.
|
||||
/// @param miscDataPath The path of the misc data model.
|
||||
void loadMiscData(const char *miscDataPath);
|
||||
/// If the data model is already loaded.
|
||||
bool isMiscDataLoaded();
|
||||
|
||||
/// Asks to load the primary language model at the given path.
|
||||
/// @param symbolDataPath The path of the symbol data model.
|
||||
void loadSymbolData(const char *symbolDataPath);
|
||||
/// If the data model is already loaded.
|
||||
bool isSymbolDataLoaded();
|
||||
|
||||
/// Asks to load the primary language model at the given path.
|
||||
/// @param cnsDataPath The path of the CNS data model.
|
||||
void loadCNSData(const char *cnsDataPath);
|
||||
/// If the data model is already loaded.
|
||||
bool isCNSDataLoaded();
|
||||
|
||||
/// Asks to load the user phrases and excluded phrases at the given path.
|
||||
/// @param userPhrasesPath The path of user phrases.
|
||||
/// @param excludedPhrasesPath The path of excluded phrases.
|
||||
void loadUserPhrases(const char *userPhrasesPath, const char *excludedPhrasesPath);
|
||||
/// Asks to load the user symbol data at the given path.
|
||||
/// @param userSymbolDataPath The path of user symbol data.
|
||||
void loadUserSymbolData(const char *userPhrasesPath);
|
||||
/// Asks to load the user associated phrases at the given path.
|
||||
/// @param userAssociatedPhrasesPath The path of the user associated phrases.
|
||||
void loadUserAssociatedPhrases(const char *userAssociatedPhrasesPath);
|
||||
/// Asks to load the phrase replacement table at the given path.
|
||||
/// @param phraseReplacementPath The path of the phrase replacement table.
|
||||
void loadPhraseReplacementMap(const char *phraseReplacementPath);
|
||||
|
||||
/// Not implemented since we do not have data to provide bigram function.
|
||||
const std::vector<Gramambular::Bigram> bigramsForKeys(const std::string &preceedingKey, const std::string &key);
|
||||
/// Returns a list of available unigram for the given key.
|
||||
/// @param key A std::string represents the BPMF reading or a symbol key. For
|
||||
/// example, it you pass "ㄇㄚ", it returns "嗎", "媽", and so on.
|
||||
const std::vector<Gramambular::Unigram> unigramsForKey(const std::string &key);
|
||||
/// If the model has unigrams for the given key.
|
||||
/// @param key The key.
|
||||
bool hasUnigramsForKey(const std::string &key);
|
||||
|
||||
/// Enables or disables phrase replacement.
|
||||
void setPhraseReplacementEnabled(bool enabled);
|
||||
/// If phrase replacement is enabled or not.
|
||||
bool phraseReplacementEnabled();
|
||||
|
||||
/// Enables or disables symbol input.
|
||||
void setSymbolEnabled(bool enabled);
|
||||
/// If symbol input is enabled or not.
|
||||
bool symbolEnabled();
|
||||
|
||||
/// Enables or disables CNS11643 input.
|
||||
void setCNSEnabled(bool enabled);
|
||||
/// If CNS11643 input is enabled or not.
|
||||
bool cnsEnabled();
|
||||
|
||||
const std::vector<std::string> associatedPhrasesForKey(const std::string &key);
|
||||
bool hasAssociatedPhrasesForKey(const std::string &key);
|
||||
|
||||
protected:
|
||||
/// Filters and converts the input unigrams and return a new list of unigrams.
|
||||
///
|
||||
/// @param unigrams The unigrams to be processed.
|
||||
/// @param excludedValues The values to excluded unigrams.
|
||||
/// @param insertedValues The values for unigrams already in the results.
|
||||
/// It helps to prevent duplicated unigrams. Please note that the method
|
||||
/// has a side effect that it inserts values to `insertedValues`.
|
||||
const std::vector<Gramambular::Unigram> filterAndTransformUnigrams(
|
||||
const std::vector<Gramambular::Unigram> unigrams, const std::unordered_set<std::string> &excludedValues,
|
||||
std::unordered_set<std::string> &insertedValues);
|
||||
|
||||
ParselessLM m_languageModel;
|
||||
CoreLM m_miscModel;
|
||||
SymbolLM m_symbolModel;
|
||||
CNSLM m_cnsModel;
|
||||
UserPhrasesLM m_userPhrases;
|
||||
UserPhrasesLM m_excludedPhrases;
|
||||
UserSymbolLM m_userSymbolModel;
|
||||
PhraseReplacementMap m_phraseReplacement;
|
||||
AssociatedPhrases m_associatedPhrases;
|
||||
bool m_phraseReplacementEnabled;
|
||||
bool m_cnsEnabled;
|
||||
bool m_symbolEnabled;
|
||||
};
|
||||
}; // namespace vChewing
|
||||
|
||||
#endif
|
|
@ -1,323 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "LMInstantiator.h"
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
|
||||
namespace vChewing
|
||||
{
|
||||
|
||||
LMInstantiator::LMInstantiator()
|
||||
{
|
||||
}
|
||||
|
||||
LMInstantiator::~LMInstantiator()
|
||||
{
|
||||
m_languageModel.close();
|
||||
m_miscModel.close();
|
||||
m_userPhrases.close();
|
||||
m_userSymbolModel.close();
|
||||
m_cnsModel.close();
|
||||
m_excludedPhrases.close();
|
||||
m_phraseReplacement.close();
|
||||
m_associatedPhrases.close();
|
||||
}
|
||||
|
||||
void LMInstantiator::loadLanguageModel(const char *languageModelDataPath)
|
||||
{
|
||||
if (languageModelDataPath)
|
||||
{
|
||||
m_languageModel.close();
|
||||
m_languageModel.open(languageModelDataPath);
|
||||
}
|
||||
}
|
||||
|
||||
bool LMInstantiator::isDataModelLoaded()
|
||||
{
|
||||
return m_languageModel.isLoaded();
|
||||
}
|
||||
|
||||
void LMInstantiator::loadCNSData(const char *cnsDataPath)
|
||||
{
|
||||
if (cnsDataPath)
|
||||
{
|
||||
m_cnsModel.close();
|
||||
m_cnsModel.open(cnsDataPath);
|
||||
}
|
||||
}
|
||||
|
||||
bool LMInstantiator::isCNSDataLoaded()
|
||||
{
|
||||
return m_cnsModel.isLoaded();
|
||||
}
|
||||
|
||||
void LMInstantiator::loadMiscData(const char *miscDataPath)
|
||||
{
|
||||
if (miscDataPath)
|
||||
{
|
||||
m_miscModel.close();
|
||||
m_miscModel.open(miscDataPath);
|
||||
}
|
||||
}
|
||||
|
||||
bool LMInstantiator::isMiscDataLoaded()
|
||||
{
|
||||
return m_miscModel.isLoaded();
|
||||
}
|
||||
|
||||
void LMInstantiator::loadSymbolData(const char *symbolDataPath)
|
||||
{
|
||||
if (symbolDataPath)
|
||||
{
|
||||
m_symbolModel.close();
|
||||
m_symbolModel.open(symbolDataPath);
|
||||
}
|
||||
}
|
||||
|
||||
bool LMInstantiator::isSymbolDataLoaded()
|
||||
{
|
||||
return m_symbolModel.isLoaded();
|
||||
}
|
||||
|
||||
void LMInstantiator::loadUserPhrases(const char *userPhrasesDataPath, const char *excludedPhrasesDataPath)
|
||||
{
|
||||
if (userPhrasesDataPath)
|
||||
{
|
||||
m_userPhrases.close();
|
||||
m_userPhrases.open(userPhrasesDataPath);
|
||||
}
|
||||
if (excludedPhrasesDataPath)
|
||||
{
|
||||
m_excludedPhrases.close();
|
||||
m_excludedPhrases.open(excludedPhrasesDataPath);
|
||||
}
|
||||
}
|
||||
|
||||
void LMInstantiator::loadUserSymbolData(const char *userSymbolDataPath)
|
||||
{
|
||||
if (userSymbolDataPath)
|
||||
{
|
||||
m_userSymbolModel.close();
|
||||
m_userSymbolModel.open(userSymbolDataPath);
|
||||
}
|
||||
}
|
||||
|
||||
void LMInstantiator::loadUserAssociatedPhrases(const char *userAssociatedPhrasesPath)
|
||||
{
|
||||
if (userAssociatedPhrasesPath)
|
||||
{
|
||||
m_associatedPhrases.close();
|
||||
m_associatedPhrases.open(userAssociatedPhrasesPath);
|
||||
}
|
||||
}
|
||||
|
||||
void LMInstantiator::loadPhraseReplacementMap(const char *phraseReplacementPath)
|
||||
{
|
||||
if (phraseReplacementPath)
|
||||
{
|
||||
m_phraseReplacement.close();
|
||||
m_phraseReplacement.open(phraseReplacementPath);
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<Gramambular::Bigram> LMInstantiator::bigramsForKeys(const std::string &preceedingKey,
|
||||
const std::string &key)
|
||||
{
|
||||
return std::vector<Gramambular::Bigram>();
|
||||
}
|
||||
|
||||
const std::vector<Gramambular::Unigram> LMInstantiator::unigramsForKey(const std::string &key)
|
||||
{
|
||||
if (key == " ")
|
||||
{
|
||||
std::vector<Gramambular::Unigram> spaceUnigrams;
|
||||
Gramambular::Unigram g;
|
||||
g.keyValue.key = " ";
|
||||
g.keyValue.value = " ";
|
||||
g.score = 0;
|
||||
spaceUnigrams.push_back(g);
|
||||
return spaceUnigrams;
|
||||
}
|
||||
|
||||
std::vector<Gramambular::Unigram> allUnigrams;
|
||||
std::vector<Gramambular::Unigram> miscUnigrams;
|
||||
std::vector<Gramambular::Unigram> symbolUnigrams;
|
||||
std::vector<Gramambular::Unigram> userUnigrams;
|
||||
std::vector<Gramambular::Unigram> userSymbolUnigrams;
|
||||
std::vector<Gramambular::Unigram> cnsUnigrams;
|
||||
|
||||
std::unordered_set<std::string> excludedValues;
|
||||
std::unordered_set<std::string> insertedValues;
|
||||
|
||||
if (m_excludedPhrases.hasUnigramsForKey(key))
|
||||
{
|
||||
std::vector<Gramambular::Unigram> excludedUnigrams = m_excludedPhrases.unigramsForKey(key);
|
||||
transform(excludedUnigrams.begin(), excludedUnigrams.end(), inserter(excludedValues, excludedValues.end()),
|
||||
[](const Gramambular::Unigram &u) { return u.keyValue.value; });
|
||||
}
|
||||
|
||||
if (m_userPhrases.hasUnigramsForKey(key))
|
||||
{
|
||||
std::vector<Gramambular::Unigram> rawUserUnigrams = m_userPhrases.unigramsForKey(key);
|
||||
// 用這句指令讓使用者語彙檔案內的詞條優先順序隨著行數增加而逐漸增高。
|
||||
// 這樣一來就可以在就地新增語彙時徹底複寫優先權。
|
||||
std::reverse(rawUserUnigrams.begin(), rawUserUnigrams.end());
|
||||
userUnigrams = filterAndTransformUnigrams(rawUserUnigrams, excludedValues, insertedValues);
|
||||
}
|
||||
|
||||
if (m_languageModel.hasUnigramsForKey(key))
|
||||
{
|
||||
std::vector<Gramambular::Unigram> rawGlobalUnigrams = m_languageModel.unigramsForKey(key);
|
||||
allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues);
|
||||
}
|
||||
|
||||
if (m_miscModel.hasUnigramsForKey(key))
|
||||
{
|
||||
std::vector<Gramambular::Unigram> rawMiscUnigrams = m_miscModel.unigramsForKey(key);
|
||||
miscUnigrams = filterAndTransformUnigrams(rawMiscUnigrams, excludedValues, insertedValues);
|
||||
}
|
||||
|
||||
if (m_symbolModel.hasUnigramsForKey(key) && m_symbolEnabled)
|
||||
{
|
||||
std::vector<Gramambular::Unigram> rawSymbolUnigrams = m_symbolModel.unigramsForKey(key);
|
||||
symbolUnigrams = filterAndTransformUnigrams(rawSymbolUnigrams, excludedValues, insertedValues);
|
||||
}
|
||||
|
||||
if (m_userSymbolModel.hasUnigramsForKey(key) && m_symbolEnabled)
|
||||
{
|
||||
std::vector<Gramambular::Unigram> rawUserSymbolUnigrams = m_userSymbolModel.unigramsForKey(key);
|
||||
userSymbolUnigrams = filterAndTransformUnigrams(rawUserSymbolUnigrams, excludedValues, insertedValues);
|
||||
}
|
||||
|
||||
if (m_cnsModel.hasUnigramsForKey(key) && m_cnsEnabled)
|
||||
{
|
||||
std::vector<Gramambular::Unigram> rawCNSUnigrams = m_cnsModel.unigramsForKey(key);
|
||||
cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues);
|
||||
}
|
||||
|
||||
allUnigrams.insert(allUnigrams.begin(), userUnigrams.begin(), userUnigrams.end());
|
||||
allUnigrams.insert(allUnigrams.end(), cnsUnigrams.begin(), cnsUnigrams.end());
|
||||
allUnigrams.insert(allUnigrams.begin(), miscUnigrams.begin(), miscUnigrams.end());
|
||||
allUnigrams.insert(allUnigrams.end(), userSymbolUnigrams.begin(), userSymbolUnigrams.end());
|
||||
allUnigrams.insert(allUnigrams.end(), symbolUnigrams.begin(), symbolUnigrams.end());
|
||||
return allUnigrams;
|
||||
}
|
||||
|
||||
bool LMInstantiator::hasUnigramsForKey(const std::string &key)
|
||||
{
|
||||
if (key == " ")
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!m_excludedPhrases.hasUnigramsForKey(key))
|
||||
{
|
||||
return m_userPhrases.hasUnigramsForKey(key) || m_languageModel.hasUnigramsForKey(key);
|
||||
}
|
||||
|
||||
return unigramsForKey(key).size() > 0;
|
||||
}
|
||||
|
||||
void LMInstantiator::setPhraseReplacementEnabled(bool enabled)
|
||||
{
|
||||
m_phraseReplacementEnabled = enabled;
|
||||
}
|
||||
|
||||
bool LMInstantiator::phraseReplacementEnabled()
|
||||
{
|
||||
return m_phraseReplacementEnabled;
|
||||
}
|
||||
|
||||
void LMInstantiator::setCNSEnabled(bool enabled)
|
||||
{
|
||||
m_cnsEnabled = enabled;
|
||||
}
|
||||
|
||||
bool LMInstantiator::cnsEnabled()
|
||||
{
|
||||
return m_cnsEnabled;
|
||||
}
|
||||
|
||||
void LMInstantiator::setSymbolEnabled(bool enabled)
|
||||
{
|
||||
m_symbolEnabled = enabled;
|
||||
}
|
||||
|
||||
bool LMInstantiator::symbolEnabled()
|
||||
{
|
||||
return m_symbolEnabled;
|
||||
}
|
||||
|
||||
const std::vector<Gramambular::Unigram> LMInstantiator::filterAndTransformUnigrams(
|
||||
const std::vector<Gramambular::Unigram> unigrams, const std::unordered_set<std::string> &excludedValues,
|
||||
std::unordered_set<std::string> &insertedValues)
|
||||
{
|
||||
std::vector<Gramambular::Unigram> results;
|
||||
|
||||
for (auto &&unigram : unigrams)
|
||||
{
|
||||
// excludedValues filters out the unigrams with the original value.
|
||||
// insertedValues filters out the ones with the converted value
|
||||
std::string originalValue = unigram.keyValue.value;
|
||||
if (excludedValues.find(originalValue) != excludedValues.end())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string value = originalValue;
|
||||
if (m_phraseReplacementEnabled)
|
||||
{
|
||||
std::string replacement = m_phraseReplacement.valueForKey(value);
|
||||
if (replacement != "")
|
||||
{
|
||||
value = replacement;
|
||||
}
|
||||
}
|
||||
if (insertedValues.find(value) == insertedValues.end())
|
||||
{
|
||||
Gramambular::Unigram g;
|
||||
g.keyValue.value = value;
|
||||
g.keyValue.key = unigram.keyValue.key;
|
||||
g.score = unigram.score;
|
||||
results.push_back(g);
|
||||
insertedValues.insert(value);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
const std::vector<std::string> LMInstantiator::associatedPhrasesForKey(const std::string &key)
|
||||
{
|
||||
return m_associatedPhrases.valuesForKey(key);
|
||||
}
|
||||
|
||||
bool LMInstantiator::hasAssociatedPhrasesForKey(const std::string &key)
|
||||
{
|
||||
return m_associatedPhrases.hasValuesForKey(key);
|
||||
}
|
||||
|
||||
} // namespace vChewing
|
|
@ -0,0 +1,301 @@
|
|||
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Refactored from the ObjCpp-version of this class by:
|
||||
// (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// NOTE: We still keep some of the comments left by Zonble,
|
||||
// regardless that he is not in charge of this Swift module。
|
||||
|
||||
import Foundation
|
||||
|
||||
// 簡體中文模式與繁體中文模式共用全字庫擴展模組,故單獨處理。
|
||||
// 塞在 LMInstantiator 內的話,每個模式都會讀入一份全字庫,會多佔用 100MB 記憶體。
|
||||
private var lmCNS = vChewing.LMLite(consolidate: false)
|
||||
private var lmSymbols = vChewing.LMCore(reverse: true, consolidate: false, defaultScore: -13.0, forceDefaultScore: true)
|
||||
|
||||
extension vChewing {
|
||||
/// LMInstantiator is a facade for managing a set of models including
|
||||
/// the input method language model, user phrases and excluded phrases.
|
||||
///
|
||||
/// It is the primary model class that the input controller and grammar builder
|
||||
/// of vChewing talks to. When the grammar builder starts to build a sentence
|
||||
/// from a series of BPMF readings, it passes the readings to the model to see
|
||||
/// if there are valid unigrams, and use returned unigrams to produce the final
|
||||
/// results.
|
||||
///
|
||||
/// LMInstantiator combine and transform the unigrams from the primary language
|
||||
/// model and user phrases. The process is
|
||||
///
|
||||
/// 1) Get the original unigrams.
|
||||
/// 2) Drop the unigrams whose value is contained in the exclusion map.
|
||||
/// 3) Replace the values of the unigrams using the phrase replacement map.
|
||||
/// 4) Drop the duplicated phrases from the generated unigram array.
|
||||
///
|
||||
/// The controller can ask the model to load the primary input method language
|
||||
/// model while launching and to load the user phrases anytime if the custom
|
||||
/// files are modified. It does not keep the reference of the data pathes but
|
||||
/// you have to pass the paths when you ask it to load.
|
||||
public class LMInstantiator: Megrez.LanguageModel {
|
||||
// 在函數內部用以記錄狀態的開關。
|
||||
public var isPhraseReplacementEnabled = false
|
||||
public var isCNSEnabled = false
|
||||
public var isSymbolEnabled = false
|
||||
|
||||
/// 介紹一下三個通用的語言模組型別:
|
||||
/// LMCore 是全功能通用型的模組,每一筆辭典記錄以 key 為注音、以 [Unigram] 陣列作為記錄內容。
|
||||
/// 比較適合那種每筆記錄都有不同的權重數值的語言模組,雖然也可以強制施加權重數值就是了。
|
||||
/// 然而缺點是:哪怕你強制施加權重數值,也不會減輕記憶體佔用。
|
||||
/// 至於像全字庫這樣所有記錄都使用同一權重數值的模組,可以用 LMLite 以節省記憶體佔用。
|
||||
/// LMLite 的辭典內不會存儲權重資料,只會在每次讀取記錄時施加您給定的權重數值。
|
||||
/// LMLite 與 LMCore 都會用到多執行緒、以加速載入(不然的話,全部資料載入會耗費八秒左右)。
|
||||
/// LMReplacements 與 LMAssociates 均為特種模組,分別擔當語彙置換表資料與使用者聯想詞的資料承載工作。
|
||||
|
||||
// 聲明原廠語言模組
|
||||
/// Reverse 的話,第一欄是注音,第二欄是對應的漢字,第三欄是可能的權重。
|
||||
/// 不 Reverse 的話,第一欄是漢字,第二欄是對應的注音,第三欄是可能的權重。
|
||||
var lmCore = LMCore(reverse: false, consolidate: false, defaultScore: -9.5, forceDefaultScore: false)
|
||||
var lmMisc = LMCore(reverse: true, consolidate: false, defaultScore: -1, forceDefaultScore: false)
|
||||
|
||||
// 聲明使用者語言模組。
|
||||
// 使用者語言模組使用多執行緒的話,可能會導致一些問題。有時間再仔細排查看看。
|
||||
var lmUserPhrases = LMLite(consolidate: true)
|
||||
var lmFiltered = LMLite(consolidate: true)
|
||||
var lmUserSymbols = LMLite(consolidate: true)
|
||||
var lmReplacements = LMReplacments()
|
||||
var lmAssociates = LMAssociates()
|
||||
|
||||
// 初期化的函數先保留
|
||||
override init() {}
|
||||
|
||||
// 以下這些函數命名暫時保持原樣,等弒神行動徹底結束了再調整。
|
||||
|
||||
public func isDataModelLoaded() -> Bool { lmCore.isLoaded() }
|
||||
public func loadLanguageModel(path: String) {
|
||||
if FileManager.default.isReadableFile(atPath: path) {
|
||||
lmCore.open(path)
|
||||
IME.prtDebugIntel("lmCore: \(lmCore.count) entries of data loaded from: \(path)")
|
||||
} else {
|
||||
IME.prtDebugIntel("lmCore: File access failure: \(path)")
|
||||
}
|
||||
}
|
||||
|
||||
public func isCNSDataLoaded() -> Bool { lmCNS.isLoaded() }
|
||||
public func loadCNSData(path: String) {
|
||||
if FileManager.default.isReadableFile(atPath: path) {
|
||||
lmCNS.open(path)
|
||||
IME.prtDebugIntel("lmCNS: \(lmCNS.count) entries of data loaded from: \(path)")
|
||||
} else {
|
||||
IME.prtDebugIntel("lmCNS: File access failure: \(path)")
|
||||
}
|
||||
}
|
||||
|
||||
public func isMiscDataLoaded() -> Bool { lmMisc.isLoaded() }
|
||||
public func loadMiscData(path: String) {
|
||||
if FileManager.default.isReadableFile(atPath: path) {
|
||||
lmMisc.open(path)
|
||||
IME.prtDebugIntel("lmMisc: \(lmMisc.count) entries of data loaded from: \(path)")
|
||||
} else {
|
||||
IME.prtDebugIntel("lmMisc: File access failure: \(path)")
|
||||
}
|
||||
}
|
||||
|
||||
public func isSymbolDataLoaded() -> Bool { lmSymbols.isLoaded() }
|
||||
public func loadSymbolData(path: String) {
|
||||
if FileManager.default.isReadableFile(atPath: path) {
|
||||
lmSymbols.open(path)
|
||||
IME.prtDebugIntel("lmSymbol: \(lmSymbols.count) entries of data loaded from: \(path)")
|
||||
} else {
|
||||
IME.prtDebugIntel("lmSymbols: File access failure: \(path)")
|
||||
}
|
||||
}
|
||||
|
||||
public func loadUserPhrases(path: String, filterPath: String) {
|
||||
if FileManager.default.isReadableFile(atPath: path) {
|
||||
lmUserPhrases.close()
|
||||
lmUserPhrases.open(path)
|
||||
IME.prtDebugIntel("lmUserPhrases: \(lmUserPhrases.count) entries of data loaded from: \(path)")
|
||||
} else {
|
||||
IME.prtDebugIntel("lmUserPhrases: File access failure: \(path)")
|
||||
}
|
||||
if FileManager.default.isReadableFile(atPath: filterPath) {
|
||||
lmFiltered.close()
|
||||
lmFiltered.open(filterPath)
|
||||
IME.prtDebugIntel("lmFiltered: \(lmFiltered.count) entries of data loaded from: \(path)")
|
||||
} else {
|
||||
IME.prtDebugIntel("lmFiltered: File access failure: \(path)")
|
||||
}
|
||||
}
|
||||
|
||||
public func loadUserSymbolData(path: String) {
|
||||
if FileManager.default.isReadableFile(atPath: path) {
|
||||
lmUserSymbols.close()
|
||||
lmUserSymbols.open(path)
|
||||
IME.prtDebugIntel("lmUserSymbol: \(lmUserSymbols.count) entries of data loaded from: \(path)")
|
||||
} else {
|
||||
IME.prtDebugIntel("lmUserSymbol: File access failure: \(path)")
|
||||
}
|
||||
}
|
||||
|
||||
public func loadUserAssociatedPhrases(path: String) {
|
||||
if FileManager.default.isReadableFile(atPath: path) {
|
||||
lmAssociates.close()
|
||||
lmAssociates.open(path)
|
||||
IME.prtDebugIntel("lmAssociates: \(lmAssociates.count) entries of data loaded from: \(path)")
|
||||
} else {
|
||||
IME.prtDebugIntel("lmAssociates: File access failure: \(path)")
|
||||
}
|
||||
}
|
||||
|
||||
public func loadPhraseReplacementMap(path: String) {
|
||||
if FileManager.default.isReadableFile(atPath: path) {
|
||||
lmReplacements.close()
|
||||
lmReplacements.open(path)
|
||||
IME.prtDebugIntel("lmReplacements: \(lmReplacements.count) entries of data loaded from: \(path)")
|
||||
} else {
|
||||
IME.prtDebugIntel("lmReplacements: File access failure: \(path)")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Core Functions (Public)
|
||||
|
||||
/// Not implemented since we do not have data to provide bigram function.
|
||||
// public func bigramsForKeys(preceedingKey: String, key: String) -> [Megrez.Bigram] { }
|
||||
|
||||
/// Returns a list of available unigram for the given key.
|
||||
/// @param key:String represents the BPMF reading or a symbol key.
|
||||
/// For instance, it you pass "ㄉㄨㄟˇ", it returns "㨃" and other possible candidates.
|
||||
override open func unigramsFor(key: String) -> [Megrez.Unigram] {
|
||||
if key == " " {
|
||||
/// 給空格鍵指定輸出值。
|
||||
let spaceUnigram = Megrez.Unigram(
|
||||
keyValue: Megrez.KeyValuePair(key: " ", value: " "),
|
||||
score: 0
|
||||
)
|
||||
return [spaceUnigram]
|
||||
}
|
||||
|
||||
/// 準備不同的語言模組容器,開始逐漸往容器陣列內塞入資料。
|
||||
var rawAllUnigrams: [Megrez.Unigram] = []
|
||||
|
||||
// 用 reversed 指令讓使用者語彙檔案內的詞條優先順序隨著行數增加而逐漸增高。
|
||||
// 這樣一來就可以在就地新增語彙時徹底複寫優先權。
|
||||
// 將兩句差分也是為了讓 rawUserUnigrams 的類型不受可能的影響。
|
||||
rawAllUnigrams += lmUserPhrases.unigramsFor(key: key, score: 0.0).reversed()
|
||||
if lmUserPhrases.unigramsFor(key: key).isEmpty {
|
||||
IME.prtDebugIntel("Not found in UserPhrasesUnigram(\(lmUserPhrases.count)): \(key)")
|
||||
}
|
||||
|
||||
// LMMisc 與 LMCore 的 score 在 (-10.0, 0.0) 這個區間內。
|
||||
rawAllUnigrams += lmMisc.unigramsFor(key: key)
|
||||
rawAllUnigrams += lmCore.unigramsFor(key: key)
|
||||
|
||||
if isCNSEnabled {
|
||||
rawAllUnigrams += lmCNS.unigramsFor(key: key, score: -11)
|
||||
}
|
||||
|
||||
if isSymbolEnabled {
|
||||
rawAllUnigrams += lmUserSymbols.unigramsFor(key: key, score: -12.0)
|
||||
if lmUserSymbols.unigramsFor(key: key).isEmpty {
|
||||
IME.prtDebugIntel("Not found in UserSymbolUnigram(\(lmUserSymbols.count)): \(key)")
|
||||
}
|
||||
|
||||
rawAllUnigrams += lmSymbols.unigramsFor(key: key)
|
||||
}
|
||||
|
||||
// 準備過濾清單與統計清單
|
||||
var insertedPairs: Set<Megrez.KeyValuePair> = [] // 統計清單
|
||||
var filteredPairs: Set<Megrez.KeyValuePair> = [] // 過濾清單
|
||||
|
||||
// 載入要過濾的 KeyValuePair 清單。
|
||||
for unigram in lmFiltered.unigramsFor(key: key) {
|
||||
filteredPairs.insert(unigram.keyValue)
|
||||
}
|
||||
|
||||
var debugOutput = "\n"
|
||||
for neta in rawAllUnigrams {
|
||||
debugOutput += "RAW: \(neta.keyValue.key) \(neta.keyValue.value) \(neta.score)\n"
|
||||
}
|
||||
if debugOutput == "\n" {
|
||||
debugOutput = "RAW: No match found in all unigrams."
|
||||
}
|
||||
IME.prtDebugIntel(debugOutput)
|
||||
|
||||
return filterAndTransform(
|
||||
unigrams: rawAllUnigrams,
|
||||
filter: filteredPairs, inserted: &insertedPairs
|
||||
)
|
||||
}
|
||||
|
||||
/// If the model has unigrams for the given key.
|
||||
/// @param key The key.
|
||||
override open func hasUnigramsFor(key: String) -> Bool {
|
||||
if key == " " { return true }
|
||||
|
||||
if !lmFiltered.hasUnigramsFor(key: key) {
|
||||
return lmUserPhrases.hasUnigramsFor(key: key) || lmCore.hasUnigramsFor(key: key)
|
||||
}
|
||||
|
||||
return !unigramsFor(key: key).isEmpty
|
||||
}
|
||||
|
||||
public func associatedPhrasesForKey(_ key: String) -> [String] {
|
||||
lmAssociates.valuesFor(key: key) ?? []
|
||||
}
|
||||
|
||||
public func hasAssociatedPhrasesForKey(_ key: String) -> Bool {
|
||||
lmAssociates.hasValuesFor(key: key)
|
||||
}
|
||||
|
||||
// MARK: - Core Functions (Private)
|
||||
|
||||
func filterAndTransform(
|
||||
unigrams: [Megrez.Unigram],
|
||||
filter filteredPairs: Set<Megrez.KeyValuePair>,
|
||||
inserted insertedPairs: inout Set<Megrez.KeyValuePair>
|
||||
) -> [Megrez.Unigram] {
|
||||
var results: [Megrez.Unigram] = []
|
||||
|
||||
for unigram in unigrams {
|
||||
var pair: Megrez.KeyValuePair = unigram.keyValue
|
||||
if filteredPairs.contains(pair) {
|
||||
continue
|
||||
}
|
||||
|
||||
if isPhraseReplacementEnabled {
|
||||
let replacement = lmReplacements.valuesFor(key: pair.value)
|
||||
if !replacement.isEmpty {
|
||||
IME.prtDebugIntel("\(pair.value) -> \(replacement)")
|
||||
pair.value = replacement
|
||||
}
|
||||
}
|
||||
|
||||
if !insertedPairs.contains(pair) {
|
||||
results.append(Megrez.Unigram(keyValue: pair, score: unigram.score))
|
||||
insertedPairs.insert(pair)
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
||||
}
|
||||
}
|
|
@ -34,7 +34,7 @@ namespace vChewing
|
|||
{
|
||||
|
||||
// About 20 generations.
|
||||
static const double DecayThreshould = 1.0 / 1048576.0;
|
||||
static const double DecayThreshold = 1.0 / 1048576.0;
|
||||
|
||||
static double Score(size_t eventCount, size_t totalCount, double eventTimestamp, double timestamp, double lambda);
|
||||
static bool IsEndingPunctuation(const std::string &value);
|
||||
|
@ -126,7 +126,7 @@ void UserOverrideModel::Observation::update(const std::string &candidate, double
|
|||
static double Score(size_t eventCount, size_t totalCount, double eventTimestamp, double timestamp, double lambda)
|
||||
{
|
||||
double decay = exp((timestamp - eventTimestamp) * lambda);
|
||||
if (decay < DecayThreshould)
|
||||
if (decay < DecayThreshold)
|
||||
{
|
||||
return 0.0;
|
||||
}
|
|
@ -0,0 +1,120 @@
|
|||
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Refactored from the ObjCpp-version of this class by:
|
||||
// (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import Foundation
|
||||
|
||||
extension vChewing {
|
||||
@frozen public struct LMAssociates {
|
||||
var keyValueMap: [String: [Megrez.KeyValuePair]] = [:]
|
||||
|
||||
public var count: Int {
|
||||
keyValueMap.count
|
||||
}
|
||||
|
||||
public init() {
|
||||
keyValueMap = [:]
|
||||
}
|
||||
|
||||
public func isLoaded() -> Bool {
|
||||
!keyValueMap.isEmpty
|
||||
}
|
||||
|
||||
@discardableResult public mutating func open(_ path: String) -> Bool {
|
||||
if isLoaded() {
|
||||
return false
|
||||
}
|
||||
|
||||
LMConsolidator.fixEOF(path: path)
|
||||
LMConsolidator.consolidate(path: path, pragma: true)
|
||||
|
||||
var arrData: [String] = []
|
||||
|
||||
do {
|
||||
arrData = try String(contentsOfFile: path, encoding: .utf8).components(separatedBy: "\n")
|
||||
} catch {
|
||||
IME.prtDebugIntel("\(error)")
|
||||
IME.prtDebugIntel("↑ Exception happened when reading Associated Phrases data.")
|
||||
return false
|
||||
}
|
||||
|
||||
for (lineID, lineContent) in arrData.enumerated() {
|
||||
if !lineContent.hasPrefix("#") {
|
||||
let lineContent = lineContent.replacingOccurrences(of: "\t", with: " ")
|
||||
if lineContent.components(separatedBy: " ").count < 2 {
|
||||
if lineContent != "", lineContent != " " {
|
||||
IME.prtDebugIntel("Line #\(lineID + 1) Wrecked: \(lineContent)")
|
||||
}
|
||||
continue
|
||||
}
|
||||
var currentKV = Megrez.KeyValuePair()
|
||||
for (unitID, unitContent) in lineContent.components(separatedBy: " ").enumerated() {
|
||||
switch unitID {
|
||||
case 0:
|
||||
currentKV.key = unitContent
|
||||
case 1:
|
||||
currentKV.value = unitContent
|
||||
default: break
|
||||
}
|
||||
}
|
||||
keyValueMap[currentKV.key, default: []].append(currentKV)
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
public mutating func close() {
|
||||
if isLoaded() {
|
||||
keyValueMap.removeAll()
|
||||
}
|
||||
}
|
||||
|
||||
public func dump() {
|
||||
var strDump = ""
|
||||
for entry in keyValueMap {
|
||||
let rows: [Megrez.KeyValuePair] = entry.value
|
||||
for row in rows {
|
||||
let addline = row.key + " " + row.value + "\n"
|
||||
strDump += addline
|
||||
}
|
||||
}
|
||||
IME.prtDebugIntel(strDump)
|
||||
}
|
||||
|
||||
public func valuesFor(key: String) -> [String]? {
|
||||
var v: [String] = []
|
||||
if let matched = keyValueMap[key] {
|
||||
for entry in matched as [Megrez.KeyValuePair] {
|
||||
v.append(entry.value)
|
||||
}
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
public func hasValuesFor(key: String) -> Bool {
|
||||
keyValueMap[key] != nil
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,155 @@
|
|||
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// 威注音重新設計原廠詞庫語言模組。不排序,但使用 Swift 內建的 String 處理。
|
||||
|
||||
import Foundation
|
||||
|
||||
extension vChewing {
|
||||
@frozen public struct LMCore {
|
||||
var keyValueScoreMap: [String: [Megrez.Unigram]] = [:]
|
||||
var shouldReverse: Bool = false
|
||||
var allowConsolidation: Bool = false
|
||||
var defaultScore: Double = 0
|
||||
var shouldForceDefaultScore: Bool = false
|
||||
|
||||
public var count: Int {
|
||||
keyValueScoreMap.count
|
||||
}
|
||||
|
||||
public init(
|
||||
reverse: Bool = false, consolidate: Bool = false, defaultScore scoreDefault: Double = 0,
|
||||
forceDefaultScore: Bool = false
|
||||
) {
|
||||
keyValueScoreMap = [:]
|
||||
allowConsolidation = consolidate
|
||||
shouldReverse = reverse
|
||||
defaultScore = scoreDefault
|
||||
shouldForceDefaultScore = forceDefaultScore
|
||||
}
|
||||
|
||||
public func isLoaded() -> Bool {
|
||||
!keyValueScoreMap.isEmpty
|
||||
}
|
||||
|
||||
@discardableResult public mutating func open(_ path: String) -> Bool {
|
||||
if isLoaded() {
|
||||
return false
|
||||
}
|
||||
|
||||
if allowConsolidation {
|
||||
LMConsolidator.fixEOF(path: path)
|
||||
LMConsolidator.consolidate(path: path, pragma: true)
|
||||
}
|
||||
|
||||
var arrData: [String] = []
|
||||
|
||||
do {
|
||||
arrData = try String(contentsOfFile: path, encoding: .utf8).components(separatedBy: "\n")
|
||||
} catch {
|
||||
IME.prtDebugIntel("\(error)")
|
||||
IME.prtDebugIntel("↑ Exception happened when reading Associated Phrases data.")
|
||||
return false
|
||||
}
|
||||
|
||||
for (lineID, lineContent) in arrData.enumerated() {
|
||||
if !lineContent.hasPrefix("#") {
|
||||
let lineContent = lineContent.replacingOccurrences(of: "\t", with: " ")
|
||||
if lineContent.components(separatedBy: " ").count < 2 {
|
||||
if lineContent != "", lineContent != " " {
|
||||
IME.prtDebugIntel("Line #\(lineID + 1) Wrecked: \(lineContent)")
|
||||
}
|
||||
continue
|
||||
}
|
||||
var currentUnigram = Megrez.Unigram(keyValue: Megrez.KeyValuePair(), score: defaultScore)
|
||||
var columnOne = ""
|
||||
var columnTwo = ""
|
||||
for (unitID, unitContent) in lineContent.components(separatedBy: " ").enumerated() {
|
||||
switch unitID {
|
||||
case 0:
|
||||
columnOne = unitContent
|
||||
case 1:
|
||||
columnTwo = unitContent
|
||||
case 2:
|
||||
if !shouldForceDefaultScore {
|
||||
if let unitContentConverted = Double(unitContent) {
|
||||
currentUnigram.score = unitContentConverted
|
||||
} else {
|
||||
IME.prtDebugIntel("Line #\(lineID) Score Data Wrecked: \(lineContent)")
|
||||
}
|
||||
}
|
||||
default: break
|
||||
}
|
||||
}
|
||||
// 標點符號的頻率最好鎖定一下。
|
||||
if columnOne.contains("_punctuation_") {
|
||||
currentUnigram.score -= (Double(lineID) * 0.000001)
|
||||
}
|
||||
let kvPair =
|
||||
shouldReverse
|
||||
? Megrez.KeyValuePair(key: columnTwo, value: columnOne)
|
||||
: Megrez.KeyValuePair(key: columnOne, value: columnTwo)
|
||||
currentUnigram.keyValue = kvPair
|
||||
let key = shouldReverse ? columnTwo : columnOne
|
||||
keyValueScoreMap[key, default: []].append(currentUnigram)
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
public mutating func close() {
|
||||
if isLoaded() {
|
||||
keyValueScoreMap.removeAll()
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Advanced features
|
||||
|
||||
public func dump() {
|
||||
var strDump = ""
|
||||
for entry in keyValueScoreMap {
|
||||
let rows: [Megrez.Unigram] = entry.value
|
||||
for row in rows {
|
||||
let addline = row.keyValue.key + " " + row.keyValue.value + " " + String(row.score) + "\n"
|
||||
strDump += addline
|
||||
}
|
||||
}
|
||||
IME.prtDebugIntel(strDump)
|
||||
}
|
||||
|
||||
public func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] {
|
||||
// 這裡用了點廢話處理,不然函數構建體會被 Swift 格式整理工具給毀掉。
|
||||
// 其實只要一句「[Megrez.Bigram]()」就夠了。
|
||||
precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]()
|
||||
}
|
||||
|
||||
public func unigramsFor(key: String) -> [Megrez.Unigram] {
|
||||
keyValueScoreMap[key] ?? [Megrez.Unigram]()
|
||||
}
|
||||
|
||||
public func hasUnigramsFor(key: String) -> Bool {
|
||||
keyValueScoreMap[key] != nil
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,124 @@
|
|||
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Refactored from the ObjCpp-version of this class by:
|
||||
// (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import Foundation
|
||||
|
||||
extension vChewing {
|
||||
@frozen public struct LMLite {
|
||||
var keyValueMap: [String: [Megrez.KeyValuePair]] = [:]
|
||||
var allowConsolidation = false
|
||||
|
||||
public var count: Int {
|
||||
keyValueMap.count
|
||||
}
|
||||
|
||||
public init(consolidate: Bool = false) {
|
||||
keyValueMap = [:]
|
||||
allowConsolidation = consolidate
|
||||
}
|
||||
|
||||
public func isLoaded() -> Bool {
|
||||
!keyValueMap.isEmpty
|
||||
}
|
||||
|
||||
@discardableResult public mutating func open(_ path: String) -> Bool {
|
||||
if isLoaded() {
|
||||
return false
|
||||
}
|
||||
|
||||
if allowConsolidation {
|
||||
LMConsolidator.fixEOF(path: path)
|
||||
LMConsolidator.consolidate(path: path, pragma: true)
|
||||
}
|
||||
|
||||
var arrData: [String] = []
|
||||
|
||||
do {
|
||||
arrData = try String(contentsOfFile: path, encoding: .utf8).components(separatedBy: "\n")
|
||||
} catch {
|
||||
IME.prtDebugIntel("\(error)")
|
||||
IME.prtDebugIntel("↑ Exception happened when reading Associated Phrases data.")
|
||||
return false
|
||||
}
|
||||
|
||||
for (lineID, lineContent) in arrData.enumerated() {
|
||||
if !lineContent.hasPrefix("#") {
|
||||
let lineContent = lineContent.replacingOccurrences(of: "\t", with: " ")
|
||||
if lineContent.components(separatedBy: " ").count < 2 {
|
||||
if lineContent != "", lineContent != " " {
|
||||
IME.prtDebugIntel("Line #\(lineID + 1) Wrecked: \(lineContent)")
|
||||
}
|
||||
continue
|
||||
}
|
||||
var currentKV = Megrez.KeyValuePair()
|
||||
for (unitID, unitContent) in lineContent.components(separatedBy: " ").enumerated() {
|
||||
switch unitID {
|
||||
case 0:
|
||||
currentKV.value = unitContent
|
||||
case 1:
|
||||
currentKV.key = unitContent
|
||||
default: break
|
||||
}
|
||||
}
|
||||
keyValueMap[currentKV.key, default: []].append(currentKV)
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
public mutating func close() {
|
||||
if isLoaded() {
|
||||
keyValueMap.removeAll()
|
||||
}
|
||||
}
|
||||
|
||||
public func dump() {
|
||||
var strDump = ""
|
||||
for entry in keyValueMap {
|
||||
let rows: [Megrez.KeyValuePair] = entry.value
|
||||
for row in rows {
|
||||
let addline = row.key + " " + row.value + "\n"
|
||||
strDump += addline
|
||||
}
|
||||
}
|
||||
IME.prtDebugIntel(strDump)
|
||||
}
|
||||
|
||||
public func unigramsFor(key: String, score givenScore: Double = 0.0) -> [Megrez.Unigram] {
|
||||
var v: [Megrez.Unigram] = []
|
||||
if let matched = keyValueMap[key] {
|
||||
for entry in matched as [Megrez.KeyValuePair] {
|
||||
v.append(Megrez.Unigram(keyValue: entry, score: givenScore))
|
||||
}
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
public func hasUnigramsFor(key: String) -> Bool {
|
||||
keyValueMap[key] != nil
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,107 @@
|
|||
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Refactored from the ObjCpp-version of this class by:
|
||||
// (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import Foundation
|
||||
|
||||
extension vChewing {
|
||||
@frozen public struct LMReplacments {
|
||||
var keyValueMap: [String: String] = [:]
|
||||
|
||||
public var count: Int {
|
||||
keyValueMap.count
|
||||
}
|
||||
|
||||
public init() {
|
||||
keyValueMap = [:]
|
||||
}
|
||||
|
||||
public func isLoaded() -> Bool {
|
||||
!keyValueMap.isEmpty
|
||||
}
|
||||
|
||||
@discardableResult public mutating func open(_ path: String) -> Bool {
|
||||
if isLoaded() {
|
||||
return false
|
||||
}
|
||||
|
||||
LMConsolidator.fixEOF(path: path)
|
||||
LMConsolidator.consolidate(path: path, pragma: true)
|
||||
|
||||
var arrData: [String] = []
|
||||
|
||||
do {
|
||||
arrData = try String(contentsOfFile: path, encoding: .utf8).components(separatedBy: "\n")
|
||||
|
||||
} catch {
|
||||
IME.prtDebugIntel("\(error)")
|
||||
IME.prtDebugIntel("↑ Exception happened when reading Associated Phrases data.")
|
||||
return false
|
||||
}
|
||||
|
||||
for (lineID, lineContent) in arrData.enumerated() {
|
||||
if !lineContent.hasPrefix("#") {
|
||||
let lineContent = lineContent.replacingOccurrences(of: "\t", with: " ")
|
||||
if lineContent.components(separatedBy: " ").count < 2 {
|
||||
if lineContent != "", lineContent != " " {
|
||||
IME.prtDebugIntel("Line #\(lineID + 1) Wrecked: \(lineContent)")
|
||||
}
|
||||
continue
|
||||
}
|
||||
var currentKV = Megrez.KeyValuePair()
|
||||
for (unitID, unitContent) in lineContent.components(separatedBy: " ").enumerated() {
|
||||
switch unitID {
|
||||
case 0:
|
||||
currentKV.key = unitContent
|
||||
case 1:
|
||||
currentKV.value = unitContent
|
||||
default: break
|
||||
}
|
||||
}
|
||||
keyValueMap[currentKV.key] = currentKV.value
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
public mutating func close() {
|
||||
if isLoaded() {
|
||||
keyValueMap.removeAll()
|
||||
}
|
||||
}
|
||||
|
||||
public func dump() {
|
||||
var strDump = ""
|
||||
for entry in keyValueMap {
|
||||
strDump += entry.key + " " + entry.value + "\n"
|
||||
}
|
||||
IME.prtDebugIntel(strDump)
|
||||
}
|
||||
|
||||
public func valuesFor(key: String) -> String {
|
||||
keyValueMap[key] ?? ""
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,223 @@
|
|||
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Refactored from the ObjCpp-version of this class by:
|
||||
// (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import Foundation
|
||||
|
||||
extension vChewing {
|
||||
public class LMUserOverride {
|
||||
// MARK: - Private Structures
|
||||
|
||||
struct Override {
|
||||
var count: Int = 0
|
||||
var timestamp: Double = 0.0
|
||||
}
|
||||
|
||||
struct Observation {
|
||||
var count: Int = 0
|
||||
var overrides: [String: Override] = [:]
|
||||
|
||||
mutating func update(candidate: String, timestamp: Double) {
|
||||
count += 1
|
||||
if var neta = overrides[candidate] {
|
||||
neta.timestamp = timestamp
|
||||
neta.count += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct KeyObservationPair: Equatable {
|
||||
var key: String
|
||||
var observation: Observation
|
||||
|
||||
var hashValue: Int { key.hashValue }
|
||||
|
||||
init(key: String, observation: Observation) {
|
||||
self.key = key
|
||||
self.observation = observation
|
||||
}
|
||||
|
||||
static func == (lhs: KeyObservationPair, rhs: KeyObservationPair) -> Bool {
|
||||
lhs.key == rhs.key
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Main
|
||||
|
||||
var mutCapacity: Int
|
||||
var mutDecayExponent: Double
|
||||
var mutLRUList = [KeyObservationPair]()
|
||||
var mutLRUMap: [String: KeyObservationPair] = [:]
|
||||
let kDecayThreshold: Double = 1.0 / 1_048_576.0
|
||||
|
||||
public init(capacity: Int = 500, decayConstant: Double = 5400.0) {
|
||||
mutCapacity = abs(capacity) // Ensures that this value is always > 0.
|
||||
mutDecayExponent = log(0.5) / decayConstant
|
||||
}
|
||||
|
||||
public func observe(
|
||||
walkedNodes: [Megrez.NodeAnchor],
|
||||
cursorIndex: Int,
|
||||
candidate: String,
|
||||
timestamp: Double
|
||||
) {
|
||||
let key = getWalkedNodesToKey(walkedNodes: walkedNodes, cursorIndex: cursorIndex)
|
||||
guard !key.isEmpty
|
||||
else {
|
||||
return
|
||||
}
|
||||
guard let map = mutLRUMap[key] else {
|
||||
var observation: Observation = .init()
|
||||
observation.update(candidate: candidate, timestamp: timestamp)
|
||||
mutLRUMap[key] = KeyObservationPair(key: key, observation: observation)
|
||||
mutLRUList.insert(KeyObservationPair(key: key, observation: observation), at: 0)
|
||||
|
||||
if mutLRUList.count > mutCapacity {
|
||||
mutLRUMap[mutLRUList.reversed()[0].key] = nil
|
||||
mutLRUList.removeLast()
|
||||
}
|
||||
return
|
||||
}
|
||||
var obs = map.observation
|
||||
obs.update(candidate: candidate, timestamp: timestamp)
|
||||
let pair = KeyObservationPair(key: key, observation: obs)
|
||||
mutLRUList.insert(pair, at: 0)
|
||||
}
|
||||
|
||||
public func suggest(
|
||||
walkedNodes: [Megrez.NodeAnchor],
|
||||
cursorIndex: Int,
|
||||
timestamp: Double
|
||||
) -> String {
|
||||
let key = getWalkedNodesToKey(walkedNodes: walkedNodes, cursorIndex: cursorIndex)
|
||||
guard let keyValuePair = mutLRUMap[key],
|
||||
!key.isEmpty
|
||||
else {
|
||||
return ""
|
||||
}
|
||||
|
||||
IME.prtDebugIntel("Suggest - A: \(key)")
|
||||
IME.prtDebugIntel("Suggest - B: \(keyValuePair.key)")
|
||||
|
||||
let observation = keyValuePair.observation
|
||||
|
||||
var candidate = ""
|
||||
var score = 0.0
|
||||
for overrideNeta in Array(observation.overrides) {
|
||||
let overrideScore = getScore(
|
||||
eventCount: overrideNeta.value.count,
|
||||
totalCount: observation.count,
|
||||
eventTimestamp: overrideNeta.value.timestamp,
|
||||
timestamp: timestamp,
|
||||
lambda: mutDecayExponent
|
||||
)
|
||||
|
||||
if overrideScore == 0.0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if overrideScore > score {
|
||||
candidate = overrideNeta.key
|
||||
score = overrideScore
|
||||
}
|
||||
}
|
||||
return candidate
|
||||
}
|
||||
|
||||
func isEndingPunctuation(value: String) -> Bool {
|
||||
[",", "。", "!", "?", "」", "』", "”", "’"].contains(value)
|
||||
}
|
||||
|
||||
public func getScore(
|
||||
eventCount: Int,
|
||||
totalCount: Int,
|
||||
eventTimestamp: Double,
|
||||
timestamp: Double,
|
||||
lambda: Double
|
||||
) -> Double {
|
||||
let decay = exp((timestamp - eventTimestamp) * lambda)
|
||||
if decay < kDecayThreshold {
|
||||
return 0.0
|
||||
}
|
||||
|
||||
let prob = Double(eventCount) / Double(totalCount)
|
||||
return prob * decay
|
||||
}
|
||||
|
||||
func getWalkedNodesToKey(
|
||||
walkedNodes: [Megrez.NodeAnchor], cursorIndex: Int
|
||||
) -> String {
|
||||
var strOutput = ""
|
||||
var arrNodes: [Megrez.NodeAnchor] = []
|
||||
var intLength = 0
|
||||
for nodeNeta in walkedNodes {
|
||||
arrNodes.append(nodeNeta)
|
||||
intLength += nodeNeta.spanningLength
|
||||
if intLength >= cursorIndex {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// 一個被 .reversed 過的陣列不能直接使用,因為不是正常的 Swift 陣列。
|
||||
// 那就新開一個正常的陣列、然後將內容拓印過去。
|
||||
var arrNodesReversed: [Megrez.NodeAnchor] = []
|
||||
arrNodesReversed.append(contentsOf: arrNodes.reversed())
|
||||
|
||||
if arrNodesReversed.isEmpty {
|
||||
return ""
|
||||
}
|
||||
|
||||
var strCurrent = "()"
|
||||
var strPrev = "()"
|
||||
var strAnterior = "()"
|
||||
|
||||
for (theIndex, theAnchor) in arrNodesReversed.enumerated() {
|
||||
if strCurrent != "()", let nodeCurrent = theAnchor.node {
|
||||
let keyCurrent = nodeCurrent.currentKeyValue().key
|
||||
let valCurrent = nodeCurrent.currentKeyValue().value
|
||||
strCurrent = "(\(keyCurrent), \(valCurrent))"
|
||||
if let nodePrev = arrNodesReversed[theIndex + 1].node {
|
||||
let keyPrev = nodePrev.currentKeyValue().key
|
||||
let valPrev = nodePrev.currentKeyValue().value
|
||||
strPrev = "(\(keyPrev), \(valPrev))"
|
||||
}
|
||||
if let nodeAnterior = arrNodesReversed[theIndex + 2].node {
|
||||
let keyAnterior = nodeAnterior.currentKeyValue().key
|
||||
let valAnterior = nodeAnterior.currentKeyValue().value
|
||||
strAnterior = "(\(keyAnterior), \(valAnterior))"
|
||||
}
|
||||
break // 我們只取第一個有效結果。
|
||||
}
|
||||
}
|
||||
|
||||
strOutput = "(\(strAnterior),\(strPrev),\(strCurrent))"
|
||||
if strOutput == "((),(),())" {
|
||||
strOutput = ""
|
||||
}
|
||||
|
||||
return strOutput
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,69 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ASSOCIATEDPHRASES_H
|
||||
#define ASSOCIATEDPHRASES_H
|
||||
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace vChewing
|
||||
{
|
||||
|
||||
class AssociatedPhrases
|
||||
{
|
||||
public:
|
||||
AssociatedPhrases();
|
||||
~AssociatedPhrases();
|
||||
|
||||
const bool isLoaded();
|
||||
bool open(const char *path);
|
||||
void close();
|
||||
const std::vector<std::string> valuesForKey(const std::string &key);
|
||||
const bool hasValuesForKey(const std::string &key);
|
||||
|
||||
protected:
|
||||
struct Row
|
||||
{
|
||||
Row(std::string_view &k, std::string_view &v) : key(k), value(v)
|
||||
{
|
||||
}
|
||||
std::string_view key;
|
||||
std::string_view value;
|
||||
};
|
||||
|
||||
std::map<std::string_view, std::vector<Row>> keyRowMap;
|
||||
|
||||
int fd;
|
||||
void *data;
|
||||
size_t length;
|
||||
};
|
||||
|
||||
} // namespace vChewing
|
||||
|
||||
#endif /* AssociatedPhrases_hpp */
|
|
@ -1,146 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "AssociatedPhrases.h"
|
||||
#include "vChewing-Swift.h"
|
||||
#include <fcntl.h>
|
||||
#include <fstream>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "KeyValueBlobReader.h"
|
||||
#include "LMConsolidator.h"
|
||||
|
||||
namespace vChewing
|
||||
{
|
||||
|
||||
AssociatedPhrases::AssociatedPhrases() : fd(-1), data(0), length(0)
|
||||
{
|
||||
}
|
||||
|
||||
AssociatedPhrases::~AssociatedPhrases()
|
||||
{
|
||||
if (data)
|
||||
{
|
||||
close();
|
||||
}
|
||||
}
|
||||
|
||||
const bool AssociatedPhrases::isLoaded()
|
||||
{
|
||||
if (data)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AssociatedPhrases::open(const char *path)
|
||||
{
|
||||
if (data)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
LMConsolidator::FixEOF(path);
|
||||
LMConsolidator::ConsolidateContent(path, true);
|
||||
|
||||
fd = ::open(path, O_RDONLY);
|
||||
if (fd == -1)
|
||||
{
|
||||
printf("open:: file not exist");
|
||||
return false;
|
||||
}
|
||||
|
||||
struct stat sb;
|
||||
if (fstat(fd, &sb) == -1)
|
||||
{
|
||||
printf("open:: cannot open file");
|
||||
return false;
|
||||
}
|
||||
|
||||
length = (size_t)sb.st_size;
|
||||
|
||||
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
|
||||
if (!data)
|
||||
{
|
||||
::close(fd);
|
||||
return false;
|
||||
}
|
||||
|
||||
KeyValueBlobReader reader(static_cast<char *>(data), length);
|
||||
KeyValueBlobReader::KeyValue keyValue;
|
||||
KeyValueBlobReader::State state;
|
||||
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR)
|
||||
{
|
||||
keyRowMap[keyValue.key].emplace_back(keyValue.key, keyValue.value);
|
||||
}
|
||||
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
|
||||
if (state == KeyValueBlobReader::State::ERROR)
|
||||
{
|
||||
// close();
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "AssociatedPhrases: Failed at Open Step 5. On Error Resume Next.\n");
|
||||
// return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void AssociatedPhrases::close()
|
||||
{
|
||||
if (data)
|
||||
{
|
||||
munmap(data, length);
|
||||
::close(fd);
|
||||
data = 0;
|
||||
}
|
||||
|
||||
keyRowMap.clear();
|
||||
}
|
||||
|
||||
const std::vector<std::string> AssociatedPhrases::valuesForKey(const std::string &key)
|
||||
{
|
||||
std::vector<std::string> v;
|
||||
auto iter = keyRowMap.find(key);
|
||||
if (iter != keyRowMap.end())
|
||||
{
|
||||
const std::vector<Row> &rows = iter->second;
|
||||
for (const auto &row : rows)
|
||||
{
|
||||
std::string_view value = row.value;
|
||||
v.push_back({value.data(), value.size()});
|
||||
}
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
const bool AssociatedPhrases::hasValuesForKey(const std::string &key)
|
||||
{
|
||||
return keyRowMap.find(key) != keyRowMap.end();
|
||||
}
|
||||
|
||||
}; // namespace vChewing
|
|
@ -1,85 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CoreLM_H
|
||||
#define CoreLM_H
|
||||
|
||||
#include "LanguageModel.h"
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// this class relies on the fact that we have a space-separated data
|
||||
// format, and we use mmap and zero-out the separators and line feeds
|
||||
// to avoid creating new string objects; the parser is a simple DFA
|
||||
|
||||
using namespace std;
|
||||
using namespace Gramambular;
|
||||
|
||||
namespace vChewing
|
||||
{
|
||||
|
||||
class CoreLM : public Gramambular::LanguageModel
|
||||
{
|
||||
public:
|
||||
CoreLM();
|
||||
~CoreLM();
|
||||
|
||||
bool isLoaded();
|
||||
bool open(const char *path);
|
||||
void close();
|
||||
void dump();
|
||||
|
||||
virtual const std::vector<Gramambular::Bigram> bigramsForKeys(const string &preceedingKey, const string &key);
|
||||
virtual const std::vector<Gramambular::Unigram> unigramsForKey(const string &key);
|
||||
virtual bool hasUnigramsForKey(const string &key);
|
||||
|
||||
protected:
|
||||
struct CStringCmp
|
||||
{
|
||||
bool operator()(const char *s1, const char *s2) const
|
||||
{
|
||||
return strcmp(s1, s2) < 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct Row
|
||||
{
|
||||
const char *key;
|
||||
const char *value;
|
||||
const char *logProbability;
|
||||
};
|
||||
|
||||
map<const char *, vector<Row>, CStringCmp> keyRowMap;
|
||||
int fd;
|
||||
void *data;
|
||||
size_t length;
|
||||
};
|
||||
|
||||
}; // namespace vChewing
|
||||
|
||||
#endif
|
|
@ -1,365 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "CoreLM.h"
|
||||
#include "vChewing-Swift.h"
|
||||
#include <fcntl.h>
|
||||
#include <fstream>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <syslog.h>
|
||||
#include <unistd.h>
|
||||
|
||||
using namespace Gramambular;
|
||||
|
||||
vChewing::CoreLM::CoreLM() : fd(-1), data(0), length(0)
|
||||
{
|
||||
}
|
||||
|
||||
vChewing::CoreLM::~CoreLM()
|
||||
{
|
||||
if (data)
|
||||
{
|
||||
close();
|
||||
}
|
||||
}
|
||||
|
||||
bool vChewing::CoreLM::isLoaded()
|
||||
{
|
||||
if (data)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool vChewing::CoreLM::open(const char *path)
|
||||
{
|
||||
if (data)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
fd = ::open(path, O_RDONLY);
|
||||
if (fd == -1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
struct stat sb;
|
||||
if (fstat(fd, &sb) == -1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
length = (size_t)sb.st_size;
|
||||
|
||||
data = mmap(NULL, length, PROT_WRITE, MAP_PRIVATE, fd, 0);
|
||||
if (!data)
|
||||
{
|
||||
::close(fd);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Regular expression for parsing:
|
||||
// (\n*\w\w*\s\w\w*\s\w\w*)*$
|
||||
//
|
||||
// Expanded as DFA (in Graphviz):
|
||||
//
|
||||
// digraph finite_state_machine {
|
||||
// rankdir = LR;
|
||||
// size = "10";
|
||||
//
|
||||
// node [shape = doublecircle]; End;
|
||||
// node [shape = circle];
|
||||
//
|
||||
// Start -> End [ label = "EOF"];
|
||||
// Start -> Error [ label = "\\s" ];
|
||||
// Start -> Start [ label = "\\n" ];
|
||||
// Start -> 1 [ label = "\\w" ];
|
||||
//
|
||||
// 1 -> Error [ label = "\\n, EOF" ];
|
||||
// 1 -> 2 [ label = "\\s" ];
|
||||
// 1 -> 1 [ label = "\\w" ];
|
||||
//
|
||||
// 2 -> Error [ label = "\\n, \\s, EOF" ];
|
||||
// 2 -> 3 [ label = "\\w" ];
|
||||
//
|
||||
// 3 -> Error [ label = "\\n, EOF "];
|
||||
// 3 -> 4 [ label = "\\s" ];
|
||||
// 3 -> 3 [ label = "\\w" ];
|
||||
//
|
||||
// 4 -> Error [ label = "\\n, \\s, EOF" ];
|
||||
// 4 -> 5 [ label = "\\w" ];
|
||||
//
|
||||
// 5 -> Error [ label = "\\s, EOF" ];
|
||||
// 5 -> Start [ label = "\\n" ];
|
||||
// 5 -> 5 [ label = "\\w" ];
|
||||
// }
|
||||
|
||||
char *head = (char *)data;
|
||||
char *end = (char *)data + length;
|
||||
char c;
|
||||
Row row;
|
||||
|
||||
start:
|
||||
// EOF -> end
|
||||
if (head == end)
|
||||
{
|
||||
goto end;
|
||||
}
|
||||
|
||||
c = *head;
|
||||
// \s -> error
|
||||
if (c == ' ')
|
||||
{
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "vChewingDebug: CoreLM // Start: \\s -> error");
|
||||
goto error;
|
||||
}
|
||||
// \n -> start
|
||||
else if (c == '\n')
|
||||
{
|
||||
head++;
|
||||
goto start;
|
||||
}
|
||||
|
||||
// \w -> record column star, state1
|
||||
row.value = head;
|
||||
head++;
|
||||
// fall through to state 1
|
||||
|
||||
state1:
|
||||
// EOF -> error
|
||||
if (head == end)
|
||||
{
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: EOF -> error");
|
||||
goto error;
|
||||
}
|
||||
|
||||
c = *head;
|
||||
// \n -> error
|
||||
if (c == '\n')
|
||||
{
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: \\n -> error");
|
||||
goto error;
|
||||
}
|
||||
// \s -> state2 + zero out ending + record column start
|
||||
else if (c == ' ')
|
||||
{
|
||||
*head = 0;
|
||||
head++;
|
||||
row.key = head;
|
||||
goto state2;
|
||||
}
|
||||
|
||||
// \w -> state1
|
||||
head++;
|
||||
goto state1;
|
||||
|
||||
state2:
|
||||
// eof -> error
|
||||
if (head == end)
|
||||
{
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: EOF -> error");
|
||||
goto error;
|
||||
}
|
||||
|
||||
c = *head;
|
||||
// \n, \s -> error
|
||||
if (c == '\n' || c == ' ')
|
||||
{
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: \\n \\s -> error");
|
||||
goto error;
|
||||
}
|
||||
|
||||
// \w -> state3
|
||||
head++;
|
||||
|
||||
// fall through to state 3
|
||||
|
||||
state3:
|
||||
// eof -> error
|
||||
if (head == end)
|
||||
{
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: EOF -> error");
|
||||
goto error;
|
||||
}
|
||||
|
||||
c = *head;
|
||||
|
||||
// \n -> error
|
||||
if (c == '\n')
|
||||
{
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: \\n -> error");
|
||||
goto error;
|
||||
}
|
||||
// \s -> state4 + zero out ending + record column start
|
||||
else if (c == ' ')
|
||||
{
|
||||
*head = 0;
|
||||
head++;
|
||||
row.logProbability = head;
|
||||
goto state4;
|
||||
}
|
||||
|
||||
// \w -> state3
|
||||
head++;
|
||||
goto state3;
|
||||
|
||||
state4:
|
||||
// eof -> error
|
||||
if (head == end)
|
||||
{
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: EOF -> error");
|
||||
goto error;
|
||||
}
|
||||
|
||||
c = *head;
|
||||
// \n, \s -> error
|
||||
if (c == '\n' || c == ' ')
|
||||
{
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: \\n \\s -> error");
|
||||
goto error;
|
||||
}
|
||||
|
||||
// \w -> state5
|
||||
head++;
|
||||
|
||||
// fall through to state 5
|
||||
|
||||
state5:
|
||||
// eof -> error
|
||||
if (head == end)
|
||||
{
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: EOF -> error");
|
||||
goto error;
|
||||
}
|
||||
|
||||
c = *head;
|
||||
// \s -> error
|
||||
if (c == ' ')
|
||||
{
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: \\s -> error");
|
||||
goto error;
|
||||
}
|
||||
// \n -> start
|
||||
else if (c == '\n')
|
||||
{
|
||||
*head = 0;
|
||||
head++;
|
||||
keyRowMap[row.key].push_back(row);
|
||||
goto start;
|
||||
}
|
||||
|
||||
// \w -> state 5
|
||||
head++;
|
||||
goto state5;
|
||||
|
||||
error:
|
||||
close();
|
||||
return false;
|
||||
|
||||
end:
|
||||
static const char *space = " ";
|
||||
static const char *zero = "0.0";
|
||||
Row emptyRow;
|
||||
emptyRow.key = space;
|
||||
emptyRow.value = space;
|
||||
emptyRow.logProbability = zero;
|
||||
keyRowMap[space].push_back(emptyRow);
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "vChewingDebug: CoreLM // File Load Complete.");
|
||||
return true;
|
||||
}
|
||||
|
||||
void vChewing::CoreLM::close()
|
||||
{
|
||||
if (data)
|
||||
{
|
||||
munmap(data, length);
|
||||
::close(fd);
|
||||
data = 0;
|
||||
}
|
||||
|
||||
keyRowMap.clear();
|
||||
}
|
||||
|
||||
void vChewing::CoreLM::dump()
|
||||
{
|
||||
size_t rows = 0;
|
||||
for (map<const char *, vector<Row>>::const_iterator i = keyRowMap.begin(), e = keyRowMap.end(); i != e; ++i)
|
||||
{
|
||||
const vector<Row> &r = (*i).second;
|
||||
for (vector<Row>::const_iterator ri = r.begin(), re = r.end(); ri != re; ++ri)
|
||||
{
|
||||
const Row &row = *ri;
|
||||
cerr << row.key << " " << row.value << " " << row.logProbability << "\n";
|
||||
rows++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<Gramambular::Bigram> vChewing::CoreLM::bigramsForKeys(const string &preceedingKey, const string &key)
|
||||
{
|
||||
return std::vector<Gramambular::Bigram>();
|
||||
}
|
||||
|
||||
const std::vector<Gramambular::Unigram> vChewing::CoreLM::unigramsForKey(const string &key)
|
||||
{
|
||||
std::vector<Gramambular::Unigram> v;
|
||||
map<const char *, vector<Row>>::const_iterator i = keyRowMap.find(key.c_str());
|
||||
|
||||
if (i != keyRowMap.end())
|
||||
{
|
||||
for (vector<Row>::const_iterator ri = (*i).second.begin(), re = (*i).second.end(); ri != re; ++ri)
|
||||
{
|
||||
Unigram g;
|
||||
const Row &r = *ri;
|
||||
g.keyValue.key = r.key;
|
||||
g.keyValue.value = r.value;
|
||||
g.score = atof(r.logProbability);
|
||||
v.push_back(g);
|
||||
}
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
bool vChewing::CoreLM::hasUnigramsForKey(const string &key)
|
||||
{
|
||||
return keyRowMap.find(key.c_str()) != keyRowMap.end();
|
||||
}
|
|
@ -1,56 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef PHRASEREPLACEMENTMAP_H
|
||||
#define PHRASEREPLACEMENTMAP_H
|
||||
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
namespace vChewing
|
||||
{
|
||||
|
||||
class PhraseReplacementMap
|
||||
{
|
||||
public:
|
||||
PhraseReplacementMap();
|
||||
~PhraseReplacementMap();
|
||||
|
||||
bool open(const char *path);
|
||||
void close();
|
||||
const std::string valueForKey(const std::string &key);
|
||||
|
||||
protected:
|
||||
std::map<std::string_view, std::string_view> keyValueMap;
|
||||
int fd;
|
||||
void *data;
|
||||
size_t length;
|
||||
};
|
||||
|
||||
} // namespace vChewing
|
||||
|
||||
#endif
|
|
@ -1,130 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "PhraseReplacementMap.h"
|
||||
#include "vChewing-Swift.h"
|
||||
#include <fcntl.h>
|
||||
#include <fstream>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <syslog.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "KeyValueBlobReader.h"
|
||||
#include "LMConsolidator.h"
|
||||
|
||||
namespace vChewing
|
||||
{
|
||||
|
||||
using std::string;
|
||||
|
||||
PhraseReplacementMap::PhraseReplacementMap() : fd(-1), data(0), length(0)
|
||||
{
|
||||
}
|
||||
|
||||
PhraseReplacementMap::~PhraseReplacementMap()
|
||||
{
|
||||
if (data)
|
||||
{
|
||||
close();
|
||||
}
|
||||
}
|
||||
|
||||
bool PhraseReplacementMap::open(const char *path)
|
||||
{
|
||||
if (data)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
LMConsolidator::FixEOF(path);
|
||||
LMConsolidator::ConsolidateContent(path, true);
|
||||
|
||||
fd = ::open(path, O_RDONLY);
|
||||
if (fd == -1)
|
||||
{
|
||||
printf("open:: file not exist");
|
||||
return false;
|
||||
}
|
||||
|
||||
struct stat sb;
|
||||
if (fstat(fd, &sb) == -1)
|
||||
{
|
||||
printf("open:: cannot open file");
|
||||
return false;
|
||||
}
|
||||
|
||||
length = (size_t)sb.st_size;
|
||||
|
||||
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
|
||||
if (!data)
|
||||
{
|
||||
::close(fd);
|
||||
return false;
|
||||
}
|
||||
|
||||
KeyValueBlobReader reader(static_cast<char *>(data), length);
|
||||
KeyValueBlobReader::KeyValue keyValue;
|
||||
KeyValueBlobReader::State state;
|
||||
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR)
|
||||
{
|
||||
keyValueMap[keyValue.key] = keyValue.value;
|
||||
}
|
||||
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
|
||||
if (state == KeyValueBlobReader::State::ERROR)
|
||||
{
|
||||
// close();
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "PhraseReplacementMap: Failed at Open Step 5. On Error Resume Next.\n");
|
||||
// return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void PhraseReplacementMap::close()
|
||||
{
|
||||
if (data)
|
||||
{
|
||||
munmap(data, length);
|
||||
::close(fd);
|
||||
data = 0;
|
||||
}
|
||||
|
||||
keyValueMap.clear();
|
||||
}
|
||||
|
||||
const std::string PhraseReplacementMap::valueForKey(const std::string &key)
|
||||
{
|
||||
auto iter = keyValueMap.find(key);
|
||||
if (iter != keyValueMap.end())
|
||||
{
|
||||
const std::string_view v = iter->second;
|
||||
return {v.data(), v.size()};
|
||||
}
|
||||
return string("");
|
||||
}
|
||||
|
||||
}
|
|
@ -1,82 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef USERPHRASESLM_H
|
||||
#define USERPHRASESLM_H
|
||||
|
||||
#include "LanguageModel.h"
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
namespace vChewing
|
||||
{
|
||||
|
||||
class UserPhrasesLM : public Gramambular::LanguageModel
|
||||
{
|
||||
public:
|
||||
UserPhrasesLM();
|
||||
~UserPhrasesLM();
|
||||
|
||||
bool isLoaded();
|
||||
bool open(const char *path);
|
||||
void close();
|
||||
void dump();
|
||||
|
||||
virtual bool allowConsolidation()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual float overridedValue()
|
||||
{
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
virtual const std::vector<Gramambular::Bigram> bigramsForKeys(const std::string &preceedingKey,
|
||||
const std::string &key);
|
||||
virtual const std::vector<Gramambular::Unigram> unigramsForKey(const std::string &key);
|
||||
virtual bool hasUnigramsForKey(const std::string &key);
|
||||
|
||||
protected:
|
||||
struct Row
|
||||
{
|
||||
Row(std::string_view &k, std::string_view &v) : key(k), value(v)
|
||||
{
|
||||
}
|
||||
std::string_view key;
|
||||
std::string_view value;
|
||||
};
|
||||
|
||||
std::map<std::string_view, std::vector<Row>> keyRowMap;
|
||||
int fd;
|
||||
void *data;
|
||||
size_t length;
|
||||
};
|
||||
|
||||
} // namespace vChewing
|
||||
|
||||
#endif
|
|
@ -1,174 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "UserPhrasesLM.h"
|
||||
#include "vChewing-Swift.h"
|
||||
#include <fcntl.h>
|
||||
#include <fstream>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <syslog.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "KeyValueBlobReader.h"
|
||||
#include "LMConsolidator.h"
|
||||
|
||||
namespace vChewing
|
||||
{
|
||||
|
||||
UserPhrasesLM::UserPhrasesLM() : fd(-1), data(0), length(0)
|
||||
{
|
||||
}
|
||||
|
||||
UserPhrasesLM::~UserPhrasesLM()
|
||||
{
|
||||
if (data)
|
||||
{
|
||||
close();
|
||||
}
|
||||
}
|
||||
|
||||
bool UserPhrasesLM::isLoaded()
|
||||
{
|
||||
if (data)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool UserPhrasesLM::open(const char *path)
|
||||
{
|
||||
if (data)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (allowConsolidation())
|
||||
{
|
||||
LMConsolidator::FixEOF(path);
|
||||
LMConsolidator::ConsolidateContent(path, true);
|
||||
}
|
||||
|
||||
fd = ::open(path, O_RDONLY);
|
||||
if (fd == -1)
|
||||
{
|
||||
printf("open:: file not exist");
|
||||
return false;
|
||||
}
|
||||
|
||||
struct stat sb;
|
||||
if (fstat(fd, &sb) == -1)
|
||||
{
|
||||
printf("open:: cannot open file");
|
||||
return false;
|
||||
}
|
||||
|
||||
length = (size_t)sb.st_size;
|
||||
|
||||
data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0);
|
||||
if (!data)
|
||||
{
|
||||
::close(fd);
|
||||
return false;
|
||||
}
|
||||
|
||||
KeyValueBlobReader reader(static_cast<char *>(data), length);
|
||||
KeyValueBlobReader::KeyValue keyValue;
|
||||
KeyValueBlobReader::State state;
|
||||
while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR)
|
||||
{
|
||||
// We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF
|
||||
// reading.
|
||||
keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key);
|
||||
}
|
||||
// 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行)
|
||||
if (state == KeyValueBlobReader::State::ERROR)
|
||||
{
|
||||
// close();
|
||||
if (mgrPrefs.isDebugModeEnabled)
|
||||
syslog(LOG_CONS, "UserPhrasesLM: Failed at Open Step 5. On Error Resume Next.\n");
|
||||
// return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void UserPhrasesLM::close()
|
||||
{
|
||||
if (data)
|
||||
{
|
||||
munmap(data, length);
|
||||
::close(fd);
|
||||
data = 0;
|
||||
}
|
||||
|
||||
keyRowMap.clear();
|
||||
}
|
||||
|
||||
void UserPhrasesLM::dump()
|
||||
{
|
||||
for (const auto &entry : keyRowMap)
|
||||
{
|
||||
const std::vector<Row> &rows = entry.second;
|
||||
for (const auto &row : rows)
|
||||
{
|
||||
std::cerr << row.key << " " << row.value << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<Gramambular::Bigram> UserPhrasesLM::bigramsForKeys(const std::string &preceedingKey,
|
||||
const std::string &key)
|
||||
{
|
||||
return std::vector<Gramambular::Bigram>();
|
||||
}
|
||||
|
||||
const std::vector<Gramambular::Unigram> UserPhrasesLM::unigramsForKey(const std::string &key)
|
||||
{
|
||||
std::vector<Gramambular::Unigram> v;
|
||||
auto iter = keyRowMap.find(key);
|
||||
if (iter != keyRowMap.end())
|
||||
{
|
||||
const std::vector<Row> &rows = iter->second;
|
||||
for (const auto &row : rows)
|
||||
{
|
||||
Gramambular::Unigram g;
|
||||
g.keyValue.key = row.key;
|
||||
g.keyValue.value = row.value;
|
||||
g.score = overridedValue();
|
||||
v.push_back(g);
|
||||
}
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
bool UserPhrasesLM::hasUnigramsForKey(const std::string &key)
|
||||
{
|
||||
return keyRowMap.find(key) != keyRowMap.end();
|
||||
}
|
||||
|
||||
}; // namespace vChewing
|
|
@ -1,54 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#import "KeyHandler.h"
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
@interface mgrLangModel : NSObject
|
||||
|
||||
+ (void)loadDataModel:(InputMode)mode;
|
||||
+ (void)loadUserPhrases;
|
||||
+ (void)loadUserAssociatedPhrases;
|
||||
+ (void)loadUserPhraseReplacement;
|
||||
|
||||
+ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase
|
||||
inputMode:(InputMode)mode
|
||||
key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:));
|
||||
+ (void)consolidateGivenFile:(NSString *)path shouldCheckPragma:(BOOL)shouldCheckPragma;
|
||||
+ (void)setPhraseReplacementEnabled:(BOOL)phraseReplacementEnabled;
|
||||
+ (void)setCNSEnabled:(BOOL)cnsEnabled;
|
||||
+ (void)setSymbolEnabled:(BOOL)symbolEnabled;
|
||||
|
||||
@end
|
||||
|
||||
/// The following methods are merely for testing.
|
||||
@interface mgrLangModel ()
|
||||
+ (void)loadDataModels;
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
|
@ -1,195 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#import "mgrLangModel.h"
|
||||
#import "LMConsolidator.h"
|
||||
#import "mgrLangModel_Privates.h"
|
||||
#import "vChewing-Swift.h"
|
||||
|
||||
static const int kUserOverrideModelCapacity = 500;
|
||||
static const double kObservedOverrideHalflife = 5400.0;
|
||||
|
||||
static vChewing::LMInstantiator gLangModelCHT;
|
||||
static vChewing::LMInstantiator gLangModelCHS;
|
||||
static vChewing::UserOverrideModel gUserOverrideModelCHT(kUserOverrideModelCapacity, kObservedOverrideHalflife);
|
||||
static vChewing::UserOverrideModel gUserOverrideModelCHS(kUserOverrideModelCapacity, kObservedOverrideHalflife);
|
||||
|
||||
@implementation mgrLangModel
|
||||
|
||||
// 這個函數無法遷移至 Swift
|
||||
static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing::LMInstantiator &lm)
|
||||
{
|
||||
NSString *dataPath = [mgrLangModel getBundleDataPath:filenameWithoutExtension];
|
||||
lm.loadLanguageModel([dataPath UTF8String]);
|
||||
}
|
||||
|
||||
// 這個函數無法遷移至 Swift
|
||||
+ (void)loadDataModels
|
||||
{
|
||||
if (!gLangModelCHT.isDataModelLoaded())
|
||||
LTLoadLanguageModelFile(@"data-cht", gLangModelCHT);
|
||||
if (!gLangModelCHT.isMiscDataLoaded())
|
||||
gLangModelCHT.loadMiscData([[self getBundleDataPath:@"data-zhuyinwen"] UTF8String]);
|
||||
if (!gLangModelCHT.isSymbolDataLoaded())
|
||||
gLangModelCHT.loadSymbolData([[self getBundleDataPath:@"data-symbols"] UTF8String]);
|
||||
if (!gLangModelCHT.isCNSDataLoaded())
|
||||
gLangModelCHT.loadCNSData([[self getBundleDataPath:@"char-kanji-cns"] UTF8String]);
|
||||
|
||||
// -----------------
|
||||
if (!gLangModelCHS.isDataModelLoaded())
|
||||
LTLoadLanguageModelFile(@"data-chs", gLangModelCHS);
|
||||
if (!gLangModelCHS.isMiscDataLoaded())
|
||||
gLangModelCHS.loadMiscData([[self getBundleDataPath:@"data-zhuyinwen"] UTF8String]);
|
||||
if (!gLangModelCHS.isSymbolDataLoaded())
|
||||
gLangModelCHS.loadSymbolData([[self getBundleDataPath:@"data-symbols"] UTF8String]);
|
||||
if (!gLangModelCHS.isCNSDataLoaded())
|
||||
gLangModelCHS.loadCNSData([[self getBundleDataPath:@"char-kanji-cns"] UTF8String]);
|
||||
}
|
||||
|
||||
// 這個函數無法遷移至 Swift
|
||||
+ (void)loadDataModel:(InputMode)mode
|
||||
{
|
||||
if ([mode isEqualToString:imeModeCHT])
|
||||
{
|
||||
if (!gLangModelCHT.isDataModelLoaded())
|
||||
LTLoadLanguageModelFile(@"data-cht", gLangModelCHT);
|
||||
if (!gLangModelCHT.isMiscDataLoaded())
|
||||
gLangModelCHT.loadMiscData([[self getBundleDataPath:@"data-zhuyinwen"] UTF8String]);
|
||||
if (!gLangModelCHT.isSymbolDataLoaded())
|
||||
gLangModelCHT.loadSymbolData([[self getBundleDataPath:@"data-symbols"] UTF8String]);
|
||||
if (!gLangModelCHT.isCNSDataLoaded())
|
||||
gLangModelCHT.loadCNSData([[self getBundleDataPath:@"char-kanji-cns"] UTF8String]);
|
||||
}
|
||||
|
||||
if ([mode isEqualToString:imeModeCHS])
|
||||
{
|
||||
if (!gLangModelCHS.isDataModelLoaded())
|
||||
LTLoadLanguageModelFile(@"data-chs", gLangModelCHS);
|
||||
if (!gLangModelCHS.isMiscDataLoaded())
|
||||
gLangModelCHS.loadMiscData([[self getBundleDataPath:@"data-zhuyinwen"] UTF8String]);
|
||||
if (!gLangModelCHS.isSymbolDataLoaded())
|
||||
gLangModelCHS.loadSymbolData([[self getBundleDataPath:@"data-symbols"] UTF8String]);
|
||||
if (!gLangModelCHS.isCNSDataLoaded())
|
||||
gLangModelCHS.loadCNSData([[self getBundleDataPath:@"char-kanji-cns"] UTF8String]);
|
||||
}
|
||||
}
|
||||
|
||||
// 這個函數無法遷移至 Swift
|
||||
+ (void)loadUserPhrases
|
||||
{
|
||||
gLangModelCHT.loadUserPhrases([[self userPhrasesDataPath:imeModeCHT] UTF8String],
|
||||
[[self excludedPhrasesDataPath:imeModeCHT] UTF8String]);
|
||||
gLangModelCHS.loadUserPhrases([[self userPhrasesDataPath:imeModeCHS] UTF8String],
|
||||
[[self excludedPhrasesDataPath:imeModeCHS] UTF8String]);
|
||||
gLangModelCHT.loadUserSymbolData([[self userSymbolDataPath:imeModeCHT] UTF8String]);
|
||||
gLangModelCHS.loadUserSymbolData([[self userSymbolDataPath:imeModeCHS] UTF8String]);
|
||||
}
|
||||
|
||||
// 這個函數無法遷移至 Swift
|
||||
+ (void)loadUserAssociatedPhrases
|
||||
{
|
||||
gLangModelCHT.loadUserAssociatedPhrases([[self userAssociatedPhrasesDataPath:imeModeCHT] UTF8String]);
|
||||
gLangModelCHS.loadUserAssociatedPhrases([[self userAssociatedPhrasesDataPath:imeModeCHS] UTF8String]);
|
||||
}
|
||||
|
||||
// 這個函數無法遷移至 Swift
|
||||
+ (void)loadUserPhraseReplacement
|
||||
{
|
||||
gLangModelCHT.loadPhraseReplacementMap([[self phraseReplacementDataPath:imeModeCHT] UTF8String]);
|
||||
gLangModelCHS.loadPhraseReplacementMap([[self phraseReplacementDataPath:imeModeCHS] UTF8String]);
|
||||
}
|
||||
|
||||
// 這個函數無法遷移至 Swift
|
||||
+ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase
|
||||
inputMode:(InputMode)mode
|
||||
key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:))
|
||||
{
|
||||
string unigramKey = string(key.UTF8String);
|
||||
vector<vChewing::Unigram> unigrams = [mode isEqualToString:imeModeCHT] ? gLangModelCHT.unigramsForKey(unigramKey)
|
||||
: gLangModelCHS.unigramsForKey(unigramKey);
|
||||
string userPhraseString = string(userPhrase.UTF8String);
|
||||
for (auto unigram : unigrams)
|
||||
{
|
||||
if (unigram.keyValue.value == userPhraseString)
|
||||
{
|
||||
return YES;
|
||||
}
|
||||
}
|
||||
return NO;
|
||||
}
|
||||
|
||||
// 這個函數無法遷移至 Swift
|
||||
+ (void)consolidateGivenFile:(NSString *)path shouldCheckPragma:(BOOL)shouldCheckPragma
|
||||
{
|
||||
vChewing::LMConsolidator::ConsolidateContent([path UTF8String], shouldCheckPragma);
|
||||
}
|
||||
|
||||
// 這個函數無法遷移至 Swift
|
||||
+ (vChewing::LMInstantiator *)lmCHT
|
||||
{
|
||||
return &gLangModelCHT;
|
||||
}
|
||||
|
||||
// 這個函數無法遷移至 Swift
|
||||
+ (vChewing::LMInstantiator *)lmCHS
|
||||
{
|
||||
return &gLangModelCHS;
|
||||
}
|
||||
|
||||
// 這個函數無法遷移至 Swift
|
||||
+ (vChewing::UserOverrideModel *)userOverrideModelCHT
|
||||
{
|
||||
return &gUserOverrideModelCHT;
|
||||
}
|
||||
|
||||
// 這個函數無法遷移至 Swift
|
||||
+ (vChewing::UserOverrideModel *)userOverrideModelCHS
|
||||
{
|
||||
return &gUserOverrideModelCHS;
|
||||
}
|
||||
|
||||
// 這個函數無法遷移至 Swift
|
||||
+ (void)setPhraseReplacementEnabled:(BOOL)phraseReplacementEnabled
|
||||
{
|
||||
gLangModelCHT.setPhraseReplacementEnabled(phraseReplacementEnabled);
|
||||
gLangModelCHS.setPhraseReplacementEnabled(phraseReplacementEnabled);
|
||||
}
|
||||
|
||||
// 這個函數無法遷移至 Swift
|
||||
+ (void)setCNSEnabled:(BOOL)cnsEnabled
|
||||
{
|
||||
gLangModelCHT.setCNSEnabled(cnsEnabled);
|
||||
gLangModelCHS.setCNSEnabled(cnsEnabled);
|
||||
}
|
||||
|
||||
// 這個函數無法遷移至 Swift
|
||||
+ (void)setSymbolEnabled:(BOOL)symbolEnabled
|
||||
{
|
||||
gLangModelCHT.setSymbolEnabled(symbolEnabled);
|
||||
gLangModelCHS.setSymbolEnabled(symbolEnabled);
|
||||
}
|
||||
|
||||
@end
|
|
@ -26,7 +26,195 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
|
||||
import Cocoa
|
||||
|
||||
@objc extension mgrLangModel {
|
||||
/// 我們不能讓 mgrLangModel 這個靜態管理器來承載下面這些副本變數。
|
||||
/// 所以,這些副本變數只能放在 mgrLangModel 的外部。
|
||||
/// 同時,這些變數不對外開放任意存取權限。
|
||||
/// 我們只在 mgrLangModel 內部寫幾個回傳函數、供其餘控制模組來讀取。
|
||||
|
||||
private var gLangModelCHS = vChewing.LMInstantiator()
|
||||
private var gLangModelCHT = vChewing.LMInstantiator()
|
||||
private var gUserOverrideModelCHS = vChewing.LMUserOverride()
|
||||
private var gUserOverrideModelCHT = vChewing.LMUserOverride()
|
||||
|
||||
class mgrLangModel: NSObject {
|
||||
/// 寫幾個回傳函數、供其餘控制模組來讀取那些被設為 fileprivate 的器外變數。
|
||||
public static var lmCHS: vChewing.LMInstantiator { gLangModelCHS }
|
||||
public static var lmCHT: vChewing.LMInstantiator { gLangModelCHT }
|
||||
public static var uomCHS: vChewing.LMUserOverride { gUserOverrideModelCHS }
|
||||
public static var uomCHT: vChewing.LMUserOverride { gUserOverrideModelCHT }
|
||||
|
||||
// MARK: - Functions reacting directly with language models.
|
||||
|
||||
static func loadCoreLanguageModelFile(filenameSansExtension: String, langModel lm: inout vChewing.LMInstantiator) {
|
||||
let dataPath: String = mgrLangModel.getBundleDataPath(filenameSansExtension)
|
||||
lm.loadLanguageModel(path: dataPath)
|
||||
}
|
||||
|
||||
public static func loadDataModels() {
|
||||
DispatchQueue.global(qos: .userInitiated).async {
|
||||
if !gLangModelCHT.isCNSDataLoaded() {
|
||||
gLangModelCHT.loadCNSData(path: getBundleDataPath("char-kanji-cns"))
|
||||
}
|
||||
if !gLangModelCHT.isMiscDataLoaded() {
|
||||
gLangModelCHT.loadMiscData(path: getBundleDataPath("data-zhuyinwen"))
|
||||
}
|
||||
if !gLangModelCHT.isSymbolDataLoaded() {
|
||||
gLangModelCHT.loadSymbolData(path: getBundleDataPath("data-symbols"))
|
||||
}
|
||||
if !gLangModelCHS.isCNSDataLoaded() {
|
||||
gLangModelCHS.loadCNSData(path: getBundleDataPath("char-kanji-cns"))
|
||||
}
|
||||
if !gLangModelCHS.isMiscDataLoaded() {
|
||||
gLangModelCHS.loadMiscData(path: getBundleDataPath("data-zhuyinwen"))
|
||||
}
|
||||
if !gLangModelCHS.isSymbolDataLoaded() {
|
||||
gLangModelCHS.loadSymbolData(path: getBundleDataPath("data-symbols"))
|
||||
}
|
||||
}
|
||||
if !gLangModelCHT.isDataModelLoaded() {
|
||||
NotifierController.notify(
|
||||
message: String(
|
||||
format: "%@", NSLocalizedString("Loading CHT Core Dict...", comment: "")
|
||||
)
|
||||
)
|
||||
loadCoreLanguageModelFile(filenameSansExtension: "data-cht", langModel: &gLangModelCHT)
|
||||
NotifierController.notify(
|
||||
message: String(
|
||||
format: "%@", NSLocalizedString("Core Dict loading complete.", comment: "")
|
||||
)
|
||||
)
|
||||
}
|
||||
if !gLangModelCHS.isDataModelLoaded() {
|
||||
NotifierController.notify(
|
||||
message: String(
|
||||
format: "%@", NSLocalizedString("Loading CHS Core Dict...", comment: "")
|
||||
)
|
||||
)
|
||||
loadCoreLanguageModelFile(filenameSansExtension: "data-chs", langModel: &gLangModelCHS)
|
||||
NotifierController.notify(
|
||||
message: String(
|
||||
format: "%@", NSLocalizedString("Core Dict loading complete.", comment: "")
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
public static func loadDataModel(_ mode: InputMode) {
|
||||
if mode == InputMode.imeModeCHS {
|
||||
DispatchQueue.global(qos: .userInitiated).async {
|
||||
if !gLangModelCHS.isMiscDataLoaded() {
|
||||
gLangModelCHS.loadMiscData(path: getBundleDataPath("data-zhuyinwen"))
|
||||
}
|
||||
if !gLangModelCHS.isSymbolDataLoaded() {
|
||||
gLangModelCHS.loadSymbolData(path: getBundleDataPath("data-symbols"))
|
||||
}
|
||||
if !gLangModelCHS.isCNSDataLoaded() {
|
||||
gLangModelCHS.loadCNSData(path: getBundleDataPath("char-kanji-cns"))
|
||||
}
|
||||
}
|
||||
if !gLangModelCHS.isDataModelLoaded() {
|
||||
NotifierController.notify(
|
||||
message: String(
|
||||
format: "%@", NSLocalizedString("Loading CHS Core Dict...", comment: "")
|
||||
)
|
||||
)
|
||||
loadCoreLanguageModelFile(filenameSansExtension: "data-chs", langModel: &gLangModelCHS)
|
||||
NotifierController.notify(
|
||||
message: String(
|
||||
format: "%@", NSLocalizedString("Core Dict loading complete.", comment: "")
|
||||
)
|
||||
)
|
||||
}
|
||||
} else if mode == InputMode.imeModeCHT {
|
||||
DispatchQueue.global(qos: .userInitiated).async {
|
||||
if !gLangModelCHT.isMiscDataLoaded() {
|
||||
gLangModelCHT.loadMiscData(path: getBundleDataPath("data-zhuyinwen"))
|
||||
}
|
||||
if !gLangModelCHT.isSymbolDataLoaded() {
|
||||
gLangModelCHT.loadSymbolData(path: getBundleDataPath("data-symbols"))
|
||||
}
|
||||
if !gLangModelCHT.isCNSDataLoaded() {
|
||||
gLangModelCHT.loadCNSData(path: getBundleDataPath("char-kanji-cns"))
|
||||
}
|
||||
}
|
||||
if !gLangModelCHT.isDataModelLoaded() {
|
||||
NotifierController.notify(
|
||||
message: String(
|
||||
format: "%@", NSLocalizedString("Loading CHT Core Dict...", comment: "")
|
||||
)
|
||||
)
|
||||
loadCoreLanguageModelFile(filenameSansExtension: "data-cht", langModel: &gLangModelCHT)
|
||||
NotifierController.notify(
|
||||
message: String(
|
||||
format: "%@", NSLocalizedString("Core Dict loading complete.", comment: "")
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static func loadUserPhrases() {
|
||||
gLangModelCHT.loadUserPhrases(
|
||||
path: userPhrasesDataPath(InputMode.imeModeCHT),
|
||||
filterPath: excludedPhrasesDataPath(InputMode.imeModeCHT)
|
||||
)
|
||||
gLangModelCHS.loadUserPhrases(
|
||||
path: userPhrasesDataPath(InputMode.imeModeCHS),
|
||||
filterPath: excludedPhrasesDataPath(InputMode.imeModeCHS)
|
||||
)
|
||||
gLangModelCHT.loadUserSymbolData(path: userSymbolDataPath(InputMode.imeModeCHT))
|
||||
gLangModelCHS.loadUserSymbolData(path: userSymbolDataPath(InputMode.imeModeCHS))
|
||||
}
|
||||
|
||||
public static func loadUserAssociatedPhrases() {
|
||||
gLangModelCHT.loadUserAssociatedPhrases(
|
||||
path: mgrLangModel.userAssociatedPhrasesDataPath(InputMode.imeModeCHT)
|
||||
)
|
||||
gLangModelCHS.loadUserAssociatedPhrases(
|
||||
path: mgrLangModel.userAssociatedPhrasesDataPath(InputMode.imeModeCHS)
|
||||
)
|
||||
}
|
||||
|
||||
public static func loadUserPhraseReplacement() {
|
||||
gLangModelCHT.loadPhraseReplacementMap(
|
||||
path: mgrLangModel.phraseReplacementDataPath(InputMode.imeModeCHT)
|
||||
)
|
||||
gLangModelCHS.loadPhraseReplacementMap(
|
||||
path: mgrLangModel.phraseReplacementDataPath(InputMode.imeModeCHS)
|
||||
)
|
||||
}
|
||||
|
||||
public static func checkIfUserPhraseExist(
|
||||
userPhrase: String,
|
||||
mode: InputMode,
|
||||
key unigramKey: String
|
||||
) -> Bool {
|
||||
let unigrams: [Megrez.Unigram] =
|
||||
(mode == InputMode.imeModeCHT)
|
||||
? gLangModelCHT.unigramsFor(key: unigramKey) : gLangModelCHS.unigramsFor(key: unigramKey)
|
||||
for unigram in unigrams {
|
||||
if unigram.keyValue.value == userPhrase {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
public static func setPhraseReplacementEnabled(_ state: Bool) {
|
||||
gLangModelCHT.isPhraseReplacementEnabled = state
|
||||
gLangModelCHS.isPhraseReplacementEnabled = state
|
||||
}
|
||||
|
||||
public static func setCNSEnabled(_ state: Bool) {
|
||||
gLangModelCHT.isCNSEnabled = state
|
||||
gLangModelCHS.isCNSEnabled = state
|
||||
}
|
||||
|
||||
public static func setSymbolEnabled(_ state: Bool) {
|
||||
gLangModelCHT.isSymbolEnabled = state
|
||||
gLangModelCHS.isSymbolEnabled = state
|
||||
}
|
||||
|
||||
// MARK: - 獲取當前輸入法封包內的原廠核心語彙檔案所在路徑
|
||||
|
||||
static func getBundleDataPath(_ filenameSansExt: String) -> String {
|
||||
|
@ -80,7 +268,7 @@ import Cocoa
|
|||
do {
|
||||
try templateData.write(to: URL(fileURLWithPath: filePath))
|
||||
} catch {
|
||||
IME.prtDebugIntel("Failed to write file")
|
||||
IME.prtDebugIntel("Failed to write template data to: \(filePath)")
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
@ -219,13 +407,15 @@ import Cocoa
|
|||
// module shipped in the vChewing Phrase Editor.
|
||||
currentMarkedPhrase += "\t#𝙾𝚟𝚎𝚛𝚛𝚒𝚍𝚎"
|
||||
}
|
||||
currentMarkedPhrase += "\n"
|
||||
|
||||
if let writeFile = FileHandle(forUpdatingAtPath: path),
|
||||
let data = currentMarkedPhrase.data(using: .utf8)
|
||||
let data = currentMarkedPhrase.data(using: .utf8),
|
||||
let endl = "\n".data(using: .utf8)
|
||||
{
|
||||
writeFile.seekToEndOfFile()
|
||||
writeFile.write(endl)
|
||||
writeFile.write(data)
|
||||
writeFile.write(endl)
|
||||
writeFile.closeFile()
|
||||
} else {
|
||||
return false
|
||||
|
@ -233,7 +423,9 @@ import Cocoa
|
|||
|
||||
// We enforce the format consolidation here, since the pragma header
|
||||
// will let the UserPhraseLM bypasses the consolidating process on load.
|
||||
consolidate(givenFile: path, shouldCheckPragma: false)
|
||||
if !vChewing.LMConsolidator.consolidate(path: path, pragma: false) {
|
||||
return false
|
||||
}
|
||||
|
||||
// We use FSEventStream to monitor possible changes of the user phrase folder, hence the
|
||||
// lack of the needs of manually load data here unless FSEventStream is disabled by user.
|
||||
|
|
|
@ -1,40 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#import "LMInstantiator.h"
|
||||
#import "UserOverrideModel.h"
|
||||
#import "mgrLangModel.h"
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
@interface mgrLangModel ()
|
||||
@property(class, readonly, nonatomic) vChewing::LMInstantiator *lmCHT;
|
||||
@property(class, readonly, nonatomic) vChewing::LMInstantiator *lmCHS;
|
||||
@property(class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHS;
|
||||
@property(class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHT;
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
|
@ -1,110 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BIGRAM_H_
|
||||
#define BIGRAM_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "KeyValuePair.h"
|
||||
|
||||
namespace Gramambular
|
||||
{
|
||||
class Bigram
|
||||
{
|
||||
public:
|
||||
Bigram();
|
||||
|
||||
KeyValuePair preceedingKeyValue;
|
||||
KeyValuePair keyValue;
|
||||
double score;
|
||||
|
||||
bool operator==(const Bigram &another) const;
|
||||
bool operator<(const Bigram &another) const;
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &stream, const Bigram &gram)
|
||||
{
|
||||
std::streamsize p = stream.precision();
|
||||
stream.precision(6);
|
||||
stream << "(" << gram.keyValue << "|" << gram.preceedingKeyValue << "," << gram.score << ")";
|
||||
stream.precision(p);
|
||||
return stream;
|
||||
}
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &stream, const std::vector<Bigram> &grams)
|
||||
{
|
||||
stream << "[" << grams.size() << "]=>{";
|
||||
|
||||
size_t index = 0;
|
||||
|
||||
for (std::vector<Bigram>::const_iterator gi = grams.begin(); gi != grams.end(); ++gi, ++index)
|
||||
{
|
||||
stream << index << "=>";
|
||||
stream << *gi;
|
||||
if (gi + 1 != grams.end())
|
||||
{
|
||||
stream << ",";
|
||||
}
|
||||
}
|
||||
|
||||
stream << "}";
|
||||
return stream;
|
||||
}
|
||||
|
||||
inline Bigram::Bigram() : score(0.0)
|
||||
{
|
||||
}
|
||||
|
||||
inline bool Bigram::operator==(const Bigram &another) const
|
||||
{
|
||||
return preceedingKeyValue == another.preceedingKeyValue && keyValue == another.keyValue && score == another.score;
|
||||
}
|
||||
|
||||
inline bool Bigram::operator<(const Bigram &another) const
|
||||
{
|
||||
if (preceedingKeyValue < another.preceedingKeyValue)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if (preceedingKeyValue == another.preceedingKeyValue)
|
||||
{
|
||||
if (keyValue < another.keyValue)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if (keyValue == another.keyValue)
|
||||
{
|
||||
return score < another.score;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
} // namespace Gramambular
|
||||
|
||||
#endif
|
|
@ -1,242 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BLOCKREADINGBUILDER_H_
|
||||
#define BLOCKREADINGBUILDER_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "Grid.h"
|
||||
#include "LanguageModel.h"
|
||||
|
||||
namespace Gramambular
|
||||
{
|
||||
|
||||
class BlockReadingBuilder
|
||||
{
|
||||
public:
|
||||
explicit BlockReadingBuilder(LanguageModel *lm);
|
||||
void clear();
|
||||
|
||||
size_t length() const;
|
||||
size_t cursorIndex() const;
|
||||
void setCursorIndex(size_t newIndex);
|
||||
void insertReadingAtCursor(const std::string &reading);
|
||||
bool deleteReadingBeforeCursor(); // backspace
|
||||
bool deleteReadingAfterCursor(); // delete
|
||||
|
||||
bool removeHeadReadings(size_t count);
|
||||
|
||||
void setJoinSeparator(const std::string &separator);
|
||||
const std::string joinSeparator() const;
|
||||
|
||||
std::vector<std::string> readings() const;
|
||||
|
||||
Grid &grid();
|
||||
|
||||
protected:
|
||||
void build();
|
||||
|
||||
static const std::string Join(std::vector<std::string>::const_iterator begin,
|
||||
std::vector<std::string>::const_iterator end, const std::string &separator);
|
||||
|
||||
// 規定最多可以組成的詞的字數上限為 10
|
||||
static const size_t MaximumBuildSpanLength = 10;
|
||||
|
||||
size_t m_cursorIndex;
|
||||
std::vector<std::string> m_readings;
|
||||
|
||||
Grid m_grid;
|
||||
LanguageModel *m_LM;
|
||||
std::string m_joinSeparator;
|
||||
};
|
||||
|
||||
inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *lm) : m_LM(lm), m_cursorIndex(0)
|
||||
{
|
||||
}
|
||||
|
||||
inline void BlockReadingBuilder::clear()
|
||||
{
|
||||
m_cursorIndex = 0;
|
||||
m_readings.clear();
|
||||
m_grid.clear();
|
||||
}
|
||||
|
||||
inline size_t BlockReadingBuilder::length() const
|
||||
{
|
||||
return m_readings.size();
|
||||
}
|
||||
|
||||
inline size_t BlockReadingBuilder::cursorIndex() const
|
||||
{
|
||||
return m_cursorIndex;
|
||||
}
|
||||
|
||||
inline void BlockReadingBuilder::setCursorIndex(size_t newIndex)
|
||||
{
|
||||
m_cursorIndex = newIndex > m_readings.size() ? m_readings.size() : newIndex;
|
||||
}
|
||||
|
||||
inline void BlockReadingBuilder::insertReadingAtCursor(const std::string &reading)
|
||||
{
|
||||
m_readings.insert(m_readings.begin() + m_cursorIndex, reading);
|
||||
|
||||
m_grid.expandGridByOneAtLocation(m_cursorIndex);
|
||||
build();
|
||||
m_cursorIndex++;
|
||||
}
|
||||
|
||||
inline std::vector<std::string> BlockReadingBuilder::readings() const
|
||||
{
|
||||
return m_readings;
|
||||
}
|
||||
|
||||
inline bool BlockReadingBuilder::deleteReadingBeforeCursor()
|
||||
{
|
||||
if (!m_cursorIndex)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.begin() + m_cursorIndex);
|
||||
m_cursorIndex--;
|
||||
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
|
||||
build();
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool BlockReadingBuilder::deleteReadingAfterCursor()
|
||||
{
|
||||
if (m_cursorIndex == m_readings.size())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.begin() + m_cursorIndex + 1);
|
||||
m_grid.shrinkGridByOneAtLocation(m_cursorIndex);
|
||||
build();
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool BlockReadingBuilder::removeHeadReadings(size_t count)
|
||||
{
|
||||
if (count > length())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
if (m_cursorIndex)
|
||||
{
|
||||
m_cursorIndex--;
|
||||
}
|
||||
m_readings.erase(m_readings.begin(), m_readings.begin() + 1);
|
||||
m_grid.shrinkGridByOneAtLocation(0);
|
||||
build();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
inline void BlockReadingBuilder::setJoinSeparator(const std::string &separator)
|
||||
{
|
||||
m_joinSeparator = separator;
|
||||
}
|
||||
|
||||
inline const std::string BlockReadingBuilder::joinSeparator() const
|
||||
{
|
||||
return m_joinSeparator;
|
||||
}
|
||||
|
||||
inline Grid &BlockReadingBuilder::grid()
|
||||
{
|
||||
return m_grid;
|
||||
}
|
||||
|
||||
inline void BlockReadingBuilder::build()
|
||||
{
|
||||
if (!m_LM)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
size_t begin = 0;
|
||||
size_t end = m_cursorIndex + MaximumBuildSpanLength;
|
||||
|
||||
if (m_cursorIndex < MaximumBuildSpanLength)
|
||||
{
|
||||
begin = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
begin = m_cursorIndex - MaximumBuildSpanLength;
|
||||
}
|
||||
|
||||
if (end > m_readings.size())
|
||||
{
|
||||
end = m_readings.size();
|
||||
}
|
||||
|
||||
for (size_t p = begin; p < end; p++)
|
||||
{
|
||||
for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++)
|
||||
{
|
||||
std::string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator);
|
||||
if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading))
|
||||
{
|
||||
std::vector<Unigram> unigrams = m_LM->unigramsForKey(combinedReading);
|
||||
|
||||
if (unigrams.size() > 0)
|
||||
{
|
||||
Node n(combinedReading, unigrams, std::vector<Bigram>());
|
||||
m_grid.insertNode(n, p, q);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline const std::string BlockReadingBuilder::Join(std::vector<std::string>::const_iterator begin,
|
||||
std::vector<std::string>::const_iterator end,
|
||||
const std::string &separator)
|
||||
{
|
||||
std::string result;
|
||||
for (std::vector<std::string>::const_iterator iter = begin; iter != end;)
|
||||
{
|
||||
result += *iter;
|
||||
++iter;
|
||||
if (iter != end)
|
||||
{
|
||||
result += separator;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
} // namespace Gramambular
|
||||
|
||||
#endif
|
|
@ -1,313 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef GRID_H_
|
||||
#define GRID_H_
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "NodeAnchor.h"
|
||||
#include "Span.h"
|
||||
|
||||
namespace Gramambular
|
||||
{
|
||||
|
||||
class Grid
|
||||
{
|
||||
public:
|
||||
void clear();
|
||||
void insertNode(const Node &node, size_t location, size_t spanningLength);
|
||||
bool hasNodeAtLocationSpanningLengthMatchingKey(size_t location, size_t spanningLength, const std::string &key);
|
||||
|
||||
void expandGridByOneAtLocation(size_t location);
|
||||
void shrinkGridByOneAtLocation(size_t location);
|
||||
|
||||
size_t width() const;
|
||||
std::vector<NodeAnchor> nodesEndingAt(size_t location);
|
||||
std::vector<NodeAnchor> nodesCrossingOrEndingAt(size_t location);
|
||||
|
||||
// "Freeze" the node with the unigram that represents the selected candidate
|
||||
// value. After this, the node that contains the unigram will always be
|
||||
// evaluated to that unigram, while all other overlapping nodes will be reset
|
||||
// to their initial state (that is, if any of those nodes were "frozen" or
|
||||
// fixed, they will be unfrozen.)
|
||||
NodeAnchor fixNodeSelectedCandidate(size_t location, const std::string &value);
|
||||
|
||||
// Similar to fixNodeSelectedCandidate, but instead of "freezing" the node,
|
||||
// only boost the unigram that represents the value with an overriding score.
|
||||
// This has the same side effect as fixNodeSelectedCandidate, which is that
|
||||
// all other overlapping nodes will be reset to their initial state.
|
||||
void overrideNodeScoreForSelectedCandidate(size_t location, const std::string &value, float overridingScore);
|
||||
|
||||
std::string dumpDOT()
|
||||
{
|
||||
std::stringstream sst;
|
||||
sst << "digraph {" << std::endl;
|
||||
sst << "graph [ rankdir=LR ];" << std::endl;
|
||||
sst << "BOS;" << std::endl;
|
||||
|
||||
for (size_t p = 0; p < m_spans.size(); p++)
|
||||
{
|
||||
Span &span = m_spans[p];
|
||||
for (size_t ni = 0; ni <= span.maximumLength(); ni++)
|
||||
{
|
||||
Node *np = span.nodeOfLength(ni);
|
||||
if (np)
|
||||
{
|
||||
if (!p)
|
||||
{
|
||||
sst << "BOS -> " << np->currentKeyValue().value << ";" << std::endl;
|
||||
}
|
||||
|
||||
sst << np->currentKeyValue().value << ";" << std::endl;
|
||||
|
||||
if (p + ni < m_spans.size())
|
||||
{
|
||||
Span &dstSpan = m_spans[p + ni];
|
||||
for (size_t q = 0; q <= dstSpan.maximumLength(); q++)
|
||||
{
|
||||
Node *dn = dstSpan.nodeOfLength(q);
|
||||
if (dn)
|
||||
{
|
||||
sst << np->currentKeyValue().value << " -> " << dn->currentKeyValue().value << ";"
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (p + ni == m_spans.size())
|
||||
{
|
||||
sst << np->currentKeyValue().value << " -> "
|
||||
<< "EOS;" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sst << "EOS;" << std::endl;
|
||||
sst << "}";
|
||||
return sst.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
std::vector<Span> m_spans;
|
||||
};
|
||||
|
||||
inline void Grid::clear()
|
||||
{
|
||||
m_spans.clear();
|
||||
}
|
||||
|
||||
inline void Grid::insertNode(const Node &node, size_t location, size_t spanningLength)
|
||||
{
|
||||
if (location >= m_spans.size())
|
||||
{
|
||||
size_t diff = location - m_spans.size() + 1;
|
||||
|
||||
for (size_t i = 0; i < diff; i++)
|
||||
{
|
||||
m_spans.push_back(Span());
|
||||
}
|
||||
}
|
||||
|
||||
m_spans[location].insertNodeOfLength(node, spanningLength);
|
||||
}
|
||||
|
||||
inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(size_t location, size_t spanningLength,
|
||||
const std::string &key)
|
||||
{
|
||||
if (location > m_spans.size())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
const Node *n = m_spans[location].nodeOfLength(spanningLength);
|
||||
if (!n)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return key == n->key();
|
||||
}
|
||||
|
||||
inline void Grid::expandGridByOneAtLocation(size_t location)
|
||||
{
|
||||
if (!location || location == m_spans.size())
|
||||
{
|
||||
m_spans.insert(m_spans.begin() + location, Span());
|
||||
}
|
||||
else
|
||||
{
|
||||
m_spans.insert(m_spans.begin() + location, Span());
|
||||
for (size_t i = 0; i < location; i++)
|
||||
{
|
||||
// zaps overlapping spans
|
||||
m_spans[i].removeNodeOfLengthGreaterThan(location - i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void Grid::shrinkGridByOneAtLocation(size_t location)
|
||||
{
|
||||
if (location >= m_spans.size())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
m_spans.erase(m_spans.begin() + location);
|
||||
for (size_t i = 0; i < location; i++)
|
||||
{
|
||||
// zaps overlapping spans
|
||||
m_spans[i].removeNodeOfLengthGreaterThan(location - i);
|
||||
}
|
||||
}
|
||||
|
||||
inline size_t Grid::width() const
|
||||
{
|
||||
return m_spans.size();
|
||||
}
|
||||
|
||||
// macOS 10.6 開始的內建注音的游標前置選字風格
|
||||
inline std::vector<NodeAnchor> Grid::nodesEndingAt(size_t location)
|
||||
{
|
||||
std::vector<NodeAnchor> result;
|
||||
|
||||
if (m_spans.size() && location <= m_spans.size())
|
||||
{
|
||||
for (size_t i = 0; i < location; i++)
|
||||
{
|
||||
Span &span = m_spans[i];
|
||||
if (i + span.maximumLength() >= location)
|
||||
{
|
||||
Node *np = span.nodeOfLength(location - i);
|
||||
if (np)
|
||||
{
|
||||
NodeAnchor na;
|
||||
na.node = np;
|
||||
na.location = i;
|
||||
na.spanningLength = location - i;
|
||||
|
||||
result.push_back(na);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Windows 版奇摩注音輸入法的游標後置的選字風格。
|
||||
// 與微軟新注音相異的是,這個風格允許在詞的中間叫出候選字窗。
|
||||
inline std::vector<NodeAnchor> Grid::nodesCrossingOrEndingAt(size_t location)
|
||||
{
|
||||
std::vector<NodeAnchor> result;
|
||||
|
||||
if (m_spans.size() && location <= m_spans.size())
|
||||
{
|
||||
for (size_t i = 0; i < location; i++)
|
||||
{
|
||||
Span &span = m_spans[i];
|
||||
|
||||
if (i + span.maximumLength() >= location)
|
||||
{
|
||||
for (size_t j = 1, m = span.maximumLength(); j <= m; j++)
|
||||
{
|
||||
if (i + j < location)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
Node *np = span.nodeOfLength(j);
|
||||
if (np)
|
||||
{
|
||||
NodeAnchor na;
|
||||
na.node = np;
|
||||
na.location = i;
|
||||
na.spanningLength = location - i;
|
||||
|
||||
result.push_back(na);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// For nodes found at the location, fix their currently-selected candidate using
|
||||
// the supplied string value.
|
||||
inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, const std::string &value)
|
||||
{
|
||||
std::vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
|
||||
NodeAnchor node;
|
||||
for (auto nodeAnchor : nodes)
|
||||
{
|
||||
auto candidates = nodeAnchor.node->candidates();
|
||||
|
||||
// Reset the candidate-fixed state of every node at the location.
|
||||
const_cast<Node *>(nodeAnchor.node)->resetCandidate();
|
||||
|
||||
for (size_t i = 0, c = candidates.size(); i < c; ++i)
|
||||
{
|
||||
if (candidates[i].value == value)
|
||||
{
|
||||
const_cast<Node *>(nodeAnchor.node)->selectCandidateAtIndex(i);
|
||||
node = nodeAnchor;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const std::string &value,
|
||||
float overridingScore)
|
||||
{
|
||||
std::vector<NodeAnchor> nodes = nodesCrossingOrEndingAt(location);
|
||||
for (auto nodeAnchor : nodes)
|
||||
{
|
||||
auto candidates = nodeAnchor.node->candidates();
|
||||
|
||||
// Reset the candidate-fixed state of every node at the location.
|
||||
const_cast<Node *>(nodeAnchor.node)->resetCandidate();
|
||||
|
||||
for (size_t i = 0, c = candidates.size(); i < c; ++i)
|
||||
{
|
||||
if (candidates[i].value == value)
|
||||
{
|
||||
const_cast<Node *>(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Gramambular
|
||||
|
||||
#endif
|
|
@ -1,71 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef KEYVALUEPAIR_H_
|
||||
#define KEYVALUEPAIR_H_
|
||||
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
|
||||
namespace Gramambular
|
||||
{
|
||||
|
||||
class KeyValuePair
|
||||
{
|
||||
public:
|
||||
std::string key;
|
||||
std::string value;
|
||||
|
||||
bool operator==(const KeyValuePair &another) const;
|
||||
bool operator<(const KeyValuePair &another) const;
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &stream, const KeyValuePair &pair)
|
||||
{
|
||||
stream << "(" << pair.key << "," << pair.value << ")";
|
||||
return stream;
|
||||
}
|
||||
|
||||
inline bool KeyValuePair::operator==(const KeyValuePair &another) const
|
||||
{
|
||||
return key == another.key && value == another.value;
|
||||
}
|
||||
|
||||
inline bool KeyValuePair::operator<(const KeyValuePair &another) const
|
||||
{
|
||||
if (key < another.key)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if (key == another.key)
|
||||
{
|
||||
return value < another.value;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} // namespace Gramambular
|
||||
|
||||
#endif
|
|
@ -1,249 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NODE_H_
|
||||
#define NODE_H_
|
||||
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "LanguageModel.h"
|
||||
|
||||
namespace Gramambular
|
||||
{
|
||||
|
||||
class Node
|
||||
{
|
||||
public:
|
||||
Node();
|
||||
Node(const std::string &key, const std::vector<Unigram> &unigrams, const std::vector<Bigram> &bigrams);
|
||||
|
||||
void primeNodeWithPreceedingKeyValues(const std::vector<KeyValuePair> &keyValues);
|
||||
|
||||
bool isCandidateFixed() const;
|
||||
const std::vector<KeyValuePair> &candidates() const;
|
||||
void selectCandidateAtIndex(size_t index = 0, bool fix = true);
|
||||
void resetCandidate();
|
||||
void selectFloatingCandidateAtIndex(size_t index, double score);
|
||||
|
||||
const std::string &key() const;
|
||||
double score() const;
|
||||
double scoreForCandidate(const std::string &candidate) const;
|
||||
const KeyValuePair currentKeyValue() const;
|
||||
double highestUnigramScore() const;
|
||||
|
||||
protected:
|
||||
const LanguageModel *m_LM;
|
||||
|
||||
std::string m_key;
|
||||
double m_score;
|
||||
|
||||
std::vector<Unigram> m_unigrams;
|
||||
std::vector<KeyValuePair> m_candidates;
|
||||
std::map<std::string, size_t> m_valueUnigramIndexMap;
|
||||
std::map<KeyValuePair, std::vector<Bigram>> m_preceedingGramBigramMap;
|
||||
|
||||
bool m_candidateFixed;
|
||||
size_t m_selectedUnigramIndex;
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &stream, const Node &node);
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &stream, const Node &node)
|
||||
{
|
||||
stream << "(node,key:" << node.m_key << ",fixed:" << (node.m_candidateFixed ? "true" : "false")
|
||||
<< ",selected:" << node.m_selectedUnigramIndex << "," << node.m_unigrams << ")";
|
||||
return stream;
|
||||
}
|
||||
|
||||
inline Node::Node() : m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0)
|
||||
{
|
||||
}
|
||||
|
||||
inline Node::Node(const std::string &key, const std::vector<Unigram> &unigrams, const std::vector<Bigram> &bigrams)
|
||||
: m_key(key), m_unigrams(unigrams), m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0)
|
||||
{
|
||||
stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare);
|
||||
|
||||
if (m_unigrams.size())
|
||||
{
|
||||
m_score = m_unigrams[0].score;
|
||||
}
|
||||
|
||||
size_t i = 0;
|
||||
for (std::vector<Unigram>::const_iterator ui = m_unigrams.begin(); ui != m_unigrams.end(); ++ui)
|
||||
{
|
||||
m_valueUnigramIndexMap[(*ui).keyValue.value] = i;
|
||||
i++;
|
||||
|
||||
m_candidates.push_back((*ui).keyValue);
|
||||
}
|
||||
|
||||
for (std::vector<Bigram>::const_iterator bi = bigrams.begin(); bi != bigrams.end(); ++bi)
|
||||
{
|
||||
m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi);
|
||||
}
|
||||
}
|
||||
|
||||
inline void Node::primeNodeWithPreceedingKeyValues(const std::vector<KeyValuePair> &keyValues)
|
||||
{
|
||||
size_t newIndex = m_selectedUnigramIndex;
|
||||
double max = m_score;
|
||||
|
||||
if (!isCandidateFixed())
|
||||
{
|
||||
for (std::vector<KeyValuePair>::const_iterator kvi = keyValues.begin(); kvi != keyValues.end(); ++kvi)
|
||||
{
|
||||
std::map<KeyValuePair, std::vector<Bigram>>::const_iterator f = m_preceedingGramBigramMap.find(*kvi);
|
||||
if (f != m_preceedingGramBigramMap.end())
|
||||
{
|
||||
const std::vector<Bigram> &bigrams = (*f).second;
|
||||
|
||||
for (std::vector<Bigram>::const_iterator bi = bigrams.begin(); bi != bigrams.end(); ++bi)
|
||||
{
|
||||
const Bigram &bigram = *bi;
|
||||
if (bigram.score > max)
|
||||
{
|
||||
std::map<std::string, size_t>::const_iterator uf =
|
||||
m_valueUnigramIndexMap.find((*bi).keyValue.value);
|
||||
if (uf != m_valueUnigramIndexMap.end())
|
||||
{
|
||||
newIndex = (*uf).second;
|
||||
max = bigram.score;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (m_score != max)
|
||||
{
|
||||
m_score = max;
|
||||
}
|
||||
|
||||
if (newIndex != m_selectedUnigramIndex)
|
||||
{
|
||||
m_selectedUnigramIndex = newIndex;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool Node::isCandidateFixed() const
|
||||
{
|
||||
return m_candidateFixed;
|
||||
}
|
||||
|
||||
inline const std::vector<KeyValuePair> &Node::candidates() const
|
||||
{
|
||||
return m_candidates;
|
||||
}
|
||||
|
||||
inline void Node::selectCandidateAtIndex(size_t index, bool fix)
|
||||
{
|
||||
if (index >= m_unigrams.size())
|
||||
{
|
||||
m_selectedUnigramIndex = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_selectedUnigramIndex = index;
|
||||
}
|
||||
|
||||
m_candidateFixed = fix;
|
||||
m_score = 99;
|
||||
}
|
||||
|
||||
inline void Node::resetCandidate()
|
||||
{
|
||||
m_selectedUnigramIndex = 0;
|
||||
m_candidateFixed = 0;
|
||||
if (m_unigrams.size())
|
||||
{
|
||||
m_score = m_unigrams[0].score;
|
||||
}
|
||||
}
|
||||
|
||||
inline void Node::selectFloatingCandidateAtIndex(size_t index, double score)
|
||||
{
|
||||
if (index >= m_unigrams.size())
|
||||
{
|
||||
m_selectedUnigramIndex = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_selectedUnigramIndex = index;
|
||||
}
|
||||
m_candidateFixed = false;
|
||||
m_score = score;
|
||||
}
|
||||
|
||||
inline const std::string &Node::key() const
|
||||
{
|
||||
return m_key;
|
||||
}
|
||||
|
||||
inline double Node::score() const
|
||||
{
|
||||
return m_score;
|
||||
}
|
||||
|
||||
inline double Node::scoreForCandidate(const std::string &candidate) const
|
||||
{
|
||||
for (auto unigram : m_unigrams)
|
||||
{
|
||||
if (unigram.keyValue.value == candidate)
|
||||
{
|
||||
return unigram.score;
|
||||
}
|
||||
}
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
inline double Node::highestUnigramScore() const
|
||||
{
|
||||
if (m_unigrams.empty())
|
||||
{
|
||||
return 0.0;
|
||||
}
|
||||
return m_unigrams[0].score;
|
||||
}
|
||||
|
||||
inline const KeyValuePair Node::currentKeyValue() const
|
||||
{
|
||||
if (m_selectedUnigramIndex >= m_unigrams.size())
|
||||
{
|
||||
return KeyValuePair();
|
||||
}
|
||||
else
|
||||
{
|
||||
return m_candidates[m_selectedUnigramIndex];
|
||||
}
|
||||
}
|
||||
} // namespace Gramambular
|
||||
|
||||
#endif
|
|
@ -1,75 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NODEANCHOR_H_
|
||||
#define NODEANCHOR_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "Node.h"
|
||||
|
||||
namespace Gramambular
|
||||
{
|
||||
|
||||
struct NodeAnchor
|
||||
{
|
||||
const Node *node = nullptr;
|
||||
size_t location = 0;
|
||||
size_t spanningLength = 0;
|
||||
double accumulatedScore = 0.0;
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &stream, const NodeAnchor &anchor)
|
||||
{
|
||||
stream << "{@(" << anchor.location << "," << anchor.spanningLength << "),";
|
||||
if (anchor.node)
|
||||
{
|
||||
stream << *(anchor.node);
|
||||
}
|
||||
else
|
||||
{
|
||||
stream << "null";
|
||||
}
|
||||
stream << "}";
|
||||
return stream;
|
||||
}
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &stream, const std::vector<NodeAnchor> &anchor)
|
||||
{
|
||||
for (std::vector<NodeAnchor>::const_iterator i = anchor.begin(); i != anchor.end(); ++i)
|
||||
{
|
||||
stream << *i;
|
||||
if (i + 1 != anchor.end())
|
||||
{
|
||||
stream << "<-";
|
||||
}
|
||||
}
|
||||
|
||||
return stream;
|
||||
}
|
||||
} // namespace Gramambular
|
||||
|
||||
#endif
|
|
@ -1,112 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef SPAN_H_
|
||||
#define SPAN_H_
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
|
||||
#include "Node.h"
|
||||
|
||||
namespace Gramambular
|
||||
{
|
||||
class Span
|
||||
{
|
||||
public:
|
||||
void clear();
|
||||
void insertNodeOfLength(const Node &node, size_t length);
|
||||
void removeNodeOfLengthGreaterThan(size_t length);
|
||||
|
||||
Node *nodeOfLength(size_t length);
|
||||
size_t maximumLength() const;
|
||||
|
||||
protected:
|
||||
std::map<size_t, Node> m_lengthNodeMap;
|
||||
size_t m_maximumLength = 0;
|
||||
};
|
||||
|
||||
inline void Span::clear()
|
||||
{
|
||||
m_lengthNodeMap.clear();
|
||||
m_maximumLength = 0;
|
||||
}
|
||||
|
||||
inline void Span::insertNodeOfLength(const Node &node, size_t length)
|
||||
{
|
||||
m_lengthNodeMap[length] = node;
|
||||
if (length > m_maximumLength)
|
||||
{
|
||||
m_maximumLength = length;
|
||||
}
|
||||
}
|
||||
|
||||
inline void Span::removeNodeOfLengthGreaterThan(size_t length)
|
||||
{
|
||||
if (length > m_maximumLength)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
size_t max = 0;
|
||||
std::set<size_t> removeSet;
|
||||
for (std::map<size_t, Node>::iterator i = m_lengthNodeMap.begin(), e = m_lengthNodeMap.end(); i != e; ++i)
|
||||
{
|
||||
if ((*i).first > length)
|
||||
{
|
||||
removeSet.insert((*i).first);
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((*i).first > max)
|
||||
{
|
||||
max = (*i).first;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (std::set<size_t>::iterator i = removeSet.begin(), e = removeSet.end(); i != e; ++i)
|
||||
{
|
||||
m_lengthNodeMap.erase(*i);
|
||||
}
|
||||
|
||||
m_maximumLength = max;
|
||||
}
|
||||
|
||||
inline Node *Span::nodeOfLength(size_t length)
|
||||
{
|
||||
std::map<size_t, Node>::iterator f = m_lengthNodeMap.find(length);
|
||||
return f == m_lengthNodeMap.end() ? 0 : &(*f).second;
|
||||
}
|
||||
|
||||
inline size_t Span::maximumLength() const
|
||||
{
|
||||
return m_maximumLength;
|
||||
}
|
||||
} // namespace Gramambular
|
||||
|
||||
#endif
|
|
@ -1,108 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef UNIGRAM_H_
|
||||
#define UNIGRAM_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "KeyValuePair.h"
|
||||
|
||||
namespace Gramambular
|
||||
{
|
||||
|
||||
class Unigram
|
||||
{
|
||||
public:
|
||||
Unigram();
|
||||
|
||||
KeyValuePair keyValue;
|
||||
double score;
|
||||
|
||||
bool operator==(const Unigram &another) const;
|
||||
bool operator<(const Unigram &another) const;
|
||||
|
||||
static bool ScoreCompare(const Unigram &a, const Unigram &b);
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &stream, const Unigram &gram)
|
||||
{
|
||||
std::streamsize p = stream.precision();
|
||||
stream.precision(6);
|
||||
stream << "(" << gram.keyValue << "," << gram.score << ")";
|
||||
stream.precision(p);
|
||||
return stream;
|
||||
}
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &stream, const std::vector<Unigram> &grams)
|
||||
{
|
||||
stream << "[" << grams.size() << "]=>{";
|
||||
|
||||
size_t index = 0;
|
||||
|
||||
for (std::vector<Unigram>::const_iterator gi = grams.begin(); gi != grams.end(); ++gi, ++index)
|
||||
{
|
||||
stream << index << "=>";
|
||||
stream << *gi;
|
||||
if (gi + 1 != grams.end())
|
||||
{
|
||||
stream << ",";
|
||||
}
|
||||
}
|
||||
|
||||
stream << "}";
|
||||
return stream;
|
||||
}
|
||||
|
||||
inline Unigram::Unigram() : score(0.0)
|
||||
{
|
||||
}
|
||||
|
||||
inline bool Unigram::operator==(const Unigram &another) const
|
||||
{
|
||||
return keyValue == another.keyValue && score == another.score;
|
||||
}
|
||||
|
||||
inline bool Unigram::operator<(const Unigram &another) const
|
||||
{
|
||||
if (keyValue < another.keyValue)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else if (keyValue == another.keyValue)
|
||||
{
|
||||
return score < another.score;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool Unigram::ScoreCompare(const Unigram &a, const Unigram &b)
|
||||
{
|
||||
return a.score > b.score;
|
||||
}
|
||||
} // namespace Gramambular
|
||||
|
||||
#endif
|
|
@ -1,96 +0,0 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef WALKER_H_
|
||||
#define WALKER_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "Grid.h"
|
||||
|
||||
namespace Gramambular
|
||||
{
|
||||
|
||||
class Walker
|
||||
{
|
||||
public:
|
||||
explicit Walker(Grid *inGrid);
|
||||
const std::vector<NodeAnchor> reverseWalk(size_t location, double accumulatedScore = 0.0);
|
||||
|
||||
protected:
|
||||
Grid *m_grid;
|
||||
};
|
||||
|
||||
inline Walker::Walker(Grid *inGrid) : m_grid(inGrid)
|
||||
{
|
||||
}
|
||||
|
||||
inline const std::vector<NodeAnchor> Walker::reverseWalk(size_t location, double accumulatedScore)
|
||||
{
|
||||
if (!location || location > m_grid->width())
|
||||
{
|
||||
return std::vector<NodeAnchor>();
|
||||
}
|
||||
|
||||
std::vector<std::vector<NodeAnchor>> paths;
|
||||
|
||||
std::vector<NodeAnchor> nodes = m_grid->nodesEndingAt(location);
|
||||
|
||||
for (std::vector<NodeAnchor>::iterator ni = nodes.begin(); ni != nodes.end(); ++ni)
|
||||
{
|
||||
if (!(*ni).node)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
(*ni).accumulatedScore = accumulatedScore + (*ni).node->score();
|
||||
|
||||
std::vector<NodeAnchor> path = reverseWalk(location - (*ni).spanningLength, (*ni).accumulatedScore);
|
||||
path.insert(path.begin(), *ni);
|
||||
|
||||
paths.push_back(path);
|
||||
}
|
||||
|
||||
if (!paths.size())
|
||||
{
|
||||
return std::vector<NodeAnchor>();
|
||||
}
|
||||
|
||||
std::vector<NodeAnchor> *result = &*(paths.begin());
|
||||
for (std::vector<std::vector<NodeAnchor>>::iterator pi = paths.begin(); pi != paths.end(); ++pi)
|
||||
{
|
||||
if ((*pi).back().accumulatedScore > result->back().accumulatedScore)
|
||||
{
|
||||
result = &*pi;
|
||||
}
|
||||
}
|
||||
|
||||
return *result;
|
||||
}
|
||||
} // namespace Gramambular
|
||||
|
||||
#endif
|
|
@ -1,6 +1,5 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
|
@ -24,31 +23,5 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef SYMBOLLM_H
|
||||
#define SYMBOLLM_H
|
||||
|
||||
#include "LanguageModel.h"
|
||||
#include "UserPhrasesLM.h"
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
namespace vChewing
|
||||
{
|
||||
|
||||
class SymbolLM : public UserPhrasesLM
|
||||
{
|
||||
public:
|
||||
bool allowConsolidation() override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
float overridedValue() override
|
||||
{
|
||||
return -13.0;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace vChewing
|
||||
|
||||
#endif
|
||||
/// The namespace for this package.
|
||||
public enum Megrez {}
|
|
@ -0,0 +1,146 @@
|
|||
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
extension Megrez {
|
||||
public class BlockReadingBuilder {
|
||||
let kMaximumBuildSpanLength = 10 // 規定最多可以組成的詞的字數上限為 10
|
||||
var mutCursorIndex: Int = 0
|
||||
var mutReadings: [String] = []
|
||||
var mutGrid: Grid = .init()
|
||||
var mutLM: LanguageModel
|
||||
var mutJoinSeparator: String = ""
|
||||
|
||||
public init(lm: LanguageModel) {
|
||||
mutLM = lm
|
||||
}
|
||||
|
||||
public func clear() {
|
||||
mutCursorIndex = 0
|
||||
mutReadings.removeAll()
|
||||
mutGrid.clear()
|
||||
}
|
||||
|
||||
public func length() -> Int { mutReadings.count }
|
||||
|
||||
public func cursorIndex() -> Int { mutCursorIndex }
|
||||
|
||||
public func setCursorIndex(newIndex: Int) {
|
||||
mutCursorIndex = min(newIndex, mutReadings.count)
|
||||
}
|
||||
|
||||
public func insertReadingAtCursor(reading: String) {
|
||||
mutReadings.insert(reading, at: mutCursorIndex)
|
||||
mutGrid.expandGridByOneAt(location: mutCursorIndex)
|
||||
build()
|
||||
mutCursorIndex += 1
|
||||
}
|
||||
|
||||
public func readings() -> [String] { mutReadings }
|
||||
|
||||
@discardableResult public func deleteReadingBeforeCursor() -> Bool {
|
||||
if mutCursorIndex == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
mutReadings.remove(at: mutCursorIndex - 1)
|
||||
mutCursorIndex -= 1
|
||||
mutGrid.shrinkGridByOneAt(location: mutCursorIndex)
|
||||
build()
|
||||
return true
|
||||
}
|
||||
|
||||
@discardableResult public func deleteReadingAfterCursor() -> Bool {
|
||||
if mutCursorIndex == mutReadings.count {
|
||||
return false
|
||||
}
|
||||
|
||||
mutReadings.remove(at: mutCursorIndex)
|
||||
mutGrid.shrinkGridByOneAt(location: mutCursorIndex)
|
||||
build()
|
||||
return true
|
||||
}
|
||||
|
||||
@discardableResult public func removeHeadReadings(count: Int) -> Bool {
|
||||
if count > length() {
|
||||
return false
|
||||
}
|
||||
|
||||
var i = 0
|
||||
while i < count {
|
||||
if mutCursorIndex != 0 {
|
||||
mutCursorIndex -= 1
|
||||
}
|
||||
mutReadings.removeFirst()
|
||||
mutGrid.shrinkGridByOneAt(location: 0)
|
||||
build()
|
||||
i += 1
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
public func setJoinSeparator(separator: String) {
|
||||
mutJoinSeparator = separator
|
||||
}
|
||||
|
||||
public func joinSeparator() -> String { mutJoinSeparator }
|
||||
|
||||
public func grid() -> Grid { mutGrid }
|
||||
|
||||
public func build() {
|
||||
// if (mutLM == nil) { return } // 這個出不了 nil,所以註釋掉。
|
||||
|
||||
let itrBegin: Int =
|
||||
(mutCursorIndex < kMaximumBuildSpanLength) ? 0 : mutCursorIndex - kMaximumBuildSpanLength
|
||||
let itrEnd: Int = min(mutCursorIndex + kMaximumBuildSpanLength, mutReadings.count)
|
||||
|
||||
var p = itrBegin
|
||||
while p < itrEnd {
|
||||
var q = 1
|
||||
while q <= kMaximumBuildSpanLength, p + q <= itrEnd {
|
||||
let strSlice = mutReadings[p..<(p + q)]
|
||||
let combinedReading: String = join(slice: strSlice, separator: mutJoinSeparator)
|
||||
if !mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) {
|
||||
let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading)
|
||||
if !unigrams.isEmpty {
|
||||
let n = Node(key: combinedReading, unigrams: unigrams)
|
||||
mutGrid.insertNode(node: n, location: p, spanningLength: q)
|
||||
}
|
||||
}
|
||||
q += 1
|
||||
}
|
||||
p += 1
|
||||
}
|
||||
}
|
||||
|
||||
public func join(slice strSlice: ArraySlice<String>, separator: String) -> String {
|
||||
var arrResult: [String] = []
|
||||
for value in strSlice {
|
||||
arrResult.append(value)
|
||||
}
|
||||
return arrResult.joined(separator: separator)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
extension Megrez {
|
||||
public class Walker {
|
||||
var mutGrid: Grid
|
||||
|
||||
public init(grid: Megrez.Grid = Megrez.Grid()) {
|
||||
mutGrid = grid
|
||||
}
|
||||
|
||||
public func reverseWalk(at location: Int, score accumulatedScore: Double = 0.0) -> [NodeAnchor] {
|
||||
if location == 0 || location > mutGrid.width() {
|
||||
return [] as [NodeAnchor]
|
||||
}
|
||||
|
||||
var paths: [[NodeAnchor]] = []
|
||||
let nodes: [NodeAnchor] = mutGrid.nodesEndingAt(location: location)
|
||||
|
||||
for n in nodes {
|
||||
var n = n
|
||||
if n.node == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
n.accumulatedScore = accumulatedScore + n.node!.score()
|
||||
|
||||
var path: [NodeAnchor] = reverseWalk(
|
||||
at: location - n.spanningLength,
|
||||
score: n.accumulatedScore
|
||||
)
|
||||
path.insert(n, at: 0)
|
||||
|
||||
paths.append(path)
|
||||
}
|
||||
|
||||
if !paths.isEmpty {
|
||||
if var result = paths.first {
|
||||
for value in paths {
|
||||
if let vLast = value.last, let rLast = result.last {
|
||||
if vLast.accumulatedScore > rLast.accumulatedScore {
|
||||
result = value
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
return [] as [NodeAnchor]
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,180 @@
|
|||
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
extension Megrez {
|
||||
public class Grid {
|
||||
var mutSpans: [Megrez.Span]
|
||||
|
||||
public init() {
|
||||
mutSpans = [Megrez.Span]()
|
||||
}
|
||||
|
||||
public func clear() {
|
||||
mutSpans = [Megrez.Span]()
|
||||
}
|
||||
|
||||
public func insertNode(node: Node, location: Int, spanningLength: Int) {
|
||||
if location >= mutSpans.count {
|
||||
let diff = location - mutSpans.count + 1
|
||||
var i = 0
|
||||
while i < diff {
|
||||
mutSpans.append(Span())
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
mutSpans[location].insert(node: node, length: spanningLength)
|
||||
}
|
||||
|
||||
public func hasMatchedNode(location: Int, spanningLength: Int, key: String) -> Bool {
|
||||
if location > mutSpans.count {
|
||||
return false
|
||||
}
|
||||
|
||||
let n = mutSpans[location].node(length: spanningLength)
|
||||
return n == nil ? false : key == n?.key()
|
||||
}
|
||||
|
||||
public func expandGridByOneAt(location: Int) {
|
||||
mutSpans.append(Span())
|
||||
if location > 0, location < mutSpans.count {
|
||||
var i = 0
|
||||
while i < location {
|
||||
// zaps overlapping spans
|
||||
mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public func shrinkGridByOneAt(location: Int) {
|
||||
if location >= mutSpans.count {
|
||||
return
|
||||
}
|
||||
|
||||
mutSpans.remove(at: location)
|
||||
var i = 0
|
||||
while i < location {
|
||||
// zaps overlapping spans
|
||||
mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
|
||||
public func width() -> Int { mutSpans.count }
|
||||
|
||||
public func nodesEndingAt(location: Int) -> [NodeAnchor] {
|
||||
var results: [NodeAnchor] = []
|
||||
if !mutSpans.isEmpty, location <= mutSpans.count {
|
||||
var i = 0
|
||||
while i < location {
|
||||
let span = mutSpans[i]
|
||||
if i + span.maximumLength >= location {
|
||||
if let np = span.node(length: location - i) {
|
||||
results.append(
|
||||
NodeAnchor(
|
||||
node: np,
|
||||
location: i,
|
||||
spanningLength: location - i
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] {
|
||||
var results: [NodeAnchor] = []
|
||||
if !mutSpans.isEmpty, location <= mutSpans.count {
|
||||
var i = 0
|
||||
while i < location {
|
||||
let span = mutSpans[i]
|
||||
if i + span.maximumLength >= location {
|
||||
var j = 1
|
||||
while j <= span.maximumLength {
|
||||
if i + j < location {
|
||||
j += 1
|
||||
continue
|
||||
}
|
||||
if let np = span.node(length: j) {
|
||||
results.append(
|
||||
NodeAnchor(
|
||||
node: np,
|
||||
location: i,
|
||||
spanningLength: location - i
|
||||
)
|
||||
)
|
||||
}
|
||||
j += 1
|
||||
}
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
public func fixNodeSelectedCandidate(location: Int, value: String) -> NodeAnchor {
|
||||
var node = NodeAnchor()
|
||||
let nodes = nodesCrossingOrEndingAt(location: location)
|
||||
for nodeAnchor in nodes {
|
||||
// Reset the candidate-fixed state of every node at the location.
|
||||
let candidates = nodeAnchor.node?.candidates() ?? []
|
||||
nodeAnchor.node?.resetCandidate()
|
||||
|
||||
for (i, candidate) in candidates.enumerated() {
|
||||
if candidate.value == value {
|
||||
nodeAnchor.node?.selectCandidateAt(index: i)
|
||||
node = nodeAnchor
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return node
|
||||
}
|
||||
|
||||
public func overrideNodeScoreForSelectedCandidate(location: Int, value: String, overridingScore: Double) {
|
||||
for nodeAnchor in nodesCrossingOrEndingAt(location: location) {
|
||||
var nodeAnchor = nodeAnchor
|
||||
if let theNode = nodeAnchor.node {
|
||||
let candidates = theNode.candidates()
|
||||
// Reset the candidate-fixed state of every node at the location.
|
||||
theNode.resetCandidate()
|
||||
nodeAnchor.node = theNode
|
||||
|
||||
for (i, candidate) in candidates.enumerated() {
|
||||
if candidate.value == value {
|
||||
theNode.selectFloatingCandidateAt(index: i, score: overridingScore)
|
||||
nodeAnchor.node = theNode
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,6 +1,5 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
|
@ -24,31 +23,14 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CNSLM_H
|
||||
#define CNSLM_H
|
||||
|
||||
#include "LanguageModel.h"
|
||||
#include "UserPhrasesLM.h"
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
namespace vChewing
|
||||
{
|
||||
|
||||
class CNSLM : public UserPhrasesLM
|
||||
{
|
||||
public:
|
||||
bool allowConsolidation() override
|
||||
{
|
||||
return false;
|
||||
extension Megrez {
|
||||
@frozen public struct NodeAnchor {
|
||||
public var node: Node?
|
||||
public var location: Int = 0
|
||||
public var spanningLength: Int = 0
|
||||
public var accumulatedScore: Double = 0.0
|
||||
public var keyLength: Int {
|
||||
node?.key().count ?? 0
|
||||
}
|
||||
}
|
||||
float overridedValue() override
|
||||
{
|
||||
return -11.0;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace vChewing
|
||||
|
||||
#endif
|
|
@ -0,0 +1,74 @@
|
|||
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
extension Megrez {
|
||||
@frozen public struct Span {
|
||||
private var mutLengthNodeMap: [Int: Megrez.Node]
|
||||
private var mutMaximumLength: Int
|
||||
var maximumLength: Int {
|
||||
mutMaximumLength
|
||||
}
|
||||
|
||||
public init() {
|
||||
mutLengthNodeMap = [:]
|
||||
mutMaximumLength = 0
|
||||
}
|
||||
|
||||
mutating func clear() {
|
||||
mutLengthNodeMap.removeAll()
|
||||
mutMaximumLength = 0
|
||||
}
|
||||
|
||||
mutating func insert(node: Node, length: Int) {
|
||||
mutLengthNodeMap[length] = node
|
||||
if length > mutMaximumLength {
|
||||
mutMaximumLength = length
|
||||
}
|
||||
}
|
||||
|
||||
mutating func removeNodeOfLengthGreaterThan(_ length: Int) {
|
||||
if length > mutMaximumLength { return }
|
||||
var max = 0
|
||||
var removalList: [Int: Megrez.Node] = [:]
|
||||
for key in mutLengthNodeMap.keys {
|
||||
if key > length {
|
||||
removalList[key] = mutLengthNodeMap[key]
|
||||
} else {
|
||||
if key > max {
|
||||
max = key
|
||||
}
|
||||
}
|
||||
}
|
||||
for key in removalList.keys {
|
||||
mutLengthNodeMap.removeValue(forKey: key)
|
||||
}
|
||||
mutMaximumLength = max
|
||||
}
|
||||
|
||||
public func node(length: Int) -> Node? {
|
||||
mutLengthNodeMap[length]
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,161 @@
|
|||
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
extension Megrez {
|
||||
public class Node {
|
||||
let mutLM: LanguageModel
|
||||
var mutKey: String
|
||||
var mutScore: Double = 0
|
||||
var mutUnigrams: [Unigram]
|
||||
var mutCandidates: [KeyValuePair]
|
||||
var mutValueUnigramIndexMap: [String: Int]
|
||||
var mutPrecedingBigramMap: [KeyValuePair: [Megrez.Bigram]]
|
||||
|
||||
var mutCandidateFixed: Bool = false
|
||||
var mutSelectedUnigramIndex: Int = 0
|
||||
|
||||
public init(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) {
|
||||
mutLM = LanguageModel()
|
||||
|
||||
mutKey = key
|
||||
mutScore = 0
|
||||
|
||||
mutUnigrams = unigrams
|
||||
mutCandidates = []
|
||||
mutValueUnigramIndexMap = [:]
|
||||
mutPrecedingBigramMap = [:]
|
||||
|
||||
mutCandidateFixed = false
|
||||
mutSelectedUnigramIndex = 0
|
||||
|
||||
if bigrams == [] {
|
||||
node(key: key, unigrams: unigrams, bigrams: bigrams)
|
||||
} else {
|
||||
node(key: key, unigrams: unigrams)
|
||||
}
|
||||
}
|
||||
|
||||
public func node(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) {
|
||||
var unigrams = unigrams
|
||||
mutKey = key
|
||||
unigrams.sort {
|
||||
$0.score > $1.score
|
||||
}
|
||||
|
||||
if !mutUnigrams.isEmpty {
|
||||
mutScore = mutUnigrams[0].score
|
||||
}
|
||||
|
||||
for (i, theGram) in unigrams.enumerated() {
|
||||
mutValueUnigramIndexMap[theGram.keyValue.value] = i
|
||||
mutCandidates.append(theGram.keyValue)
|
||||
}
|
||||
|
||||
for gram in bigrams {
|
||||
mutPrecedingBigramMap[gram.precedingKeyValue]?.append(gram)
|
||||
}
|
||||
}
|
||||
|
||||
public func primeNodeWith(precedingKeyValues: [KeyValuePair]) {
|
||||
var newIndex = mutSelectedUnigramIndex
|
||||
var max = mutScore
|
||||
|
||||
if !isCandidateFixed() {
|
||||
for neta in precedingKeyValues {
|
||||
let bigrams = mutPrecedingBigramMap[neta] ?? []
|
||||
for bigram in bigrams {
|
||||
if bigram.score > max {
|
||||
if let valRetrieved = mutValueUnigramIndexMap[bigram.keyValue.value] {
|
||||
newIndex = valRetrieved as Int
|
||||
max = bigram.score
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if mutScore != max {
|
||||
mutScore = max
|
||||
}
|
||||
|
||||
if mutSelectedUnigramIndex != newIndex {
|
||||
mutSelectedUnigramIndex = newIndex
|
||||
}
|
||||
}
|
||||
|
||||
public func isCandidateFixed() -> Bool { mutCandidateFixed }
|
||||
|
||||
public func candidates() -> [KeyValuePair] { mutCandidates }
|
||||
|
||||
public func selectCandidateAt(index: Int = 0, fix: Bool = false) {
|
||||
mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index
|
||||
mutCandidateFixed = fix
|
||||
mutScore = 99
|
||||
}
|
||||
|
||||
public func resetCandidate() {
|
||||
mutSelectedUnigramIndex = 0
|
||||
mutCandidateFixed = false
|
||||
if !mutUnigrams.isEmpty {
|
||||
mutScore = mutUnigrams[0].score
|
||||
}
|
||||
}
|
||||
|
||||
public func selectFloatingCandidateAt(index: Int, score: Double) {
|
||||
mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index
|
||||
mutCandidateFixed = false
|
||||
mutScore = score
|
||||
}
|
||||
|
||||
public func key() -> String { mutKey }
|
||||
|
||||
public func score() -> Double { mutScore }
|
||||
|
||||
public func scoreFor(candidate: String) -> Double {
|
||||
for unigram in mutUnigrams {
|
||||
if unigram.keyValue.value == candidate {
|
||||
return unigram.score
|
||||
}
|
||||
}
|
||||
return 0.0
|
||||
}
|
||||
|
||||
public func currentKeyValue() -> KeyValuePair {
|
||||
mutSelectedUnigramIndex >= mutUnigrams.count ? KeyValuePair() : mutCandidates[mutSelectedUnigramIndex]
|
||||
}
|
||||
|
||||
public func highestUnigramScore() -> Double {
|
||||
mutUnigrams.isEmpty ? 0.0 : mutUnigrams[0].score
|
||||
}
|
||||
|
||||
public static func == (lhs: Node, rhs: Node) -> Bool {
|
||||
lhs.mutUnigrams == rhs.mutUnigrams && lhs.mutCandidates == rhs.mutCandidates
|
||||
&& lhs.mutValueUnigramIndexMap == rhs.mutValueUnigramIndexMap
|
||||
&& lhs.mutPrecedingBigramMap == rhs.mutPrecedingBigramMap
|
||||
&& lhs.mutCandidateFixed == rhs.mutCandidateFixed
|
||||
&& lhs.mutSelectedUnigramIndex == rhs.mutSelectedUnigramIndex
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,6 +1,5 @@
|
|||
// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License).
|
||||
// All possible vChewing-specific modifications are of:
|
||||
// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
|
@ -24,29 +23,22 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef LANGUAGEMODEL_H_
|
||||
#define LANGUAGEMODEL_H_
|
||||
extension Megrez {
|
||||
// 這裡充其量只是框架,回頭實際使用時需要派生一個型別、且重寫相關函數。
|
||||
// 這裡寫了一點假內容,不然有些 Swift 格式化工具會破壞掉函數的參數設計。
|
||||
open class LanguageModel {
|
||||
public init() {}
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "Bigram.h"
|
||||
#include "Unigram.h"
|
||||
|
||||
namespace Gramambular
|
||||
{
|
||||
|
||||
class LanguageModel
|
||||
{
|
||||
public:
|
||||
virtual ~LanguageModel()
|
||||
{
|
||||
open func unigramsFor(key: String) -> [Megrez.Unigram] {
|
||||
key.isEmpty ? [Megrez.Unigram]() : [Megrez.Unigram]()
|
||||
}
|
||||
|
||||
virtual const std::vector<Bigram> bigramsForKeys(const std::string &preceedingKey, const std::string &key) = 0;
|
||||
virtual const std::vector<Unigram> unigramsForKey(const std::string &key) = 0;
|
||||
virtual bool hasUnigramsForKey(const std::string &key) = 0;
|
||||
};
|
||||
} // namespace Gramambular
|
||||
open func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] {
|
||||
precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]()
|
||||
}
|
||||
|
||||
#endif
|
||||
open func hasUnigramsFor(key: String) -> Bool {
|
||||
key.count != 0
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
extension Megrez {
|
||||
@frozen public struct Bigram: Equatable {
|
||||
public var keyValue: KeyValuePair
|
||||
public var precedingKeyValue: KeyValuePair
|
||||
public var score: Double
|
||||
// var paired: String
|
||||
|
||||
public init(precedingKeyValue: KeyValuePair, keyValue: KeyValuePair, score: Double) {
|
||||
self.keyValue = keyValue
|
||||
self.precedingKeyValue = precedingKeyValue
|
||||
self.score = score
|
||||
// paired = "(" + keyValue.paired + "|" + precedingKeyValue.paired + "," + String(score) + ")"
|
||||
}
|
||||
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
hasher.combine(keyValue)
|
||||
hasher.combine(precedingKeyValue)
|
||||
hasher.combine(score)
|
||||
// hasher.combine(paired)
|
||||
}
|
||||
|
||||
// static func getPairedBigrams(grams: [Bigram]) -> String {
|
||||
// var arrOutputContent = [""]
|
||||
// var index = 0
|
||||
// for gram in grams {
|
||||
// arrOutputContent.append(contentsOf: [String(index) + "=>" + gram.paired])
|
||||
// index += 1
|
||||
// }
|
||||
// return "[" + String(grams.count) + "]=>{" + arrOutputContent.joined(separator: ",") + "}"
|
||||
// }
|
||||
|
||||
public static func == (lhs: Bigram, rhs: Bigram) -> Bool {
|
||||
lhs.precedingKeyValue == rhs.precedingKeyValue && lhs.keyValue == rhs.keyValue && lhs.score == rhs.score
|
||||
}
|
||||
|
||||
public static func < (lhs: Bigram, rhs: Bigram) -> Bool {
|
||||
lhs.precedingKeyValue < rhs.precedingKeyValue
|
||||
|| (lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.keyValue < rhs.keyValue))
|
||||
}
|
||||
|
||||
var description: String {
|
||||
"\(keyValue):\(score)"
|
||||
}
|
||||
|
||||
var debugDescription: String {
|
||||
"Bigram(keyValue: \(keyValue), score: \(score))"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
extension Megrez {
|
||||
@frozen public struct Unigram: Equatable {
|
||||
public var keyValue: KeyValuePair
|
||||
public var score: Double
|
||||
// var paired: String
|
||||
|
||||
public init(keyValue: KeyValuePair, score: Double) {
|
||||
self.keyValue = keyValue
|
||||
self.score = score
|
||||
// paired = "(" + keyValue.paired + "," + String(score) + ")"
|
||||
}
|
||||
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
hasher.combine(keyValue)
|
||||
hasher.combine(score)
|
||||
// hasher.combine(paired)
|
||||
}
|
||||
|
||||
// 這個函數不再需要了。
|
||||
public static func compareScore(a: Unigram, b: Unigram) -> Bool {
|
||||
a.score > b.score
|
||||
}
|
||||
|
||||
// static func getPairedUnigrams(grams: [Unigram]) -> String {
|
||||
// var arrOutputContent = [""]
|
||||
// var index = 0
|
||||
// for gram in grams {
|
||||
// arrOutputContent.append(contentsOf: [String(index) + "=>" + gram.paired])
|
||||
// index += 1
|
||||
// }
|
||||
// return "[" + String(grams.count) + "]=>{" + arrOutputContent.joined(separator: ",") + "}"
|
||||
// }
|
||||
|
||||
public static func == (lhs: Unigram, rhs: Unigram) -> Bool {
|
||||
lhs.keyValue == rhs.keyValue && lhs.score == rhs.score
|
||||
}
|
||||
|
||||
public static func < (lhs: Unigram, rhs: Unigram) -> Bool {
|
||||
lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.keyValue < rhs.keyValue)
|
||||
}
|
||||
|
||||
var description: String {
|
||||
"\(keyValue):\(score)"
|
||||
}
|
||||
|
||||
var debugDescription: String {
|
||||
"Unigram(keyValue: \(keyValue), score: \(score))"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License).
|
||||
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License).
|
||||
/*
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
1. The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
2. No trademark license is granted to use the trade names, trademarks, service
|
||||
marks, or product names of Contributor, except as required to fulfill notice
|
||||
requirements above.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
extension Megrez {
|
||||
@frozen public struct KeyValuePair: Equatable, Hashable, Comparable {
|
||||
public var key: String
|
||||
public var value: String
|
||||
// public var paired: String
|
||||
|
||||
public init(key: String = "", value: String = "") {
|
||||
self.key = key
|
||||
self.value = value
|
||||
// paired = "(" + key + "," + value + ")"
|
||||
}
|
||||
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
hasher.combine(key)
|
||||
hasher.combine(value)
|
||||
// hasher.combine(paired)
|
||||
}
|
||||
|
||||
public static func == (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool {
|
||||
lhs.key.count == rhs.key.count && lhs.value == rhs.value
|
||||
}
|
||||
|
||||
public static func < (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool {
|
||||
(lhs.key.count < rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value < rhs.value)
|
||||
}
|
||||
|
||||
public static func > (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool {
|
||||
(lhs.key.count > rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value > rhs.value)
|
||||
}
|
||||
|
||||
public static func <= (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool {
|
||||
(lhs.key.count <= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value <= rhs.value)
|
||||
}
|
||||
|
||||
public static func >= (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool {
|
||||
(lhs.key.count >= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value >= rhs.value)
|
||||
}
|
||||
|
||||
public var description: String {
|
||||
"(\(key), \(value))"
|
||||
}
|
||||
|
||||
public var debugDescription: String {
|
||||
"KeyValuePair(key: \(key), value: \(value))"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -59,11 +59,11 @@ public class clsSFX: NSObject, NSSoundDelegate {
|
|||
currentBeep = beep
|
||||
}
|
||||
|
||||
@objc public func sound(_: NSSound, didFinishPlaying _: Bool) {
|
||||
public func sound(_: NSSound, didFinishPlaying _: Bool) {
|
||||
currentBeep = nil
|
||||
}
|
||||
|
||||
@objc static func beep() {
|
||||
static func beep() {
|
||||
shared.beep()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -57,6 +57,9 @@
|
|||
"Edit User Symbol & Emoji Data…" = "Edit User Symbol & Emoji Data…";
|
||||
"Choose your desired user data folder." = "Choose your desired user data folder.";
|
||||
"Cursor is between \"%@\" and \"%@\"." = "Cursor is between \"%@\" and \"%@\".";
|
||||
"Loading CHS Core Dict..." = "Loading CHS Core Dict...";
|
||||
"Loading CHT Core Dict..." = "Loading CHT Core Dict...";
|
||||
"Core Dict loading complete." = "Core Dict loading complete.";
|
||||
|
||||
// The followings are the category names used in the Symbol menu.
|
||||
"catCommonSymbols" = "CommonSymbols";
|
||||
|
|
|
@ -57,6 +57,9 @@
|
|||
"Edit User Symbol & Emoji Data…" = "Edit User Symbol & Emoji Data…";
|
||||
"Choose your desired user data folder." = "Choose your desired user data folder.";
|
||||
"Cursor is between \"%@\" and \"%@\"." = "Cursor is between \"%@\" and \"%@\".";
|
||||
"Loading CHS Core Dict..." = "Loading CHS Core Dict...";
|
||||
"Loading CHT Core Dict..." = "Loading CHT Core Dict...";
|
||||
"Core Dict loading complete." = "Core Dict loading complete.";
|
||||
|
||||
// The followings are the category names used in the Symbol menu.
|
||||
"catCommonSymbols" = "CommonSymbols";
|
||||
|
|
|
@ -57,6 +57,9 @@
|
|||
"Edit User Symbol & Emoji Data…" = "ユーザー符号&絵文字辞書を編集…";
|
||||
"Choose your desired user data folder." = "欲しがるユーザー辞書フォルダをお選びください。";
|
||||
"Cursor is between \"%@\" and \"%@\"." = "カーソルは「%@」と「%@」に間れ。";
|
||||
"Loading CHS Core Dict..." = "簡体中国語核心辞書読込中…";
|
||||
"Loading CHT Core Dict..." = "繁体中国語核心辞書読込中…";
|
||||
"Core Dict loading complete." = "核心辞書読込完了";
|
||||
|
||||
// The followings are the category names used in the Symbol menu.
|
||||
"catCommonSymbols" = "常用";
|
||||
|
|
|
@ -57,6 +57,9 @@
|
|||
"Edit User Symbol & Emoji Data…" = "编辑自订符号&绘文字资料…";
|
||||
"Choose your desired user data folder." = "请选择您想指定的使用者语汇档案目录。";
|
||||
"Cursor is between \"%@\" and \"%@\"." = "游标介于「%@」与「%@」之间。";
|
||||
"Loading CHS Core Dict..." = "载入简体中文核心辞典…";
|
||||
"Loading CHT Core Dict..." = "载入繁体中文核心辞典…";
|
||||
"Core Dict loading complete." = "核心辞典载入完毕";
|
||||
|
||||
// The followings are the category names used in the Symbol menu.
|
||||
"catCommonSymbols" = "常用";
|
||||
|
|
|
@ -57,6 +57,9 @@
|
|||
"Edit User Symbol & Emoji Data…" = "編輯自訂符號&繪文字資料…";
|
||||
"Choose your desired user data folder." = "請選擇您想指定的使用者語彙檔案目錄。";
|
||||
"Cursor is between \"%@\" and \"%@\"." = "游標介於「%@」與「%@」之間。";
|
||||
"Loading CHS Core Dict..." = "載入簡體中文核心辭典…";
|
||||
"Loading CHT Core Dict..." = "載入繁體中文核心辭典…";
|
||||
"Core Dict loading complete." = "核心辭典載入完畢";
|
||||
|
||||
// The followings are the category names used in the Symbol menu.
|
||||
"catCommonSymbols" = "常用";
|
||||
|
|
|
@ -26,10 +26,9 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
|
||||
import Cocoa
|
||||
|
||||
@objc(VTCandidateKeyLabel)
|
||||
public class CandidateKeyLabel: NSObject {
|
||||
@objc public private(set) var key: String
|
||||
@objc public private(set) var displayedText: String
|
||||
public private(set) var key: String
|
||||
public private(set) var displayedText: String
|
||||
|
||||
public init(key: String, displayedText: String) {
|
||||
self.key = key
|
||||
|
@ -38,7 +37,6 @@ public class CandidateKeyLabel: NSObject {
|
|||
}
|
||||
}
|
||||
|
||||
@objc(ctlCandidateDelegate)
|
||||
public protocol ctlCandidateDelegate: AnyObject {
|
||||
func candidateCountForController(_ controller: ctlCandidate) -> UInt
|
||||
func ctlCandidate(_ controller: ctlCandidate, candidateAtIndex index: UInt)
|
||||
|
@ -48,16 +46,15 @@ public protocol ctlCandidateDelegate: AnyObject {
|
|||
)
|
||||
}
|
||||
|
||||
@objc(ctlCandidate)
|
||||
public class ctlCandidate: NSWindowController {
|
||||
@objc public weak var delegate: ctlCandidateDelegate? {
|
||||
public weak var delegate: ctlCandidateDelegate? {
|
||||
didSet {
|
||||
reloadData()
|
||||
}
|
||||
}
|
||||
|
||||
@objc public var selectedCandidateIndex: UInt = .max
|
||||
@objc public var visible: Bool = false {
|
||||
public var selectedCandidateIndex: UInt = .max
|
||||
public var visible: Bool = false {
|
||||
didSet {
|
||||
NSObject.cancelPreviousPerformRequests(withTarget: self)
|
||||
if visible {
|
||||
|
@ -68,7 +65,7 @@ public class ctlCandidate: NSWindowController {
|
|||
}
|
||||
}
|
||||
|
||||
@objc public var windowTopLeftPoint: NSPoint {
|
||||
public var windowTopLeftPoint: NSPoint {
|
||||
get {
|
||||
guard let frameRect = window?.frame else {
|
||||
return NSPoint.zero
|
||||
|
@ -82,36 +79,36 @@ public class ctlCandidate: NSWindowController {
|
|||
}
|
||||
}
|
||||
|
||||
@objc public var keyLabels: [CandidateKeyLabel] = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
|
||||
public var keyLabels: [CandidateKeyLabel] = ["1", "2", "3", "4", "5", "6", "7", "8", "9"]
|
||||
.map {
|
||||
CandidateKeyLabel(key: $0, displayedText: $0)
|
||||
}
|
||||
|
||||
@objc public var keyLabelFont: NSFont = NSFont.monospacedDigitSystemFont(
|
||||
public var keyLabelFont: NSFont = NSFont.monospacedDigitSystemFont(
|
||||
ofSize: 14, weight: .medium
|
||||
)
|
||||
@objc public var candidateFont: NSFont = NSFont.systemFont(ofSize: 18)
|
||||
@objc public var tooltip: String = ""
|
||||
public var candidateFont: NSFont = NSFont.systemFont(ofSize: 18)
|
||||
public var tooltip: String = ""
|
||||
|
||||
@objc public func reloadData() {}
|
||||
public func reloadData() {}
|
||||
|
||||
@objc public func showNextPage() -> Bool {
|
||||
public func showNextPage() -> Bool {
|
||||
false
|
||||
}
|
||||
|
||||
@objc public func showPreviousPage() -> Bool {
|
||||
public func showPreviousPage() -> Bool {
|
||||
false
|
||||
}
|
||||
|
||||
@objc public func highlightNextCandidate() -> Bool {
|
||||
public func highlightNextCandidate() -> Bool {
|
||||
false
|
||||
}
|
||||
|
||||
@objc public func highlightPreviousCandidate() -> Bool {
|
||||
public func highlightPreviousCandidate() -> Bool {
|
||||
false
|
||||
}
|
||||
|
||||
@objc public func candidateIndexAtKeyLabelIndex(_: UInt) -> UInt {
|
||||
public func candidateIndexAtKeyLabelIndex(_: UInt) -> UInt {
|
||||
UInt.max
|
||||
}
|
||||
|
||||
|
@ -125,7 +122,6 @@ public class ctlCandidate: NSWindowController {
|
|||
/// - windowTopLeftPoint: The given location.
|
||||
/// - height: The height that helps the window not to be out of the bottom
|
||||
/// of a screen.
|
||||
@objc(setWindowTopLeftPoint:bottomOutOfScreenAdjustmentHeight:)
|
||||
public func set(windowTopLeftPoint: NSPoint, bottomOutOfScreenAdjustmentHeight height: CGFloat) {
|
||||
DispatchQueue.main.asyncAfter(deadline: DispatchTime.now()) {
|
||||
self.doSet(
|
||||
|
|
|
@ -87,7 +87,7 @@ public class NotifierController: NSWindowController, NotifierWindowDelegate {
|
|||
private static var instanceCount = 0
|
||||
private static var lastLocation = NSPoint.zero
|
||||
|
||||
@objc public static func notify(message: String, stay: Bool = false) {
|
||||
public static func notify(message: String, stay: Bool = false) {
|
||||
let controller = NotifierController()
|
||||
controller.message = message
|
||||
controller.shouldStay = stay
|
||||
|
|
|
@ -79,6 +79,7 @@ struct suiPrefPaneDictionary: View {
|
|||
mgrPrefs.userDataFolderSpecified = newPath
|
||||
tbxUserDataPathSpecified = mgrPrefs.userDataFolderSpecified
|
||||
IME.initLangModels(userOnly: true)
|
||||
(NSApplication.shared.delegate as! AppDelegate).updateStreamHelperPath()
|
||||
} else {
|
||||
clsSFX.beep()
|
||||
if !bolPreviousFolderValidity {
|
||||
|
|
|
@ -64,7 +64,6 @@ public class TooltipController: NSWindowController {
|
|||
fatalError("init(coder:) has not been implemented")
|
||||
}
|
||||
|
||||
@objc(showTooltip:atPoint:)
|
||||
public func show(tooltip: String, at point: NSPoint) {
|
||||
messageTextField.textColor = TooltipController.textColor
|
||||
messageTextField.backgroundColor = TooltipController.backgroundColor
|
||||
|
|
|
@ -26,7 +26,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
|
||||
import Cocoa
|
||||
|
||||
@objc(AboutWindow) class ctlAboutWindow: NSWindowController {
|
||||
class ctlAboutWindow: NSWindowController {
|
||||
@IBOutlet var appVersionLabel: NSTextField!
|
||||
@IBOutlet var appCopyrightLabel: NSTextField!
|
||||
@IBOutlet var appEULAContent: NSTextView!
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue