diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3c39d821..e8606dcc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,14 +1,14 @@ # 威注音輸入法研發參與相關說明 -威注音輸入法歡迎有人參與。但為了不讓參與者們浪費各自的熱情,特設此文以說明該專案目前最需要協助的地方。 +威注音輸入法歡迎有熱心的志願者們參與。 -1. 有人能用 Swift 將該專案內這兩個源自 LibFormosa 的組件套件重寫: +威注音目前的 codebase 更能代表一個先進的 macOS 輸入法雛形專案的形態。目前的 dev 分支除了 Mandarin 模組(以及其與 KeyHandler 的對接的部分)以外被威注音使用的部分全都是清一色的 Swift codebase,一目了然,方便他人參與,比某些其它開源品牌旗下的專案更具程式方面的生命力。為什麼這樣講呢?那些傳統開源品牌的專案主要使用 C++ 這門不太友好的語言(Mandarin 模組現在對我而言仍舊是天書,一大堆針對記憶體指針的操作完全看不懂。搞不清楚在這一層之上的功能邏輯的話,就無法制定 Swift 版的 coding 策略),這也是我這次用 Swift 重寫了語言模型引擎的原因(也是為後來者行方便)。 + +為了不讓參與者們浪費各自的熱情,特設此文以說明該專案目前最需要協助的地方。 + +1. 有人能用 Swift 將該專案內的這個源自 LibFormosa 的組件套件重寫: - Mandarin 組件,用以分析普通話音韻數據、創建且控制 Syllable Composer 注音拼識組件。 - - Gramambular 套裝,這包括了 Source 資料夾下的其餘全部的 (Obj)C(++) 檔案(LMConsolidator 除外)。 - - LMConsolidator 有 Swift 版本,已經用於威注音語彙編輯器內。給主程式用 C++ 版本僅為了與 Gramambular 協作方便。 - - 這也包括了所有與 Language Model 有關的實現,因為都是 Gramambular 內的某個語言模組 Protocol 衍生出來的東西。 - - LMInstantiator 是用來將語言模組副本化的組件,原本不屬於 Gramambular,但與其衍生的各類語言模組高度耦合。 - - KeyValueBlobReader 不屬於 Gramambular,但與其衍生的各類語言模組高度耦合、也與 KeyHandler 高度耦合。 + - 一堆記憶體指針操作,實在看不懂這個組件的處理邏輯是什麼,無能為力。 2. 讓 Alt+波浪鍵選單能夠在諸如 MS Word 以及終端機內正常工作(可以用方向鍵控制高亮候選內容,等)。 - 原理上而言恐怕得欺騙當前正在接受輸入的應用、使其誤以為當前有組字區。這只是推測。 3. SQLite 實現。 @@ -39,4 +39,4 @@ 至於對 Swift 檔案改採 1-Tab 縮進,則是為了在尊重所有用戶的需求的同時、最大程度上節約檔案體積。使用者可自行修改 Xcode 的預設 Tab 縮進尺寸。 -$ EOF. \ No newline at end of file +$ EOF. diff --git a/Makefile b/Makefile index 92a7d187..5a9608c9 100644 --- a/Makefile +++ b/Makefile @@ -28,6 +28,7 @@ clang-format: clang-format-swift clang-format-cpp clang-format-swift: @git ls-files --exclude-standard | grep -E '\.swift$$' | xargs swift-format format --in-place --configuration ./.clang-format-swift.json --parallel + @git ls-files --exclude-standard | grep -E '\.swift$$' | xargs swift-format lint --configuration ./.clang-format-swift.json --parallel clang-format-cpp: @git ls-files --exclude-standard | grep -E '\.(cpp|hpp|c|cc|cxx|hxx|ixx|h|m|mm|hh)$$' | xargs clang-format -i diff --git a/Packages/SwiftyOpenCC/Package.swift b/Packages/SwiftyOpenCC/Package.swift index 470d63c1..22020d49 100644 --- a/Packages/SwiftyOpenCC/Package.swift +++ b/Packages/SwiftyOpenCC/Package.swift @@ -48,10 +48,6 @@ let package = Package( "src/UTF8StringSliceTest.cpp", "src/UTF8UtilTest.cpp", "deps/google-benchmark", - "deps/gtest-1.11.0", - "deps/pybind11-2.5.0", - "deps/rapidjson-1.1.0", - "deps/tclap-1.2.2", "src/CmdLineOutput.hpp", "src/Config.hpp", diff --git a/Source/3rdParty/LineReader/LineReader.swift b/Source/3rdParty/LineReader/LineReader.swift new file mode 100644 index 00000000..a5cc4b83 --- /dev/null +++ b/Source/3rdParty/LineReader/LineReader.swift @@ -0,0 +1,69 @@ +// Copyright (c) 2019 and onwards Robert Muckle-Jones (Apache 2.0 License). + +import Foundation + +public class LineReader { + let encoding: String.Encoding + let chunkSize: Int + var fileHandle: FileHandle + let delimData: Data + var buffer: Data + var atEof: Bool + + public init( + file: FileHandle, encoding: String.Encoding = .utf8, + chunkSize: Int = 4096 + ) throws { + let fileHandle = file + self.encoding = encoding + self.chunkSize = chunkSize + self.fileHandle = fileHandle + delimData = "\n".data(using: encoding)! + buffer = Data(capacity: chunkSize) + atEof = false + } + + /// Return next line, or nil on EOF. + public func nextLine() -> String? { + // Read data chunks from file until a line delimiter is found: + while !atEof { + // get a data from the buffer up to the next delimiter + if let range = buffer.range(of: delimData) { + // convert data to a string + let line = String(data: buffer.subdata(in: 0.. AnyIterator { + AnyIterator { + self.nextLine() + } + } +} diff --git a/Source/Headers/vChewing-Bridging-Header.h b/Source/Headers/vChewing-Bridging-Header.h index 3f79ac94..fd510fc9 100644 --- a/Source/Headers/vChewing-Bridging-Header.h +++ b/Source/Headers/vChewing-Bridging-Header.h @@ -31,4 +31,3 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @import Foundation; #import "KeyHandler.h" -#import "mgrLangModel.h" diff --git a/Source/Modules/ControllerModules/KeyHandler.h b/Source/Modules/ControllerModules/KeyHandler.h index 020bad09..9ab4eb47 100644 --- a/Source/Modules/ControllerModules/KeyHandler.h +++ b/Source/Modules/ControllerModules/KeyHandler.h @@ -28,22 +28,10 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @class InputHandler; @class InputState; +@class KeyHandlerSputnik; NS_ASSUME_NONNULL_BEGIN -typedef NSString *const InputMode NS_TYPED_ENUM; -extern InputMode imeModeCHT; -extern InputMode imeModeCHS; -extern InputMode imeModeNULL; - -struct BufferStatePackage -{ - NSString *composedText; - NSInteger cursorIndex; - NSString *resultOfRear; - NSString *resultOfFront; -}; - @class KeyHandler; @protocol KeyHandlerDelegate @@ -54,48 +42,20 @@ struct BufferStatePackage @interface KeyHandler : NSObject -- (BOOL)isBuilderEmpty; - -- (void)fixNodeWithValue:(NSString *)value NS_SWIFT_NAME(fixNode(value:)); -- (void)clear; - -@property(strong, nonatomic) InputMode inputMode; @property(weak, nonatomic) id delegate; // The following items need to be exposed to Swift: -- (void)_walk; -- (NSString *)_popOverflowComposingTextAndWalk; -- (NSArray *)_currentReadings; - (BOOL)checkWhetherToneMarkerConfirmsPhoneticReadingBuffer; - (BOOL)chkKeyValidity:(UniChar)value; -- (BOOL)ifLangModelHasUnigramsForKey:(NSString *)reading; - (BOOL)isPhoneticReadingBufferEmpty; - (BOOL)isPrintable:(UniChar)charCode; -- (NSArray *)buildAssociatePhraseArrayWithKey:(NSString *)key; -- (NSArray *)getCandidatesArray; -- (NSInteger)getKeyLengthAtIndexZero; -- (NSInteger)getBuilderCursorIndex; -- (NSInteger)getBuilderLength; -- (NSInteger)getPackagedCursorIndex; -- (NSString *)getComposedText; - (NSString *)getCompositionFromPhoneticReadingBuffer; -- (NSString *)getStrLocationResult:(BOOL)isFront NS_SWIFT_NAME(getStrLocationResult(isFront:)); - (NSString *)getSyllableCompositionFromPhoneticReadingBuffer; - (void)clearPhoneticReadingBuffer; - (void)combinePhoneticReadingBufferKey:(UniChar)charCode; -- (void)createNewBuilder; -- (void)dealWithOverrideModelSuggestions; -- (void)deleteBuilderReadingAfterCursor; -- (void)deleteBuilderReadingInFrontOfCursor; - (void)doBackSpaceToPhoneticReadingBuffer; - (void)ensurePhoneticParser; -- (void)insertReadingToBuilderAtCursor:(NSString *)reading; -- (void)packageBufferStateMaterials; -- (void)removeBuilderAndReset:(BOOL)shouldReset; -- (void)setBuilderCursorIndex:(NSInteger)value; -- (void)setInputModesToLM:(BOOL)isCHS; -- (void)syncBaseLMPrefs; @end diff --git a/Source/Modules/ControllerModules/KeyHandler.mm b/Source/Modules/ControllerModules/KeyHandler.mm index 256d4168..c50efb22 100644 --- a/Source/Modules/ControllerModules/KeyHandler.mm +++ b/Source/Modules/ControllerModules/KeyHandler.mm @@ -25,134 +25,23 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #import "KeyHandler.h" -#import "Gramambular.h" -#import "LMInstantiator.h" #import "Mandarin.h" -#import "UserOverrideModel.h" -#import "mgrLangModel_Privates.h" #import "vChewing-Swift.h" #import -InputMode imeModeCHS = ctlInputMethod.kIMEModeCHS; -InputMode imeModeCHT = ctlInputMethod.kIMEModeCHT; -InputMode imeModeNULL = ctlInputMethod.kIMEModeNULL; - -typedef vChewing::LMInstantiator BaseLM; -typedef vChewing::UserOverrideModel UserOverrideLM; -typedef Gramambular::BlockReadingBuilder BlockBuilder; typedef Mandarin::BopomofoReadingBuffer PhoneticBuffer; -static const double kEpsilon = 0.000001; - -NSString *packagedComposedText; -NSInteger packagedCursorIndex; -NSString *packagedResultOfRear; -NSString *packagedResultOfFront; - -// NON-SWIFTIFIABLE -static double FindHighestScore(const std::vector &nodes, double epsilon) -{ - double highestScore = 0.0; - for (auto ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) - { - double score = ni->node->highestUnigramScore(); - if (score > highestScore) - highestScore = score; - } - return highestScore + epsilon; -} - -// NON-SWIFTIFIABLE -class NodeAnchorDescendingSorter -{ - public: - bool operator()(const Gramambular::NodeAnchor &a, const Gramambular::NodeAnchor &b) const - { - return a.node->key().length() > b.node->key().length(); - } -}; - -// if DEBUG is defined, a DOT file (GraphViz format) will be written to the -// specified path every time the grid is walked -#if DEBUG -static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; -#endif - // NON-SWIFTIFIABLE @implementation KeyHandler { // the reading buffer that takes user input PhoneticBuffer *_bpmfReadingBuffer; - - // language model - BaseLM *_languageModel; - - // user override model - UserOverrideLM *_userOverrideModel; - - // the grid (lattice) builder for the unigrams (and bigrams) - BlockBuilder *_builder; - - // latest walked path (trellis) using the Viterbi algorithm - std::vector _walkedNodes; - - NSString *_inputMode; } @synthesize delegate = _delegate; -// NON-SWIFTIFIABLE DUE TO VARIABLE AVAILABLE ACCESSIBILITY RANGE. -// VARIABLE: "_inputMode" -- (NSString *)inputMode -{ - return _inputMode; -} - -// NON-SWIFTIFIABLE -- (BOOL)isBuilderEmpty -{ - return (_builder->grid().width() == 0); -} - -// NON-SWIFTIFIABLE DUE TO VARIABLE AVAILABLE ACCESSIBILITY RANGE. -// VARIABLE: "_inputMode" -- (void)setInputMode:(NSString *)value -{ - // 下面這句的「isKindOfClass」是做類型檢查, - // 為了應對出現輸入法 plist 被改壞掉這樣的極端情況。 - BOOL isCHS = [value isKindOfClass:[NSString class]] && [value isEqual:imeModeCHS]; - - // 緊接著將新的簡繁輸入模式提報給 ctlInputMethod: - ctlInputMethod.currentInputMode = isCHS ? imeModeCHS : imeModeCHT; - mgrPrefs.mostRecentInputMode = ctlInputMethod.currentInputMode; - - // 拿當前的 _inputMode 與 ctlInputMethod 的提報結果對比,不同的話則套用新設定: - if (![_inputMode isEqualToString:ctlInputMethod.currentInputMode]) - { - // Reinitiate language models if necessary - [self setInputModesToLM:isCHS]; - - // Synchronize the sub-languageModel state settings to the new LM. - [self syncBaseLMPrefs]; - - [self removeBuilderAndReset:YES]; - - if (![self isPhoneticReadingBufferEmpty]) - [self clearPhoneticReadingBuffer]; - } - _inputMode = ctlInputMethod.currentInputMode; -} - -// NON-SWIFTIFIABLE: Required by an ObjC(pp)-based class. -- (void)dealloc -{ // clean up everything - if (_bpmfReadingBuffer) - delete _bpmfReadingBuffer; - if (_builder) - [self removeBuilderAndReset:NO]; -} - -// NON-SWIFTIFIABLE: Not placeable in swift extensions. +// Not migrable as long as there's still ObjC++ components needed. +// Will deprecate this once Mandarin gets Swiftified. - (instancetype)init { self = [super init]; @@ -164,262 +53,14 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return self; } -// NON-SWIFTIFIABLE -- (void)fixNodeWithValue:(NSString *)value -{ - NSInteger cursorIndex = [self getActualCandidateCursorIndex]; - std::string stringValue(value.UTF8String); - Gramambular::NodeAnchor selectedNode = _builder->grid().fixNodeSelectedCandidate(cursorIndex, stringValue); - if (!mgrPrefs.useSCPCTypingMode) - { // 不要針對逐字選字模式啟用臨時半衰記憶模型。 - // If the length of the readings and the characters do not match, - // it often means it is a special symbol and it should not be stored - // in the user override model. - BOOL addToOverrideModel = YES; - if (selectedNode.spanningLength != [value count]) - addToOverrideModel = NO; - - if (addToOverrideModel) - { - double score = selectedNode.node->scoreForCandidate(stringValue); - if (score <= -12) // 威注音的 SymbolLM 的 Score 是 -12。 - addToOverrideModel = NO; - } - if (addToOverrideModel) - _userOverrideModel->observe(_walkedNodes, cursorIndex, stringValue, [[NSDate date] timeIntervalSince1970]); - } - [self _walk]; - - if (mgrPrefs.moveCursorAfterSelectingCandidate) - { - size_t nextPosition = 0; - for (auto node : _walkedNodes) - { - if (nextPosition >= cursorIndex) - break; - nextPosition += node.spanningLength; - } - if (nextPosition <= [self getBuilderLength]) - [self setBuilderCursorIndex:nextPosition]; - } +// NON-SWIFTIFIABLE: Mandarin +- (void)dealloc +{ // clean up everything + if (_bpmfReadingBuffer) + delete _bpmfReadingBuffer; } -// NON-SWIFTIFIABLE -- (void)clear -{ - [self clearPhoneticReadingBuffer]; - _builder->clear(); - _walkedNodes.clear(); -} - -#pragma mark - States Building - -// NON-SWIFTIFIABLE -- (void)packageBufferStateMaterials -{ - // We gather the data through this function, package it, - // and sent it to our Swift extension to build the InputState.Inputting there. - // Otherwise, ObjC++ always bugs for "expecting a type". - - // "updating the composing buffer" means to request the client to "refresh" the text input buffer - // with our "composing text" - NSMutableString *composingBuffer = [[NSMutableString alloc] init]; - NSInteger composedStringCursorIndex = 0; - - // we must do some Unicode codepoint counting to find the actual cursor location for the client - // i.e. we need to take UTF-16 into consideration, for which a surrogate pair takes 2 UniChars - // locations - - size_t readingCursorIndex = 0; - size_t builderCursorIndex = [self getBuilderCursorIndex]; - - NSString *resultOfRear = @""; - NSString *resultOfFront = @""; - - for (std::vector::iterator wi = _walkedNodes.begin(), we = _walkedNodes.end(); wi != we; - ++wi) - { - if ((*wi).node) - { - std::string nodeStr = (*wi).node->currentKeyValue().value; - NSString *valueString = [NSString stringWithUTF8String:nodeStr.c_str()]; - [composingBuffer appendString:valueString]; - - NSArray *splited = [valueString split]; - NSInteger codepointCount = splited.count; - - // this re-aligns the cursor index in the composed string - // (the actual cursor on the screen) with the builder's logical - // cursor (reading) cursor; each built node has a "spanning length" - // (e.g. two reading blocks has a spanning length of 2), and we - // accumulate those lengths to calculate the displayed cursor - // index - size_t spanningLength = (*wi).spanningLength; - if (readingCursorIndex + spanningLength <= builderCursorIndex) - { - composedStringCursorIndex += [valueString length]; - readingCursorIndex += spanningLength; - } - else - { - if (codepointCount == spanningLength) - { - for (size_t i = 0; i < codepointCount && readingCursorIndex < builderCursorIndex; i++) - { - composedStringCursorIndex += [splited[i] length]; - readingCursorIndex++; - } - } - else - { - if (readingCursorIndex < builderCursorIndex) - { - composedStringCursorIndex += [valueString length]; - readingCursorIndex += spanningLength; - if (readingCursorIndex > builderCursorIndex) - { - readingCursorIndex = builderCursorIndex; - } - if (builderCursorIndex == 0) - { - resultOfFront = - [NSString stringWithUTF8String:_builder->readings()[builderCursorIndex].c_str()]; - } - else if (builderCursorIndex >= _builder->readings().size()) - { - resultOfRear = [NSString - stringWithUTF8String:_builder->readings()[_builder->readings().size() - 1].c_str()]; - } - else - { - resultOfFront = - [NSString stringWithUTF8String:_builder->readings()[builderCursorIndex].c_str()]; - resultOfRear = - [NSString stringWithUTF8String:_builder->readings()[builderCursorIndex - 1].c_str()]; - } - } - } - } - } - } - - // now we gather all the info, we separate the composing buffer to two parts, head and tail, - // and insert the reading text (the Mandarin syllable) in between them; - // the reading text is what the user is typing - NSString *head = [composingBuffer substringToIndex:composedStringCursorIndex]; - NSString *reading = [self getCompositionFromPhoneticReadingBuffer]; - NSString *tail = [composingBuffer substringFromIndex:composedStringCursorIndex]; - NSString *composedText = [head stringByAppendingString:[reading stringByAppendingString:tail]]; - NSInteger cursorIndex = composedStringCursorIndex + [reading length]; - - packagedComposedText = composedText; - packagedCursorIndex = cursorIndex; - packagedResultOfRear = resultOfRear; - packagedResultOfFront = resultOfFront; -} - -// NON-SWIFTIFIABLE DUE TO VARIABLE AVAILABLE ACCESSIBILITY RANGE. -- (NSString *)getStrLocationResult:(BOOL)isFront -{ - if (isFront) - return packagedResultOfFront; - else - return packagedResultOfRear; -} - -// NON-SWIFTIFIABLE DUE TO VARIABLE AVAILABLE ACCESSIBILITY RANGE. -- (NSString *)getComposedText -{ - return packagedComposedText; -} - -// NON-SWIFTIFIABLE DUE TO VARIABLE AVAILABLE ACCESSIBILITY RANGE. -- (NSInteger)getPackagedCursorIndex -{ - return packagedCursorIndex; -} - -// NON-SWIFTIFIABLE -- (void)_walk -{ - // retrieve the most likely trellis, i.e. a Maximum Likelihood Estimation - // of the best possible Mandarin characters given the input syllables, - // using the Viterbi algorithm implemented in the Gramambular library - Gramambular::Walker walker(&_builder->grid()); - - // the reverse walk traces the trellis from the end - _walkedNodes = walker.reverseWalk(_builder->grid().width()); - - // then we reverse the nodes so that we get the forward-walked nodes - reverse(_walkedNodes.begin(), _walkedNodes.end()); - - // if DEBUG is defined, a GraphViz file is written to kGraphVizOutputfile -#if DEBUG - std::string dotDump = _builder->grid().dumpDOT(); - NSString *dotStr = [NSString stringWithUTF8String:dotDump.c_str()]; - NSError *error = nil; - - BOOL __unused success = [dotStr writeToFile:kGraphVizOutputfile - atomically:YES - encoding:NSUTF8StringEncoding - error:&error]; -#endif -} - -// NON-SWIFTIFIABLE -- (NSString *)_popOverflowComposingTextAndWalk -{ - // in an ideal world, we can as well let the user type forever, - // but because the Viterbi algorithm has a complexity of O(N^2), - // the walk will become slower as the number of nodes increase, - // therefore we need to auto-commit overflown texts which usually - // lose their influence over the whole MLE anyway -- so that when - // the user type along, the already composed text in the rear side - // of the buffer will be committed (i.e. "popped out"). - - NSString *poppedText = @""; - NSInteger composingBufferSize = mgrPrefs.composingBufferSize; - - if (_builder->grid().width() > (size_t)composingBufferSize) - { - if (_walkedNodes.size() > 0) - { - Gramambular::NodeAnchor &anchor = _walkedNodes[0]; - poppedText = [NSString stringWithUTF8String:anchor.node->currentKeyValue().value.c_str()]; - _builder->removeHeadReadings(anchor.spanningLength); - } - } - - [self _walk]; - return poppedText; -} - -// NON-SWIFTIFIABLE -- (NSArray *)_currentReadings -{ - NSMutableArray *readingsArray = [[NSMutableArray alloc] init]; - std::vector v = _builder->readings(); - for (std::vector::iterator it_i = v.begin(); it_i != v.end(); ++it_i) - [readingsArray addObject:[NSString stringWithUTF8String:it_i->c_str()]]; - return readingsArray; -} - -// NON-SWIFTIFIABLE -- (NSArray *)buildAssociatePhraseArrayWithKey:(NSString *)key -{ - NSMutableArray *array = [NSMutableArray array]; - std::string cppKey = std::string(key.UTF8String); - if (_languageModel->hasAssociatedPhrasesForKey(cppKey)) - { - std::vector phrases = _languageModel->associatedPhrasesForKey(cppKey); - for (auto phrase : phrases) - { - NSString *item = [[NSString alloc] initWithUTF8String:phrase.c_str()]; - [array addObject:item]; - } - } - return array; -} +// MARK: - 目前到這裡了 #pragma mark - 必須用 ObjCpp 處理的部分: Mandarin @@ -504,129 +145,6 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; } } -#pragma mark - 必須用 ObjCpp 處理的部分: Gramambular 等 - -- (void)removeBuilderAndReset:(BOOL)shouldReset -{ - if (_builder) - { - delete _builder; - if (shouldReset) - [self createNewBuilder]; - } - else if (shouldReset) - [self createNewBuilder]; -} - -- (void)createNewBuilder -{ - _builder = new Gramambular::BlockReadingBuilder(_languageModel); - // Each Mandarin syllable is separated by a hyphen. - _builder->setJoinSeparator("-"); -} - -- (void)setInputModesToLM:(BOOL)isCHS -{ - _languageModel = isCHS ? [mgrLangModel lmCHS] : [mgrLangModel lmCHT]; - _userOverrideModel = isCHS ? [mgrLangModel userOverrideModelCHS] : [mgrLangModel userOverrideModelCHT]; -} - -- (void)syncBaseLMPrefs -{ - if (_languageModel) - { - _languageModel->setPhraseReplacementEnabled(mgrPrefs.phraseReplacementEnabled); - _languageModel->setSymbolEnabled(mgrPrefs.symbolInputEnabled); - _languageModel->setCNSEnabled(mgrPrefs.cns11643Enabled); - } -} - -// ---- - -- (BOOL)ifLangModelHasUnigramsForKey:(NSString *)reading -{ - return _languageModel->hasUnigramsForKey((std::string)[reading UTF8String]); -} - -- (void)insertReadingToBuilderAtCursor:(NSString *)reading -{ - _builder->insertReadingAtCursor((std::string)[reading UTF8String]); -} - -- (void)dealWithOverrideModelSuggestions -{ - // 這一整段都太 C++ 且只出現一次,就整個端過來了。 - // 拆開封裝的話,只會把問題搞得更麻煩而已。 - std::string overrideValue = (mgrPrefs.useSCPCTypingMode) - ? "" - : _userOverrideModel->suggest(_walkedNodes, [self getBuilderCursorIndex], - [[NSDate date] timeIntervalSince1970]); - - if (!overrideValue.empty()) - { - NSInteger cursorIndex = [self getActualCandidateCursorIndex]; - std::vector nodes = mgrPrefs.setRearCursorMode - ? _builder->grid().nodesCrossingOrEndingAt(cursorIndex) - : _builder->grid().nodesEndingAt(cursorIndex); - double highestScore = FindHighestScore(nodes, kEpsilon); - _builder->grid().overrideNodeScoreForSelectedCandidate(cursorIndex, overrideValue, - static_cast(highestScore)); - } -} - -- (void)setBuilderCursorIndex:(NSInteger)value -{ - _builder->setCursorIndex(value); -} - -- (NSInteger)getBuilderCursorIndex -{ - return _builder->cursorIndex(); -} - -- (NSInteger)getBuilderLength -{ - return _builder->length(); -} - -- (void)deleteBuilderReadingInFrontOfCursor -{ - _builder->deleteReadingBeforeCursor(); -} - -- (void)deleteBuilderReadingAfterCursor -{ - _builder->deleteReadingAfterCursor(); -} - -- (NSArray *)getCandidatesArray -{ - NSMutableArray *candidatesArray = [[NSMutableArray alloc] init]; - - NSInteger cursorIndex = [self getActualCandidateCursorIndex]; - std::vector nodes = mgrPrefs.setRearCursorMode - ? _builder->grid().nodesCrossingOrEndingAt(cursorIndex) - : _builder->grid().nodesEndingAt(cursorIndex); - - // sort the nodes, so that longer nodes (representing longer phrases) are placed at the top of the candidate list - stable_sort(nodes.begin(), nodes.end(), NodeAnchorDescendingSorter()); - - // then use the C++ trick to retrieve the candidates for each node at/crossing the cursor - for (std::vector::iterator ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) - { - const std::vector &candidates = (*ni).node->candidates(); - for (std::vector::const_iterator ci = candidates.begin(), ce = candidates.end(); - ci != ce; ++ci) - [candidatesArray addObject:[NSString stringWithUTF8String:(*ci).value.c_str()]]; - } - return candidatesArray; -} - -- (NSInteger)getKeyLengthAtIndexZero -{ - return [NSString stringWithUTF8String:_walkedNodes[0].node->currentKeyValue().value.c_str()].length; -} - #pragma mark - 威注音認為有必要單獨拿出來處理的部分,交給 Swift 則有些困難。 - (BOOL)isPrintable:(UniChar)charCode diff --git a/Source/Modules/FileHandlers/LMConsolidator.h b/Source/Modules/ControllerModules/KeyHandlerSputnik.swift similarity index 61% rename from Source/Modules/FileHandlers/LMConsolidator.h rename to Source/Modules/ControllerModules/KeyHandlerSputnik.swift index 9bda0d9e..fc100936 100644 --- a/Source/Modules/FileHandlers/LMConsolidator.h +++ b/Source/Modules/ControllerModules/KeyHandlerSputnik.swift @@ -1,4 +1,6 @@ // Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// Refactored from the ObjCpp-version of this class by: +// (c) 2011 and onwards The OpenVanilla Project (MIT License). /* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -22,30 +24,19 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef LMConsolidator_hpp -#define LMConsolidator_hpp +import Cocoa -#include -#include -#include -#include -#include -#include -#include -#include -#include +// MARK: - KeyHandler Sputnik. -using namespace std; -namespace vChewing -{ +// Swift Extension 不允許直接存放這些變數,所以就寫了這個衛星型別。 +// 一旦 Mandarin 模組被 Swift 化,整個 KeyHandler 就可以都用 Swift。 +// 屆時會考慮將該衛星型別內的變數與常數都挪回 KeyHandler_Kernel 內。 -class LMConsolidator -{ - public: - static bool CheckPragma(const char *path); - static bool FixEOF(const char *path); - static bool ConsolidateContent(const char *path, bool shouldCheckPragma); -}; - -} // namespace vChewing -#endif /* LMConsolidator_hpp */ +class KeyHandlerSputnik: NSObject { + static let kEpsilon: Double = 0.000001 + static var inputMode: String = "" + static var languageModel: vChewing.LMInstantiator = .init() + static var userOverrideModel: vChewing.LMUserOverride = .init() + static var builder: Megrez.BlockReadingBuilder = .init(lm: languageModel) + static var walkedNodes: [Megrez.NodeAnchor] = [] +} diff --git a/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift b/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift index 39dac754..4f23b995 100644 --- a/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift +++ b/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift @@ -160,17 +160,17 @@ import Cocoa let reading = getSyllableCompositionFromPhoneticReadingBuffer() if !ifLangModelHasUnigrams(forKey: reading) { - IME.prtDebugIntel("B49C0979") + IME.prtDebugIntel("B49C0979:語彙庫內無「\(reading)」的匹配記錄。") errorCallback() stateCallback(buildInputtingState()) return true } // ... and insert it into the lattice grid... - insertReadingToBuilder(atCursor: reading) + insertReadingToBuilderAtCursor(reading: reading) // ... then walk the lattice grid... - let poppedText = _popOverflowComposingTextAndWalk() + let poppedText = popOverflowComposingTextAndWalk() // ... get and tweak override model suggestion if possible... dealWithOverrideModelSuggestions() @@ -233,8 +233,8 @@ import Cocoa stateCallback(InputState.Committing(poppedText: " ")) stateCallback(InputState.Empty()) } else if ifLangModelHasUnigrams(forKey: " ") { - insertReadingToBuilder(atCursor: " ") - let poppedText = _popOverflowComposingTextAndWalk() + insertReadingToBuilderAtCursor(reading: " ") + let poppedText = popOverflowComposingTextAndWalk() let inputting = buildInputtingState() inputting.poppedText = poppedText stateCallback(inputting) @@ -330,8 +330,8 @@ import Cocoa if !input.isOptionHold { if ifLangModelHasUnigrams(forKey: "_punctuation_list") { if isPhoneticReadingBufferEmpty() { - insertReadingToBuilder(atCursor: "_punctuation_list") - let poppedText: String! = _popOverflowComposingTextAndWalk() + insertReadingToBuilderAtCursor(reading: "_punctuation_list") + let poppedText: String! = popOverflowComposingTextAndWalk() let inputting = buildInputtingState() inputting.poppedText = poppedText stateCallback(inputting) @@ -354,7 +354,7 @@ import Cocoa // MARK: Punctuation - // if nothing is matched, see if it's a punctuation key for current layout. + // If nothing is matched, see if it's a punctuation key for current layout. var punctuationNamePrefix = "" diff --git a/Source/Modules/ControllerModules/KeyHandler_Kernel.swift b/Source/Modules/ControllerModules/KeyHandler_Kernel.swift new file mode 100644 index 00000000..a04cf18d --- /dev/null +++ b/Source/Modules/ControllerModules/KeyHandler_Kernel.swift @@ -0,0 +1,309 @@ +// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// Refactored from the ObjCpp-version of this class by: +// (c) 2011 and onwards The OpenVanilla Project (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +import Cocoa + +public enum InputMode: String { + case imeModeCHS = "org.atelierInmu.inputmethod.vChewing.IMECHS" + case imeModeCHT = "org.atelierInmu.inputmethod.vChewing.IMECHT" + case imeModeNULL = "" +} + +// MARK: - Delegate. + +// MARK: - Kernel. + +extension KeyHandler { + var kEpsilon: Double { + KeyHandlerSputnik.kEpsilon + } + + var inputMode: InputMode { + get { + switch KeyHandlerSputnik.inputMode { + case "org.atelierInmu.inputmethod.vChewing.IMECHS": + return InputMode.imeModeCHS + case "org.atelierInmu.inputmethod.vChewing.IMECHT": + return InputMode.imeModeCHT + default: + return InputMode.imeModeNULL + } + } + set { setInputMode(newValue.rawValue) } + } + + // TODO: Will reenable this once Mandarin gets Swiftified. + // override public init() { + // self.ensurePhoneticParser() + // self.setInputMode(ctlInputMethod.currentInputMode) + // super.init() + // } + + func clear() { + clearPhoneticReadingBuffer() + KeyHandlerSputnik.builder.clear() + KeyHandlerSputnik.walkedNodes.removeAll() + } + + // 這個函數得獨立出來給 ObjC 使用。 + @objc func setInputMode(_ value: String) { + // 下面這句的「isKindOfClass」是做類型檢查, + // 為了應對出現輸入法 plist 被改壞掉這樣的極端情況。 + let isCHS: Bool = (value == InputMode.imeModeCHS.rawValue) + + // 緊接著將新的簡繁輸入模式提報給 ctlInputMethod: + ctlInputMethod.currentInputMode = isCHS ? InputMode.imeModeCHS.rawValue : InputMode.imeModeCHT.rawValue + mgrPrefs.mostRecentInputMode = ctlInputMethod.currentInputMode + + // 拿當前的 _inputMode 與 ctlInputMethod 的提報結果對比,不同的話則套用新設定: + if KeyHandlerSputnik.inputMode != ctlInputMethod.currentInputMode { + // Reinitiate language models if necessary + setInputModesToLM(isCHS: isCHS) + + // Synchronize the sub-languageModel state settings to the new LM. + syncBaseLMPrefs() + + // Create new grid builder. + createNewBuilder() + + if !isPhoneticReadingBufferEmpty() { + clearPhoneticReadingBuffer() + } + } + // 直接寫到衛星模組內,省得類型轉換 + KeyHandlerSputnik.inputMode = ctlInputMethod.currentInputMode + } + + // MARK: - Functions dealing with Megrez. + + func walk() { + // Retrieve the most likely trellis, i.e. a Maximum Likelihood Estimation + // of the best possible Mandarin characters given the input syllables, + // using the Viterbi algorithm implemented in the Gramambular library + let walker = Megrez.Walker(grid: KeyHandlerSputnik.builder.grid()) + + // the reverse walk traces the trellis from the end + let walked: [Megrez.NodeAnchor] = walker.reverseWalk(at: KeyHandlerSputnik.builder.grid().width()) + + // then we use ".reversed()" to reverse the nodes so that we get the forward-walked nodes + KeyHandlerSputnik.walkedNodes.removeAll() + KeyHandlerSputnik.walkedNodes.append(contentsOf: walked.reversed()) + } + + func popOverflowComposingTextAndWalk() -> String { + // In ideal situations we can allow users to type infinitely in a buffer. + // However, Viberti algorithm has a complexity of O(N^2), the walk will + // become slower as the number of nodes increase. Therefore, we need to + // auto-commit overflown texts which usually lose their influence over + // the whole MLE anyway -- so that when the user type along, the already + // composed text in the rear side of the buffer will be committed out. + // (i.e. popped out.) + + var poppedText = "" + if KeyHandlerSputnik.builder.grid().width() > mgrPrefs.composingBufferSize { + if KeyHandlerSputnik.walkedNodes.count > 0 { + let anchor: Megrez.NodeAnchor = KeyHandlerSputnik.walkedNodes[0] + if let theNode = anchor.node { + poppedText = theNode.currentKeyValue().value + } + KeyHandlerSputnik.builder.removeHeadReadings(count: anchor.spanningLength) + } + } + walk() + return poppedText + } + + func buildAssociatePhraseArray(withKey key: String) -> [String] { + var arrResult: [String] = [] + if KeyHandlerSputnik.languageModel.hasAssociatedPhrasesForKey(key) { + arrResult.append(contentsOf: KeyHandlerSputnik.languageModel.associatedPhrasesForKey(key)) + } + return arrResult + } + + func fixNode(value: String) { + let cursorIndex: Int = getActualCandidateCursorIndex() + let selectedNode: Megrez.NodeAnchor = KeyHandlerSputnik.builder.grid().fixNodeSelectedCandidate( + location: cursorIndex, value: value + ) + // 不要針對逐字選字模式啟用臨時半衰記憶模型。 + if !mgrPrefs.useSCPCTypingMode { + // If the length of the readings and the characters do not match, + // it often means it is a special symbol and it should not be stored + // in the user override model. + var addToUserOverrideModel = true + if selectedNode.spanningLength != value.count { + addToUserOverrideModel = false + } + if addToUserOverrideModel { + if let theNode = selectedNode.node { + // 威注音的 SymbolLM 的 Score 是 -12。 + if theNode.scoreFor(candidate: value) <= -12 { + addToUserOverrideModel = false + } + } + } + if addToUserOverrideModel { + KeyHandlerSputnik.userOverrideModel.observe( + walkedNodes: KeyHandlerSputnik.walkedNodes, cursorIndex: cursorIndex, candidate: value, + timestamp: NSDate().timeIntervalSince1970 + ) + } + } + walk() + + if mgrPrefs.moveCursorAfterSelectingCandidate { + var nextPosition = 0 + for node in KeyHandlerSputnik.walkedNodes { + if nextPosition >= cursorIndex { break } + nextPosition += node.spanningLength + } + if nextPosition <= getBuilderLength() { + setBuilderCursorIndex(value: nextPosition) + } + } + } + + func getCandidatesArray() -> [String] { + var arrCandidates: [String] = [] + var arrNodes: [Megrez.NodeAnchor] = [] + arrNodes.append(contentsOf: getRawNodes()) + + /// 原理:nodes 這個回饋結果包含一堆子陣列,分別對應不同詞長的候選字。 + /// 這裡先對陣列排序、讓最長候選字的子陣列的優先權最高。 + /// 這個過程不會傷到子陣列內部的排序。 + if !arrNodes.isEmpty { + // sort the nodes, so that longer nodes (representing longer phrases) + // are placed at the top of the candidate list + arrNodes.sort { $0.keyLength > $1.keyLength } + + // then use the Swift trick to retrieve the candidates for each node at/crossing the cursor + for currentNodeAnchor in arrNodes { + if let currentNode = currentNodeAnchor.node { + for currentCandidate in currentNode.candidates() { + arrCandidates.append(currentCandidate.value) + } + } + } + } + return arrCandidates + } + + func dealWithOverrideModelSuggestions() { + var overrideValue = + mgrPrefs.useSCPCTypingMode + ? "" + : KeyHandlerSputnik.userOverrideModel.suggest( + walkedNodes: KeyHandlerSputnik.walkedNodes, cursorIndex: getBuilderCursorIndex(), + timestamp: NSDate().timeIntervalSince1970 + ) + + if !overrideValue.isEmpty { + KeyHandlerSputnik.builder.grid().overrideNodeScoreForSelectedCandidate( + location: getActualCandidateCursorIndex(), + value: &overrideValue, + overridingScore: findHighestScore(nodes: getRawNodes(), epsilon: kEpsilon) + ) + } + } + + func findHighestScore(nodes: [Megrez.NodeAnchor], epsilon: Double) -> Double { + var highestScore: Double = 0 + for currentAnchor in nodes { + if let theNode = currentAnchor.node { + let score = theNode.highestUnigramScore() + if score > highestScore { + highestScore = score + } + } + } + return highestScore + epsilon + } + + // MARK: - Extracted methods and functions. + + func isBuilderEmpty() -> Bool { KeyHandlerSputnik.builder.grid().width() == 0 } + + func getRawNodes() -> [Megrez.NodeAnchor] { + /// 警告:不要對游標前置風格使用 nodesCrossing,否則會導致游標行為與 macOS 內建注音輸入法不一致。 + /// 微軟新注音輸入法的游標後置風格也是不允許 nodeCrossing 的,但目前 Megrez 暫時缺乏對該特性的支援。 + /// 所以暫時只能將威注音的游標後置風格描述成「跟 Windows 版雅虎奇摩注音一致」。 + mgrPrefs.setRearCursorMode + ? KeyHandlerSputnik.builder.grid().nodesCrossingOrEndingAt(location: getActualCandidateCursorIndex()) + : KeyHandlerSputnik.builder.grid().nodesEndingAt(location: getActualCandidateCursorIndex()) + } + + func setInputModesToLM(isCHS: Bool) { + KeyHandlerSputnik.languageModel = isCHS ? mgrLangModel.lmCHS : mgrLangModel.lmCHT + KeyHandlerSputnik.userOverrideModel = isCHS ? mgrLangModel.uomCHS : mgrLangModel.uomCHT + } + + func syncBaseLMPrefs() { + KeyHandlerSputnik.languageModel.isPhraseReplacementEnabled = mgrPrefs.phraseReplacementEnabled + KeyHandlerSputnik.languageModel.isCNSEnabled = mgrPrefs.cns11643Enabled + KeyHandlerSputnik.languageModel.isSymbolEnabled = mgrPrefs.symbolInputEnabled + } + + func createNewBuilder() { + KeyHandlerSputnik.builder = Megrez.BlockReadingBuilder(lm: KeyHandlerSputnik.languageModel) + // Each Mandarin syllable is separated by a hyphen. + KeyHandlerSputnik.builder.setJoinSeparator(separator: "-") + } + + func currentReadings() -> [String] { KeyHandlerSputnik.builder.readings() } + + func ifLangModelHasUnigrams(forKey reading: String) -> Bool { + KeyHandlerSputnik.languageModel.hasUnigramsFor(key: reading) + } + + func insertReadingToBuilderAtCursor(reading: String) { + KeyHandlerSputnik.builder.insertReadingAtCursor(reading: reading) + } + + func setBuilderCursorIndex(value: Int) { + KeyHandlerSputnik.builder.setCursorIndex(newIndex: value) + } + + func getBuilderCursorIndex() -> Int { + KeyHandlerSputnik.builder.cursorIndex() + } + + func getBuilderLength() -> Int { + KeyHandlerSputnik.builder.length() + } + + func deleteBuilderReadingInFrontOfCursor() { + KeyHandlerSputnik.builder.deleteReadingBeforeCursor() + } + + func deleteBuilderReadingAfterCursor() { + KeyHandlerSputnik.builder.deleteReadingAfterCursor() + } + + func getKeyLengthAtIndexZero() -> Int { + KeyHandlerSputnik.walkedNodes[0].node?.currentKeyValue().value.count ?? 0 + } +} diff --git a/Source/Modules/ControllerModules/KeyHandler_Misc.swift b/Source/Modules/ControllerModules/KeyHandler_Misc.swift index 5b79ed19..ffa6ec7c 100644 --- a/Source/Modules/ControllerModules/KeyHandler_Misc.swift +++ b/Source/Modules/ControllerModules/KeyHandler_Misc.swift @@ -43,7 +43,7 @@ import Cocoa && (cursorIndex < getBuilderLength())) || cursorIndex == 0 { - if cursorIndex == 0 && !mgrPrefs.setRearCursorMode { + if cursorIndex == 0, !mgrPrefs.setRearCursorMode { cursorIndex += getKeyLengthAtIndexZero() } else { cursorIndex += 1 diff --git a/Source/Modules/ControllerModules/KeyHandler_States.swift b/Source/Modules/ControllerModules/KeyHandler_States.swift index d531b770..e2d0a645 100644 --- a/Source/Modules/ControllerModules/KeyHandler_States.swift +++ b/Source/Modules/ControllerModules/KeyHandler_States.swift @@ -32,45 +32,67 @@ import Cocoa // MARK: - 構築狀態(State Building) func buildInputtingState() -> InputState.Inputting { - // 觸發資料封裝更新,否則下文拿到的資料會是過期的。 - packageBufferStateMaterials() - // 獲取封裝好的資料 - let composedText = getComposedText() - let packagedCursorIndex = UInt(getPackagedCursorIndex()) - let resultOfRear = getStrLocationResult(isFront: false) - let resultOfFront = getStrLocationResult(isFront: true) + // "Updating the composing buffer" means to request the client + // to "refresh" the text input buffer with our "composing text" + var composingBuffer = "" + var composedStringCursorIndex = 0 - // 初期化狀態 - let newState = InputState.Inputting(composingBuffer: composedText, cursorIndex: packagedCursorIndex) + var readingCursorIndex: size_t = 0 + let builderCursorIndex: size_t = getBuilderCursorIndex() - // 組建提示文本 - var tooltip = "" + // We must do some Unicode codepoint counting to find the actual cursor location for the client + // i.e. we need to take UTF-16 into consideration, for which a surrogate pair takes 2 UniChars + // locations. These processes are inherited from the ObjC++ version of this class and might be + // unnecessary in Swift, but this deduction requires further experiments. + for walkedNode in KeyHandlerSputnik.walkedNodes { + if let theNode = walkedNode.node { + let strNodeValue = theNode.currentKeyValue().value + composingBuffer += strNodeValue - // 如果在用特定的模式的話,則始終顯示對應的提示。 - // TODO: 該功能無法正常運作,暫時註釋掉。 - // if ctlInputMethod.currentKeyHandler.inputMode == InputMode.imeModeCHT { - // if mgrPrefs.chineseConversionEnabled && !mgrPrefs.shiftJISShinjitaiOutputEnabled { - // tooltip = String( - // format: "%@%@%@", NSLocalizedString("Force KangXi Writing", comment: ""), "\n", - // NSLocalizedString("NotificationSwitchON", comment: "")) - // } else if mgrPrefs.shiftJISShinjitaiOutputEnabled { - // tooltip = String( - // format: "%@%@%@", NSLocalizedString("JIS Shinjitai Output", comment: ""), "\n", - // NSLocalizedString("NotificationSwitchON", comment: "")) - // } - // } + let arrSplit: [NSString] = (strNodeValue as NSString).split() + let codepointCount = arrSplit.count - // 備註:因為目前的輸入法已經有了 NSString Emoji 支援,所以這個工具提示可能不會出現了。 - // 姑且留下來用作萬一時的偵錯用途。 - if resultOfRear != "" || resultOfFront != "" { - tooltip = String( - format: NSLocalizedString("Cursor is between \"%@\" and \"%@\".", comment: ""), - resultOfFront, resultOfRear - ) + // This re-aligns the cursor index in the composed string + // (the actual cursor on the screen) with the builder's logical + // cursor (reading) cursor; each built node has a "spanning length" + // (e.g. two reading blocks has a spanning length of 2), and we + // accumulate those lengths to calculate the displayed cursor + // index. + let spanningLength: Int = walkedNode.spanningLength + if readingCursorIndex + spanningLength <= builderCursorIndex { + composedStringCursorIndex += (strNodeValue as NSString).length + readingCursorIndex += spanningLength + } else { + if codepointCount == spanningLength { + var i = 0 + while i < codepointCount, readingCursorIndex < builderCursorIndex { + composedStringCursorIndex += arrSplit[i].length + readingCursorIndex += 1 + i += 1 + } + } else { + if readingCursorIndex < builderCursorIndex { + composedStringCursorIndex += (strNodeValue as NSString).length + readingCursorIndex += spanningLength + if readingCursorIndex > builderCursorIndex { + readingCursorIndex = builderCursorIndex + } + } + } + } + } } + // Now, we gather all the intel, separate the composing buffer to two parts (head and tail), + // and insert the reading text (the Mandarin syllable) in between them. + // The reading text is what the user is typing. - newState.tooltip = tooltip - return newState + let head = String((composingBuffer as NSString).substring(to: composedStringCursorIndex)) + let reading = getCompositionFromPhoneticReadingBuffer() + let tail = String((composingBuffer as NSString).substring(from: composedStringCursorIndex)) + let composedText = head + reading + tail + let cursorIndex = composedStringCursorIndex + reading.count + + return InputState.Inputting(composingBuffer: composedText, cursorIndex: UInt(cursorIndex)) } // MARK: - 用以生成候選詞陣列及狀態 @@ -102,7 +124,8 @@ import Cocoa ) -> InputState.AssociatedPhrases! { // 上一行必須要用驚嘆號,否則 Xcode 會誤導你砍掉某些實際上必需的語句。 InputState.AssociatedPhrases( - candidates: buildAssociatePhraseArray(withKey: key), useVerticalMode: useVerticalMode) + candidates: buildAssociatePhraseArray(withKey: key), useVerticalMode: useVerticalMode + ) } // MARK: - 用以處理就地新增自訂語彙時的行為 @@ -191,8 +214,8 @@ import Cocoa } if isPhoneticReadingBufferEmpty() { - insertReadingToBuilder(atCursor: customPunctuation) - let poppedText = _popOverflowComposingTextAndWalk() + insertReadingToBuilderAtCursor(reading: customPunctuation) + let poppedText = popOverflowComposingTextAndWalk() let inputting = buildInputtingState() inputting.poppedText = poppedText stateCallback(inputting) @@ -256,7 +279,7 @@ import Cocoa return false } - let readings: [String] = _currentReadings() + let readings: [String] = currentReadings() let composingBuffer = (IME.areWeUsingOurOwnPhraseEditor) ? readings.joined(separator: "-") @@ -283,7 +306,7 @@ import Cocoa if isPhoneticReadingBufferEmpty() { if getBuilderCursorIndex() >= 0 { deleteBuilderReadingInFrontOfCursor() - _walk() + walk() } else { IME.prtDebugIntel("9D69908D") errorCallback() @@ -316,7 +339,7 @@ import Cocoa if isPhoneticReadingBufferEmpty() { if getBuilderCursorIndex() != getBuilderLength() { deleteBuilderReadingAfterCursor() - _walk() + walk() let inputting = buildInputtingState() // 這裡不用「count > 0」,因為該整數變數只要「!isEmpty」那就必定滿足這個條件。 if !inputting.composingBuffer.isEmpty { @@ -375,7 +398,7 @@ import Cocoa } if getBuilderCursorIndex() != 0 { - setBuilderCursorIndex(0) + setBuilderCursorIndex(value: 0) stateCallback(buildInputtingState()) } else { IME.prtDebugIntel("66D97F90") @@ -405,7 +428,7 @@ import Cocoa } if getBuilderCursorIndex() != getBuilderLength() { - setBuilderCursorIndex(getBuilderLength()) + setBuilderCursorIndex(value: getBuilderLength()) stateCallback(buildInputtingState()) } else { IME.prtDebugIntel("9B69908E") @@ -475,7 +498,7 @@ import Cocoa composingBuffer: currentState.composingBuffer, cursorIndex: currentState.cursorIndex, markerIndex: UInt(nextPosition), - readings: _currentReadings() + readings: currentReadings() ) marking.tooltipForInputting = currentState.tooltip stateCallback(marking) @@ -486,7 +509,7 @@ import Cocoa } } else { if getBuilderCursorIndex() < getBuilderLength() { - setBuilderCursorIndex(getBuilderCursorIndex() + 1) + setBuilderCursorIndex(value: getBuilderCursorIndex() + 1) stateCallback(buildInputtingState()) } else { IME.prtDebugIntel("A96AAD58") @@ -526,7 +549,7 @@ import Cocoa composingBuffer: currentState.composingBuffer, cursorIndex: currentState.cursorIndex, markerIndex: UInt(previousPosition), - readings: _currentReadings() + readings: currentReadings() ) marking.tooltipForInputting = currentState.tooltip stateCallback(marking) @@ -537,7 +560,7 @@ import Cocoa } } else { if getBuilderCursorIndex() > 0 { - setBuilderCursorIndex(getBuilderCursorIndex() - 1) + setBuilderCursorIndex(value: getBuilderCursorIndex() - 1) stateCallback(buildInputtingState()) } else { IME.prtDebugIntel("7045E6F3") diff --git a/Source/Modules/ControllerModules/KeyValueBlobReader.cpp b/Source/Modules/ControllerModules/KeyValueBlobReader.cpp deleted file mode 100644 index eee32bbf..00000000 --- a/Source/Modules/ControllerModules/KeyValueBlobReader.cpp +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "KeyValueBlobReader.h" - -namespace vChewing -{ - -KeyValueBlobReader::State KeyValueBlobReader::Next(KeyValue *out) -{ - static auto new_line = [](char c) { return c == '\n' || c == '\r'; }; - static auto blank = [](char c) { return c == ' ' || c == '\t'; }; - static auto blank_or_newline = [](char c) { return blank(c) || new_line(c); }; - static auto content_char = [](char c) { return !blank(c) && !new_line(c); }; - - if (state_ == State::ERROR) - { - return state_; - } - - const char *key_begin = nullptr; - size_t key_length = 0; - const char *value_begin = nullptr; - size_t value_length = 0; - - while (true) - { - state_ = SkipUntilNot(blank_or_newline); - if (state_ != State::CAN_CONTINUE) - { - return state_; - } - - // Check if it's a comment line; if so, read until end of line. - if (*current_ != '#') - { - break; - } - state_ = SkipUntil(new_line); - if (state_ != State::CAN_CONTINUE) - { - return state_; - } - } - - // No need to check whether* current_ is a content_char, since content_char - // is defined as not blank and not new_line. - - key_begin = current_; - state_ = SkipUntilNot(content_char); - if (state_ != State::CAN_CONTINUE) - { - goto error; - } - key_length = current_ - key_begin; - - // There should be at least one blank character after the key string. - if (!blank(*current_)) - { - goto error; - } - - state_ = SkipUntilNot(blank); - if (state_ != State::CAN_CONTINUE) - { - goto error; - } - - if (!content_char(*current_)) - { - goto error; - } - - value_begin = current_; - // value must only contain content characters, blanks not are allowed. - // also, there's no need to check the state after this, since we will always - // emit the value. This also avoids the situation where trailing spaces in a - // line would become part of the value. - SkipUntilNot(content_char); - value_length = current_ - value_begin; - - // Unconditionally skip until the end of the line. This prevents the case - // like "foo bar baz\n" where baz should not be treated as the Next key. - SkipUntil(new_line); - - if (out != nullptr) - { - *out = KeyValue{std::string_view{key_begin, key_length}, std::string_view{value_begin, value_length}}; - } - state_ = State::HAS_PAIR; - return state_; - -error: - state_ = State::ERROR; - return state_; -} - -KeyValueBlobReader::State KeyValueBlobReader::SkipUntilNot(const std::function &f) -{ - while (current_ != end_ && *current_) - { - if (!f(*current_)) - { - return State::CAN_CONTINUE; - } - ++current_; - } - - return State::END; -} - -KeyValueBlobReader::State KeyValueBlobReader::SkipUntil(const std::function &f) -{ - while (current_ != end_ && *current_) - { - if (f(*current_)) - { - return State::CAN_CONTINUE; - } - ++current_; - } - - return State::END; -} - -std::ostream &operator<<(std::ostream &os, const KeyValueBlobReader::KeyValue &kv) -{ - os << "(key: " << kv.key << ", value: " << kv.value << ")"; - return os; -} - -} // namespace vChewing diff --git a/Source/Modules/ControllerModules/KeyValueBlobReader.h b/Source/Modules/ControllerModules/KeyValueBlobReader.h deleted file mode 100644 index 8ca313be..00000000 --- a/Source/Modules/ControllerModules/KeyValueBlobReader.h +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef SOURCE_ENGINE_KEYVALUEBLOBREADER_H_ -#define SOURCE_ENGINE_KEYVALUEBLOBREADER_H_ - -#include -#include -#include -#include - -// A reader for text-based, blank-separated key-value pairs in a binary blob. -// -// This reader is suitable for reading language model files that entirely -// consist of key-value pairs. Leading or trailing spaces are ignored. -// Lines that start with "#" are treated as comments. Values cannot contain -// spaces. Any space after the value string is parsed is ignored. This implies -// that after a blank, anything that comes after the value can be used as -// comment. Both ' ' and '\t' are treated as blank characters, and the parser -// is agnostic to how lines are ended, and so LF, CR LF, and CR are all valid -// line endings. -// -// std::string_view is used to allow returning results efficiently. As a result, -// the blob is a const char* and will never be mutated. This implies, for -// example, read-only mmap can be used to parse large files. -namespace vChewing -{ - -class KeyValueBlobReader -{ - public: - enum class State : int - { - // There are no more key-value pairs in this blob. - END = 0, - // The reader has produced a new key-value pair. - HAS_PAIR = 1, - // An error is encountered and the parsing stopped. - ERROR = -1, - // Internal-only state: the parser can continue parsing. - CAN_CONTINUE = 2 - }; - - struct KeyValue - { - constexpr KeyValue() : key(""), value("") - { - } - constexpr KeyValue(std::string_view k, std::string_view v) : key(k), value(v) - { - } - - bool operator==(const KeyValue &another) const - { - return key == another.key && value == another.value; - } - - std::string_view key; - std::string_view value; - }; - - KeyValueBlobReader(const char *blob, size_t size) : current_(blob), end_(blob + size) - { - } - - // Parse the next key-value pair and return the state of the reader. If - // `out` is passed, out will be set to the produced key-value pair if there - // is one. - State Next(KeyValue *out = nullptr); - - private: - State SkipUntil(const std::function &f); - State SkipUntilNot(const std::function &f); - - const char *current_; - const char *end_; - State state_ = State::CAN_CONTINUE; -}; - -std::ostream &operator<<(std::ostream &, const KeyValueBlobReader::KeyValue &); - -} // namespace vChewing - -#endif // SOURCE_ENGINE_KEYVALUEBLOBREADER_H_ diff --git a/Source/Modules/FileHandlers/LMConsolidator.mm b/Source/Modules/FileHandlers/LMConsolidator.mm deleted file mode 100644 index 0843e93d..00000000 --- a/Source/Modules/FileHandlers/LMConsolidator.mm +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "LMConsolidator.h" -#include "vChewing-Swift.h" - -namespace vChewing -{ - -constexpr std::string_view FORMATTED_PRAGMA_HEADER = - "# 𝙵𝙾𝚁𝙼𝙰𝚃 𝚘𝚛𝚐.𝚊𝚝𝚎𝚕𝚒𝚎𝚛𝙸𝚗𝚖𝚞.𝚟𝚌𝚑𝚎𝚠𝚒𝚗𝚐.𝚞𝚜𝚎𝚛𝙻𝚊𝚗𝚐𝚞𝚊𝚐𝚎𝙼𝚘𝚍𝚎𝚕𝙳𝚊𝚝𝚊.𝚏𝚘𝚛𝚖𝚊𝚝𝚝𝚎𝚍"; - -// HEADER VERIFIER. CREDIT: Shiki Suen -bool LMConsolidator::CheckPragma(const char *path) -{ - ifstream zfdCheckPragma(path); - if (zfdCheckPragma.good()) - { - string firstLine; - getline(zfdCheckPragma, firstLine); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "HEADER SEEN ||%s", firstLine.c_str()); - if (firstLine != FORMATTED_PRAGMA_HEADER) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "HEADER VERIFICATION FAILED. START IN-PLACE CONSOLIDATING PROCESS."); - return false; - } - } - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "HEADER VERIFICATION SUCCESSFUL."); - return true; -} - -// EOF FIXER. CREDIT: Shiki Suen. -bool LMConsolidator::FixEOF(const char *path) -{ - std::fstream zfdEOFFixerIncomingStream(path); - zfdEOFFixerIncomingStream.seekg(-1, std::ios_base::end); - char z; - zfdEOFFixerIncomingStream.get(z); - if (z != '\n') - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "// REPORT: Data File not ended with a new line.\n"); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "// DATA FILE: %s", path); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "// PROCEDURE: Trying to insert a new line as EOF before per-line check process.\n"); - std::ofstream zfdEOFFixerOutput(path, std::ios_base::app); - zfdEOFFixerOutput << std::endl; - zfdEOFFixerOutput.close(); - if (zfdEOFFixerOutput.fail()) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "// REPORT: Failed to append a newline to the data file. Insufficient Privileges?\n"); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "// DATA FILE: %s", path); - return false; - } - } - zfdEOFFixerIncomingStream.close(); - if (zfdEOFFixerIncomingStream.fail()) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, - "// REPORT: Failed to read lines through the data file for EOF check. Insufficient Privileges?\n"); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "// DATA FILE: %s", path); - return false; - } - return true; -} // END: EOF FIXER. - -// CONTENT CONSOLIDATOR. CREDIT: Shiki Suen. -bool LMConsolidator::ConsolidateContent(const char *path, bool shouldCheckPragma) -{ - bool pragmaCheckResult = LMConsolidator::CheckPragma(path); - if (pragmaCheckResult && shouldCheckPragma) - { - return true; - } - - ifstream zfdContentConsolidatorIncomingStream(path); - vector vecEntry; - while (!zfdContentConsolidatorIncomingStream.eof()) - { // Xcode 13 能用的 ObjCpp 與 Cpp 並無原生支援「\h」這個 Regex 參數的能力,只能逐行處理。 - string zfdBuffer; - getline(zfdContentConsolidatorIncomingStream, zfdBuffer); - vecEntry.push_back(zfdBuffer); - } - // 第一遍 for 用來統整每行內的內容。 - // regex sedCJKWhiteSpace("\\x{3000}"), sedNonBreakWhiteSpace("\\x{A0}"), sedWhiteSpace("\\s+"), - // sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // 這樣寫會導致輸入法敲不了任何字,推測 Xcode 13 支援的 cpp / - // objCpp 可能對某些 Regex 寫法有相容性問題。 regex sedCJKWhiteSpace(" "), sedNonBreakWhiteSpace(" "), - // sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // RegEx 先定義好。 - regex sedToConsolidate("( +| +| +|\t+)+"), sedToTrim("(^\\s|\\s$)"); - for (int i = 0; i < vecEntry.size(); i++) - { // 第一遍 for 用來統整每行內的內容。 - if (vecEntry[i].size() != 0) - { // 不要理會空行,否則給空行加上 endl 等於再加空行。 - // RegEx 處理順序:先將全形空格換成西文空格,然後合併任何意義上的連續空格(包括 tab - // 等),最後去除每行首尾空格。 vecEntry[i] = regex_replace(vecEntry[i], sedCJKWhiteSpace, " ").c_str(); // - // 中日韓全形空格轉為 ASCII 空格。 vecEntry[i] = regex_replace(vecEntry[i], sedNonBreakWhiteSpace, " - // ").c_str(); // Non-Break 型空格轉為 ASCII 空格。 vecEntry[i] = regex_replace(vecEntry[i], sedWhiteSpace, - // " ").c_str(); // 所有意義上的連續的 \s 型空格都轉為單個 ASCII 空格。 vecEntry[i] = - // regex_replace(vecEntry[i], sedLeadingSpace, "").c_str(); // 去掉行首空格。 vecEntry[i] = - // regex_replace(vecEntry[i], sedTrailingSpace, "").c_str(); // 去掉行尾空格。 - // 上述命令分步驟執行容易產生效能問題,故濃縮為下述兩句。 - vecEntry[i] = regex_replace(vecEntry[i], sedToConsolidate, " ").c_str(); - vecEntry[i] = regex_replace(vecEntry[i], sedToTrim, "").c_str(); - } - } - // 在第二遍 for 運算之前,針對 vecEntry 去除重複條目。 - std::reverse(vecEntry.begin(), vecEntry.end()); // 先首尾顛倒,免得破壞最新的 override 資訊。 - vecEntry.erase(unique(vecEntry.begin(), vecEntry.end()), vecEntry.end()); // 去重複。 - std::reverse(vecEntry.begin(), vecEntry.end()); // 再顛倒回來。 - // 統整完畢。開始將統整過的內容寫入檔案。 - ofstream zfdContentConsolidatorOutput(path); // 這裡是要從頭開始重寫檔案內容,所以不需要「 ios_base::app 」。 - if (!pragmaCheckResult) - { - zfdContentConsolidatorOutput << FORMATTED_PRAGMA_HEADER << endl; // 寫入經過整理處理的 HEADER。 - } - for (int i = 0; i < vecEntry.size(); i++) - { // 第二遍 for 用來寫入統整過的內容。 - if (vecEntry[i].size() != 0) - { // 這句很重要,不然還是會把經過 RegEx 處理後出現的空行搞到檔案裡。 - zfdContentConsolidatorOutput << vecEntry[i] - << endl; // 這裡是必須得加上 endl 的,不然所有行都變成一個整合行。 - } - } - zfdContentConsolidatorOutput.close(); - if (zfdContentConsolidatorOutput.fail()) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, - "// REPORT: Failed to write content-consolidated data to the file. Insufficient Privileges?\n"); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "// DATA FILE: %s", path); - return false; - } - zfdContentConsolidatorIncomingStream.close(); - if (zfdContentConsolidatorIncomingStream.fail()) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "// REPORT: Failed to read lines through the data file for content-consolidation. " - "Insufficient Privileges?\n"); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "// DATA FILE: %s", path); - return false; - } - return true; -} // END: CONTENT CONSOLIDATOR. - -} // namespace vChewing diff --git a/Source/Modules/FileHandlers/LMConsolidator.swift b/Source/Modules/FileHandlers/LMConsolidator.swift new file mode 100644 index 00000000..b0fae6cd --- /dev/null +++ b/Source/Modules/FileHandlers/LMConsolidator.swift @@ -0,0 +1,161 @@ +// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +import Foundation + +extension vChewing { + public enum LMConsolidator { + public static let kPragmaHeader = "# 𝙵𝙾𝚁𝙼𝙰𝚃 𝚘𝚛𝚐.𝚊𝚝𝚎𝚕𝚒𝚎𝚛𝙸𝚗𝚖𝚞.𝚟𝚌𝚑𝚎𝚠𝚒𝚗𝚐.𝚞𝚜𝚎𝚛𝙻𝚊𝚗𝚐𝚞𝚊𝚐𝚎𝙼𝚘𝚍𝚎𝚕𝙳𝚊𝚝𝚊.𝚏𝚘𝚛𝚖𝚊𝚝𝚝𝚎𝚍" + + public static func checkPragma(path: String) -> Bool { + if FileManager.default.fileExists(atPath: path) { + let fileHandle = FileHandle(forReadingAtPath: path)! + do { + let lineReader = try LineReader(file: fileHandle) + for strLine in lineReader { // 不需要 i=0,因為第一遍迴圈就出結果。 + IME.prtDebugIntel("Header Seen ||\(strLine)") + if strLine != kPragmaHeader { + IME.prtDebugIntel("Header Mismatch, Starting In-Place Consolidation.") + return false + } else { + IME.prtDebugIntel("Header Verification Successful.") + return true + } + } + } catch { + IME.prtDebugIntel("Header Verification Failed: File Access Error.") + return false + } + } + IME.prtDebugIntel("Header Verification Failed: File Missing.") + return false + } + + public static func fixEOF(path: String) -> Bool { + let urlPath = URL(fileURLWithPath: path) + if FileManager.default.fileExists(atPath: path) { + var strIncoming = "" + do { + strIncoming += try String(contentsOf: urlPath, encoding: .utf8) + if !strIncoming.hasSuffix("\n") { + IME.prtDebugIntel("EOF Fix Necessity Confirmed, Start Fixing.") + strIncoming += "\n" + try strIncoming.write(to: urlPath, atomically: false, encoding: .utf8) + } + } catch { + IME.prtDebugIntel("EOF Fix Failed w/ File: \(path)") + IME.prtDebugIntel("EOF Fix Failed w/ Error: \(error).") + return false + } + IME.prtDebugIntel("EOF Successfully Ensured (with possible autofixes performed).") + return true + } + IME.prtDebugIntel("EOF Fix Failed: File Missing at \(path).") + return false + } + + public static func consolidate(path: String, pragma shouldCheckPragma: Bool) -> Bool { + var pragmaResult = false + if shouldCheckPragma { + pragmaResult = checkPragma(path: path) + if pragmaResult { + return true + } + } + + let urlPath = URL(fileURLWithPath: path) + if FileManager.default.fileExists(atPath: path) { + var strProcessed = "" + do { + strProcessed += try String(contentsOf: urlPath, encoding: .utf8) + + // Step 1: Consolidating formats per line. + // ------- + // CJKWhiteSpace (\x{3000}) to ASCII Space + // NonBreakWhiteSpace (\x{A0}) to ASCII Space + // Tab to ASCII Space + // 統整連續空格為一個 ASCII 空格 + strProcessed.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ") + // 去除行尾行首空格 + strProcessed.regReplace(pattern: #"(^ | $)"#, replaceWith: "") + // CR & FF to LF, 且去除重複行 + strProcessed.regReplace(pattern: #"(\f+|\r+|\n+)+"#, replaceWith: "\n") + if strProcessed.prefix(1) == " " { // 去除檔案開頭空格 + strProcessed.removeFirst() + } + if strProcessed.suffix(1) == " " { // 去除檔案結尾空格 + strProcessed.removeLast() + } + + // Step 3: Add Formatted Pragma, the Sorted Header: + if !pragmaResult { + strProcessed = kPragmaHeader + "\n" + strProcessed // Add Sorted Header + } + + // Step 4: Deduplication. + let arrData = strProcessed.components(separatedBy: "\n") + strProcessed = "" // Reset its value + // 下面兩行的 reversed 是首尾顛倒,免得破壞最新的 override 資訊。 + let arrDataDeduplicated = Array(NSOrderedSet(array: arrData.reversed()).array as! [String]) + for lineData in arrDataDeduplicated.reversed() { + strProcessed += lineData + strProcessed += "\n" + } + + // Step 5: Remove duplicated newlines at the end of the file. + strProcessed.regReplace(pattern: "\\n+", replaceWith: "\n") + + // Step 6: Write consolidated file contents. + try strProcessed.write(to: urlPath, atomically: false, encoding: .utf8) + + } catch { + IME.prtDebugIntel("Consolidation Failed w/ File: \(path)") + IME.prtDebugIntel("Consolidation Failed w/ Error: \(error).") + return false + } + IME.prtDebugIntel("Either Consolidation Successful Or No-Need-To-Consolidate.") + return true + } + IME.prtDebugIntel("Consolidation Failed: File Missing at \(path).") + return false + } + } +} + +// MARK: - String Extension + +extension String { + fileprivate mutating func regReplace(pattern: String, replaceWith: String = "") { + // Ref: https://stackoverflow.com/a/40993403/4162914 && https://stackoverflow.com/a/71291137/4162914 + do { + let regex = try NSRegularExpression( + pattern: pattern, options: [.caseInsensitive, .anchorsMatchLines] + ) + let range = NSRange(startIndex..., in: self) + self = regex.stringByReplacingMatches( + in: self, options: [], range: range, withTemplate: replaceWith + ) + } catch { return } + } +} diff --git a/Source/Modules/IMEModules/IME.swift b/Source/Modules/IMEModules/IME.swift index 91f9604a..6c5b929e 100644 --- a/Source/Modules/IMEModules/IME.swift +++ b/Source/Modules/IMEModules/IME.swift @@ -25,6 +25,9 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import Carbon import Cocoa +// The namespace of this input method. +public enum vChewing {} + public class IME: NSObject { static let arrSupportedLocales = ["en", "zh-Hant", "zh-Hans", "ja"] static let dlgOpenPath = NSOpenPanel() diff --git a/Source/Modules/IMEModules/ctlInputMethod.swift b/Source/Modules/IMEModules/ctlInputMethod.swift index 8bf699bf..ad7fc055 100644 --- a/Source/Modules/IMEModules/ctlInputMethod.swift +++ b/Source/Modules/IMEModules/ctlInputMethod.swift @@ -38,10 +38,6 @@ extension ctlCandidate { @objc(ctlInputMethod) class ctlInputMethod: IMKInputController { - @objc static let kIMEModeCHS = "org.atelierInmu.inputmethod.vChewing.IMECHS" - @objc static let kIMEModeCHT = "org.atelierInmu.inputmethod.vChewing.IMECHT" - @objc static let kIMEModeNULL = "org.atelierInmu.inputmethod.vChewing.IMENULL" - @objc static var areWeDeleting = false private static let tooltipController = TooltipController() @@ -116,7 +112,7 @@ class ctlInputMethod: IMKInputController { } override func setValue(_ value: Any!, forTag _: Int, client: Any!) { - var newInputMode = InputMode(rawValue: value as? String ?? InputMode.imeModeNULL.rawValue) + var newInputMode: InputMode = InputMode(rawValue: value as? String ?? "") ?? InputMode.imeModeNULL switch newInputMode { case InputMode.imeModeCHS: newInputMode = InputMode.imeModeCHS diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.h b/Source/Modules/LangModelRelated/KeyValueStructs.swift similarity index 55% rename from Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.h rename to Source/Modules/LangModelRelated/KeyValueStructs.swift index 43263923..6897b960 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.h +++ b/Source/Modules/LangModelRelated/KeyValueStructs.swift @@ -1,6 +1,4 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). /* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -24,33 +22,42 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef PHRASEREPLACEMENTMAP_H -#define PHRASEREPLACEMENTMAP_H +import Foundation -#include -#include -#include +extension vChewing { + @frozen public struct KeyValue: Equatable { + var key: String + var value: String -namespace vChewing -{ + public init(key: String = "", value: String = "") { + self.key = key + self.value = value + } -class PhraseReplacementMap -{ - public: - PhraseReplacementMap(); - ~PhraseReplacementMap(); + public static func == (lhs: KeyValue, rhs: KeyValue) -> Bool { + lhs.key == rhs.key && lhs.value == rhs.value + } + } - bool open(const char *path); - void close(); - const std::string valueForKey(const std::string &key); + @frozen public struct KeyValueRate: Equatable { + var key: String + var value: String + var rate: Double - protected: - std::map keyValueMap; - int fd; - void *data; - size_t length; -}; + public init(key: String = "", value: String = "", rate: Double = 0.0) { + self.key = key + self.value = value + self.rate = rate + } -} // namespace vChewing + public init(keyValue: KeyValue = KeyValue(key: "", value: ""), rate: Double = 0.0) { + key = keyValue.key + value = keyValue.value + self.rate = rate + } -#endif + public static func == (lhs: KeyValueRate, rhs: KeyValueRate) -> Bool { + lhs.key == rhs.key && lhs.value == rhs.value && lhs.rate == rhs.rate + } + } +} diff --git a/Source/Modules/LangModelRelated/LMInstantiator.h b/Source/Modules/LangModelRelated/LMInstantiator.h deleted file mode 100644 index fdbf92a7..00000000 --- a/Source/Modules/LangModelRelated/LMInstantiator.h +++ /dev/null @@ -1,167 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef LMInstantiator_H -#define LMInstantiator_H - -#include "AssociatedPhrases.h" -#include "CNSLM.h" -#include "CoreLM.h" -#include "ParselessLM.h" -#include "PhraseReplacementMap.h" -#include "SymbolLM.h" -#include "UserPhrasesLM.h" -#include "UserSymbolLM.h" -#include -#include - -namespace vChewing -{ - -using namespace Gramambular; - -/// LMInstantiator is a facade for managing a set of models including -/// the input method language model, user phrases and excluded phrases. -/// -/// It is the primary model class that the input controller and grammar builder -/// of vChewing talks to. When the grammar builder starts to build a sentence -/// from a series of BPMF readings, it passes the readings to the model to see -/// if there are valid unigrams, and use returned unigrams to produce the final -/// results. -/// -/// LMInstantiator combine and transform the unigrams from the primary language -/// model and user phrases. The process is -/// -/// 1) Get the original unigrams. -/// 2) Drop the unigrams whose value is contained in the exclusion map. -/// 3) Replace the values of the unigrams using the phrase replacement map. -/// 4) Replace the values of the unigrams using an external converter lambda. -/// 5) Drop the duplicated phrases. -/// -/// The controller can ask the model to load the primary input method language -/// model while launching and to load the user phrases anytime if the custom -/// files are modified. It does not keep the reference of the data pathes but -/// you have to pass the paths when you ask it to do loading. -class LMInstantiator : public Gramambular::LanguageModel -{ - public: - LMInstantiator(); - ~LMInstantiator(); - - /// Asks to load the primary language model at the given path. - /// @param languageModelPath The path of the language model. - void loadLanguageModel(const char *languageModelPath); - /// If the data model is already loaded. - bool isDataModelLoaded(); - - /// Asks to load the primary language model at the given path. - /// @param miscDataPath The path of the misc data model. - void loadMiscData(const char *miscDataPath); - /// If the data model is already loaded. - bool isMiscDataLoaded(); - - /// Asks to load the primary language model at the given path. - /// @param symbolDataPath The path of the symbol data model. - void loadSymbolData(const char *symbolDataPath); - /// If the data model is already loaded. - bool isSymbolDataLoaded(); - - /// Asks to load the primary language model at the given path. - /// @param cnsDataPath The path of the CNS data model. - void loadCNSData(const char *cnsDataPath); - /// If the data model is already loaded. - bool isCNSDataLoaded(); - - /// Asks to load the user phrases and excluded phrases at the given path. - /// @param userPhrasesPath The path of user phrases. - /// @param excludedPhrasesPath The path of excluded phrases. - void loadUserPhrases(const char *userPhrasesPath, const char *excludedPhrasesPath); - /// Asks to load the user symbol data at the given path. - /// @param userSymbolDataPath The path of user symbol data. - void loadUserSymbolData(const char *userPhrasesPath); - /// Asks to load the user associated phrases at the given path. - /// @param userAssociatedPhrasesPath The path of the user associated phrases. - void loadUserAssociatedPhrases(const char *userAssociatedPhrasesPath); - /// Asks to load the phrase replacement table at the given path. - /// @param phraseReplacementPath The path of the phrase replacement table. - void loadPhraseReplacementMap(const char *phraseReplacementPath); - - /// Not implemented since we do not have data to provide bigram function. - const std::vector bigramsForKeys(const std::string &preceedingKey, const std::string &key); - /// Returns a list of available unigram for the given key. - /// @param key A std::string represents the BPMF reading or a symbol key. For - /// example, it you pass "ㄇㄚ", it returns "嗎", "媽", and so on. - const std::vector unigramsForKey(const std::string &key); - /// If the model has unigrams for the given key. - /// @param key The key. - bool hasUnigramsForKey(const std::string &key); - - /// Enables or disables phrase replacement. - void setPhraseReplacementEnabled(bool enabled); - /// If phrase replacement is enabled or not. - bool phraseReplacementEnabled(); - - /// Enables or disables symbol input. - void setSymbolEnabled(bool enabled); - /// If symbol input is enabled or not. - bool symbolEnabled(); - - /// Enables or disables CNS11643 input. - void setCNSEnabled(bool enabled); - /// If CNS11643 input is enabled or not. - bool cnsEnabled(); - - const std::vector associatedPhrasesForKey(const std::string &key); - bool hasAssociatedPhrasesForKey(const std::string &key); - - protected: - /// Filters and converts the input unigrams and return a new list of unigrams. - /// - /// @param unigrams The unigrams to be processed. - /// @param excludedValues The values to excluded unigrams. - /// @param insertedValues The values for unigrams already in the results. - /// It helps to prevent duplicated unigrams. Please note that the method - /// has a side effect that it inserts values to `insertedValues`. - const std::vector filterAndTransformUnigrams( - const std::vector unigrams, const std::unordered_set &excludedValues, - std::unordered_set &insertedValues); - - ParselessLM m_languageModel; - CoreLM m_miscModel; - SymbolLM m_symbolModel; - CNSLM m_cnsModel; - UserPhrasesLM m_userPhrases; - UserPhrasesLM m_excludedPhrases; - UserSymbolLM m_userSymbolModel; - PhraseReplacementMap m_phraseReplacement; - AssociatedPhrases m_associatedPhrases; - bool m_phraseReplacementEnabled; - bool m_cnsEnabled; - bool m_symbolEnabled; -}; -}; // namespace vChewing - -#endif diff --git a/Source/Modules/LangModelRelated/LMInstantiator.mm b/Source/Modules/LangModelRelated/LMInstantiator.mm deleted file mode 100644 index 2873cbf2..00000000 --- a/Source/Modules/LangModelRelated/LMInstantiator.mm +++ /dev/null @@ -1,323 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "LMInstantiator.h" -#include -#include - -namespace vChewing -{ - -LMInstantiator::LMInstantiator() -{ -} - -LMInstantiator::~LMInstantiator() -{ - m_languageModel.close(); - m_miscModel.close(); - m_userPhrases.close(); - m_userSymbolModel.close(); - m_cnsModel.close(); - m_excludedPhrases.close(); - m_phraseReplacement.close(); - m_associatedPhrases.close(); -} - -void LMInstantiator::loadLanguageModel(const char *languageModelDataPath) -{ - if (languageModelDataPath) - { - m_languageModel.close(); - m_languageModel.open(languageModelDataPath); - } -} - -bool LMInstantiator::isDataModelLoaded() -{ - return m_languageModel.isLoaded(); -} - -void LMInstantiator::loadCNSData(const char *cnsDataPath) -{ - if (cnsDataPath) - { - m_cnsModel.close(); - m_cnsModel.open(cnsDataPath); - } -} - -bool LMInstantiator::isCNSDataLoaded() -{ - return m_cnsModel.isLoaded(); -} - -void LMInstantiator::loadMiscData(const char *miscDataPath) -{ - if (miscDataPath) - { - m_miscModel.close(); - m_miscModel.open(miscDataPath); - } -} - -bool LMInstantiator::isMiscDataLoaded() -{ - return m_miscModel.isLoaded(); -} - -void LMInstantiator::loadSymbolData(const char *symbolDataPath) -{ - if (symbolDataPath) - { - m_symbolModel.close(); - m_symbolModel.open(symbolDataPath); - } -} - -bool LMInstantiator::isSymbolDataLoaded() -{ - return m_symbolModel.isLoaded(); -} - -void LMInstantiator::loadUserPhrases(const char *userPhrasesDataPath, const char *excludedPhrasesDataPath) -{ - if (userPhrasesDataPath) - { - m_userPhrases.close(); - m_userPhrases.open(userPhrasesDataPath); - } - if (excludedPhrasesDataPath) - { - m_excludedPhrases.close(); - m_excludedPhrases.open(excludedPhrasesDataPath); - } -} - -void LMInstantiator::loadUserSymbolData(const char *userSymbolDataPath) -{ - if (userSymbolDataPath) - { - m_userSymbolModel.close(); - m_userSymbolModel.open(userSymbolDataPath); - } -} - -void LMInstantiator::loadUserAssociatedPhrases(const char *userAssociatedPhrasesPath) -{ - if (userAssociatedPhrasesPath) - { - m_associatedPhrases.close(); - m_associatedPhrases.open(userAssociatedPhrasesPath); - } -} - -void LMInstantiator::loadPhraseReplacementMap(const char *phraseReplacementPath) -{ - if (phraseReplacementPath) - { - m_phraseReplacement.close(); - m_phraseReplacement.open(phraseReplacementPath); - } -} - -const std::vector LMInstantiator::bigramsForKeys(const std::string &preceedingKey, - const std::string &key) -{ - return std::vector(); -} - -const std::vector LMInstantiator::unigramsForKey(const std::string &key) -{ - if (key == " ") - { - std::vector spaceUnigrams; - Gramambular::Unigram g; - g.keyValue.key = " "; - g.keyValue.value = " "; - g.score = 0; - spaceUnigrams.push_back(g); - return spaceUnigrams; - } - - std::vector allUnigrams; - std::vector miscUnigrams; - std::vector symbolUnigrams; - std::vector userUnigrams; - std::vector userSymbolUnigrams; - std::vector cnsUnigrams; - - std::unordered_set excludedValues; - std::unordered_set insertedValues; - - if (m_excludedPhrases.hasUnigramsForKey(key)) - { - std::vector excludedUnigrams = m_excludedPhrases.unigramsForKey(key); - transform(excludedUnigrams.begin(), excludedUnigrams.end(), inserter(excludedValues, excludedValues.end()), - [](const Gramambular::Unigram &u) { return u.keyValue.value; }); - } - - if (m_userPhrases.hasUnigramsForKey(key)) - { - std::vector rawUserUnigrams = m_userPhrases.unigramsForKey(key); - // 用這句指令讓使用者語彙檔案內的詞條優先順序隨著行數增加而逐漸增高。 - // 這樣一來就可以在就地新增語彙時徹底複寫優先權。 - std::reverse(rawUserUnigrams.begin(), rawUserUnigrams.end()); - userUnigrams = filterAndTransformUnigrams(rawUserUnigrams, excludedValues, insertedValues); - } - - if (m_languageModel.hasUnigramsForKey(key)) - { - std::vector rawGlobalUnigrams = m_languageModel.unigramsForKey(key); - allUnigrams = filterAndTransformUnigrams(rawGlobalUnigrams, excludedValues, insertedValues); - } - - if (m_miscModel.hasUnigramsForKey(key)) - { - std::vector rawMiscUnigrams = m_miscModel.unigramsForKey(key); - miscUnigrams = filterAndTransformUnigrams(rawMiscUnigrams, excludedValues, insertedValues); - } - - if (m_symbolModel.hasUnigramsForKey(key) && m_symbolEnabled) - { - std::vector rawSymbolUnigrams = m_symbolModel.unigramsForKey(key); - symbolUnigrams = filterAndTransformUnigrams(rawSymbolUnigrams, excludedValues, insertedValues); - } - - if (m_userSymbolModel.hasUnigramsForKey(key) && m_symbolEnabled) - { - std::vector rawUserSymbolUnigrams = m_userSymbolModel.unigramsForKey(key); - userSymbolUnigrams = filterAndTransformUnigrams(rawUserSymbolUnigrams, excludedValues, insertedValues); - } - - if (m_cnsModel.hasUnigramsForKey(key) && m_cnsEnabled) - { - std::vector rawCNSUnigrams = m_cnsModel.unigramsForKey(key); - cnsUnigrams = filterAndTransformUnigrams(rawCNSUnigrams, excludedValues, insertedValues); - } - - allUnigrams.insert(allUnigrams.begin(), userUnigrams.begin(), userUnigrams.end()); - allUnigrams.insert(allUnigrams.end(), cnsUnigrams.begin(), cnsUnigrams.end()); - allUnigrams.insert(allUnigrams.begin(), miscUnigrams.begin(), miscUnigrams.end()); - allUnigrams.insert(allUnigrams.end(), userSymbolUnigrams.begin(), userSymbolUnigrams.end()); - allUnigrams.insert(allUnigrams.end(), symbolUnigrams.begin(), symbolUnigrams.end()); - return allUnigrams; -} - -bool LMInstantiator::hasUnigramsForKey(const std::string &key) -{ - if (key == " ") - { - return true; - } - - if (!m_excludedPhrases.hasUnigramsForKey(key)) - { - return m_userPhrases.hasUnigramsForKey(key) || m_languageModel.hasUnigramsForKey(key); - } - - return unigramsForKey(key).size() > 0; -} - -void LMInstantiator::setPhraseReplacementEnabled(bool enabled) -{ - m_phraseReplacementEnabled = enabled; -} - -bool LMInstantiator::phraseReplacementEnabled() -{ - return m_phraseReplacementEnabled; -} - -void LMInstantiator::setCNSEnabled(bool enabled) -{ - m_cnsEnabled = enabled; -} - -bool LMInstantiator::cnsEnabled() -{ - return m_cnsEnabled; -} - -void LMInstantiator::setSymbolEnabled(bool enabled) -{ - m_symbolEnabled = enabled; -} - -bool LMInstantiator::symbolEnabled() -{ - return m_symbolEnabled; -} - -const std::vector LMInstantiator::filterAndTransformUnigrams( - const std::vector unigrams, const std::unordered_set &excludedValues, - std::unordered_set &insertedValues) -{ - std::vector results; - - for (auto &&unigram : unigrams) - { - // excludedValues filters out the unigrams with the original value. - // insertedValues filters out the ones with the converted value - std::string originalValue = unigram.keyValue.value; - if (excludedValues.find(originalValue) != excludedValues.end()) - { - continue; - } - - std::string value = originalValue; - if (m_phraseReplacementEnabled) - { - std::string replacement = m_phraseReplacement.valueForKey(value); - if (replacement != "") - { - value = replacement; - } - } - if (insertedValues.find(value) == insertedValues.end()) - { - Gramambular::Unigram g; - g.keyValue.value = value; - g.keyValue.key = unigram.keyValue.key; - g.score = unigram.score; - results.push_back(g); - insertedValues.insert(value); - } - } - return results; -} - -const std::vector LMInstantiator::associatedPhrasesForKey(const std::string &key) -{ - return m_associatedPhrases.valuesForKey(key); -} - -bool LMInstantiator::hasAssociatedPhrasesForKey(const std::string &key) -{ - return m_associatedPhrases.hasValuesForKey(key); -} - -} // namespace vChewing diff --git a/Source/Modules/LangModelRelated/LMInstantiator.swift b/Source/Modules/LangModelRelated/LMInstantiator.swift new file mode 100644 index 00000000..4f27d5df --- /dev/null +++ b/Source/Modules/LangModelRelated/LMInstantiator.swift @@ -0,0 +1,311 @@ +// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// Refactored from the ObjCpp-version of this class by: +// (c) 2011 and onwards The OpenVanilla Project (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +// NOTE: We still keep some of the comments left by Zonble, +// regardless that he is not in charge of this Swift module。 + +import Foundation + +extension vChewing { + /// LMInstantiator is a facade for managing a set of models including + /// the input method language model, user phrases and excluded phrases. + /// + /// It is the primary model class that the input controller and grammar builder + /// of vChewing talks to. When the grammar builder starts to build a sentence + /// from a series of BPMF readings, it passes the readings to the model to see + /// if there are valid unigrams, and use returned unigrams to produce the final + /// results. + /// + /// LMInstantiator combine and transform the unigrams from the primary language + /// model and user phrases. The process is + /// + /// 1) Get the original unigrams. + /// 2) Drop the unigrams whose value is contained in the exclusion map. + /// 3) Replace the values of the unigrams using the phrase replacement map. + /// 4) Replace the values of the unigrams using an external converter lambda. + /// 5) Drop the duplicated phrases. + /// + /// The controller can ask the model to load the primary input method language + /// model while launching and to load the user phrases anytime if the custom + /// files are modified. It does not keep the reference of the data pathes but + /// you have to pass the paths when you ask it to do loading. + public class LMInstantiator: Megrez.LanguageModel { + // 在函數內部用以記錄狀態的開關。 + public var isPhraseReplacementEnabled = false + public var isCNSEnabled = false + public var isSymbolEnabled = false + + // 聲明原廠語言模組 + /// Reverse 的話,第一欄是注音,第二欄是對應的漢字,第三欄是可能的權重。 + /// 不 Reverse 的話,第一欄是漢字,第二欄是對應的注音,第三欄是可能的權重。 + let lmCore = LMCore(reverse: false, consolidate: false, defaultScore: -9.5, forceDefaultScore: false) + let lmMisc = LMCore(reverse: true, consolidate: false, defaultScore: -1, forceDefaultScore: false) + let lmSymbols = LMLite(defaultScore: -13.0, consolidate: true) + let lmCNS = LMLite(defaultScore: -11.0, consolidate: true) + + // 聲明使用者語言模組 + let lmUserPhrases = LMLite(defaultScore: 0.0, consolidate: true) + let lmFiltered = LMLite(defaultScore: 0.0, consolidate: true) + let lmUserSymbols = LMLite(defaultScore: -12.0, consolidate: true) + let lmReplacements = LMReplacments() + let lmAssociates = LMAssociates() + + // 初期化的函數先保留 + override init() {} + + // 自我析構前要關掉全部的語言模組 + deinit { + lmCore.close() + lmMisc.close() + lmSymbols.close() + lmCNS.close() + lmUserPhrases.close() + lmFiltered.close() + lmUserSymbols.close() + lmReplacements.close() + lmAssociates.close() + } + + // 以下這些函數命名暫時保持原樣,等弒神行動徹底結束了再調整。 + + public func isDataModelLoaded() -> Bool { lmCore.isLoaded() } + public func loadLanguageModel(path: String) { + if FileManager.default.isReadableFile(atPath: path) { + lmCore.close() + lmCore.open(path) + } + } + + public func isCNSDataLoaded() -> Bool { lmCNS.isLoaded() } + public func loadCNSData(path: String) { + if FileManager.default.isReadableFile(atPath: path) { + lmCNS.close() + lmCNS.open(path) + } + } + + public func isMiscDataLoaded() -> Bool { lmMisc.isLoaded() } + public func loadMiscData(path: String) { + if FileManager.default.isReadableFile(atPath: path) { + lmMisc.close() + lmMisc.open(path) + } + } + + public func isSymbolDataLoaded() -> Bool { lmSymbols.isLoaded() } + public func loadSymbolData(path: String) { + if FileManager.default.isReadableFile(atPath: path) { + lmSymbols.close() + lmSymbols.open(path) + } + } + + public func loadUserPhrases(path: String, filterPath: String) { + if FileManager.default.isReadableFile(atPath: path) { + lmUserPhrases.close() + lmUserPhrases.open(path) + } + if FileManager.default.isReadableFile(atPath: filterPath) { + lmFiltered.close() + lmFiltered.open(filterPath) + } + } + + public func loadUserSymbolData(path: String) { + if FileManager.default.isReadableFile(atPath: path) { + lmUserSymbols.close() + lmUserSymbols.open(path) + } + } + + public func loadUserAssociatedPhrases(path: String) { + if FileManager.default.isReadableFile(atPath: path) { + lmAssociates.close() + lmAssociates.open(path) + } + } + + public func loadPhraseReplacementMap(path: String) { + if FileManager.default.isReadableFile(atPath: path) { + lmReplacements.close() + lmReplacements.open(path) + } + } + + // MARK: - Core Functions (Public) + + /// Not implemented since we do not have data to provide bigram function. + // public func bigramsForKeys(preceedingKey: String, key: String) -> [Megrez.Bigram] { } + + /// Returns a list of available unigram for the given key. + /// @param key:String represents the BPMF reading or a symbol key. + /// For instance, it you pass "ㄉㄨㄟˇ", it returns "㨃" and other possible candidates. + override open func unigramsFor(key: String) -> [Megrez.Unigram] { + if key == " " { + /// 給空格鍵指定輸出值。 + let spaceUnigram = Megrez.Unigram( + keyValue: Megrez.KeyValuePair(key: " ", value: " "), + score: 0 + ) + return [spaceUnigram] + } + + /// 準備不同的語言模組容器。 + var coreUnigrams: [Megrez.Unigram] = [] + var miscUnigrams: [Megrez.Unigram] = [] + var symbolUnigrams: [Megrez.Unigram] = [] + var userUnigrams: [Megrez.Unigram] = [] + var userSymbolUnigrams: [Megrez.Unigram] = [] + var cnsUnigrams: [Megrez.Unigram] = [] + + var insertedPairs: Set = [] // 具體用途有待商榷 + var filteredPairs: Set = [] + + // 開始逐漸往容器陣列內塞入資料 + let filteredUnigrams: [Megrez.Unigram] = + lmFiltered.hasUnigramsFor(key: key) ? lmFiltered.unigramsFor(key: key) : [] + for unigram in filteredUnigrams { + filteredPairs.insert(unigram.keyValue) + } + + if lmUserPhrases.hasUnigramsFor(key: key) { + var rawUserUnigrams: [Megrez.Unigram] = [] + // 用 reversed 指令讓使用者語彙檔案內的詞條優先順序隨著行數增加而逐漸增高。 + // 這樣一來就可以在就地新增語彙時徹底複寫優先權。 + // 將兩句差分也是為了讓 rawUserUnigrams 的類型不受可能的影響。 + rawUserUnigrams.append(contentsOf: lmUserPhrases.unigramsFor(key: key).reversed()) + userUnigrams = filterAndTransform( + unigrams: rawUserUnigrams, filter: filteredPairs, inserted: &insertedPairs + ) + } + + if lmUserPhrases.hasUnigramsFor(key: key) { + let rawUserUnigrams: [Megrez.Unigram] = lmUserPhrases.unigramsFor(key: key) + userUnigrams = filterAndTransform( + unigrams: rawUserUnigrams, filter: filteredPairs, inserted: &insertedPairs + ) + } + + if lmMisc.hasUnigramsFor(key: key) { + let rawMiscUnigrams: [Megrez.Unigram] = lmMisc.unigramsFor(key: key) + miscUnigrams = filterAndTransform( + unigrams: rawMiscUnigrams, filter: filteredPairs, inserted: &insertedPairs + ) + } + + if lmCore.hasUnigramsFor(key: key) { + let rawCoreUnigrams: [Megrez.Unigram] = lmCore.unigramsFor(key: key) + coreUnigrams = filterAndTransform( + unigrams: rawCoreUnigrams, filter: filteredPairs, inserted: &insertedPairs + ) + } + + if isSymbolEnabled { + if lmUserSymbols.hasUnigramsFor(key: key) { + let rawUserSymbolUnigrams: [Megrez.Unigram] = lmUserSymbols.unigramsFor(key: key) + userSymbolUnigrams = filterAndTransform( + unigrams: rawUserSymbolUnigrams, filter: filteredPairs, inserted: &insertedPairs + ) + } else { + IME.prtDebugIntel("Not found in UserSymbolUnigram: \(key)") + } + + if lmSymbols.hasUnigramsFor(key: key) { + let rawSymbolUnigrams: [Megrez.Unigram] = lmSymbols.unigramsFor(key: key) + symbolUnigrams = filterAndTransform( + unigrams: rawSymbolUnigrams, filter: filteredPairs, inserted: &insertedPairs + ) + } else { + IME.prtDebugIntel("Not found in UserUnigram: \(key)") + } + } + + if lmCNS.hasUnigramsFor(key: key), isCNSEnabled { + let rawCNSUnigrams: [Megrez.Unigram] = lmCNS.unigramsFor(key: key) + cnsUnigrams = filterAndTransform( + unigrams: rawCNSUnigrams, filter: filteredPairs, inserted: &insertedPairs + ) + } + + let allUnigrams: [Megrez.Unigram] = + userUnigrams + miscUnigrams + coreUnigrams + cnsUnigrams + userSymbolUnigrams + symbolUnigrams + + return allUnigrams + } + + /// If the model has unigrams for the given key. + /// @param key The key. + override open func hasUnigramsFor(key: String) -> Bool { + if key == " " { return true } + + if !lmFiltered.hasUnigramsFor(key: key) { + return lmUserPhrases.hasUnigramsFor(key: key) || lmCore.hasUnigramsFor(key: key) + } + + return !unigramsFor(key: key).isEmpty + } + + public func associatedPhrasesForKey(_ key: String) -> [String] { + lmAssociates.valuesFor(key: key) ?? [] + } + + public func hasAssociatedPhrasesForKey(_ key: String) -> Bool { + lmAssociates.hasValuesFor(key: key) + } + + // MARK: - Core Functions (Private) + + func filterAndTransform( + unigrams: [Megrez.Unigram], + filter filteredPairs: Set, + inserted insertedPairs: inout Set + ) -> [Megrez.Unigram] { + var results: [Megrez.Unigram] = [] + + for unigram in unigrams { + let pairToDealWith: Megrez.KeyValuePair = unigram.keyValue + if filteredPairs.contains(pairToDealWith) { + continue + } + + var pair: Megrez.KeyValuePair = pairToDealWith + if isPhraseReplacementEnabled { + let replacement = lmReplacements.valuesFor(key: pair.key) + if !replacement.isEmpty { + IME.prtDebugIntel(replacement) + pair.value = replacement + } + } + + if !insertedPairs.contains(pair) { + results.append(Megrez.Unigram(keyValue: pair, score: unigram.score)) + insertedPairs.insert(pair) + } + } + return results + } + } +} diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/ParselessLM.cpp b/Source/Modules/LangModelRelated/OldFileReferences/ParselessLM.cpp similarity index 100% rename from Source/Modules/LangModelRelated/SubLanguageModels/ParselessLM.cpp rename to Source/Modules/LangModelRelated/OldFileReferences/ParselessLM.cpp diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/ParselessLM.h b/Source/Modules/LangModelRelated/OldFileReferences/ParselessLM.h similarity index 100% rename from Source/Modules/LangModelRelated/SubLanguageModels/ParselessLM.h rename to Source/Modules/LangModelRelated/OldFileReferences/ParselessLM.h diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/ParselessPhraseDB.cpp b/Source/Modules/LangModelRelated/OldFileReferences/ParselessPhraseDB.cpp similarity index 100% rename from Source/Modules/LangModelRelated/SubLanguageModels/ParselessPhraseDB.cpp rename to Source/Modules/LangModelRelated/OldFileReferences/ParselessPhraseDB.cpp diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/ParselessPhraseDB.h b/Source/Modules/LangModelRelated/OldFileReferences/ParselessPhraseDB.h similarity index 100% rename from Source/Modules/LangModelRelated/SubLanguageModels/ParselessPhraseDB.h rename to Source/Modules/LangModelRelated/OldFileReferences/ParselessPhraseDB.h diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/UserOverrideModel.cpp b/Source/Modules/LangModelRelated/OldFileReferences/UserOverrideModel.cpp similarity index 98% rename from Source/Modules/LangModelRelated/SubLanguageModels/UserOverrideModel.cpp rename to Source/Modules/LangModelRelated/OldFileReferences/UserOverrideModel.cpp index 4ae8443f..8b4fb8ac 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/UserOverrideModel.cpp +++ b/Source/Modules/LangModelRelated/OldFileReferences/UserOverrideModel.cpp @@ -34,7 +34,7 @@ namespace vChewing { // About 20 generations. -static const double DecayThreshould = 1.0 / 1048576.0; +static const double DecayThreshold = 1.0 / 1048576.0; static double Score(size_t eventCount, size_t totalCount, double eventTimestamp, double timestamp, double lambda); static bool IsEndingPunctuation(const std::string &value); @@ -126,7 +126,7 @@ void UserOverrideModel::Observation::update(const std::string &candidate, double static double Score(size_t eventCount, size_t totalCount, double eventTimestamp, double timestamp, double lambda) { double decay = exp((timestamp - eventTimestamp) * lambda); - if (decay < DecayThreshould) + if (decay < DecayThreshold) { return 0.0; } diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/UserOverrideModel.h b/Source/Modules/LangModelRelated/OldFileReferences/UserOverrideModel.h similarity index 100% rename from Source/Modules/LangModelRelated/SubLanguageModels/UserOverrideModel.h rename to Source/Modules/LangModelRelated/OldFileReferences/UserOverrideModel.h diff --git a/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift b/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift new file mode 100644 index 00000000..2e894159 --- /dev/null +++ b/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift @@ -0,0 +1,124 @@ +// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// Refactored from the ObjCpp-version of this class by: +// (c) 2011 and onwards The OpenVanilla Project (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +import Foundation + +extension vChewing { + public class LMAssociates { + var keyValueMap: [String: [Megrez.KeyValuePair]] = [:] + var theData: String = "" + + public init() { + keyValueMap = [:] + theData = "" + } + + deinit { + if isLoaded() { + close() + } + } + + public func isLoaded() -> Bool { + !keyValueMap.isEmpty + } + + @discardableResult public func open(_ path: String) -> Bool { + if isLoaded() { + return false + } + + if !LMConsolidator.fixEOF(path: path) { + return false + } + if !LMConsolidator.consolidate(path: path, pragma: true) { + return false + } + + do { + theData = try String(contentsOfFile: path, encoding: .utf8) + } catch { + IME.prtDebugIntel("\(error)") + IME.prtDebugIntel("↑ Exception happened when reading Associated Phrases data.") + return false + } + + let length = theData.count + guard length > 0 else { + return false + } + + let arrData = theData.components(separatedBy: "\n") + for (lineID, lineContent) in arrData.enumerated() { + if !lineContent.hasPrefix("#") { + if lineContent.components(separatedBy: " ").count < 2 { + if arrData.last != "" { + IME.prtDebugIntel("Line #\(lineID + 1) Wrecked: \(lineContent)") + } + continue + } + var currentKV = Megrez.KeyValuePair() + for (unitID, unitContent) in lineContent.components(separatedBy: " ").enumerated() { + switch unitID { + case 0: + currentKV.key = unitContent + case 1: + currentKV.value = unitContent + default: break + } + } + keyValueMap[currentKV.key, default: []].append(currentKV) + } + } + IME.prtDebugIntel("\(keyValueMap.count) entries of data loaded from: \(path)") + theData = "" + return true + } + + public func close() { + if isLoaded() { + keyValueMap.removeAll() + } + } + + public func valuesFor(key: String) -> [String]? { + var v: [String] = [] + if let matched = keyValueMap[key] { + for entry in matched as [Megrez.KeyValuePair] { + v.append(entry.value) + } + } + return v + } + + public func hasValuesFor(key: String) -> Bool { + if let arrEntry = keyValueMap[key] { + return !arrEntry.isEmpty + } + return false + } + } +} diff --git a/Source/Modules/LangModelRelated/SubLMs/lmCore.swift b/Source/Modules/LangModelRelated/SubLMs/lmCore.swift new file mode 100644 index 00000000..0ee1cf8a --- /dev/null +++ b/Source/Modules/LangModelRelated/SubLMs/lmCore.swift @@ -0,0 +1,168 @@ +// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +// 威注音重新設計原廠詞庫語言模組。不排序,但使用 Swift 內建的 String 處理。 + +import Foundation + +extension vChewing { + public class LMCore { + var keyValueScoreMap: [String: [Megrez.Unigram]] = [:] + var theData: String = "" + var shouldReverse: Bool = false + var allowConsolidation: Bool = false + var defaultScore: Double = 0 + var shouldForceDefaultScore: Bool = false + + public init( + reverse: Bool = false, consolidate: Bool = false, defaultScore scoreDefault: Double = 0, + forceDefaultScore: Bool = false + ) { + keyValueScoreMap = [:] + theData = "" + allowConsolidation = consolidate + shouldReverse = reverse + defaultScore = scoreDefault + shouldForceDefaultScore = forceDefaultScore + } + + deinit { + if isLoaded() { + close() + } + } + + public func isLoaded() -> Bool { + !keyValueScoreMap.isEmpty + } + + @discardableResult public func open(_ path: String) -> Bool { + if isLoaded() { + return false + } + + if allowConsolidation { + if !LMConsolidator.fixEOF(path: path) { + return false + } + if !LMConsolidator.consolidate(path: path, pragma: true) { + return false + } + } + + do { + theData = try String(contentsOfFile: path, encoding: .utf8) + } catch { + IME.prtDebugIntel("\(error)") + IME.prtDebugIntel("↑ Exception happened when reading Associated Phrases data.") + return false + } + + let length = theData.count + guard length > 0 else { + return false + } + + let arrData = theData.components(separatedBy: "\n") + for (lineID, lineContent) in arrData.enumerated() { + if !lineContent.hasPrefix("#") { + let lineContent = lineContent.replacingOccurrences(of: "\t", with: " ") + if lineContent.components(separatedBy: " ").count < 2 { + if arrData.last != "" { + IME.prtDebugIntel("Line #\(lineID + 1) Wrecked: \(lineContent)") + } + continue + } + var currentUnigram = Megrez.Unigram(keyValue: Megrez.KeyValuePair(), score: defaultScore) + var columnOne = "" + var columnTwo = "" + for (unitID, unitContent) in lineContent.components(separatedBy: " ").enumerated() { + switch unitID { + case 0: + columnOne = unitContent + case 1: + columnTwo = unitContent + case 2: + if !shouldForceDefaultScore { + if let unitContentConverted = Double(unitContent) { + currentUnigram.score = unitContentConverted + } else { + IME.prtDebugIntel("Line #\(lineID) Score Data Wrecked: \(lineContent)") + } + } + default: break + } + } + let kvPair = + shouldReverse + ? Megrez.KeyValuePair(key: columnTwo, value: columnOne) + : Megrez.KeyValuePair(key: columnOne, value: columnTwo) + currentUnigram.keyValue = kvPair + let key = shouldReverse ? columnTwo : columnOne + keyValueScoreMap[key, default: []].append(currentUnigram) + } + } + IME.prtDebugIntel("\(keyValueScoreMap.count) entries of data loaded from: \(path)") + theData = "" + return true + } + + public func close() { + if isLoaded() { + keyValueScoreMap.removeAll() + } + } + + // MARK: - Advanced features + + public func dump() { + var strDump = "" + for entry in keyValueScoreMap { + let rows: [Megrez.Unigram] = entry.1 + for row in rows { + let addline = row.keyValue.key + " " + row.keyValue.value + " " + String(row.score) + "\n" + strDump += addline + } + } + IME.prtDebugIntel(strDump) + } + + open func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] { + // 這裡用了點廢話處理,不然函數構建體會被 Swift 格式整理工具給毀掉。 + // 其實只要一句「[Megrez.Bigram]()」就夠了。 + precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]() + } + + open func unigramsFor(key: String) -> [Megrez.Unigram] { + keyValueScoreMap[key] ?? [Megrez.Unigram]() + } + + open func hasUnigramsFor(key: String) -> Bool { + if let arrEntry = keyValueScoreMap[key] { + return !arrEntry.isEmpty + } + return false + } + } +} diff --git a/Source/Modules/LangModelRelated/SubLMs/lmLite.swift b/Source/Modules/LangModelRelated/SubLMs/lmLite.swift new file mode 100644 index 00000000..e06e02eb --- /dev/null +++ b/Source/Modules/LangModelRelated/SubLMs/lmLite.swift @@ -0,0 +1,145 @@ +// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// Refactored from the ObjCpp-version of this class by: +// (c) 2011 and onwards The OpenVanilla Project (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +import Foundation + +extension vChewing { + public class LMLite { + var keyValueMap: [String: [Megrez.KeyValuePair]] = [:] + var defaultScore: Double = 0 + var theData: String = "" + var allowConsolidation = false + + public init(defaultScore scoreDefault: Double = 0, consolidate: Bool = false) { + keyValueMap = [:] + theData = "" + defaultScore = scoreDefault + allowConsolidation = consolidate + } + + deinit { + if isLoaded() { + close() + } + } + + public func isLoaded() -> Bool { + !keyValueMap.isEmpty + } + + @discardableResult public func open(_ path: String) -> Bool { + if isLoaded() { + return false + } + + if allowConsolidation { + if !LMConsolidator.fixEOF(path: path) { + return false + } + if !LMConsolidator.consolidate(path: path, pragma: true) { + return false + } + } + + do { + theData = try String(contentsOfFile: path, encoding: .utf8) + } catch { + IME.prtDebugIntel("\(error)") + IME.prtDebugIntel("↑ Exception happened when reading Associated Phrases data.") + return false + } + + let length = theData.count + guard length > 0 else { + return false + } + + let arrData = theData.components(separatedBy: "\n") + for (lineID, lineContent) in arrData.enumerated() { + if !lineContent.hasPrefix("#") { + if lineContent.components(separatedBy: " ").count < 2 { + if arrData.last != "" { + IME.prtDebugIntel("Line #\(lineID + 1) Wrecked: \(lineContent)") + } + continue + } + var currentKV = Megrez.KeyValuePair() + for (unitID, unitContent) in lineContent.components(separatedBy: " ").enumerated() { + switch unitID { + case 0: + currentKV.value = unitContent + case 1: + currentKV.key = unitContent + default: break + } + } + keyValueMap[currentKV.key, default: []].append(currentKV) + } + } + IME.prtDebugIntel("\(keyValueMap.count) entries of data loaded from: \(path)") + theData = "" + if path.contains("vChewing/") { + dump() + } + return true + } + + public func close() { + if isLoaded() { + keyValueMap.removeAll() + } + } + + public func dump() { + var strDump = "" + for entry in keyValueMap { + let rows: [Megrez.KeyValuePair] = entry.1 + for row in rows { + let addline = row.key + " " + row.value + "\n" + strDump += addline + } + } + IME.prtDebugIntel(strDump) + } + + public func unigramsFor(key: String) -> [Megrez.Unigram] { + var v: [Megrez.Unigram] = [] + if let matched = keyValueMap[key] { + for entry in matched as [Megrez.KeyValuePair] { + v.append(Megrez.Unigram(keyValue: entry, score: defaultScore)) + } + } + return v + } + + public func hasUnigramsFor(key: String) -> Bool { + if let arrEntry = keyValueMap[key] { + return !arrEntry.isEmpty + } + return false + } + } +} diff --git a/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift b/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift new file mode 100644 index 00000000..d18c21eb --- /dev/null +++ b/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift @@ -0,0 +1,115 @@ +// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// Refactored from the ObjCpp-version of this class by: +// (c) 2011 and onwards The OpenVanilla Project (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +import Foundation + +extension vChewing { + public class LMReplacments { + var keyValueMap: [String: String] = [:] + var theData: String = "" + + public init() { + keyValueMap = [:] + theData = "" + } + + deinit { + if isLoaded() { + close() + } + } + + public func isLoaded() -> Bool { + !keyValueMap.isEmpty + } + + @discardableResult public func open(_ path: String) -> Bool { + if isLoaded() { + return false + } + + if !LMConsolidator.fixEOF(path: path) { + return false + } + if !LMConsolidator.consolidate(path: path, pragma: true) { + return false + } + + do { + theData = try String(contentsOfFile: path, encoding: .utf8) + } catch { + IME.prtDebugIntel("\(error)") + IME.prtDebugIntel("↑ Exception happened when reading Associated Phrases data.") + return false + } + + let length = theData.count + guard length > 0 else { + return false + } + + let arrData = theData.components(separatedBy: "\n") + for (lineID, lineContent) in arrData.enumerated() { + if !lineContent.hasPrefix("#") { + if lineContent.components(separatedBy: " ").count < 2 { + if arrData.last != "" { + IME.prtDebugIntel("Line #\(lineID + 1) Wrecked: \(lineContent)") + } + continue + } + var currentKV = KeyValue() + for (unitID, unitContent) in lineContent.components(separatedBy: " ").enumerated() { + switch unitID { + case 0: + currentKV.key = unitContent + case 1: + currentKV.value = unitContent + default: break + } + } + keyValueMap[currentKV.key] = currentKV.value + } + } + IME.prtDebugIntel("\(keyValueMap.count) entries of data loaded from: \(path)") + theData = "" + return true + } + + public func close() { + if isLoaded() { + keyValueMap.removeAll() + } + } + + public func valuesFor(key: String) -> String { + var v = "" + if let matched = keyValueMap[key] { + v = matched + } + return v + } + } +} diff --git a/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift b/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift new file mode 100644 index 00000000..28183aa8 --- /dev/null +++ b/Source/Modules/LangModelRelated/SubLMs/lmUserOverride.swift @@ -0,0 +1,222 @@ +// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// Refactored from the ObjCpp-version of this class by: +// (c) 2011 and onwards The OpenVanilla Project (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +import Foundation + +extension vChewing { + public class LMUserOverride { + let kDecayThreshold: Double = 1.0 / 1_048_576.0 + + public init(capacity: Int = 0, decayExponent: Double = 0) { + mutCapacity = capacity + mutDecayExponent = decayExponent + } + + public func observe( + walkedNodes: [Megrez.NodeAnchor], + cursorIndex: Int, + candidate: String, + timestamp: Double + ) { + let key = getWalkedNodesToKey(walkedNodes: walkedNodes, cursorIndex: cursorIndex) + if mutLRUMap[key] == nil { + let keyValuePair = KeyObservationPair(key: key, observation: Observation()) + var observation: Observation = keyValuePair.observation + observation.update(candidate: candidate, timestamp: timestamp) + + mutLRUList.insert(keyValuePair, at: 0) + mutLRUMap[key] = KeyObservationPair(key: key, observation: observation) + + if mutLRUList.count > mutCapacity { + mutLRUMap[mutLRUList.reversed()[0].key] = nil + mutLRUList.removeLast() + } + } else { + var obs = mutLRUMap[key]!.observation + obs.update(candidate: candidate, timestamp: timestamp) + let pair = KeyObservationPair.init(key: key, observation: obs) + mutLRUList.insert(pair, at: 0) + } + } + + public func suggest( + walkedNodes: [Megrez.NodeAnchor], + cursorIndex: Int, + timestamp: Double + ) -> String { + let key = getWalkedNodesToKey(walkedNodes: walkedNodes, cursorIndex: cursorIndex) + guard let keyValuePair = mutLRUMap[key] else { + return "" + } + let observation = keyValuePair.observation + + var candidate = "" + var score = 0.0 + for overrideNeta in Array(observation.overrides) { + let overrideScore = getScore( + eventCount: overrideNeta.value.count, + totalCount: observation.count, + eventTimestamp: overrideNeta.value.timestamp, + timestamp: timestamp, + lambda: mutDecayExponent + ) + + if overrideScore == 0.0 { + continue + } + + if overrideScore > score { + candidate = overrideNeta.key + score = overrideScore + } + } + return candidate + } + + func isEndingPunctuation(value: String) -> Bool { + [",", "。", "!", "?", "」", "』", "”", "’"].contains(value) + } + + public func getScore( + eventCount: Int, + totalCount: Int, + eventTimestamp: Double, + timestamp: Double, + lambda: Double + ) -> Double { + let decay = exp((timestamp - eventTimestamp) * lambda) + if decay < kDecayThreshold { + return 0.0 + } + + let prob = Double(eventCount) / Double(totalCount) + return prob * decay + } + + func getWalkedNodesToKey( + walkedNodes: [Megrez.NodeAnchor], cursorIndex: Int + ) -> String { + var s = "" + var n: [Megrez.NodeAnchor] = [] + var ll = 0 + for i in walkedNodes { + let nn = i + n.append(nn) + ll += nn.spanningLength + if ll >= cursorIndex { + break + } + } + + var r: [Megrez.NodeAnchor] = [] + r.append(contentsOf: n.reversed()) + + if r.isEmpty { + return "" + } + + if let theAnchor = r.first, theAnchor.node != nil { + let theNode = theAnchor.node! + let current = theNode.currentKeyValue().key + r.removeFirst() + + s = "" // 保險起見,這裡也清空 s。 + if !r.isEmpty { + let value = theNode.currentKeyValue().value + if isEndingPunctuation(value: value) { + s = "()" + r = [] + } else { + s = "(\(theNode.currentKeyValue().key),\(value))" + r.removeFirst() + } + } else { + s = "()" + } + let prev = s + + s = "" + if !r.isEmpty { + let value = theNode.currentKeyValue().value + if isEndingPunctuation(value: value) { + s = "()" + r = [] + } else { + s = "(\(theNode.currentKeyValue().key),\(value))" + r.removeFirst() + } + } else { + s = "()" + } + let anterior = s + + s = "(\(anterior),\(prev),\(current))" + } + return s + } + + // MARK: - Private Structures + + var mutCapacity: Int + var mutDecayExponent: Double + var mutLRUList = [KeyObservationPair]() + var mutLRUMap: [String: KeyObservationPair] = [:] + + struct Override { + var count: Int = 0 + var timestamp: Double = 0.0 + } + + struct Observation { + var count: Int = 0 + var overrides: [String: Override] = [:] + + mutating func update(candidate: String, timestamp: Double) { + count += 1 + if var neta = overrides[candidate] { + neta.timestamp = timestamp + neta.count += 1 + } + } + } + + struct KeyObservationPair: Equatable { + var key: String + var observation: Observation + + var hashValue: Int { key.hashValue } + + init(key: String, observation: Observation) { + self.key = key + self.observation = observation + } + + static func == (lhs: KeyObservationPair, rhs: KeyObservationPair) -> Bool { + lhs.key == rhs.key + } + } + } +} diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.h b/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.h deleted file mode 100644 index 63f6aca1..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.h +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef ASSOCIATEDPHRASES_H -#define ASSOCIATEDPHRASES_H - -#include -#include -#include -#include - -namespace vChewing -{ - -class AssociatedPhrases -{ - public: - AssociatedPhrases(); - ~AssociatedPhrases(); - - const bool isLoaded(); - bool open(const char *path); - void close(); - const std::vector valuesForKey(const std::string &key); - const bool hasValuesForKey(const std::string &key); - - protected: - struct Row - { - Row(std::string_view &k, std::string_view &v) : key(k), value(v) - { - } - std::string_view key; - std::string_view value; - }; - - std::map> keyRowMap; - - int fd; - void *data; - size_t length; -}; - -} // namespace vChewing - -#endif /* AssociatedPhrases_hpp */ diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.mm b/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.mm deleted file mode 100644 index ac0f223e..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/AssociatedPhrases.mm +++ /dev/null @@ -1,146 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "AssociatedPhrases.h" -#include "vChewing-Swift.h" -#include -#include -#include -#include -#include - -#include "KeyValueBlobReader.h" -#include "LMConsolidator.h" - -namespace vChewing -{ - -AssociatedPhrases::AssociatedPhrases() : fd(-1), data(0), length(0) -{ -} - -AssociatedPhrases::~AssociatedPhrases() -{ - if (data) - { - close(); - } -} - -const bool AssociatedPhrases::isLoaded() -{ - if (data) - { - return true; - } - return false; -} - -bool AssociatedPhrases::open(const char *path) -{ - if (data) - { - return false; - } - - LMConsolidator::FixEOF(path); - LMConsolidator::ConsolidateContent(path, true); - - fd = ::open(path, O_RDONLY); - if (fd == -1) - { - printf("open:: file not exist"); - return false; - } - - struct stat sb; - if (fstat(fd, &sb) == -1) - { - printf("open:: cannot open file"); - return false; - } - - length = (size_t)sb.st_size; - - data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0); - if (!data) - { - ::close(fd); - return false; - } - - KeyValueBlobReader reader(static_cast(data), length); - KeyValueBlobReader::KeyValue keyValue; - KeyValueBlobReader::State state; - while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) - { - keyRowMap[keyValue.key].emplace_back(keyValue.key, keyValue.value); - } - // 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行) - if (state == KeyValueBlobReader::State::ERROR) - { - // close(); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "AssociatedPhrases: Failed at Open Step 5. On Error Resume Next.\n"); - // return false; - } - return true; -} - -void AssociatedPhrases::close() -{ - if (data) - { - munmap(data, length); - ::close(fd); - data = 0; - } - - keyRowMap.clear(); -} - -const std::vector AssociatedPhrases::valuesForKey(const std::string &key) -{ - std::vector v; - auto iter = keyRowMap.find(key); - if (iter != keyRowMap.end()) - { - const std::vector &rows = iter->second; - for (const auto &row : rows) - { - std::string_view value = row.value; - v.push_back({value.data(), value.size()}); - } - } - return v; -} - -const bool AssociatedPhrases::hasValuesForKey(const std::string &key) -{ - return keyRowMap.find(key) != keyRowMap.end(); -} - -}; // namespace vChewing diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.h b/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.h deleted file mode 100644 index 46625e74..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.h +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef CoreLM_H -#define CoreLM_H - -#include "LanguageModel.h" -#include -#include -#include -#include - -// this class relies on the fact that we have a space-separated data -// format, and we use mmap and zero-out the separators and line feeds -// to avoid creating new string objects; the parser is a simple DFA - -using namespace std; -using namespace Gramambular; - -namespace vChewing -{ - -class CoreLM : public Gramambular::LanguageModel -{ - public: - CoreLM(); - ~CoreLM(); - - bool isLoaded(); - bool open(const char *path); - void close(); - void dump(); - - virtual const std::vector bigramsForKeys(const string &preceedingKey, const string &key); - virtual const std::vector unigramsForKey(const string &key); - virtual bool hasUnigramsForKey(const string &key); - - protected: - struct CStringCmp - { - bool operator()(const char *s1, const char *s2) const - { - return strcmp(s1, s2) < 0; - } - }; - - struct Row - { - const char *key; - const char *value; - const char *logProbability; - }; - - map, CStringCmp> keyRowMap; - int fd; - void *data; - size_t length; -}; - -}; // namespace vChewing - -#endif diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.mm b/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.mm deleted file mode 100644 index de24f821..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/CoreLM.mm +++ /dev/null @@ -1,365 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "CoreLM.h" -#include "vChewing-Swift.h" -#include -#include -#include -#include -#include -#include - -using namespace Gramambular; - -vChewing::CoreLM::CoreLM() : fd(-1), data(0), length(0) -{ -} - -vChewing::CoreLM::~CoreLM() -{ - if (data) - { - close(); - } -} - -bool vChewing::CoreLM::isLoaded() -{ - if (data) - { - return true; - } - return false; -} - -bool vChewing::CoreLM::open(const char *path) -{ - if (data) - { - return false; - } - - fd = ::open(path, O_RDONLY); - if (fd == -1) - { - return false; - } - - struct stat sb; - if (fstat(fd, &sb) == -1) - { - return false; - } - - length = (size_t)sb.st_size; - - data = mmap(NULL, length, PROT_WRITE, MAP_PRIVATE, fd, 0); - if (!data) - { - ::close(fd); - return false; - } - - // Regular expression for parsing: - // (\n*\w\w*\s\w\w*\s\w\w*)*$ - // - // Expanded as DFA (in Graphviz): - // - // digraph finite_state_machine { - // rankdir = LR; - // size = "10"; - // - // node [shape = doublecircle]; End; - // node [shape = circle]; - // - // Start -> End [ label = "EOF"]; - // Start -> Error [ label = "\\s" ]; - // Start -> Start [ label = "\\n" ]; - // Start -> 1 [ label = "\\w" ]; - // - // 1 -> Error [ label = "\\n, EOF" ]; - // 1 -> 2 [ label = "\\s" ]; - // 1 -> 1 [ label = "\\w" ]; - // - // 2 -> Error [ label = "\\n, \\s, EOF" ]; - // 2 -> 3 [ label = "\\w" ]; - // - // 3 -> Error [ label = "\\n, EOF "]; - // 3 -> 4 [ label = "\\s" ]; - // 3 -> 3 [ label = "\\w" ]; - // - // 4 -> Error [ label = "\\n, \\s, EOF" ]; - // 4 -> 5 [ label = "\\w" ]; - // - // 5 -> Error [ label = "\\s, EOF" ]; - // 5 -> Start [ label = "\\n" ]; - // 5 -> 5 [ label = "\\w" ]; - // } - - char *head = (char *)data; - char *end = (char *)data + length; - char c; - Row row; - -start: - // EOF -> end - if (head == end) - { - goto end; - } - - c = *head; - // \s -> error - if (c == ' ') - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // Start: \\s -> error"); - goto error; - } - // \n -> start - else if (c == '\n') - { - head++; - goto start; - } - - // \w -> record column star, state1 - row.value = head; - head++; - // fall through to state 1 - -state1: - // EOF -> error - if (head == end) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: EOF -> error"); - goto error; - } - - c = *head; - // \n -> error - if (c == '\n') - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 1: \\n -> error"); - goto error; - } - // \s -> state2 + zero out ending + record column start - else if (c == ' ') - { - *head = 0; - head++; - row.key = head; - goto state2; - } - - // \w -> state1 - head++; - goto state1; - -state2: - // eof -> error - if (head == end) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: EOF -> error"); - goto error; - } - - c = *head; - // \n, \s -> error - if (c == '\n' || c == ' ') - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 2: \\n \\s -> error"); - goto error; - } - - // \w -> state3 - head++; - - // fall through to state 3 - -state3: - // eof -> error - if (head == end) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: EOF -> error"); - goto error; - } - - c = *head; - - // \n -> error - if (c == '\n') - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 3: \\n -> error"); - goto error; - } - // \s -> state4 + zero out ending + record column start - else if (c == ' ') - { - *head = 0; - head++; - row.logProbability = head; - goto state4; - } - - // \w -> state3 - head++; - goto state3; - -state4: - // eof -> error - if (head == end) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: EOF -> error"); - goto error; - } - - c = *head; - // \n, \s -> error - if (c == '\n' || c == ' ') - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 4: \\n \\s -> error"); - goto error; - } - - // \w -> state5 - head++; - - // fall through to state 5 - -state5: - // eof -> error - if (head == end) - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: EOF -> error"); - goto error; - } - - c = *head; - // \s -> error - if (c == ' ') - { - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // state 5: \\s -> error"); - goto error; - } - // \n -> start - else if (c == '\n') - { - *head = 0; - head++; - keyRowMap[row.key].push_back(row); - goto start; - } - - // \w -> state 5 - head++; - goto state5; - -error: - close(); - return false; - -end: - static const char *space = " "; - static const char *zero = "0.0"; - Row emptyRow; - emptyRow.key = space; - emptyRow.value = space; - emptyRow.logProbability = zero; - keyRowMap[space].push_back(emptyRow); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "vChewingDebug: CoreLM // File Load Complete."); - return true; -} - -void vChewing::CoreLM::close() -{ - if (data) - { - munmap(data, length); - ::close(fd); - data = 0; - } - - keyRowMap.clear(); -} - -void vChewing::CoreLM::dump() -{ - size_t rows = 0; - for (map>::const_iterator i = keyRowMap.begin(), e = keyRowMap.end(); i != e; ++i) - { - const vector &r = (*i).second; - for (vector::const_iterator ri = r.begin(), re = r.end(); ri != re; ++ri) - { - const Row &row = *ri; - cerr << row.key << " " << row.value << " " << row.logProbability << "\n"; - rows++; - } - } -} - -const std::vector vChewing::CoreLM::bigramsForKeys(const string &preceedingKey, const string &key) -{ - return std::vector(); -} - -const std::vector vChewing::CoreLM::unigramsForKey(const string &key) -{ - std::vector v; - map>::const_iterator i = keyRowMap.find(key.c_str()); - - if (i != keyRowMap.end()) - { - for (vector::const_iterator ri = (*i).second.begin(), re = (*i).second.end(); ri != re; ++ri) - { - Unigram g; - const Row &r = *ri; - g.keyValue.key = r.key; - g.keyValue.value = r.value; - g.score = atof(r.logProbability); - v.push_back(g); - } - } - - return v; -} - -bool vChewing::CoreLM::hasUnigramsForKey(const string &key) -{ - return keyRowMap.find(key.c_str()) != keyRowMap.end(); -} diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/SymbolLM.h b/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/SymbolLM.h deleted file mode 100644 index 7c385307..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/SymbolLM.h +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef SYMBOLLM_H -#define SYMBOLLM_H - -#include "LanguageModel.h" -#include "UserPhrasesLM.h" -#include -#include -#include - -namespace vChewing -{ - -class SymbolLM : public UserPhrasesLM -{ - public: - bool allowConsolidation() override - { - return false; - } - float overridedValue() override - { - return -13.0; - } -}; - -} // namespace vChewing - -#endif diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.mm b/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.mm deleted file mode 100644 index 7fde339b..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/PhraseReplacementMap.mm +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "PhraseReplacementMap.h" -#include "vChewing-Swift.h" -#include -#include -#include -#include -#include -#include - -#include "KeyValueBlobReader.h" -#include "LMConsolidator.h" - -namespace vChewing -{ - -using std::string; - -PhraseReplacementMap::PhraseReplacementMap() : fd(-1), data(0), length(0) -{ -} - -PhraseReplacementMap::~PhraseReplacementMap() -{ - if (data) - { - close(); - } -} - -bool PhraseReplacementMap::open(const char *path) -{ - if (data) - { - return false; - } - - LMConsolidator::FixEOF(path); - LMConsolidator::ConsolidateContent(path, true); - - fd = ::open(path, O_RDONLY); - if (fd == -1) - { - printf("open:: file not exist"); - return false; - } - - struct stat sb; - if (fstat(fd, &sb) == -1) - { - printf("open:: cannot open file"); - return false; - } - - length = (size_t)sb.st_size; - - data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0); - if (!data) - { - ::close(fd); - return false; - } - - KeyValueBlobReader reader(static_cast(data), length); - KeyValueBlobReader::KeyValue keyValue; - KeyValueBlobReader::State state; - while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) - { - keyValueMap[keyValue.key] = keyValue.value; - } - // 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行) - if (state == KeyValueBlobReader::State::ERROR) - { - // close(); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "PhraseReplacementMap: Failed at Open Step 5. On Error Resume Next.\n"); - // return false; - } - return true; -} - -void PhraseReplacementMap::close() -{ - if (data) - { - munmap(data, length); - ::close(fd); - data = 0; - } - - keyValueMap.clear(); -} - -const std::string PhraseReplacementMap::valueForKey(const std::string &key) -{ - auto iter = keyValueMap.find(key); - if (iter != keyValueMap.end()) - { - const std::string_view v = iter->second; - return {v.data(), v.size()}; - } - return string(""); -} - -} diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.h b/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.h deleted file mode 100644 index 4c27d748..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.h +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef USERPHRASESLM_H -#define USERPHRASESLM_H - -#include "LanguageModel.h" -#include -#include -#include - -namespace vChewing -{ - -class UserPhrasesLM : public Gramambular::LanguageModel -{ - public: - UserPhrasesLM(); - ~UserPhrasesLM(); - - bool isLoaded(); - bool open(const char *path); - void close(); - void dump(); - - virtual bool allowConsolidation() - { - return true; - } - - virtual float overridedValue() - { - return 0.0; - } - - virtual const std::vector bigramsForKeys(const std::string &preceedingKey, - const std::string &key); - virtual const std::vector unigramsForKey(const std::string &key); - virtual bool hasUnigramsForKey(const std::string &key); - - protected: - struct Row - { - Row(std::string_view &k, std::string_view &v) : key(k), value(v) - { - } - std::string_view key; - std::string_view value; - }; - - std::map> keyRowMap; - int fd; - void *data; - size_t length; -}; - -} // namespace vChewing - -#endif diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.mm b/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.mm deleted file mode 100644 index e3565d0e..00000000 --- a/Source/Modules/LangModelRelated/SubLanguageModels/UserPhrasesLM.mm +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "UserPhrasesLM.h" -#include "vChewing-Swift.h" -#include -#include -#include -#include -#include -#include - -#include "KeyValueBlobReader.h" -#include "LMConsolidator.h" - -namespace vChewing -{ - -UserPhrasesLM::UserPhrasesLM() : fd(-1), data(0), length(0) -{ -} - -UserPhrasesLM::~UserPhrasesLM() -{ - if (data) - { - close(); - } -} - -bool UserPhrasesLM::isLoaded() -{ - if (data) - { - return true; - } - return false; -} - -bool UserPhrasesLM::open(const char *path) -{ - if (data) - { - return false; - } - - if (allowConsolidation()) - { - LMConsolidator::FixEOF(path); - LMConsolidator::ConsolidateContent(path, true); - } - - fd = ::open(path, O_RDONLY); - if (fd == -1) - { - printf("open:: file not exist"); - return false; - } - - struct stat sb; - if (fstat(fd, &sb) == -1) - { - printf("open:: cannot open file"); - return false; - } - - length = (size_t)sb.st_size; - - data = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, 0); - if (!data) - { - ::close(fd); - return false; - } - - KeyValueBlobReader reader(static_cast(data), length); - KeyValueBlobReader::KeyValue keyValue; - KeyValueBlobReader::State state; - while ((state = reader.Next(&keyValue)) == KeyValueBlobReader::State::HAS_PAIR) - { - // We invert the key and value, since in user phrases, "key" is the phrase value, and "value" is the BPMF - // reading. - keyRowMap[keyValue.value].emplace_back(keyValue.value, keyValue.key); - } - // 下面這一段或許可以做成開關、來詢問是否對使用者語彙採取寬鬆策略(哪怕有行內容寫錯也會放行) - if (state == KeyValueBlobReader::State::ERROR) - { - // close(); - if (mgrPrefs.isDebugModeEnabled) - syslog(LOG_CONS, "UserPhrasesLM: Failed at Open Step 5. On Error Resume Next.\n"); - // return false; - } - return true; -} - -void UserPhrasesLM::close() -{ - if (data) - { - munmap(data, length); - ::close(fd); - data = 0; - } - - keyRowMap.clear(); -} - -void UserPhrasesLM::dump() -{ - for (const auto &entry : keyRowMap) - { - const std::vector &rows = entry.second; - for (const auto &row : rows) - { - std::cerr << row.key << " " << row.value << "\n"; - } - } -} - -const std::vector UserPhrasesLM::bigramsForKeys(const std::string &preceedingKey, - const std::string &key) -{ - return std::vector(); -} - -const std::vector UserPhrasesLM::unigramsForKey(const std::string &key) -{ - std::vector v; - auto iter = keyRowMap.find(key); - if (iter != keyRowMap.end()) - { - const std::vector &rows = iter->second; - for (const auto &row : rows) - { - Gramambular::Unigram g; - g.keyValue.key = row.key; - g.keyValue.value = row.value; - g.score = overridedValue(); - v.push_back(g); - } - } - - return v; -} - -bool UserPhrasesLM::hasUnigramsForKey(const std::string &key) -{ - return keyRowMap.find(key) != keyRowMap.end(); -} - -}; // namespace vChewing diff --git a/Source/Modules/LangModelRelated/mgrLangModel.h b/Source/Modules/LangModelRelated/mgrLangModel.h deleted file mode 100644 index b7bfbae1..00000000 --- a/Source/Modules/LangModelRelated/mgrLangModel.h +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#import "KeyHandler.h" -#import - -NS_ASSUME_NONNULL_BEGIN - -@interface mgrLangModel : NSObject - -+ (void)loadDataModel:(InputMode)mode; -+ (void)loadUserPhrases; -+ (void)loadUserAssociatedPhrases; -+ (void)loadUserPhraseReplacement; - -+ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase - inputMode:(InputMode)mode - key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:)); -+ (void)consolidateGivenFile:(NSString *)path shouldCheckPragma:(BOOL)shouldCheckPragma; -+ (void)setPhraseReplacementEnabled:(BOOL)phraseReplacementEnabled; -+ (void)setCNSEnabled:(BOOL)cnsEnabled; -+ (void)setSymbolEnabled:(BOOL)symbolEnabled; - -@end - -/// The following methods are merely for testing. -@interface mgrLangModel () -+ (void)loadDataModels; -@end - -NS_ASSUME_NONNULL_END diff --git a/Source/Modules/LangModelRelated/mgrLangModel.mm b/Source/Modules/LangModelRelated/mgrLangModel.mm deleted file mode 100644 index 065a5afb..00000000 --- a/Source/Modules/LangModelRelated/mgrLangModel.mm +++ /dev/null @@ -1,195 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#import "mgrLangModel.h" -#import "LMConsolidator.h" -#import "mgrLangModel_Privates.h" -#import "vChewing-Swift.h" - -static const int kUserOverrideModelCapacity = 500; -static const double kObservedOverrideHalflife = 5400.0; - -static vChewing::LMInstantiator gLangModelCHT; -static vChewing::LMInstantiator gLangModelCHS; -static vChewing::UserOverrideModel gUserOverrideModelCHT(kUserOverrideModelCapacity, kObservedOverrideHalflife); -static vChewing::UserOverrideModel gUserOverrideModelCHS(kUserOverrideModelCapacity, kObservedOverrideHalflife); - -@implementation mgrLangModel - -// 這個函數無法遷移至 Swift -static void LTLoadLanguageModelFile(NSString *filenameWithoutExtension, vChewing::LMInstantiator &lm) -{ - NSString *dataPath = [mgrLangModel getBundleDataPath:filenameWithoutExtension]; - lm.loadLanguageModel([dataPath UTF8String]); -} - -// 這個函數無法遷移至 Swift -+ (void)loadDataModels -{ - if (!gLangModelCHT.isDataModelLoaded()) - LTLoadLanguageModelFile(@"data-cht", gLangModelCHT); - if (!gLangModelCHT.isMiscDataLoaded()) - gLangModelCHT.loadMiscData([[self getBundleDataPath:@"data-zhuyinwen"] UTF8String]); - if (!gLangModelCHT.isSymbolDataLoaded()) - gLangModelCHT.loadSymbolData([[self getBundleDataPath:@"data-symbols"] UTF8String]); - if (!gLangModelCHT.isCNSDataLoaded()) - gLangModelCHT.loadCNSData([[self getBundleDataPath:@"char-kanji-cns"] UTF8String]); - - // ----------------- - if (!gLangModelCHS.isDataModelLoaded()) - LTLoadLanguageModelFile(@"data-chs", gLangModelCHS); - if (!gLangModelCHS.isMiscDataLoaded()) - gLangModelCHS.loadMiscData([[self getBundleDataPath:@"data-zhuyinwen"] UTF8String]); - if (!gLangModelCHS.isSymbolDataLoaded()) - gLangModelCHS.loadSymbolData([[self getBundleDataPath:@"data-symbols"] UTF8String]); - if (!gLangModelCHS.isCNSDataLoaded()) - gLangModelCHS.loadCNSData([[self getBundleDataPath:@"char-kanji-cns"] UTF8String]); -} - -// 這個函數無法遷移至 Swift -+ (void)loadDataModel:(InputMode)mode -{ - if ([mode isEqualToString:imeModeCHT]) - { - if (!gLangModelCHT.isDataModelLoaded()) - LTLoadLanguageModelFile(@"data-cht", gLangModelCHT); - if (!gLangModelCHT.isMiscDataLoaded()) - gLangModelCHT.loadMiscData([[self getBundleDataPath:@"data-zhuyinwen"] UTF8String]); - if (!gLangModelCHT.isSymbolDataLoaded()) - gLangModelCHT.loadSymbolData([[self getBundleDataPath:@"data-symbols"] UTF8String]); - if (!gLangModelCHT.isCNSDataLoaded()) - gLangModelCHT.loadCNSData([[self getBundleDataPath:@"char-kanji-cns"] UTF8String]); - } - - if ([mode isEqualToString:imeModeCHS]) - { - if (!gLangModelCHS.isDataModelLoaded()) - LTLoadLanguageModelFile(@"data-chs", gLangModelCHS); - if (!gLangModelCHS.isMiscDataLoaded()) - gLangModelCHS.loadMiscData([[self getBundleDataPath:@"data-zhuyinwen"] UTF8String]); - if (!gLangModelCHS.isSymbolDataLoaded()) - gLangModelCHS.loadSymbolData([[self getBundleDataPath:@"data-symbols"] UTF8String]); - if (!gLangModelCHS.isCNSDataLoaded()) - gLangModelCHS.loadCNSData([[self getBundleDataPath:@"char-kanji-cns"] UTF8String]); - } -} - -// 這個函數無法遷移至 Swift -+ (void)loadUserPhrases -{ - gLangModelCHT.loadUserPhrases([[self userPhrasesDataPath:imeModeCHT] UTF8String], - [[self excludedPhrasesDataPath:imeModeCHT] UTF8String]); - gLangModelCHS.loadUserPhrases([[self userPhrasesDataPath:imeModeCHS] UTF8String], - [[self excludedPhrasesDataPath:imeModeCHS] UTF8String]); - gLangModelCHT.loadUserSymbolData([[self userSymbolDataPath:imeModeCHT] UTF8String]); - gLangModelCHS.loadUserSymbolData([[self userSymbolDataPath:imeModeCHS] UTF8String]); -} - -// 這個函數無法遷移至 Swift -+ (void)loadUserAssociatedPhrases -{ - gLangModelCHT.loadUserAssociatedPhrases([[self userAssociatedPhrasesDataPath:imeModeCHT] UTF8String]); - gLangModelCHS.loadUserAssociatedPhrases([[self userAssociatedPhrasesDataPath:imeModeCHS] UTF8String]); -} - -// 這個函數無法遷移至 Swift -+ (void)loadUserPhraseReplacement -{ - gLangModelCHT.loadPhraseReplacementMap([[self phraseReplacementDataPath:imeModeCHT] UTF8String]); - gLangModelCHS.loadPhraseReplacementMap([[self phraseReplacementDataPath:imeModeCHS] UTF8String]); -} - -// 這個函數無法遷移至 Swift -+ (BOOL)checkIfUserPhraseExist:(NSString *)userPhrase - inputMode:(InputMode)mode - key:(NSString *)key NS_SWIFT_NAME(checkIfUserPhraseExist(userPhrase:mode:key:)) -{ - string unigramKey = string(key.UTF8String); - vector unigrams = [mode isEqualToString:imeModeCHT] ? gLangModelCHT.unigramsForKey(unigramKey) - : gLangModelCHS.unigramsForKey(unigramKey); - string userPhraseString = string(userPhrase.UTF8String); - for (auto unigram : unigrams) - { - if (unigram.keyValue.value == userPhraseString) - { - return YES; - } - } - return NO; -} - -// 這個函數無法遷移至 Swift -+ (void)consolidateGivenFile:(NSString *)path shouldCheckPragma:(BOOL)shouldCheckPragma -{ - vChewing::LMConsolidator::ConsolidateContent([path UTF8String], shouldCheckPragma); -} - -// 這個函數無法遷移至 Swift -+ (vChewing::LMInstantiator *)lmCHT -{ - return &gLangModelCHT; -} - -// 這個函數無法遷移至 Swift -+ (vChewing::LMInstantiator *)lmCHS -{ - return &gLangModelCHS; -} - -// 這個函數無法遷移至 Swift -+ (vChewing::UserOverrideModel *)userOverrideModelCHT -{ - return &gUserOverrideModelCHT; -} - -// 這個函數無法遷移至 Swift -+ (vChewing::UserOverrideModel *)userOverrideModelCHS -{ - return &gUserOverrideModelCHS; -} - -// 這個函數無法遷移至 Swift -+ (void)setPhraseReplacementEnabled:(BOOL)phraseReplacementEnabled -{ - gLangModelCHT.setPhraseReplacementEnabled(phraseReplacementEnabled); - gLangModelCHS.setPhraseReplacementEnabled(phraseReplacementEnabled); -} - -// 這個函數無法遷移至 Swift -+ (void)setCNSEnabled:(BOOL)cnsEnabled -{ - gLangModelCHT.setCNSEnabled(cnsEnabled); - gLangModelCHS.setCNSEnabled(cnsEnabled); -} - -// 這個函數無法遷移至 Swift -+ (void)setSymbolEnabled:(BOOL)symbolEnabled -{ - gLangModelCHT.setSymbolEnabled(symbolEnabled); - gLangModelCHS.setSymbolEnabled(symbolEnabled); -} - -@end diff --git a/Source/Modules/LangModelRelated/mgrLangModel.swift b/Source/Modules/LangModelRelated/mgrLangModel.swift index 7e4df687..d12ee66c 100644 --- a/Source/Modules/LangModelRelated/mgrLangModel.swift +++ b/Source/Modules/LangModelRelated/mgrLangModel.swift @@ -26,7 +26,158 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import Cocoa -@objc extension mgrLangModel { +/// 我們不能讓 mgrLangModel 這個靜態管理器來承載下面這些副本變數。 +/// 所以,這些副本變數只能放在 mgrLangModel 的外部。 +/// 同時,這些變數不對外開放任意存取權限。 +/// 我們只在 mgrLangModel 內部寫幾個回傳函數、供其餘控制模組來讀取。 + +private let kUserOverrideModelCapacity: Int = 500 +private let kObservedOverrideHalflife: Double = 5400.0 + +private var gLangModelCHS = vChewing.LMInstantiator() +private var gLangModelCHT = vChewing.LMInstantiator() +private var gUserOverrideModelCHS = vChewing.LMUserOverride( + capacity: kUserOverrideModelCapacity, decayExponent: kObservedOverrideHalflife +) +private var gUserOverrideModelCHT = vChewing.LMUserOverride( + capacity: kUserOverrideModelCapacity, decayExponent: kObservedOverrideHalflife +) + +@objc class mgrLangModel: NSObject { + /// 寫幾個回傳函數、供其餘控制模組來讀取那些被設為 fileprivate 的器外變數。 + public static var lmCHS: vChewing.LMInstantiator { gLangModelCHS } + public static var lmCHT: vChewing.LMInstantiator { gLangModelCHT } + public static var uomCHS: vChewing.LMUserOverride { gUserOverrideModelCHS } + public static var uomCHT: vChewing.LMUserOverride { gUserOverrideModelCHT } + + // MARK: - Functions reacting directly with language models. + + static func loadCoreLanguageModelFile(filenameSansExtension: String, langModel lm: inout vChewing.LMInstantiator) { + let dataPath: String = mgrLangModel.getBundleDataPath(filenameSansExtension) + lm.loadLanguageModel(path: dataPath) + } + + public static func loadDataModels() { + if !gLangModelCHT.isDataModelLoaded() { + loadCoreLanguageModelFile(filenameSansExtension: "data-cht", langModel: &gLangModelCHT) + } + if !gLangModelCHT.isMiscDataLoaded() { + gLangModelCHT.loadMiscData(path: getBundleDataPath("data-zhuyinwen")) + } + if !gLangModelCHT.isSymbolDataLoaded() { + gLangModelCHT.loadSymbolData(path: getBundleDataPath("data-symbols")) + } + if !gLangModelCHT.isCNSDataLoaded() { + gLangModelCHT.loadCNSData(path: getBundleDataPath("char-kanji-cns")) + } + + // ----------------- + if !gLangModelCHS.isDataModelLoaded() { + loadCoreLanguageModelFile(filenameSansExtension: "data-chs", langModel: &gLangModelCHS) + } + if !gLangModelCHS.isMiscDataLoaded() { + gLangModelCHS.loadMiscData(path: getBundleDataPath("data-zhuyinwen")) + } + if !gLangModelCHS.isSymbolDataLoaded() { + gLangModelCHS.loadSymbolData(path: getBundleDataPath("data-symbols")) + } + if !gLangModelCHS.isCNSDataLoaded() { + gLangModelCHS.loadCNSData(path: getBundleDataPath("char-kanji-cns")) + } + } + + public static func loadDataModel(_ mode: InputMode) { + if mode == InputMode.imeModeCHS { + if !gLangModelCHS.isDataModelLoaded() { + loadCoreLanguageModelFile(filenameSansExtension: "data-chs", langModel: &gLangModelCHS) + } + if !gLangModelCHS.isMiscDataLoaded() { + gLangModelCHS.loadMiscData(path: getBundleDataPath("data-zhuyinwen")) + } + if !gLangModelCHS.isSymbolDataLoaded() { + gLangModelCHS.loadSymbolData(path: getBundleDataPath("data-symbols")) + } + if !gLangModelCHS.isCNSDataLoaded() { + gLangModelCHS.loadCNSData(path: getBundleDataPath("char-kanji-cns")) + } + } else if mode == InputMode.imeModeCHT { + if !gLangModelCHT.isDataModelLoaded() { + loadCoreLanguageModelFile(filenameSansExtension: "data-cht", langModel: &gLangModelCHT) + } + if !gLangModelCHT.isMiscDataLoaded() { + gLangModelCHT.loadMiscData(path: getBundleDataPath("data-zhuyinwen")) + } + if !gLangModelCHT.isSymbolDataLoaded() { + gLangModelCHT.loadSymbolData(path: getBundleDataPath("data-symbols")) + } + if !gLangModelCHT.isCNSDataLoaded() { + gLangModelCHT.loadCNSData(path: getBundleDataPath("char-kanji-cns")) + } + } + } + + public static func loadUserPhrases() { + gLangModelCHT.loadUserPhrases( + path: userSymbolDataPath(InputMode.imeModeCHT), + filterPath: excludedPhrasesDataPath(InputMode.imeModeCHT) + ) + gLangModelCHS.loadUserPhrases( + path: userPhrasesDataPath(InputMode.imeModeCHS), + filterPath: excludedPhrasesDataPath(InputMode.imeModeCHS) + ) + gLangModelCHT.loadUserSymbolData(path: userSymbolDataPath(InputMode.imeModeCHT)) + gLangModelCHS.loadUserSymbolData(path: userSymbolDataPath(InputMode.imeModeCHS)) + } + + public static func loadUserAssociatedPhrases() { + gLangModelCHT.loadUserAssociatedPhrases( + path: mgrLangModel.userAssociatedPhrasesDataPath(InputMode.imeModeCHT) + ) + gLangModelCHT.loadUserAssociatedPhrases( + path: mgrLangModel.userAssociatedPhrasesDataPath(InputMode.imeModeCHS) + ) + } + + public static func loadUserPhraseReplacement() { + gLangModelCHT.loadPhraseReplacementMap( + path: mgrLangModel.phraseReplacementDataPath(InputMode.imeModeCHT) + ) + gLangModelCHT.loadPhraseReplacementMap( + path: mgrLangModel.phraseReplacementDataPath(InputMode.imeModeCHS) + ) + } + + public static func checkIfUserPhraseExist( + userPhrase: String, + mode: InputMode, + key unigramKey: String + ) -> Bool { + let unigrams: [Megrez.Unigram] = + (mode == InputMode.imeModeCHT) + ? gLangModelCHT.unigramsFor(key: unigramKey) : gLangModelCHS.unigramsFor(key: unigramKey) + for unigram in unigrams { + if unigram.keyValue.value == userPhrase { + return true + } + } + return false + } + + public static func setPhraseReplacementEnabled(_ state: Bool) { + gLangModelCHT.isPhraseReplacementEnabled = state + gLangModelCHS.isPhraseReplacementEnabled = state + } + + public static func setCNSEnabled(_ state: Bool) { + gLangModelCHT.isCNSEnabled = state + gLangModelCHS.isCNSEnabled = state + } + + public static func setSymbolEnabled(_ state: Bool) { + gLangModelCHT.isSymbolEnabled = state + gLangModelCHS.isSymbolEnabled = state + } + // MARK: - 獲取當前輸入法封包內的原廠核心語彙檔案所在路徑 static func getBundleDataPath(_ filenameSansExt: String) -> String { @@ -233,7 +384,9 @@ import Cocoa // We enforce the format consolidation here, since the pragma header // will let the UserPhraseLM bypasses the consolidating process on load. - consolidate(givenFile: path, shouldCheckPragma: false) + if !vChewing.LMConsolidator.consolidate(path: path, pragma: false) { + return false + } // We use FSEventStream to monitor possible changes of the user phrase folder, hence the // lack of the needs of manually load data here unless FSEventStream is disabled by user. diff --git a/Source/Modules/LangModelRelated/mgrLangModel_Privates.h b/Source/Modules/LangModelRelated/mgrLangModel_Privates.h deleted file mode 100644 index cc42ca2a..00000000 --- a/Source/Modules/LangModelRelated/mgrLangModel_Privates.h +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#import "LMInstantiator.h" -#import "UserOverrideModel.h" -#import "mgrLangModel.h" - -NS_ASSUME_NONNULL_BEGIN - -@interface mgrLangModel () -@property(class, readonly, nonatomic) vChewing::LMInstantiator *lmCHT; -@property(class, readonly, nonatomic) vChewing::LMInstantiator *lmCHS; -@property(class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHS; -@property(class, readonly, nonatomic) vChewing::UserOverrideModel *userOverrideModelCHT; -@end - -NS_ASSUME_NONNULL_END diff --git a/Source/Modules/LanguageParsers/Gramambular/Bigram.h b/Source/Modules/LanguageParsers/Gramambular/Bigram.h deleted file mode 100644 index a4b8c8b2..00000000 --- a/Source/Modules/LanguageParsers/Gramambular/Bigram.h +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef BIGRAM_H_ -#define BIGRAM_H_ - -#include - -#include "KeyValuePair.h" - -namespace Gramambular -{ -class Bigram -{ - public: - Bigram(); - - KeyValuePair preceedingKeyValue; - KeyValuePair keyValue; - double score; - - bool operator==(const Bigram &another) const; - bool operator<(const Bigram &another) const; -}; - -inline std::ostream &operator<<(std::ostream &stream, const Bigram &gram) -{ - std::streamsize p = stream.precision(); - stream.precision(6); - stream << "(" << gram.keyValue << "|" << gram.preceedingKeyValue << "," << gram.score << ")"; - stream.precision(p); - return stream; -} - -inline std::ostream &operator<<(std::ostream &stream, const std::vector &grams) -{ - stream << "[" << grams.size() << "]=>{"; - - size_t index = 0; - - for (std::vector::const_iterator gi = grams.begin(); gi != grams.end(); ++gi, ++index) - { - stream << index << "=>"; - stream << *gi; - if (gi + 1 != grams.end()) - { - stream << ","; - } - } - - stream << "}"; - return stream; -} - -inline Bigram::Bigram() : score(0.0) -{ -} - -inline bool Bigram::operator==(const Bigram &another) const -{ - return preceedingKeyValue == another.preceedingKeyValue && keyValue == another.keyValue && score == another.score; -} - -inline bool Bigram::operator<(const Bigram &another) const -{ - if (preceedingKeyValue < another.preceedingKeyValue) - { - return true; - } - else if (preceedingKeyValue == another.preceedingKeyValue) - { - if (keyValue < another.keyValue) - { - return true; - } - else if (keyValue == another.keyValue) - { - return score < another.score; - } - return false; - } - - return false; -} -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Gramambular/BlockReadingBuilder.h b/Source/Modules/LanguageParsers/Gramambular/BlockReadingBuilder.h deleted file mode 100644 index 12046b15..00000000 --- a/Source/Modules/LanguageParsers/Gramambular/BlockReadingBuilder.h +++ /dev/null @@ -1,242 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef BLOCKREADINGBUILDER_H_ -#define BLOCKREADINGBUILDER_H_ - -#include -#include - -#include "Grid.h" -#include "LanguageModel.h" - -namespace Gramambular -{ - -class BlockReadingBuilder -{ - public: - explicit BlockReadingBuilder(LanguageModel *lm); - void clear(); - - size_t length() const; - size_t cursorIndex() const; - void setCursorIndex(size_t newIndex); - void insertReadingAtCursor(const std::string &reading); - bool deleteReadingBeforeCursor(); // backspace - bool deleteReadingAfterCursor(); // delete - - bool removeHeadReadings(size_t count); - - void setJoinSeparator(const std::string &separator); - const std::string joinSeparator() const; - - std::vector readings() const; - - Grid &grid(); - - protected: - void build(); - - static const std::string Join(std::vector::const_iterator begin, - std::vector::const_iterator end, const std::string &separator); - - // 規定最多可以組成的詞的字數上限為 10 - static const size_t MaximumBuildSpanLength = 10; - - size_t m_cursorIndex; - std::vector m_readings; - - Grid m_grid; - LanguageModel *m_LM; - std::string m_joinSeparator; -}; - -inline BlockReadingBuilder::BlockReadingBuilder(LanguageModel *lm) : m_LM(lm), m_cursorIndex(0) -{ -} - -inline void BlockReadingBuilder::clear() -{ - m_cursorIndex = 0; - m_readings.clear(); - m_grid.clear(); -} - -inline size_t BlockReadingBuilder::length() const -{ - return m_readings.size(); -} - -inline size_t BlockReadingBuilder::cursorIndex() const -{ - return m_cursorIndex; -} - -inline void BlockReadingBuilder::setCursorIndex(size_t newIndex) -{ - m_cursorIndex = newIndex > m_readings.size() ? m_readings.size() : newIndex; -} - -inline void BlockReadingBuilder::insertReadingAtCursor(const std::string &reading) -{ - m_readings.insert(m_readings.begin() + m_cursorIndex, reading); - - m_grid.expandGridByOneAtLocation(m_cursorIndex); - build(); - m_cursorIndex++; -} - -inline std::vector BlockReadingBuilder::readings() const -{ - return m_readings; -} - -inline bool BlockReadingBuilder::deleteReadingBeforeCursor() -{ - if (!m_cursorIndex) - { - return false; - } - - m_readings.erase(m_readings.begin() + m_cursorIndex - 1, m_readings.begin() + m_cursorIndex); - m_cursorIndex--; - m_grid.shrinkGridByOneAtLocation(m_cursorIndex); - build(); - return true; -} - -inline bool BlockReadingBuilder::deleteReadingAfterCursor() -{ - if (m_cursorIndex == m_readings.size()) - { - return false; - } - - m_readings.erase(m_readings.begin() + m_cursorIndex, m_readings.begin() + m_cursorIndex + 1); - m_grid.shrinkGridByOneAtLocation(m_cursorIndex); - build(); - return true; -} - -inline bool BlockReadingBuilder::removeHeadReadings(size_t count) -{ - if (count > length()) - { - return false; - } - - for (size_t i = 0; i < count; i++) - { - if (m_cursorIndex) - { - m_cursorIndex--; - } - m_readings.erase(m_readings.begin(), m_readings.begin() + 1); - m_grid.shrinkGridByOneAtLocation(0); - build(); - } - - return true; -} - -inline void BlockReadingBuilder::setJoinSeparator(const std::string &separator) -{ - m_joinSeparator = separator; -} - -inline const std::string BlockReadingBuilder::joinSeparator() const -{ - return m_joinSeparator; -} - -inline Grid &BlockReadingBuilder::grid() -{ - return m_grid; -} - -inline void BlockReadingBuilder::build() -{ - if (!m_LM) - { - return; - } - - size_t begin = 0; - size_t end = m_cursorIndex + MaximumBuildSpanLength; - - if (m_cursorIndex < MaximumBuildSpanLength) - { - begin = 0; - } - else - { - begin = m_cursorIndex - MaximumBuildSpanLength; - } - - if (end > m_readings.size()) - { - end = m_readings.size(); - } - - for (size_t p = begin; p < end; p++) - { - for (size_t q = 1; q <= MaximumBuildSpanLength && p + q <= end; q++) - { - std::string combinedReading = Join(m_readings.begin() + p, m_readings.begin() + p + q, m_joinSeparator); - if (!m_grid.hasNodeAtLocationSpanningLengthMatchingKey(p, q, combinedReading)) - { - std::vector unigrams = m_LM->unigramsForKey(combinedReading); - - if (unigrams.size() > 0) - { - Node n(combinedReading, unigrams, std::vector()); - m_grid.insertNode(n, p, q); - } - } - } - } -} - -inline const std::string BlockReadingBuilder::Join(std::vector::const_iterator begin, - std::vector::const_iterator end, - const std::string &separator) -{ - std::string result; - for (std::vector::const_iterator iter = begin; iter != end;) - { - result += *iter; - ++iter; - if (iter != end) - { - result += separator; - } - } - return result; -} -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Grid.h b/Source/Modules/LanguageParsers/Gramambular/Grid.h deleted file mode 100644 index 5a39fe7a..00000000 --- a/Source/Modules/LanguageParsers/Gramambular/Grid.h +++ /dev/null @@ -1,313 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef GRID_H_ -#define GRID_H_ - -#include -#include -#include - -#include "NodeAnchor.h" -#include "Span.h" - -namespace Gramambular -{ - -class Grid -{ - public: - void clear(); - void insertNode(const Node &node, size_t location, size_t spanningLength); - bool hasNodeAtLocationSpanningLengthMatchingKey(size_t location, size_t spanningLength, const std::string &key); - - void expandGridByOneAtLocation(size_t location); - void shrinkGridByOneAtLocation(size_t location); - - size_t width() const; - std::vector nodesEndingAt(size_t location); - std::vector nodesCrossingOrEndingAt(size_t location); - - // "Freeze" the node with the unigram that represents the selected candidate - // value. After this, the node that contains the unigram will always be - // evaluated to that unigram, while all other overlapping nodes will be reset - // to their initial state (that is, if any of those nodes were "frozen" or - // fixed, they will be unfrozen.) - NodeAnchor fixNodeSelectedCandidate(size_t location, const std::string &value); - - // Similar to fixNodeSelectedCandidate, but instead of "freezing" the node, - // only boost the unigram that represents the value with an overriding score. - // This has the same side effect as fixNodeSelectedCandidate, which is that - // all other overlapping nodes will be reset to their initial state. - void overrideNodeScoreForSelectedCandidate(size_t location, const std::string &value, float overridingScore); - - std::string dumpDOT() - { - std::stringstream sst; - sst << "digraph {" << std::endl; - sst << "graph [ rankdir=LR ];" << std::endl; - sst << "BOS;" << std::endl; - - for (size_t p = 0; p < m_spans.size(); p++) - { - Span &span = m_spans[p]; - for (size_t ni = 0; ni <= span.maximumLength(); ni++) - { - Node *np = span.nodeOfLength(ni); - if (np) - { - if (!p) - { - sst << "BOS -> " << np->currentKeyValue().value << ";" << std::endl; - } - - sst << np->currentKeyValue().value << ";" << std::endl; - - if (p + ni < m_spans.size()) - { - Span &dstSpan = m_spans[p + ni]; - for (size_t q = 0; q <= dstSpan.maximumLength(); q++) - { - Node *dn = dstSpan.nodeOfLength(q); - if (dn) - { - sst << np->currentKeyValue().value << " -> " << dn->currentKeyValue().value << ";" - << std::endl; - } - } - } - - if (p + ni == m_spans.size()) - { - sst << np->currentKeyValue().value << " -> " - << "EOS;" << std::endl; - } - } - } - } - - sst << "EOS;" << std::endl; - sst << "}"; - return sst.str(); - } - - protected: - std::vector m_spans; -}; - -inline void Grid::clear() -{ - m_spans.clear(); -} - -inline void Grid::insertNode(const Node &node, size_t location, size_t spanningLength) -{ - if (location >= m_spans.size()) - { - size_t diff = location - m_spans.size() + 1; - - for (size_t i = 0; i < diff; i++) - { - m_spans.push_back(Span()); - } - } - - m_spans[location].insertNodeOfLength(node, spanningLength); -} - -inline bool Grid::hasNodeAtLocationSpanningLengthMatchingKey(size_t location, size_t spanningLength, - const std::string &key) -{ - if (location > m_spans.size()) - { - return false; - } - - const Node *n = m_spans[location].nodeOfLength(spanningLength); - if (!n) - { - return false; - } - - return key == n->key(); -} - -inline void Grid::expandGridByOneAtLocation(size_t location) -{ - if (!location || location == m_spans.size()) - { - m_spans.insert(m_spans.begin() + location, Span()); - } - else - { - m_spans.insert(m_spans.begin() + location, Span()); - for (size_t i = 0; i < location; i++) - { - // zaps overlapping spans - m_spans[i].removeNodeOfLengthGreaterThan(location - i); - } - } -} - -inline void Grid::shrinkGridByOneAtLocation(size_t location) -{ - if (location >= m_spans.size()) - { - return; - } - - m_spans.erase(m_spans.begin() + location); - for (size_t i = 0; i < location; i++) - { - // zaps overlapping spans - m_spans[i].removeNodeOfLengthGreaterThan(location - i); - } -} - -inline size_t Grid::width() const -{ - return m_spans.size(); -} - -// macOS 10.6 開始的內建注音的游標前置選字風格 -inline std::vector Grid::nodesEndingAt(size_t location) -{ - std::vector result; - - if (m_spans.size() && location <= m_spans.size()) - { - for (size_t i = 0; i < location; i++) - { - Span &span = m_spans[i]; - if (i + span.maximumLength() >= location) - { - Node *np = span.nodeOfLength(location - i); - if (np) - { - NodeAnchor na; - na.node = np; - na.location = i; - na.spanningLength = location - i; - - result.push_back(na); - } - } - } - } - - return result; -} - -// Windows 版奇摩注音輸入法的游標後置的選字風格。 -// 與微軟新注音相異的是,這個風格允許在詞的中間叫出候選字窗。 -inline std::vector Grid::nodesCrossingOrEndingAt(size_t location) -{ - std::vector result; - - if (m_spans.size() && location <= m_spans.size()) - { - for (size_t i = 0; i < location; i++) - { - Span &span = m_spans[i]; - - if (i + span.maximumLength() >= location) - { - for (size_t j = 1, m = span.maximumLength(); j <= m; j++) - { - if (i + j < location) - { - continue; - } - - Node *np = span.nodeOfLength(j); - if (np) - { - NodeAnchor na; - na.node = np; - na.location = i; - na.spanningLength = location - i; - - result.push_back(na); - } - } - } - } - } - - return result; -} - -// For nodes found at the location, fix their currently-selected candidate using -// the supplied string value. -inline NodeAnchor Grid::fixNodeSelectedCandidate(size_t location, const std::string &value) -{ - std::vector nodes = nodesCrossingOrEndingAt(location); - NodeAnchor node; - for (auto nodeAnchor : nodes) - { - auto candidates = nodeAnchor.node->candidates(); - - // Reset the candidate-fixed state of every node at the location. - const_cast(nodeAnchor.node)->resetCandidate(); - - for (size_t i = 0, c = candidates.size(); i < c; ++i) - { - if (candidates[i].value == value) - { - const_cast(nodeAnchor.node)->selectCandidateAtIndex(i); - node = nodeAnchor; - break; - } - } - } - return node; -} - -inline void Grid::overrideNodeScoreForSelectedCandidate(size_t location, const std::string &value, - float overridingScore) -{ - std::vector nodes = nodesCrossingOrEndingAt(location); - for (auto nodeAnchor : nodes) - { - auto candidates = nodeAnchor.node->candidates(); - - // Reset the candidate-fixed state of every node at the location. - const_cast(nodeAnchor.node)->resetCandidate(); - - for (size_t i = 0, c = candidates.size(); i < c; ++i) - { - if (candidates[i].value == value) - { - const_cast(nodeAnchor.node)->selectFloatingCandidateAtIndex(i, overridingScore); - break; - } - } - } -} - -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Gramambular/LanguageModel.h b/Source/Modules/LanguageParsers/Gramambular/LanguageModel.h deleted file mode 100644 index 1049c011..00000000 --- a/Source/Modules/LanguageParsers/Gramambular/LanguageModel.h +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef LANGUAGEMODEL_H_ -#define LANGUAGEMODEL_H_ - -#include -#include - -#include "Bigram.h" -#include "Unigram.h" - -namespace Gramambular -{ - -class LanguageModel -{ - public: - virtual ~LanguageModel() - { - } - - virtual const std::vector bigramsForKeys(const std::string &preceedingKey, const std::string &key) = 0; - virtual const std::vector unigramsForKey(const std::string &key) = 0; - virtual bool hasUnigramsForKey(const std::string &key) = 0; -}; -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Node.h b/Source/Modules/LanguageParsers/Gramambular/Node.h deleted file mode 100644 index 16b69fdf..00000000 --- a/Source/Modules/LanguageParsers/Gramambular/Node.h +++ /dev/null @@ -1,249 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef NODE_H_ -#define NODE_H_ - -#include -#include -#include -#include - -#include "LanguageModel.h" - -namespace Gramambular -{ - -class Node -{ - public: - Node(); - Node(const std::string &key, const std::vector &unigrams, const std::vector &bigrams); - - void primeNodeWithPreceedingKeyValues(const std::vector &keyValues); - - bool isCandidateFixed() const; - const std::vector &candidates() const; - void selectCandidateAtIndex(size_t index = 0, bool fix = true); - void resetCandidate(); - void selectFloatingCandidateAtIndex(size_t index, double score); - - const std::string &key() const; - double score() const; - double scoreForCandidate(const std::string &candidate) const; - const KeyValuePair currentKeyValue() const; - double highestUnigramScore() const; - - protected: - const LanguageModel *m_LM; - - std::string m_key; - double m_score; - - std::vector m_unigrams; - std::vector m_candidates; - std::map m_valueUnigramIndexMap; - std::map> m_preceedingGramBigramMap; - - bool m_candidateFixed; - size_t m_selectedUnigramIndex; - - friend std::ostream &operator<<(std::ostream &stream, const Node &node); -}; - -inline std::ostream &operator<<(std::ostream &stream, const Node &node) -{ - stream << "(node,key:" << node.m_key << ",fixed:" << (node.m_candidateFixed ? "true" : "false") - << ",selected:" << node.m_selectedUnigramIndex << "," << node.m_unigrams << ")"; - return stream; -} - -inline Node::Node() : m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) -{ -} - -inline Node::Node(const std::string &key, const std::vector &unigrams, const std::vector &bigrams) - : m_key(key), m_unigrams(unigrams), m_candidateFixed(false), m_selectedUnigramIndex(0), m_score(0.0) -{ - stable_sort(m_unigrams.begin(), m_unigrams.end(), Unigram::ScoreCompare); - - if (m_unigrams.size()) - { - m_score = m_unigrams[0].score; - } - - size_t i = 0; - for (std::vector::const_iterator ui = m_unigrams.begin(); ui != m_unigrams.end(); ++ui) - { - m_valueUnigramIndexMap[(*ui).keyValue.value] = i; - i++; - - m_candidates.push_back((*ui).keyValue); - } - - for (std::vector::const_iterator bi = bigrams.begin(); bi != bigrams.end(); ++bi) - { - m_preceedingGramBigramMap[(*bi).preceedingKeyValue].push_back(*bi); - } -} - -inline void Node::primeNodeWithPreceedingKeyValues(const std::vector &keyValues) -{ - size_t newIndex = m_selectedUnigramIndex; - double max = m_score; - - if (!isCandidateFixed()) - { - for (std::vector::const_iterator kvi = keyValues.begin(); kvi != keyValues.end(); ++kvi) - { - std::map>::const_iterator f = m_preceedingGramBigramMap.find(*kvi); - if (f != m_preceedingGramBigramMap.end()) - { - const std::vector &bigrams = (*f).second; - - for (std::vector::const_iterator bi = bigrams.begin(); bi != bigrams.end(); ++bi) - { - const Bigram &bigram = *bi; - if (bigram.score > max) - { - std::map::const_iterator uf = - m_valueUnigramIndexMap.find((*bi).keyValue.value); - if (uf != m_valueUnigramIndexMap.end()) - { - newIndex = (*uf).second; - max = bigram.score; - } - } - } - } - } - } - - if (m_score != max) - { - m_score = max; - } - - if (newIndex != m_selectedUnigramIndex) - { - m_selectedUnigramIndex = newIndex; - } -} - -inline bool Node::isCandidateFixed() const -{ - return m_candidateFixed; -} - -inline const std::vector &Node::candidates() const -{ - return m_candidates; -} - -inline void Node::selectCandidateAtIndex(size_t index, bool fix) -{ - if (index >= m_unigrams.size()) - { - m_selectedUnigramIndex = 0; - } - else - { - m_selectedUnigramIndex = index; - } - - m_candidateFixed = fix; - m_score = 99; -} - -inline void Node::resetCandidate() -{ - m_selectedUnigramIndex = 0; - m_candidateFixed = 0; - if (m_unigrams.size()) - { - m_score = m_unigrams[0].score; - } -} - -inline void Node::selectFloatingCandidateAtIndex(size_t index, double score) -{ - if (index >= m_unigrams.size()) - { - m_selectedUnigramIndex = 0; - } - else - { - m_selectedUnigramIndex = index; - } - m_candidateFixed = false; - m_score = score; -} - -inline const std::string &Node::key() const -{ - return m_key; -} - -inline double Node::score() const -{ - return m_score; -} - -inline double Node::scoreForCandidate(const std::string &candidate) const -{ - for (auto unigram : m_unigrams) - { - if (unigram.keyValue.value == candidate) - { - return unigram.score; - } - } - return 0.0; -} - -inline double Node::highestUnigramScore() const -{ - if (m_unigrams.empty()) - { - return 0.0; - } - return m_unigrams[0].score; -} - -inline const KeyValuePair Node::currentKeyValue() const -{ - if (m_selectedUnigramIndex >= m_unigrams.size()) - { - return KeyValuePair(); - } - else - { - return m_candidates[m_selectedUnigramIndex]; - } -} -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Span.h b/Source/Modules/LanguageParsers/Gramambular/Span.h deleted file mode 100644 index 57c9a64c..00000000 --- a/Source/Modules/LanguageParsers/Gramambular/Span.h +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef SPAN_H_ -#define SPAN_H_ - -#include -#include -#include - -#include "Node.h" - -namespace Gramambular -{ -class Span -{ - public: - void clear(); - void insertNodeOfLength(const Node &node, size_t length); - void removeNodeOfLengthGreaterThan(size_t length); - - Node *nodeOfLength(size_t length); - size_t maximumLength() const; - - protected: - std::map m_lengthNodeMap; - size_t m_maximumLength = 0; -}; - -inline void Span::clear() -{ - m_lengthNodeMap.clear(); - m_maximumLength = 0; -} - -inline void Span::insertNodeOfLength(const Node &node, size_t length) -{ - m_lengthNodeMap[length] = node; - if (length > m_maximumLength) - { - m_maximumLength = length; - } -} - -inline void Span::removeNodeOfLengthGreaterThan(size_t length) -{ - if (length > m_maximumLength) - { - return; - } - - size_t max = 0; - std::set removeSet; - for (std::map::iterator i = m_lengthNodeMap.begin(), e = m_lengthNodeMap.end(); i != e; ++i) - { - if ((*i).first > length) - { - removeSet.insert((*i).first); - } - else - { - if ((*i).first > max) - { - max = (*i).first; - } - } - } - - for (std::set::iterator i = removeSet.begin(), e = removeSet.end(); i != e; ++i) - { - m_lengthNodeMap.erase(*i); - } - - m_maximumLength = max; -} - -inline Node *Span::nodeOfLength(size_t length) -{ - std::map::iterator f = m_lengthNodeMap.find(length); - return f == m_lengthNodeMap.end() ? 0 : &(*f).second; -} - -inline size_t Span::maximumLength() const -{ - return m_maximumLength; -} -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Unigram.h b/Source/Modules/LanguageParsers/Gramambular/Unigram.h deleted file mode 100644 index 7faac48d..00000000 --- a/Source/Modules/LanguageParsers/Gramambular/Unigram.h +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef UNIGRAM_H_ -#define UNIGRAM_H_ - -#include - -#include "KeyValuePair.h" - -namespace Gramambular -{ - -class Unigram -{ - public: - Unigram(); - - KeyValuePair keyValue; - double score; - - bool operator==(const Unigram &another) const; - bool operator<(const Unigram &another) const; - - static bool ScoreCompare(const Unigram &a, const Unigram &b); -}; - -inline std::ostream &operator<<(std::ostream &stream, const Unigram &gram) -{ - std::streamsize p = stream.precision(); - stream.precision(6); - stream << "(" << gram.keyValue << "," << gram.score << ")"; - stream.precision(p); - return stream; -} - -inline std::ostream &operator<<(std::ostream &stream, const std::vector &grams) -{ - stream << "[" << grams.size() << "]=>{"; - - size_t index = 0; - - for (std::vector::const_iterator gi = grams.begin(); gi != grams.end(); ++gi, ++index) - { - stream << index << "=>"; - stream << *gi; - if (gi + 1 != grams.end()) - { - stream << ","; - } - } - - stream << "}"; - return stream; -} - -inline Unigram::Unigram() : score(0.0) -{ -} - -inline bool Unigram::operator==(const Unigram &another) const -{ - return keyValue == another.keyValue && score == another.score; -} - -inline bool Unigram::operator<(const Unigram &another) const -{ - if (keyValue < another.keyValue) - { - return true; - } - else if (keyValue == another.keyValue) - { - return score < another.score; - } - return false; -} - -inline bool Unigram::ScoreCompare(const Unigram &a, const Unigram &b) -{ - return a.score > b.score; -} -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Walker.h b/Source/Modules/LanguageParsers/Gramambular/Walker.h deleted file mode 100644 index c5ef2e3d..00000000 --- a/Source/Modules/LanguageParsers/Gramambular/Walker.h +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef WALKER_H_ -#define WALKER_H_ - -#include -#include - -#include "Grid.h" - -namespace Gramambular -{ - -class Walker -{ - public: - explicit Walker(Grid *inGrid); - const std::vector reverseWalk(size_t location, double accumulatedScore = 0.0); - - protected: - Grid *m_grid; -}; - -inline Walker::Walker(Grid *inGrid) : m_grid(inGrid) -{ -} - -inline const std::vector Walker::reverseWalk(size_t location, double accumulatedScore) -{ - if (!location || location > m_grid->width()) - { - return std::vector(); - } - - std::vector> paths; - - std::vector nodes = m_grid->nodesEndingAt(location); - - for (std::vector::iterator ni = nodes.begin(); ni != nodes.end(); ++ni) - { - if (!(*ni).node) - { - continue; - } - - (*ni).accumulatedScore = accumulatedScore + (*ni).node->score(); - - std::vector path = reverseWalk(location - (*ni).spanningLength, (*ni).accumulatedScore); - path.insert(path.begin(), *ni); - - paths.push_back(path); - } - - if (!paths.size()) - { - return std::vector(); - } - - std::vector *result = &*(paths.begin()); - for (std::vector>::iterator pi = paths.begin(); pi != paths.end(); ++pi) - { - if ((*pi).back().accumulatedScore > result->back().accumulatedScore) - { - result = &*pi; - } - } - - return *result; -} -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Gramambular/Gramambular.h b/Source/Modules/LanguageParsers/Megrez/0_Megrez.swift similarity index 71% rename from Source/Modules/LanguageParsers/Gramambular/Gramambular.h rename to Source/Modules/LanguageParsers/Megrez/0_Megrez.swift index d33a298b..cc4b4804 100644 --- a/Source/Modules/LanguageParsers/Gramambular/Gramambular.h +++ b/Source/Modules/LanguageParsers/Megrez/0_Megrez.swift @@ -1,6 +1,5 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). /* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -24,18 +23,5 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef GRAMAMBULAR_H_ -#define GRAMAMBULAR_H_ - -#include "Bigram.h" -#include "BlockReadingBuilder.h" -#include "Grid.h" -#include "KeyValuePair.h" -#include "LanguageModel.h" -#include "Node.h" -#include "NodeAnchor.h" -#include "Span.h" -#include "Unigram.h" -#include "Walker.h" - -#endif +/// The namespace for this package. +public enum Megrez {} diff --git a/Source/Modules/LanguageParsers/Megrez/1_BlockReadingBuilder.swift b/Source/Modules/LanguageParsers/Megrez/1_BlockReadingBuilder.swift new file mode 100644 index 00000000..64578605 --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/1_BlockReadingBuilder.swift @@ -0,0 +1,146 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +extension Megrez { + public class BlockReadingBuilder { + let kMaximumBuildSpanLength = 10 // 規定最多可以組成的詞的字數上限為 10 + var mutCursorIndex: Int = 0 + var mutReadings: [String] = [] + var mutGrid: Grid = .init() + var mutLM: LanguageModel + var mutJoinSeparator: String = "" + + public init(lm: LanguageModel) { + mutLM = lm + } + + public func clear() { + mutCursorIndex = 0 + mutReadings.removeAll() + mutGrid.clear() + } + + public func length() -> Int { mutReadings.count } + + public func cursorIndex() -> Int { mutCursorIndex } + + public func setCursorIndex(newIndex: Int) { + mutCursorIndex = min(newIndex, mutReadings.count) + } + + public func insertReadingAtCursor(reading: String) { + mutReadings.insert(reading, at: mutCursorIndex) + mutGrid.expandGridByOneAt(location: mutCursorIndex) + build() + mutCursorIndex += 1 + } + + public func readings() -> [String] { mutReadings } + + @discardableResult public func deleteReadingBeforeCursor() -> Bool { + if mutCursorIndex == 0 { + return false + } + + mutReadings.remove(at: mutCursorIndex - 1) + mutCursorIndex -= 1 + mutGrid.shrinkGridByOneAt(location: mutCursorIndex) + build() + return true + } + + @discardableResult public func deleteReadingAfterCursor() -> Bool { + if mutCursorIndex == mutReadings.count { + return false + } + + mutReadings.remove(at: mutCursorIndex) + mutGrid.shrinkGridByOneAt(location: mutCursorIndex) + build() + return true + } + + @discardableResult public func removeHeadReadings(count: Int) -> Bool { + if count > length() { + return false + } + + var i = 0 + while i < count { + if mutCursorIndex != 0 { + mutCursorIndex -= 1 + } + mutReadings.removeFirst() + mutGrid.shrinkGridByOneAt(location: 0) + build() + i += 1 + } + + return true + } + + public func setJoinSeparator(separator: String) { + mutJoinSeparator = separator + } + + public func joinSeparator() -> String { mutJoinSeparator } + + public func grid() -> Grid { mutGrid } + + public func build() { + // if (mutLM == nil) { return } // 這個出不了 nil,所以註釋掉。 + + let itrBegin: Int = + (mutCursorIndex < kMaximumBuildSpanLength) ? 0 : mutCursorIndex - kMaximumBuildSpanLength + let itrEnd: Int = min(mutCursorIndex + kMaximumBuildSpanLength, mutReadings.count) + + var p = itrBegin + while p < itrEnd { + var q = 1 + while q <= kMaximumBuildSpanLength, p + q <= itrEnd { + let strSlice = mutReadings[p..<(p + q)] + let combinedReading: String = join(slice: strSlice, separator: mutJoinSeparator) + if !mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) { + let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading) + if !unigrams.isEmpty { + let n = Node(key: combinedReading, unigrams: unigrams) + mutGrid.insertNode(node: n, location: p, spanningLength: q) + } + } + q += 1 + } + p += 1 + } + } + + public func join(slice strSlice: ArraySlice, separator: String) -> String { + var arrResult: [String] = [] + for value in strSlice { + arrResult.append(value) + } + return arrResult.joined(separator: separator) + } + } +} diff --git a/Source/Modules/LanguageParsers/Gramambular/NodeAnchor.h b/Source/Modules/LanguageParsers/Megrez/1_Walker.swift similarity index 50% rename from Source/Modules/LanguageParsers/Gramambular/NodeAnchor.h rename to Source/Modules/LanguageParsers/Megrez/1_Walker.swift index 432566a0..d6590be8 100644 --- a/Source/Modules/LanguageParsers/Gramambular/NodeAnchor.h +++ b/Source/Modules/LanguageParsers/Megrez/1_Walker.swift @@ -1,6 +1,5 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). /* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -24,52 +23,52 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef NODEANCHOR_H_ -#define NODEANCHOR_H_ +extension Megrez { + public class Walker { + var mutGrid: Grid -#include + public init(grid: Megrez.Grid = Megrez.Grid()) { + mutGrid = grid + } -#include "Node.h" + public func reverseWalk(at location: Int, score accumulatedScore: Double = 0.0) -> [NodeAnchor] { + if location == 0 || location > mutGrid.width() { + return [] as [NodeAnchor] + } -namespace Gramambular -{ + var paths: [[NodeAnchor]] = [] + let nodes: [NodeAnchor] = mutGrid.nodesEndingAt(location: location) -struct NodeAnchor -{ - const Node *node = nullptr; - size_t location = 0; - size_t spanningLength = 0; - double accumulatedScore = 0.0; -}; + for n in nodes { + var n = n + if n.node == nil { + continue + } -inline std::ostream &operator<<(std::ostream &stream, const NodeAnchor &anchor) -{ - stream << "{@(" << anchor.location << "," << anchor.spanningLength << "),"; - if (anchor.node) - { - stream << *(anchor.node); - } - else - { - stream << "null"; - } - stream << "}"; - return stream; + n.accumulatedScore = accumulatedScore + n.node!.score() + + var path: [NodeAnchor] = reverseWalk( + at: location - n.spanningLength, + score: n.accumulatedScore + ) + path.insert(n, at: 0) + + paths.append(path) + } + + if !paths.isEmpty { + if var result = paths.first { + for value in paths { + if let vLast = value.last, let rLast = result.last { + if vLast.accumulatedScore > rLast.accumulatedScore { + result = value + } + } + } + return result + } + } + return [] as [NodeAnchor] + } + } } - -inline std::ostream &operator<<(std::ostream &stream, const std::vector &anchor) -{ - for (std::vector::const_iterator i = anchor.begin(); i != anchor.end(); ++i) - { - stream << *i; - if (i + 1 != anchor.end()) - { - stream << "<-"; - } - } - - return stream; -} -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Megrez/2_Grid.swift b/Source/Modules/LanguageParsers/Megrez/2_Grid.swift new file mode 100644 index 00000000..db4ac907 --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/2_Grid.swift @@ -0,0 +1,180 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +extension Megrez { + public class Grid { + var mutSpans: [Megrez.Span] + + public init() { + mutSpans = [Megrez.Span]() + } + + public func clear() { + mutSpans = [Megrez.Span]() + } + + public func insertNode(node: Node, location: Int, spanningLength: Int) { + if location >= mutSpans.count { + let diff = location - mutSpans.count + 1 + var i = 0 + while i < diff { + mutSpans.append(Span()) + i += 1 + } + } + mutSpans[location].insert(node: node, length: spanningLength) + } + + public func hasMatchedNode(location: Int, spanningLength: Int, key: String) -> Bool { + if location > mutSpans.count { + return false + } + + let n = mutSpans[location].node(length: spanningLength) + return n == nil ? false : key == n?.key() + } + + public func expandGridByOneAt(location: Int) { + mutSpans.append(Span()) + if location > 0, location < mutSpans.count { + var i = 0 + while i < location { + // zaps overlapping spans + mutSpans[i].removeNodeOfLengthGreaterThan(location - i) + i += 1 + } + } + } + + public func shrinkGridByOneAt(location: Int) { + if location >= mutSpans.count { + return + } + + mutSpans.remove(at: location) + var i = 0 + while i < location { + // zaps overlapping spans + mutSpans[i].removeNodeOfLengthGreaterThan(location - i) + i += 1 + } + } + + public func width() -> Int { mutSpans.count } + + public func nodesEndingAt(location: Int) -> [NodeAnchor] { + var results: [NodeAnchor] = [] + if !mutSpans.isEmpty, location <= mutSpans.count { + var i = 0 + while i < location { + let span = mutSpans[i] + if i + span.maximumLength >= location { + if let np = span.node(length: location - i) { + results.append( + NodeAnchor( + node: np, + location: i, + spanningLength: location - i + ) + ) + } + } + i += 1 + } + } + return results + } + + public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] { + var results: [NodeAnchor] = [] + if !mutSpans.isEmpty, location <= mutSpans.count { + var i = 0 + while i < location { + let span = mutSpans[i] + if i + span.maximumLength >= location { + var j = 1 + while j <= span.maximumLength { + if i + j < location { + j += 1 + continue + } + if let np = span.node(length: j) { + results.append( + NodeAnchor( + node: np, + location: i, + spanningLength: location - i + ) + ) + } + j += 1 + } + } + i += 1 + } + } + return results + } + + public func fixNodeSelectedCandidate(location: Int, value: String) -> NodeAnchor { + var node = NodeAnchor() + let nodes = nodesCrossingOrEndingAt(location: location) + for nodeAnchor in nodes { + // Reset the candidate-fixed state of every node at the location. + let candidates = nodeAnchor.node?.candidates() ?? [] + nodeAnchor.node?.resetCandidate() + + for (i, candidate) in candidates.enumerated() { + if candidate.value == value { + nodeAnchor.node?.selectCandidateAt(index: i) + node = nodeAnchor + break + } + } + } + return node + } + + public func overrideNodeScoreForSelectedCandidate(location: Int, value: inout String, overridingScore: Double) { + for nodeAnchor in nodesCrossingOrEndingAt(location: location) { + var nodeAnchor = nodeAnchor + if let theNode = nodeAnchor.node { + let candidates = theNode.candidates() + // Reset the candidate-fixed state of every node at the location. + theNode.resetCandidate() + nodeAnchor.node = theNode + + for (i, candidate) in candidates.enumerated() { + if candidate.value == value { + theNode.selectFloatingCandidateAt(index: i, score: overridingScore) + nodeAnchor.node = theNode + break + } + } + } + } + } + } +} diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/CNSLM.h b/Source/Modules/LanguageParsers/Megrez/3_NodeAnchor.swift similarity index 67% rename from Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/CNSLM.h rename to Source/Modules/LanguageParsers/Megrez/3_NodeAnchor.swift index f464255f..48bc364d 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/CNSLM.h +++ b/Source/Modules/LanguageParsers/Megrez/3_NodeAnchor.swift @@ -1,6 +1,5 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). /* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -24,31 +23,14 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef CNSLM_H -#define CNSLM_H - -#include "LanguageModel.h" -#include "UserPhrasesLM.h" -#include -#include -#include - -namespace vChewing -{ - -class CNSLM : public UserPhrasesLM -{ - public: - bool allowConsolidation() override - { - return false; - } - float overridedValue() override - { - return -11.0; - } -}; - -} // namespace vChewing - -#endif +extension Megrez { + @frozen public struct NodeAnchor { + public var node: Node? + public var location: Int = 0 + public var spanningLength: Int = 0 + public var accumulatedScore: Double = 0.0 + public var keyLength: Int { + node?.key().count ?? 0 + } + } +} diff --git a/Source/Modules/LanguageParsers/Gramambular/KeyValuePair.h b/Source/Modules/LanguageParsers/Megrez/3_Span.swift similarity index 50% rename from Source/Modules/LanguageParsers/Gramambular/KeyValuePair.h rename to Source/Modules/LanguageParsers/Megrez/3_Span.swift index 231d6342..0db3a889 100644 --- a/Source/Modules/LanguageParsers/Gramambular/KeyValuePair.h +++ b/Source/Modules/LanguageParsers/Megrez/3_Span.swift @@ -1,6 +1,5 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). /* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -24,48 +23,52 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef KEYVALUEPAIR_H_ -#define KEYVALUEPAIR_H_ +extension Megrez { + @frozen public struct Span { + private var mutLengthNodeMap: [Int: Megrez.Node] + private var mutMaximumLength: Int + var maximumLength: Int { + mutMaximumLength + } -#include -#include + public init() { + mutLengthNodeMap = [:] + mutMaximumLength = 0 + } -namespace Gramambular -{ + mutating func clear() { + mutLengthNodeMap.removeAll() + mutMaximumLength = 0 + } -class KeyValuePair -{ - public: - std::string key; - std::string value; + mutating func insert(node: Node, length: Int) { + mutLengthNodeMap[length] = node + if length > mutMaximumLength { + mutMaximumLength = length + } + } - bool operator==(const KeyValuePair &another) const; - bool operator<(const KeyValuePair &another) const; -}; + mutating func removeNodeOfLengthGreaterThan(_ length: Int) { + if length > mutMaximumLength { return } + var max = 0 + var removalList: [Int: Megrez.Node] = [:] + for key in mutLengthNodeMap.keys { + if key > length { + removalList[key] = mutLengthNodeMap[key] + } else { + if key > max { + max = key + } + } + } + for key in removalList.keys { + mutLengthNodeMap.removeValue(forKey: key) + } + mutMaximumLength = max + } -inline std::ostream &operator<<(std::ostream &stream, const KeyValuePair &pair) -{ - stream << "(" << pair.key << "," << pair.value << ")"; - return stream; + public func node(length: Int) -> Node? { + mutLengthNodeMap[length] + } + } } - -inline bool KeyValuePair::operator==(const KeyValuePair &another) const -{ - return key == another.key && value == another.value; -} - -inline bool KeyValuePair::operator<(const KeyValuePair &another) const -{ - if (key < another.key) - { - return true; - } - else if (key == another.key) - { - return value < another.value; - } - return false; -} -} // namespace Gramambular - -#endif diff --git a/Source/Modules/LanguageParsers/Megrez/4_Node.swift b/Source/Modules/LanguageParsers/Megrez/4_Node.swift new file mode 100644 index 00000000..9744086a --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/4_Node.swift @@ -0,0 +1,161 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +extension Megrez { + public class Node { + let mutLM: LanguageModel + var mutKey: String + var mutScore: Double = 0 + var mutUnigrams: [Unigram] + var mutCandidates: [KeyValuePair] + var mutValueUnigramIndexMap: [String: Int] + var mutPrecedingBigramMap: [KeyValuePair: [Megrez.Bigram]] + + var mutCandidateFixed: Bool = false + var mutSelectedUnigramIndex: Int = 0 + + public init(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) { + mutLM = LanguageModel() + + mutKey = key + mutScore = 0 + + mutUnigrams = unigrams + mutCandidates = [] + mutValueUnigramIndexMap = [:] + mutPrecedingBigramMap = [:] + + mutCandidateFixed = false + mutSelectedUnigramIndex = 0 + + if bigrams == [] { + node(key: key, unigrams: unigrams, bigrams: bigrams) + } else { + node(key: key, unigrams: unigrams) + } + } + + public func node(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) { + var unigrams = unigrams + mutKey = key + unigrams.sort { + $0.score > $1.score + } + + if !mutUnigrams.isEmpty { + mutScore = mutUnigrams[0].score + } + + for (i, theGram) in unigrams.enumerated() { + mutValueUnigramIndexMap[theGram.keyValue.value] = i + mutCandidates.append(theGram.keyValue) + } + + for gram in bigrams { + mutPrecedingBigramMap[gram.precedingKeyValue]?.append(gram) + } + } + + public func primeNodeWith(precedingKeyValues: [KeyValuePair]) { + var newIndex = mutSelectedUnigramIndex + var max = mutScore + + if !isCandidateFixed() { + for neta in precedingKeyValues { + let bigrams = mutPrecedingBigramMap[neta] ?? [] + for bigram in bigrams { + if bigram.score > max { + if let valRetrieved = mutValueUnigramIndexMap[bigram.keyValue.value] { + newIndex = valRetrieved as Int + max = bigram.score + } + } + } + } + } + + if mutScore != max { + mutScore = max + } + + if mutSelectedUnigramIndex != newIndex { + mutSelectedUnigramIndex = newIndex + } + } + + public func isCandidateFixed() -> Bool { mutCandidateFixed } + + public func candidates() -> [KeyValuePair] { mutCandidates } + + public func selectCandidateAt(index: Int = 0, fix: Bool = false) { + mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index + mutCandidateFixed = fix + mutScore = 99 + } + + public func resetCandidate() { + mutSelectedUnigramIndex = 0 + mutCandidateFixed = false + if !mutUnigrams.isEmpty { + mutScore = mutUnigrams[0].score + } + } + + public func selectFloatingCandidateAt(index: Int, score: Double) { + mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index + mutCandidateFixed = false + mutScore = score + } + + public func key() -> String { mutKey } + + public func score() -> Double { mutScore } + + public func scoreFor(candidate: String) -> Double { + for unigram in mutUnigrams { + if unigram.keyValue.value == candidate { + return unigram.score + } + } + return 0.0 + } + + public func currentKeyValue() -> KeyValuePair { + mutSelectedUnigramIndex >= mutUnigrams.count ? KeyValuePair() : mutCandidates[mutSelectedUnigramIndex] + } + + public func highestUnigramScore() -> Double { + mutUnigrams.isEmpty ? 0.0 : mutUnigrams[0].score + } + + public static func == (lhs: Node, rhs: Node) -> Bool { + lhs.mutUnigrams == rhs.mutUnigrams && lhs.mutCandidates == rhs.mutCandidates + && lhs.mutValueUnigramIndexMap == rhs.mutValueUnigramIndexMap + && lhs.mutPrecedingBigramMap == rhs.mutPrecedingBigramMap + && lhs.mutCandidateFixed == rhs.mutCandidateFixed + && lhs.mutSelectedUnigramIndex == rhs.mutSelectedUnigramIndex + } + } +} diff --git a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/UserSymbolLM.h b/Source/Modules/LanguageParsers/Megrez/5_LanguageModel.swift similarity index 60% rename from Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/UserSymbolLM.h rename to Source/Modules/LanguageParsers/Megrez/5_LanguageModel.swift index 7f37c3ac..ce12ffaf 100644 --- a/Source/Modules/LangModelRelated/SubLanguageModels/InstantiatedModels/UserSymbolLM.h +++ b/Source/Modules/LanguageParsers/Megrez/5_LanguageModel.swift @@ -1,6 +1,5 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). /* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -24,31 +23,22 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef USERSYMBOLLM_H -#define USERSYMBOLLM_H +extension Megrez { + // 這裡充其量只是框架,回頭實際使用時需要派生一個型別、且重寫相關函數。 + // 這裡寫了一點假內容,不然有些 Swift 格式化工具會破壞掉函數的參數設計。 + open class LanguageModel { + public init() {} -#include "LanguageModel.h" -#include "UserPhrasesLM.h" -#include -#include -#include + open func unigramsFor(key: String) -> [Megrez.Unigram] { + key.isEmpty ? [Megrez.Unigram]() : [Megrez.Unigram]() + } -namespace vChewing -{ + open func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] { + precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]() + } -class UserSymbolLM : public UserPhrasesLM -{ - public: - bool allowConsolidation() override - { - return true; - } - float overridedValue() override - { - return -12.0; - } -}; - -} // namespace vChewing - -#endif + open func hasUnigramsFor(key: String) -> Bool { + key.count != 0 + } + } +} diff --git a/Source/Modules/LanguageParsers/Megrez/6_Bigram.swift b/Source/Modules/LanguageParsers/Megrez/6_Bigram.swift new file mode 100644 index 00000000..a8f25ba3 --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/6_Bigram.swift @@ -0,0 +1,74 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +extension Megrez { + @frozen public struct Bigram: Equatable { + public var keyValue: KeyValuePair + public var precedingKeyValue: KeyValuePair + public var score: Double + // var paired: String + + public init(precedingKeyValue: KeyValuePair, keyValue: KeyValuePair, score: Double) { + self.keyValue = keyValue + self.precedingKeyValue = precedingKeyValue + self.score = score + // paired = "(" + keyValue.paired + "|" + precedingKeyValue.paired + "," + String(score) + ")" + } + + public func hash(into hasher: inout Hasher) { + hasher.combine(keyValue) + hasher.combine(precedingKeyValue) + hasher.combine(score) + // hasher.combine(paired) + } + + // static func getPairedBigrams(grams: [Bigram]) -> String { + // var arrOutputContent = [""] + // var index = 0 + // for gram in grams { + // arrOutputContent.append(contentsOf: [String(index) + "=>" + gram.paired]) + // index += 1 + // } + // return "[" + String(grams.count) + "]=>{" + arrOutputContent.joined(separator: ",") + "}" + // } + + public static func == (lhs: Bigram, rhs: Bigram) -> Bool { + lhs.precedingKeyValue == rhs.precedingKeyValue && lhs.keyValue == rhs.keyValue && lhs.score == rhs.score + } + + public static func < (lhs: Bigram, rhs: Bigram) -> Bool { + lhs.precedingKeyValue < rhs.precedingKeyValue + || (lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.keyValue < rhs.keyValue)) + } + + var description: String { + "\(keyValue):\(score)" + } + + var debugDescription: String { + "Bigram(keyValue: \(keyValue), score: \(score))" + } + } +} diff --git a/Source/Modules/LanguageParsers/Megrez/6_Unigram.swift b/Source/Modules/LanguageParsers/Megrez/6_Unigram.swift new file mode 100644 index 00000000..a7bc881e --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/6_Unigram.swift @@ -0,0 +1,75 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +extension Megrez { + @frozen public struct Unigram: Equatable { + public var keyValue: KeyValuePair + public var score: Double + // var paired: String + + public init(keyValue: KeyValuePair, score: Double) { + self.keyValue = keyValue + self.score = score + // paired = "(" + keyValue.paired + "," + String(score) + ")" + } + + public func hash(into hasher: inout Hasher) { + hasher.combine(keyValue) + hasher.combine(score) + // hasher.combine(paired) + } + + // 這個函數不再需要了。 + public static func compareScore(a: Unigram, b: Unigram) -> Bool { + a.score > b.score + } + + // static func getPairedUnigrams(grams: [Unigram]) -> String { + // var arrOutputContent = [""] + // var index = 0 + // for gram in grams { + // arrOutputContent.append(contentsOf: [String(index) + "=>" + gram.paired]) + // index += 1 + // } + // return "[" + String(grams.count) + "]=>{" + arrOutputContent.joined(separator: ",") + "}" + // } + + public static func == (lhs: Unigram, rhs: Unigram) -> Bool { + lhs.keyValue == rhs.keyValue && lhs.score == rhs.score + } + + public static func < (lhs: Unigram, rhs: Unigram) -> Bool { + lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.keyValue < rhs.keyValue) + } + + var description: String { + "\(keyValue):\(score)" + } + + var debugDescription: String { + "Unigram(keyValue: \(keyValue), score: \(score))" + } + } +} diff --git a/Source/Modules/LanguageParsers/Megrez/7_KeyValuePair.swift b/Source/Modules/LanguageParsers/Megrez/7_KeyValuePair.swift new file mode 100644 index 00000000..23a58295 --- /dev/null +++ b/Source/Modules/LanguageParsers/Megrez/7_KeyValuePair.swift @@ -0,0 +1,72 @@ +// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License). +// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +extension Megrez { + @frozen public struct KeyValuePair: Equatable, Hashable, Comparable { + public var key: String + public var value: String + // public var paired: String + + public init(key: String = "", value: String = "") { + self.key = key + self.value = value + // paired = "(" + key + "," + value + ")" + } + + public func hash(into hasher: inout Hasher) { + hasher.combine(key) + hasher.combine(value) + // hasher.combine(paired) + } + + public static func == (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + lhs.key.count == rhs.key.count && lhs.value == rhs.value + } + + public static func < (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + (lhs.key.count < rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value < rhs.value) + } + + public static func > (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + (lhs.key.count > rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value > rhs.value) + } + + public static func <= (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + (lhs.key.count <= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value <= rhs.value) + } + + public static func >= (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool { + (lhs.key.count >= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value >= rhs.value) + } + + public var description: String { + "(\(key), \(value))" + } + + public var debugDescription: String { + "KeyValuePair(key: \(key), value: \(value))" + } + } +} diff --git a/UserPhraseEditor/WindowController.swift b/UserPhraseEditor/WindowController.swift index 7af24322..42c35e31 100644 --- a/UserPhraseEditor/WindowController.swift +++ b/UserPhraseEditor/WindowController.swift @@ -31,9 +31,6 @@ class WindowController: NSWindowController, NSWindowDelegate { required init?(coder aDecoder: NSCoder) { super.init(coder: aDecoder) - /** NSWindows loaded from the storyboard will be cascaded - based on the original frame of the window in the storyboard. - */ shouldCascadeWindows = true } } diff --git a/vChewing.xcodeproj/project.pbxproj b/vChewing.xcodeproj/project.pbxproj index f97b5b51..5eec2d76 100644 --- a/vChewing.xcodeproj/project.pbxproj +++ b/vChewing.xcodeproj/project.pbxproj @@ -14,10 +14,23 @@ 5B2DB16F27AF6891006D874E /* data-chs.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5B2DB16D27AF6891006D874E /* data-chs.txt */; }; 5B2DB17027AF6891006D874E /* data-cht.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5B2DB16E27AF6891006D874E /* data-cht.txt */; }; 5B3133BF280B229700A4A505 /* KeyHandler_States.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B3133BE280B229700A4A505 /* KeyHandler_States.swift */; }; + 5B38F59A281E2E49007D5F5D /* 6_Unigram.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1D15FC0EB100ABF4B3 /* 6_Unigram.swift */; }; + 5B38F59B281E2E49007D5F5D /* 7_KeyValuePair.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1815FC0EB100ABF4B3 /* 7_KeyValuePair.swift */; }; + 5B38F59C281E2E49007D5F5D /* 2_Grid.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1715FC0EB100ABF4B3 /* 2_Grid.swift */; }; + 5B38F59D281E2E49007D5F5D /* 4_Node.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1A15FC0EB100ABF4B3 /* 4_Node.swift */; }; + 5B38F59E281E2E49007D5F5D /* 6_Bigram.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1415FC0EB100ABF4B3 /* 6_Bigram.swift */; }; + 5B38F59F281E2E49007D5F5D /* 3_NodeAnchor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1B15FC0EB100ABF4B3 /* 3_NodeAnchor.swift */; }; + 5B38F5A0281E2E49007D5F5D /* 1_Walker.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1E15FC0EB100ABF4B3 /* 1_Walker.swift */; }; + 5B38F5A1281E2E49007D5F5D /* 1_BlockReadingBuilder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1515FC0EB100ABF4B3 /* 1_BlockReadingBuilder.swift */; }; + 5B38F5A2281E2E49007D5F5D /* 0_Megrez.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1615FC0EB100ABF4B3 /* 0_Megrez.swift */; }; + 5B38F5A3281E2E49007D5F5D /* 3_Span.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1C15FC0EB100ABF4B3 /* 3_Span.swift */; }; + 5B38F5A4281E2E49007D5F5D /* 5_LanguageModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1915FC0EB100ABF4B3 /* 5_LanguageModel.swift */; }; + 5B40730C281672610023DFFF /* lmAssociates.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B407309281672610023DFFF /* lmAssociates.swift */; }; + 5B40730D281672610023DFFF /* lmReplacements.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B40730A281672610023DFFF /* lmReplacements.swift */; }; + 5B5D28AC281EA1E900523D4D /* lmLite.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B5D28AB281EA1E800523D4D /* lmLite.swift */; }; 5B5E535227EF261400C6AA1E /* IME.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B5E535127EF261400C6AA1E /* IME.swift */; }; 5B61B0CA280BEFD4002E3CFA /* KeyHandler_Misc.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B61B0C9280BEFD4002E3CFA /* KeyHandler_Misc.swift */; }; 5B62A32927AE77D100A19448 /* FSEventStreamHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A32827AE77D100A19448 /* FSEventStreamHelper.swift */; }; - 5B62A32F27AE78B000A19448 /* CoreLM.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A32D27AE78B000A19448 /* CoreLM.mm */; }; 5B62A33227AE792F00A19448 /* InputSourceHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A33127AE792F00A19448 /* InputSourceHelper.swift */; }; 5B62A33627AE795800A19448 /* mgrPrefs.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A33527AE795800A19448 /* mgrPrefs.swift */; }; 5B62A33827AE79CD00A19448 /* NSStringUtils.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A33727AE79CD00A19448 /* NSStringUtils.swift */; }; @@ -34,6 +47,10 @@ 5B782EC4280C243C007276DE /* KeyHandler_HandleCandidate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B782EC3280C243C007276DE /* KeyHandler_HandleCandidate.swift */; }; 5B7BC4B027AFFBE800F66C24 /* frmPrefWindow.xib in Resources */ = {isa = PBXBuildFile; fileRef = 5B7BC4AE27AFFBE800F66C24 /* frmPrefWindow.xib */; }; 5B7F225D2808501000DDD3CB /* KeyHandler_HandleInput.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B7F225C2808501000DDD3CB /* KeyHandler_HandleInput.swift */; }; + 5B949BD92816DC5400D87B5D /* LineReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B949BD82816DC5400D87B5D /* LineReader.swift */; }; + 5B949BDB2816DDBC00D87B5D /* LMConsolidator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B949BDA2816DDBC00D87B5D /* LMConsolidator.swift */; }; + 5BA0DF312817857D009E73BB /* lmUserOverride.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BA0DF2E2817857D009E73BB /* lmUserOverride.swift */; }; + 5BA0DF322817857D009E73BB /* lmCore.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BA0DF2F2817857D009E73BB /* lmCore.swift */; }; 5BA9FD0F27FEDB6B002DE248 /* suiPrefPaneGeneral.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BA9FD0A27FEDB6B002DE248 /* suiPrefPaneGeneral.swift */; }; 5BA9FD1027FEDB6B002DE248 /* suiPrefPaneKeyboard.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BA9FD0B27FEDB6B002DE248 /* suiPrefPaneKeyboard.swift */; }; 5BA9FD1127FEDB6B002DE248 /* ctlPrefUI.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BA9FD0C27FEDB6B002DE248 /* ctlPrefUI.swift */; }; @@ -66,6 +83,9 @@ 5BBBB77627AED70B0023B93A /* MenuIcon-TCVIM.png in Resources */ = {isa = PBXBuildFile; fileRef = 5BBBB77227AED70B0023B93A /* MenuIcon-TCVIM.png */; }; 5BBBB77A27AEDC690023B93A /* clsSFX.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BBBB77927AEDC690023B93A /* clsSFX.swift */; }; 5BC2652227E04B7E00700291 /* uninstall.sh in Resources */ = {isa = PBXBuildFile; fileRef = 5BC2652127E04B7B00700291 /* uninstall.sh */; }; + 5BC4F6382819FF4500A2514A /* KeyHandlerSputnik.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BC4F6372819FF4500A2514A /* KeyHandlerSputnik.swift */; }; + 5BD0113B28180D6100609769 /* LMInstantiator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BD0113A28180D6100609769 /* LMInstantiator.swift */; }; + 5BD0113D2818543900609769 /* KeyHandler_Kernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BD0113C2818543900609769 /* KeyHandler_Kernel.swift */; }; 5BD05B8127B22F3C004C4F1D /* char-kanji-cns.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5BD05B8027B22F3C004C4F1D /* char-kanji-cns.txt */; }; 5BD05BCA27B2A43D004C4F1D /* Images.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 6A2E40F5253A69DA00D1AE1D /* Images.xcassets */; }; 5BD05C5D27B2BBA9004C4F1D /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 5BD05C5B27B2BBA9004C4F1D /* Main.storyboard */; }; @@ -76,9 +96,9 @@ 5BD05C6A27B2BBEF004C4F1D /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BD05C6527B2BBEF004C4F1D /* ViewController.swift */; }; 5BDC1CFA27FDF1310052C2B9 /* apiUpdate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BDC1CF927FDF1310052C2B9 /* apiUpdate.swift */; }; 5BDCBB2E27B4E67A00D0CC59 /* vChewingPhraseEditor.app in Resources */ = {isa = PBXBuildFile; fileRef = 5BD05BB827B2A429004C4F1D /* vChewingPhraseEditor.app */; }; + 5BE33BED28169B5D00CE5BB0 /* KeyValueStructs.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BE33BEC28169B5D00CE5BB0 /* KeyValueStructs.swift */; }; 5BE78BD927B3775B005EA1BE /* ctlAboutWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BE78BD827B37750005EA1BE /* ctlAboutWindow.swift */; }; 5BE78BDD27B3776D005EA1BE /* frmAboutWindow.xib in Resources */ = {isa = PBXBuildFile; fileRef = 5BE78BDA27B37764005EA1BE /* frmAboutWindow.xib */; }; - 5BE78BE027B38804005EA1BE /* LMConsolidator.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A32727AE77BB00A19448 /* LMConsolidator.mm */; }; 5BF8423127BAA942008E7E4C /* vChewingKanjiConverter.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BF8423027BAA942008E7E4C /* vChewingKanjiConverter.swift */; }; 6A0D4F4515FC0EB100ABF4B3 /* Mandarin.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F2015FC0EB100ABF4B3 /* Mandarin.cpp */; }; 6A187E2616004C5900466B2E /* MainMenu.xib in Resources */ = {isa = PBXBuildFile; fileRef = 6A187E2816004C5900466B2E /* MainMenu.xib */; }; @@ -89,21 +109,12 @@ 6ACA41FC15FC1D9000935EF6 /* Localizable.strings in Resources */ = {isa = PBXBuildFile; fileRef = 6ACA41EE15FC1D9000935EF6 /* Localizable.strings */; }; 6ACA41FD15FC1D9000935EF6 /* MainMenu.xib in Resources */ = {isa = PBXBuildFile; fileRef = 6ACA41F015FC1D9000935EF6 /* MainMenu.xib */; }; 6ACA420215FC1E5200935EF6 /* vChewing.app in Resources */ = {isa = PBXBuildFile; fileRef = 6A0D4EA215FC0D2D00ABF4B3 /* vChewing.app */; }; - 6ACC3D3F27914F2400F1B140 /* KeyValueBlobReader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6ACC3D3E27914F2400F1B140 /* KeyValueBlobReader.cpp */; }; - 6ACC3D442793701600F1B140 /* ParselessPhraseDB.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6ACC3D402793701600F1B140 /* ParselessPhraseDB.cpp */; }; - 6ACC3D452793701600F1B140 /* ParselessLM.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6ACC3D422793701600F1B140 /* ParselessLM.cpp */; }; - D41355D8278D74B5005E5CBD /* mgrLangModel.mm in Sources */ = {isa = PBXBuildFile; fileRef = D41355D7278D7409005E5CBD /* mgrLangModel.mm */; }; - D41355DB278E6D17005E5CBD /* LMInstantiator.mm in Sources */ = {isa = PBXBuildFile; fileRef = D41355D9278E6D17005E5CBD /* LMInstantiator.mm */; }; - D41355DE278EA3ED005E5CBD /* UserPhrasesLM.mm in Sources */ = {isa = PBXBuildFile; fileRef = D41355DC278EA3ED005E5CBD /* UserPhrasesLM.mm */; }; D427F76C278CA2B0004A2160 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427F76B278CA1BA004A2160 /* AppDelegate.swift */; }; - D44FB74D2792189A003C80A6 /* PhraseReplacementMap.mm in Sources */ = {isa = PBXBuildFile; fileRef = D44FB74B2792189A003C80A6 /* PhraseReplacementMap.mm */; }; D456576E279E4F7B00DF6BC9 /* InputHandler.swift in Sources */ = {isa = PBXBuildFile; fileRef = D456576D279E4F7B00DF6BC9 /* InputHandler.swift */; }; D461B792279DAC010070E734 /* InputState.swift in Sources */ = {isa = PBXBuildFile; fileRef = D461B791279DAC010070E734 /* InputState.swift */; }; D47B92C027972AD100458394 /* main.swift in Sources */ = {isa = PBXBuildFile; fileRef = D47B92BF27972AC800458394 /* main.swift */; }; - D47D73AC27A6CAE600255A50 /* AssociatedPhrases.mm in Sources */ = {isa = PBXBuildFile; fileRef = D47D73AA27A6CAE600255A50 /* AssociatedPhrases.mm */; }; D47F7DCE278BFB57002F9DD7 /* ctlPrefWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = D47F7DCD278BFB57002F9DD7 /* ctlPrefWindow.swift */; }; D47F7DD0278C0897002F9DD7 /* ctlNonModalAlertWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = D47F7DCF278C0897002F9DD7 /* ctlNonModalAlertWindow.swift */; }; - D47F7DD3278C1263002F9DD7 /* UserOverrideModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D47F7DD2278C1263002F9DD7 /* UserOverrideModel.cpp */; }; D4A13D5A27A59F0B003BE359 /* ctlInputMethod.swift in Sources */ = {isa = PBXBuildFile; fileRef = D4A13D5927A59D5C003BE359 /* ctlInputMethod.swift */; }; D4E33D8A27A838CF006DB1CF /* Localizable.strings in Resources */ = {isa = PBXBuildFile; fileRef = D4E33D8827A838CF006DB1CF /* Localizable.strings */; }; D4E33D8F27A838F0006DB1CF /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = D4E33D8D27A838F0006DB1CF /* InfoPlist.strings */; }; @@ -174,9 +185,9 @@ 5B05A47B27AFF7CA00437698 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/InfoPlist.strings; sourceTree = ""; }; 5B05A47C27AFF7CF00437698 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/Localizable.strings; sourceTree = ""; }; 5B05A47F27AFF84200437698 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/frmAboutWindow.strings; sourceTree = ""; }; - 5B0AF8B427B2C8290096FE54 /* StringExtension.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = StringExtension.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B0C5EDF27C7D9870078037C /* dataCompiler.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = dataCompiler.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B11328827B94CFB00E58451 /* AppleKeyboardConverter.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = AppleKeyboardConverter.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B0AF8B427B2C8290096FE54 /* StringExtension.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = StringExtension.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B0C5EDF27C7D9870078037C /* dataCompiler.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = dataCompiler.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B11328827B94CFB00E58451 /* AppleKeyboardConverter.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = AppleKeyboardConverter.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 5B18BA6F27C7BD8B0056EB19 /* LICENSE-CHS.txt */ = {isa = PBXFileReference; lastKnownFileType = text; path = "LICENSE-CHS.txt"; sourceTree = ""; }; 5B18BA7027C7BD8B0056EB19 /* Makefile */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.make; path = Makefile; sourceTree = ""; }; 5B18BA7127C7BD8B0056EB19 /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; @@ -189,55 +200,55 @@ 5B2DB16E27AF6891006D874E /* data-cht.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = "data-cht.txt"; path = "Data/data-cht.txt"; sourceTree = ""; }; 5B2DB17127AF8771006D874E /* Makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; name = Makefile; path = Data/Makefile; sourceTree = ""; }; 5B30F11227BA568800484E24 /* vChewingKeyLayout.bundle */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.plug-in"; path = vChewingKeyLayout.bundle; sourceTree = ""; }; - 5B3133BE280B229700A4A505 /* KeyHandler_States.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = KeyHandler_States.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B5E535127EF261400C6AA1E /* IME.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = IME.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B61B0C9280BEFD4002E3CFA /* KeyHandler_Misc.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = KeyHandler_Misc.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B62A32627AE77BB00A19448 /* LMConsolidator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = LMConsolidator.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 5B62A32727AE77BB00A19448 /* LMConsolidator.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = LMConsolidator.mm; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 5B62A32827AE77D100A19448 /* FSEventStreamHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = FSEventStreamHelper.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B62A32B27AE78B000A19448 /* CNSLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = CNSLM.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 5B62A32C27AE78B000A19448 /* CoreLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = CoreLM.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 5B62A32D27AE78B000A19448 /* CoreLM.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = CoreLM.mm; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 5B62A33127AE792F00A19448 /* InputSourceHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = InputSourceHelper.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B62A33527AE795800A19448 /* mgrPrefs.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = mgrPrefs.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B62A33727AE79CD00A19448 /* NSStringUtils.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = NSStringUtils.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B62A33C27AE7CC100A19448 /* ctlAboutWindow.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = ctlAboutWindow.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B62A33F27AE7CD900A19448 /* ctlCandidateHorizontal.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = ctlCandidateHorizontal.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B62A34027AE7CD900A19448 /* ctlCandidate.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = ctlCandidate.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B62A34127AE7CD900A19448 /* ctlCandidateVertical.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = ctlCandidateVertical.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B62A34327AE7CD900A19448 /* TooltipController.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = TooltipController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B62A34527AE7CD900A19448 /* NotifierController.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = NotifierController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B3133BE280B229700A4A505 /* KeyHandler_States.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = KeyHandler_States.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B407309281672610023DFFF /* lmAssociates.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = lmAssociates.swift; sourceTree = ""; usesTabs = 1; }; + 5B40730A281672610023DFFF /* lmReplacements.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = lmReplacements.swift; sourceTree = ""; usesTabs = 1; }; + 5B5D28AB281EA1E800523D4D /* lmLite.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = lmLite.swift; sourceTree = ""; }; + 5B5E535127EF261400C6AA1E /* IME.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = IME.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B61B0C9280BEFD4002E3CFA /* KeyHandler_Misc.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = KeyHandler_Misc.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B62A32827AE77D100A19448 /* FSEventStreamHelper.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = FSEventStreamHelper.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B62A33127AE792F00A19448 /* InputSourceHelper.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = InputSourceHelper.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B62A33527AE795800A19448 /* mgrPrefs.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = mgrPrefs.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B62A33727AE79CD00A19448 /* NSStringUtils.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = NSStringUtils.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B62A33C27AE7CC100A19448 /* ctlAboutWindow.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ctlAboutWindow.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B62A33F27AE7CD900A19448 /* ctlCandidateHorizontal.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ctlCandidateHorizontal.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B62A34027AE7CD900A19448 /* ctlCandidate.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ctlCandidate.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B62A34127AE7CD900A19448 /* ctlCandidateVertical.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ctlCandidateVertical.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B62A34327AE7CD900A19448 /* TooltipController.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = TooltipController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B62A34527AE7CD900A19448 /* NotifierController.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = NotifierController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 5B707CE527D9F3A10099EF99 /* SwiftyOpenCC */ = {isa = PBXFileReference; lastKnownFileType = wrapper; name = SwiftyOpenCC; path = Packages/SwiftyOpenCC; sourceTree = ""; }; - 5B707CE727D9F4590099EF99 /* OpenCCBridge.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = OpenCCBridge.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B7111C727DEF9FF00444310 /* UserSymbolLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = UserSymbolLM.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; + 5B707CE727D9F4590099EF99 /* OpenCCBridge.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = OpenCCBridge.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 5B73FB5427B2BD6900E9BF49 /* PhraseEditor-Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; name = "PhraseEditor-Info.plist"; path = "UserPhraseEditor/PhraseEditor-Info.plist"; sourceTree = SOURCE_ROOT; }; 5B73FB5F27B2BE1300E9BF49 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/InfoPlist.strings; sourceTree = ""; }; - 5B782EC3280C243C007276DE /* KeyHandler_HandleCandidate.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = KeyHandler_HandleCandidate.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B782EC3280C243C007276DE /* KeyHandler_HandleCandidate.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = KeyHandler_HandleCandidate.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 5B7BC4AF27AFFBE800F66C24 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/frmPrefWindow.xib; sourceTree = ""; }; 5B7BC4B227AFFC0B00F66C24 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/frmPrefWindow.strings; sourceTree = ""; }; - 5B7F225C2808501000DDD3CB /* KeyHandler_HandleInput.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = KeyHandler_HandleInput.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5B8F43ED27C9BC220069AC27 /* SymbolLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = SymbolLM.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 5BA9FD0A27FEDB6B002DE248 /* suiPrefPaneGeneral.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = suiPrefPaneGeneral.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BA9FD0B27FEDB6B002DE248 /* suiPrefPaneKeyboard.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = suiPrefPaneKeyboard.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BA9FD0C27FEDB6B002DE248 /* ctlPrefUI.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = ctlPrefUI.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BA9FD0D27FEDB6B002DE248 /* suiPrefPaneExperience.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = suiPrefPaneExperience.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BA9FD0E27FEDB6B002DE248 /* suiPrefPaneDictionary.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = suiPrefPaneDictionary.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BA9FD3127FEF3C8002DE248 /* Utilities.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = Utilities.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BA9FD3227FEF3C8002DE248 /* Pane.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = Pane.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BA9FD3327FEF3C8002DE248 /* Localization.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = Localization.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BA9FD3427FEF3C8002DE248 /* PreferencesStyle.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = PreferencesStyle.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BA9FD3527FEF3C8002DE248 /* PreferencePane.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = PreferencePane.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B7F225C2808501000DDD3CB /* KeyHandler_HandleInput.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = KeyHandler_HandleInput.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5B949BD82816DC5400D87B5D /* LineReader.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = LineReader.swift; sourceTree = ""; usesTabs = 1; }; + 5B949BDA2816DDBC00D87B5D /* LMConsolidator.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = LMConsolidator.swift; sourceTree = ""; usesTabs = 1; }; + 5BA0DF2E2817857D009E73BB /* lmUserOverride.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = lmUserOverride.swift; sourceTree = ""; usesTabs = 1; }; + 5BA0DF2F2817857D009E73BB /* lmCore.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = lmCore.swift; sourceTree = ""; usesTabs = 1; }; + 5BA9FD0A27FEDB6B002DE248 /* suiPrefPaneGeneral.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = suiPrefPaneGeneral.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BA9FD0B27FEDB6B002DE248 /* suiPrefPaneKeyboard.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = suiPrefPaneKeyboard.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BA9FD0C27FEDB6B002DE248 /* ctlPrefUI.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ctlPrefUI.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BA9FD0D27FEDB6B002DE248 /* suiPrefPaneExperience.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = suiPrefPaneExperience.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BA9FD0E27FEDB6B002DE248 /* suiPrefPaneDictionary.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = suiPrefPaneDictionary.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BA9FD3127FEF3C8002DE248 /* Utilities.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = Utilities.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BA9FD3227FEF3C8002DE248 /* Pane.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = Pane.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BA9FD3327FEF3C8002DE248 /* Localization.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = Localization.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BA9FD3427FEF3C8002DE248 /* PreferencesStyle.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = PreferencesStyle.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BA9FD3527FEF3C8002DE248 /* PreferencePane.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = PreferencePane.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 5BA9FD3627FEF3C8002DE248 /* Preferences.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = Preferences.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BA9FD3727FEF3C8002DE248 /* SegmentedControlStyleViewController.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = SegmentedControlStyleViewController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BA9FD3827FEF3C8002DE248 /* ToolbarItemStyleViewController.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = ToolbarItemStyleViewController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BA9FD3927FEF3C8002DE248 /* Container.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = Container.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BA9FD3A27FEF3C8002DE248 /* PreferencesStyleController.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = PreferencesStyleController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BA9FD3B27FEF3C8002DE248 /* PreferencesWindowController.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = PreferencesWindowController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BA9FD3C27FEF3C8002DE248 /* Section.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = Section.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BA9FD3D27FEF3C8002DE248 /* PreferencesTabViewController.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = PreferencesTabViewController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BA9FD8A28006B41002DE248 /* VDKComboBox.swift */ = {isa = PBXFileReference; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = VDKComboBox.swift; sourceTree = ""; tabWidth = 2; }; - 5BAEFACF28012565001F42C9 /* mgrLangModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = mgrLangModel.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BB802D927FABA8300CF1C19 /* ctlInputMethod_Menu.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = ctlInputMethod_Menu.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BA9FD3727FEF3C8002DE248 /* SegmentedControlStyleViewController.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = SegmentedControlStyleViewController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BA9FD3827FEF3C8002DE248 /* ToolbarItemStyleViewController.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ToolbarItemStyleViewController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BA9FD3927FEF3C8002DE248 /* Container.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = Container.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BA9FD3A27FEF3C8002DE248 /* PreferencesStyleController.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = PreferencesStyleController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BA9FD3B27FEF3C8002DE248 /* PreferencesWindowController.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = PreferencesWindowController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BA9FD3C27FEF3C8002DE248 /* Section.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = Section.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BA9FD3D27FEF3C8002DE248 /* PreferencesTabViewController.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = PreferencesTabViewController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BA9FD8A28006B41002DE248 /* VDKComboBox.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = VDKComboBox.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BAEFACF28012565001F42C9 /* mgrLangModel.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = mgrLangModel.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BB802D927FABA8300CF1C19 /* ctlInputMethod_Menu.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ctlInputMethod_Menu.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 5BBBB75D27AED54C0023B93A /* Beep.m4a */ = {isa = PBXFileReference; lastKnownFileType = file; path = Beep.m4a; sourceTree = ""; }; 5BBBB75E27AED54C0023B93A /* Fart.m4a */ = {isa = PBXFileReference; lastKnownFileType = file; path = Fart.m4a; sourceTree = ""; }; 5BBBB76627AED5DB0023B93A /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/frmNonModalAlertWindow.xib; sourceTree = ""; }; @@ -247,21 +258,24 @@ 5BBBB77127AED70B0023B93A /* MenuIcon-SCVIM.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; path = "MenuIcon-SCVIM.png"; sourceTree = ""; }; 5BBBB77227AED70B0023B93A /* MenuIcon-TCVIM.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; path = "MenuIcon-TCVIM.png"; sourceTree = ""; }; 5BBBB77727AEDB290023B93A /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/MainMenu.strings; sourceTree = ""; }; - 5BBBB77927AEDC690023B93A /* clsSFX.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = clsSFX.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BBBB77927AEDC690023B93A /* clsSFX.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = clsSFX.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 5BBD627827B6C4D900271480 /* Update-Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = "Update-Info.plist"; sourceTree = ""; }; 5BC0AAC927F58472002D33E9 /* pkgPreInstall.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = pkgPreInstall.sh; sourceTree = ""; }; 5BC0AACA27F58472002D33E9 /* pkgPostInstall.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = pkgPostInstall.sh; sourceTree = ""; }; 5BC2652127E04B7B00700291 /* uninstall.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; lineEnding = 0; path = uninstall.sh; sourceTree = ""; }; + 5BC4F6372819FF4500A2514A /* KeyHandlerSputnik.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = KeyHandlerSputnik.swift; sourceTree = ""; usesTabs = 1; }; + 5BD0113A28180D6100609769 /* LMInstantiator.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = LMInstantiator.swift; sourceTree = ""; usesTabs = 1; }; + 5BD0113C2818543900609769 /* KeyHandler_Kernel.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = KeyHandler_Kernel.swift; sourceTree = ""; usesTabs = 1; }; 5BD05B8027B22F3C004C4F1D /* char-kanji-cns.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = "char-kanji-cns.txt"; path = "Data/components/common/char-kanji-cns.txt"; sourceTree = ""; }; 5BD05BB827B2A429004C4F1D /* vChewingPhraseEditor.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = vChewingPhraseEditor.app; sourceTree = BUILT_PRODUCTS_DIR; }; 5BD05BC627B2A42A004C4F1D /* vChewingPhraseEditor.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = vChewingPhraseEditor.entitlements; sourceTree = ""; }; 5BD05C5C27B2BBA9004C4F1D /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = ""; }; - 5BD05C6127B2BBEF004C4F1D /* Document.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = Document.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BD05C6227B2BBEF004C4F1D /* AppDelegate.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = AppDelegate.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BD05C6327B2BBEF004C4F1D /* Content.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = Content.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BD05C6427B2BBEF004C4F1D /* WindowController.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = WindowController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BD05C6527B2BBEF004C4F1D /* ViewController.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = ViewController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - 5BDC1CF927FDF1310052C2B9 /* apiUpdate.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = apiUpdate.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BD05C6127B2BBEF004C4F1D /* Document.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = Document.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BD05C6227B2BBEF004C4F1D /* AppDelegate.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = AppDelegate.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BD05C6327B2BBEF004C4F1D /* Content.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = Content.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BD05C6427B2BBEF004C4F1D /* WindowController.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = WindowController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BD05C6527B2BBEF004C4F1D /* ViewController.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ViewController.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BDC1CF927FDF1310052C2B9 /* apiUpdate.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = apiUpdate.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 5BDCBB4227B4F6C600D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/MainMenu.strings"; sourceTree = ""; }; 5BDCBB4327B4F6C600D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/frmAboutWindow.strings"; sourceTree = ""; }; 5BDCBB4527B4F6C600D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/frmPrefWindow.strings"; sourceTree = ""; }; @@ -271,25 +285,27 @@ 5BDCBB4A27B4F6C700D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/Localizable.strings"; sourceTree = ""; }; 5BDCBB4B27B4F6C700D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/frmAboutWindow.strings"; sourceTree = ""; }; 5BDCBB4D27B4F6C700D0CC59 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/InfoPlist.strings"; sourceTree = ""; }; - 5BE78BD827B37750005EA1BE /* ctlAboutWindow.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = ctlAboutWindow.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BE33BEC28169B5D00CE5BB0 /* KeyValueStructs.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = KeyValueStructs.swift; sourceTree = ""; usesTabs = 1; }; + 5BE78BD827B37750005EA1BE /* ctlAboutWindow.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ctlAboutWindow.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 5BE78BDB27B37764005EA1BE /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/frmAboutWindow.xib; sourceTree = ""; }; 5BE78BDF27B37968005EA1BE /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/frmAboutWindow.strings; sourceTree = ""; }; - 5BF8423027BAA942008E7E4C /* vChewingKanjiConverter.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = vChewingKanjiConverter.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 5BE8A8C4281EE65300197741 /* CONTRIBUTING.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = CONTRIBUTING.md; sourceTree = ""; }; + 5BF8423027BAA942008E7E4C /* vChewingKanjiConverter.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = vChewingKanjiConverter.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 5BFDF48C27B51867009523B6 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "zh-Hant.lproj/Main.strings"; sourceTree = ""; }; 6A0D4EA215FC0D2D00ABF4B3 /* vChewing.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = vChewing.app; sourceTree = BUILT_PRODUCTS_DIR; }; 6A0D4EF515FC0DA600ABF4B3 /* IME-Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = "IME-Info.plist"; sourceTree = ""; }; 6A0D4EF615FC0DA600ABF4B3 /* vChewing-Prefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "vChewing-Prefix.pch"; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1415FC0EB100ABF4B3 /* Bigram.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = Bigram.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1515FC0EB100ABF4B3 /* BlockReadingBuilder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = BlockReadingBuilder.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1615FC0EB100ABF4B3 /* Gramambular.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = Gramambular.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1715FC0EB100ABF4B3 /* Grid.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = Grid.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1815FC0EB100ABF4B3 /* KeyValuePair.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = KeyValuePair.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1915FC0EB100ABF4B3 /* LanguageModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = LanguageModel.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1A15FC0EB100ABF4B3 /* Node.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = Node.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1B15FC0EB100ABF4B3 /* NodeAnchor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = NodeAnchor.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1C15FC0EB100ABF4B3 /* Span.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = Span.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1D15FC0EB100ABF4B3 /* Unigram.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = Unigram.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6A0D4F1E15FC0EB100ABF4B3 /* Walker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = Walker.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; + 6A0D4F1415FC0EB100ABF4B3 /* 6_Bigram.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 6_Bigram.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1515FC0EB100ABF4B3 /* 1_BlockReadingBuilder.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 1_BlockReadingBuilder.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1615FC0EB100ABF4B3 /* 0_Megrez.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = 0_Megrez.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1715FC0EB100ABF4B3 /* 2_Grid.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 2_Grid.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1815FC0EB100ABF4B3 /* 7_KeyValuePair.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 7_KeyValuePair.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1915FC0EB100ABF4B3 /* 5_LanguageModel.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 5_LanguageModel.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1A15FC0EB100ABF4B3 /* 4_Node.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 4_Node.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1B15FC0EB100ABF4B3 /* 3_NodeAnchor.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 3_NodeAnchor.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1C15FC0EB100ABF4B3 /* 3_Span.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 3_Span.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1D15FC0EB100ABF4B3 /* 6_Unigram.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 6_Unigram.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + 6A0D4F1E15FC0EB100ABF4B3 /* 1_Walker.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 1_Walker.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 6A0D4F2015FC0EB100ABF4B3 /* Mandarin.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = Mandarin.cpp; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; 6A0D4F2115FC0EB100ABF4B3 /* Mandarin.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = Mandarin.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; 6A15B32421A51F2300B92CD3 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/MainMenu.xib; sourceTree = ""; }; @@ -302,39 +318,26 @@ 6ACA41EF15FC1D9000935EF6 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/Localizable.strings; sourceTree = ""; }; 6ACA41F215FC1D9000935EF6 /* Installer-Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; name = "Installer-Info.plist"; path = "Installer/Installer-Info.plist"; sourceTree = SOURCE_ROOT; }; 6ACA41F315FC1D9000935EF6 /* Installer-Prefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "Installer-Prefix.pch"; path = "Installer/Installer-Prefix.pch"; sourceTree = SOURCE_ROOT; }; - 6ACC3D3C27914AAB00F1B140 /* KeyValueBlobReader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = KeyValueBlobReader.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - 6ACC3D3E27914F2400F1B140 /* KeyValueBlobReader.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = KeyValueBlobReader.cpp; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; 6ACC3D402793701600F1B140 /* ParselessPhraseDB.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = ParselessPhraseDB.cpp; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; 6ACC3D412793701600F1B140 /* ParselessPhraseDB.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = ParselessPhraseDB.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; 6ACC3D422793701600F1B140 /* ParselessLM.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = ParselessLM.cpp; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; 6ACC3D432793701600F1B140 /* ParselessLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = ParselessLM.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D41355D6278D7409005E5CBD /* mgrLangModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = mgrLangModel.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D41355D7278D7409005E5CBD /* mgrLangModel.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = mgrLangModel.mm; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D41355D9278E6D17005E5CBD /* LMInstantiator.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = LMInstantiator.mm; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D41355DA278E6D17005E5CBD /* LMInstantiator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = LMInstantiator.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D41355DC278EA3ED005E5CBD /* UserPhrasesLM.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = UserPhrasesLM.mm; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D41355DD278EA3ED005E5CBD /* UserPhrasesLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = UserPhrasesLM.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; D427A9BF25ED28CC005D43E0 /* vChewing-Bridging-Header.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = "vChewing-Bridging-Header.h"; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D427F76B278CA1BA004A2160 /* AppDelegate.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = AppDelegate.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - D44FB74B2792189A003C80A6 /* PhraseReplacementMap.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = PhraseReplacementMap.mm; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D44FB74C2792189A003C80A6 /* PhraseReplacementMap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = PhraseReplacementMap.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D456576D279E4F7B00DF6BC9 /* InputHandler.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = InputHandler.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - D461B791279DAC010070E734 /* InputState.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = InputState.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - D47B92BF27972AC800458394 /* main.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = main.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - D47D73AA27A6CAE600255A50 /* AssociatedPhrases.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = AssociatedPhrases.mm; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D47D73AB27A6CAE600255A50 /* AssociatedPhrases.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = AssociatedPhrases.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D47F7DCD278BFB57002F9DD7 /* ctlPrefWindow.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = ctlPrefWindow.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - D47F7DCF278C0897002F9DD7 /* ctlNonModalAlertWindow.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = ctlNonModalAlertWindow.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + D427F76B278CA1BA004A2160 /* AppDelegate.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = AppDelegate.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + D456576D279E4F7B00DF6BC9 /* InputHandler.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = InputHandler.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + D461B791279DAC010070E734 /* InputState.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = InputState.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + D47B92BF27972AC800458394 /* main.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = main.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + D47F7DCD278BFB57002F9DD7 /* ctlPrefWindow.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ctlPrefWindow.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + D47F7DCF278C0897002F9DD7 /* ctlNonModalAlertWindow.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ctlNonModalAlertWindow.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; D47F7DD1278C1263002F9DD7 /* UserOverrideModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = UserOverrideModel.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; D47F7DD2278C1263002F9DD7 /* UserOverrideModel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = UserOverrideModel.cpp; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D495583A27A5C6C4006ADE1C /* mgrLangModel_Privates.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = mgrLangModel_Privates.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D4A13D5927A59D5C003BE359 /* ctlInputMethod.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = ctlInputMethod.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + D4A13D5927A59D5C003BE359 /* ctlInputMethod.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ctlInputMethod.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; D4E33D8927A838CF006DB1CF /* Base */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = Base; path = Base.lproj/Localizable.strings; sourceTree = ""; }; D4E33D8E27A838F0006DB1CF /* Base */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = Base; path = Base.lproj/InfoPlist.strings; sourceTree = ""; }; D4E569DA27A34CC100AC2CEF /* KeyHandler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = KeyHandler.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; D4E569DB27A34CC100AC2CEF /* KeyHandler.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; lineEnding = 0; path = KeyHandler.mm; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D4F0BBDE279AF1AF0071253C /* ArchiveUtil.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = ArchiveUtil.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; - D4F0BBE0279AF8B30071253C /* AppDelegate.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = AppDelegate.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + D4F0BBDE279AF1AF0071253C /* ArchiveUtil.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ArchiveUtil.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; + D4F0BBE0279AF8B30071253C /* AppDelegate.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = AppDelegate.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; D4F0BBE2279B08900071253C /* Chronosphere.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = Chronosphere.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; D4F0BBE3279B08900071253C /* Chronosphere.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; lineEnding = 0; path = Chronosphere.m; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; /* End PBXFileReference section */ @@ -377,6 +380,7 @@ isa = PBXGroup; children = ( 5BC2652127E04B7B00700291 /* uninstall.sh */, + 5BE8A8C4281EE65300197741 /* CONTRIBUTING.md */, 5B18BA6F27C7BD8B0056EB19 /* LICENSE-CHS.txt */, 5B18BA7427C7BD8C0056EB19 /* LICENSE-CHT.txt */, 5B18BA7327C7BD8C0056EB19 /* LICENSE-JPN.txt */, @@ -386,19 +390,22 @@ name = MiscRootFiles; sourceTree = ""; }; - 5B4D47B627C9186900220DDC /* InstantiatedModels */ = { + 5B407308281672610023DFFF /* SubLMs */ = { isa = PBXGroup; children = ( - 5B62A32B27AE78B000A19448 /* CNSLM.h */, - 5B8F43ED27C9BC220069AC27 /* SymbolLM.h */, - 5B7111C727DEF9FF00444310 /* UserSymbolLM.h */, + 5B407309281672610023DFFF /* lmAssociates.swift */, + 5BA0DF2F2817857D009E73BB /* lmCore.swift */, + 5B40730A281672610023DFFF /* lmReplacements.swift */, + 5BA0DF2E2817857D009E73BB /* lmUserOverride.swift */, + 5B5D28AB281EA1E800523D4D /* lmLite.swift */, ); - path = InstantiatedModels; + path = SubLMs; sourceTree = ""; }; 5B62A30127AE732800A19448 /* 3rdParty */ = { isa = PBXGroup; children = ( + 5B949BD72816DC4400D87B5D /* LineReader */, 5B707CE627D9F43E0099EF99 /* OpenCCBridge */, 5B62A30227AE733500A19448 /* OVMandarin */, 5BA9FCEA27FED652002DE248 /* SindreSorhus */, @@ -432,12 +439,12 @@ D461B791279DAC010070E734 /* InputState.swift */, 5B782EC3280C243C007276DE /* KeyHandler_HandleCandidate.swift */, 5B7F225C2808501000DDD3CB /* KeyHandler_HandleInput.swift */, + 5BD0113C2818543900609769 /* KeyHandler_Kernel.swift */, 5B61B0C9280BEFD4002E3CFA /* KeyHandler_Misc.swift */, 5B3133BE280B229700A4A505 /* KeyHandler_States.swift */, + 5BC4F6372819FF4500A2514A /* KeyHandlerSputnik.swift */, D4E569DA27A34CC100AC2CEF /* KeyHandler.h */, D4E569DB27A34CC100AC2CEF /* KeyHandler.mm */, - 6ACC3D3E27914F2400F1B140 /* KeyValueBlobReader.cpp */, - 6ACC3D3C27914AAB00F1B140 /* KeyValueBlobReader.h */, 5B62A33727AE79CD00A19448 /* NSStringUtils.swift */, 5BF8423027BAA942008E7E4C /* vChewingKanjiConverter.swift */, ); @@ -457,8 +464,7 @@ isa = PBXGroup; children = ( 5B62A32827AE77D100A19448 /* FSEventStreamHelper.swift */, - 5B62A32627AE77BB00A19448 /* LMConsolidator.h */, - 5B62A32727AE77BB00A19448 /* LMConsolidator.mm */, + 5B949BDA2816DDBC00D87B5D /* LMConsolidator.swift */, ); path = FileHandlers; sourceTree = ""; @@ -479,7 +485,7 @@ 5B62A32327AE756800A19448 /* LanguageParsers */ = { isa = PBXGroup; children = ( - 6A0D4F1315FC0EB100ABF4B3 /* Gramambular */, + 6A0D4F1315FC0EB100ABF4B3 /* Megrez */, ); path = LanguageParsers; sourceTree = ""; @@ -487,37 +493,26 @@ 5B62A32427AE757300A19448 /* LangModelRelated */ = { isa = PBXGroup; children = ( - 5B62A32527AE758000A19448 /* SubLanguageModels */, - D41355DA278E6D17005E5CBD /* LMInstantiator.h */, - D41355D9278E6D17005E5CBD /* LMInstantiator.mm */, - D495583A27A5C6C4006ADE1C /* mgrLangModel_Privates.h */, - D41355D6278D7409005E5CBD /* mgrLangModel.h */, - D41355D7278D7409005E5CBD /* mgrLangModel.mm */, + 5B62A32527AE758000A19448 /* OldFileReferences */, + 5B407308281672610023DFFF /* SubLMs */, + 5BE33BEC28169B5D00CE5BB0 /* KeyValueStructs.swift */, + 5BD0113A28180D6100609769 /* LMInstantiator.swift */, 5BAEFACF28012565001F42C9 /* mgrLangModel.swift */, ); path = LangModelRelated; sourceTree = ""; }; - 5B62A32527AE758000A19448 /* SubLanguageModels */ = { + 5B62A32527AE758000A19448 /* OldFileReferences */ = { isa = PBXGroup; children = ( - 5B4D47B627C9186900220DDC /* InstantiatedModels */, - D47D73AB27A6CAE600255A50 /* AssociatedPhrases.h */, - D47D73AA27A6CAE600255A50 /* AssociatedPhrases.mm */, - 5B62A32C27AE78B000A19448 /* CoreLM.h */, - 5B62A32D27AE78B000A19448 /* CoreLM.mm */, 6ACC3D422793701600F1B140 /* ParselessLM.cpp */, 6ACC3D432793701600F1B140 /* ParselessLM.h */, 6ACC3D402793701600F1B140 /* ParselessPhraseDB.cpp */, 6ACC3D412793701600F1B140 /* ParselessPhraseDB.h */, - D44FB74C2792189A003C80A6 /* PhraseReplacementMap.h */, - D44FB74B2792189A003C80A6 /* PhraseReplacementMap.mm */, D47F7DD2278C1263002F9DD7 /* UserOverrideModel.cpp */, D47F7DD1278C1263002F9DD7 /* UserOverrideModel.h */, - D41355DD278EA3ED005E5CBD /* UserPhrasesLM.h */, - D41355DC278EA3ED005E5CBD /* UserPhrasesLM.mm */, ); - path = SubLanguageModels; + path = OldFileReferences; sourceTree = ""; }; 5B62A33027AE78E500A19448 /* Resources */ = { @@ -620,6 +615,14 @@ path = OpenCCBridge; sourceTree = ""; }; + 5B949BD72816DC4400D87B5D /* LineReader */ = { + isa = PBXGroup; + children = ( + 5B949BD82816DC5400D87B5D /* LineReader.swift */, + ); + path = LineReader; + sourceTree = ""; + }; 5BA9FCEA27FED652002DE248 /* SindreSorhus */ = { isa = PBXGroup; children = ( @@ -800,22 +803,22 @@ path = Modules; sourceTree = ""; }; - 6A0D4F1315FC0EB100ABF4B3 /* Gramambular */ = { + 6A0D4F1315FC0EB100ABF4B3 /* Megrez */ = { isa = PBXGroup; children = ( - 6A0D4F1415FC0EB100ABF4B3 /* Bigram.h */, - 6A0D4F1515FC0EB100ABF4B3 /* BlockReadingBuilder.h */, - 6A0D4F1615FC0EB100ABF4B3 /* Gramambular.h */, - 6A0D4F1715FC0EB100ABF4B3 /* Grid.h */, - 6A0D4F1815FC0EB100ABF4B3 /* KeyValuePair.h */, - 6A0D4F1915FC0EB100ABF4B3 /* LanguageModel.h */, - 6A0D4F1A15FC0EB100ABF4B3 /* Node.h */, - 6A0D4F1B15FC0EB100ABF4B3 /* NodeAnchor.h */, - 6A0D4F1C15FC0EB100ABF4B3 /* Span.h */, - 6A0D4F1D15FC0EB100ABF4B3 /* Unigram.h */, - 6A0D4F1E15FC0EB100ABF4B3 /* Walker.h */, + 6A0D4F1615FC0EB100ABF4B3 /* 0_Megrez.swift */, + 6A0D4F1515FC0EB100ABF4B3 /* 1_BlockReadingBuilder.swift */, + 6A0D4F1E15FC0EB100ABF4B3 /* 1_Walker.swift */, + 6A0D4F1715FC0EB100ABF4B3 /* 2_Grid.swift */, + 6A0D4F1B15FC0EB100ABF4B3 /* 3_NodeAnchor.swift */, + 6A0D4F1C15FC0EB100ABF4B3 /* 3_Span.swift */, + 6A0D4F1A15FC0EB100ABF4B3 /* 4_Node.swift */, + 6A0D4F1915FC0EB100ABF4B3 /* 5_LanguageModel.swift */, + 6A0D4F1415FC0EB100ABF4B3 /* 6_Bigram.swift */, + 6A0D4F1D15FC0EB100ABF4B3 /* 6_Unigram.swift */, + 6A0D4F1815FC0EB100ABF4B3 /* 7_KeyValuePair.swift */, ); - path = Gramambular; + path = Megrez; sourceTree = ""; }; 6ACA41E715FC1D9000935EF6 /* Installer */ = { @@ -1069,40 +1072,46 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( + 5B38F59D281E2E49007D5F5D /* 4_Node.swift in Sources */, + 5B38F5A3281E2E49007D5F5D /* 3_Span.swift in Sources */, + 5B40730C281672610023DFFF /* lmAssociates.swift in Sources */, 5B707CE827D9F4590099EF99 /* OpenCCBridge.swift in Sources */, D427F76C278CA2B0004A2160 /* AppDelegate.swift in Sources */, + 5B5D28AC281EA1E900523D4D /* lmLite.swift in Sources */, 5BA9FD4527FEF3C9002DE248 /* ToolbarItemStyleViewController.swift in Sources */, + 5BA0DF322817857D009E73BB /* lmCore.swift in Sources */, 5BA9FD4127FEF3C8002DE248 /* PreferencesStyle.swift in Sources */, 5B7F225D2808501000DDD3CB /* KeyHandler_HandleInput.swift in Sources */, 5BA9FD1227FEDB6B002DE248 /* suiPrefPaneExperience.swift in Sources */, - 6ACC3D442793701600F1B140 /* ParselessPhraseDB.cpp in Sources */, D461B792279DAC010070E734 /* InputState.swift in Sources */, 5B62A33D27AE7CC100A19448 /* ctlAboutWindow.swift in Sources */, D47B92C027972AD100458394 /* main.swift in Sources */, - D44FB74D2792189A003C80A6 /* PhraseReplacementMap.mm in Sources */, D4A13D5A27A59F0B003BE359 /* ctlInputMethod.swift in Sources */, 5BA9FD4827FEF3C9002DE248 /* PreferencesWindowController.swift in Sources */, + 5BC4F6382819FF4500A2514A /* KeyHandlerSputnik.swift in Sources */, + 5BD0113B28180D6100609769 /* LMInstantiator.swift in Sources */, D4E569DC27A34D0E00AC2CEF /* KeyHandler.mm in Sources */, 5BA9FD4627FEF3C9002DE248 /* Container.swift in Sources */, D47F7DD0278C0897002F9DD7 /* ctlNonModalAlertWindow.swift in Sources */, - 5B62A32F27AE78B000A19448 /* CoreLM.mm in Sources */, - 5BE78BE027B38804005EA1BE /* LMConsolidator.mm in Sources */, + 5B38F5A2281E2E49007D5F5D /* 0_Megrez.swift in Sources */, + 5B949BD92816DC5400D87B5D /* LineReader.swift in Sources */, D456576E279E4F7B00DF6BC9 /* InputHandler.swift in Sources */, 5BA9FD1027FEDB6B002DE248 /* suiPrefPaneKeyboard.swift in Sources */, 5B3133BF280B229700A4A505 /* KeyHandler_States.swift in Sources */, 5BA9FD4327FEF3C8002DE248 /* Preferences.swift in Sources */, 5BA9FD4427FEF3C8002DE248 /* SegmentedControlStyleViewController.swift in Sources */, D47F7DCE278BFB57002F9DD7 /* ctlPrefWindow.swift in Sources */, + 5BD0113D2818543900609769 /* KeyHandler_Kernel.swift in Sources */, 5BA9FD4227FEF3C8002DE248 /* PreferencePane.swift in Sources */, + 5BA0DF312817857D009E73BB /* lmUserOverride.swift in Sources */, 5BA9FD8B28006B41002DE248 /* VDKComboBox.swift in Sources */, - D47D73AC27A6CAE600255A50 /* AssociatedPhrases.mm in Sources */, 5BA9FD4A27FEF3C9002DE248 /* PreferencesTabViewController.swift in Sources */, 5B62A34A27AE7CD900A19448 /* NotifierController.swift in Sources */, 5B11328927B94CFB00E58451 /* AppleKeyboardConverter.swift in Sources */, - D41355DB278E6D17005E5CBD /* LMInstantiator.mm in Sources */, 5B62A32927AE77D100A19448 /* FSEventStreamHelper.swift in Sources */, - D47F7DD3278C1263002F9DD7 /* UserOverrideModel.cpp in Sources */, + 5B38F59B281E2E49007D5F5D /* 7_KeyValuePair.swift in Sources */, 5B62A33627AE795800A19448 /* mgrPrefs.swift in Sources */, + 5B38F5A4281E2E49007D5F5D /* 5_LanguageModel.swift in Sources */, 5BAEFAD028012565001F42C9 /* mgrLangModel.swift in Sources */, 5B782EC4280C243C007276DE /* KeyHandler_HandleCandidate.swift in Sources */, 5B62A33827AE79CD00A19448 /* NSStringUtils.swift in Sources */, @@ -1110,25 +1119,30 @@ 5BA9FD4927FEF3C9002DE248 /* Section.swift in Sources */, 5BA9FD3E27FEF3C8002DE248 /* Utilities.swift in Sources */, 5BA9FD1127FEDB6B002DE248 /* ctlPrefUI.swift in Sources */, + 5B38F59C281E2E49007D5F5D /* 2_Grid.swift in Sources */, + 5B40730D281672610023DFFF /* lmReplacements.swift in Sources */, + 5B38F59E281E2E49007D5F5D /* 6_Bigram.swift in Sources */, 5B62A33227AE792F00A19448 /* InputSourceHelper.swift in Sources */, + 5BE33BED28169B5D00CE5BB0 /* KeyValueStructs.swift in Sources */, 5B5E535227EF261400C6AA1E /* IME.swift in Sources */, 5B62A34927AE7CD900A19448 /* TooltipController.swift in Sources */, 6A0D4F4515FC0EB100ABF4B3 /* Mandarin.cpp in Sources */, 5B61B0CA280BEFD4002E3CFA /* KeyHandler_Misc.swift in Sources */, + 5B38F59A281E2E49007D5F5D /* 6_Unigram.swift in Sources */, + 5B38F5A0281E2E49007D5F5D /* 1_Walker.swift in Sources */, 5B62A34827AE7CD900A19448 /* ctlCandidateVertical.swift in Sources */, 5BA9FD4027FEF3C8002DE248 /* Localization.swift in Sources */, 5BA9FD1327FEDB6B002DE248 /* suiPrefPaneDictionary.swift in Sources */, - 6ACC3D452793701600F1B140 /* ParselessLM.cpp in Sources */, 5BBBB77A27AEDC690023B93A /* clsSFX.swift in Sources */, 5BA9FD4727FEF3C9002DE248 /* PreferencesStyleController.swift in Sources */, 5BF8423127BAA942008E7E4C /* vChewingKanjiConverter.swift in Sources */, + 5B949BDB2816DDBC00D87B5D /* LMConsolidator.swift in Sources */, + 5B38F59F281E2E49007D5F5D /* 3_NodeAnchor.swift in Sources */, 5B62A34627AE7CD900A19448 /* ctlCandidateHorizontal.swift in Sources */, 5B62A34727AE7CD900A19448 /* ctlCandidate.swift in Sources */, 5BA9FD3F27FEF3C8002DE248 /* Pane.swift in Sources */, 5BB802DA27FABA8300CF1C19 /* ctlInputMethod_Menu.swift in Sources */, - D41355DE278EA3ED005E5CBD /* UserPhrasesLM.mm in Sources */, - 6ACC3D3F27914F2400F1B140 /* KeyValueBlobReader.cpp in Sources */, - D41355D8278D74B5005E5CBD /* mgrLangModel.mm in Sources */, + 5B38F5A1281E2E49007D5F5D /* 1_BlockReadingBuilder.swift in Sources */, 5BDC1CFA27FDF1310052C2B9 /* apiUpdate.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; @@ -1490,6 +1504,7 @@ "$(OTHER_CFLAGS)", "-fcxx-modules", ); + SWIFT_COMPILATION_MODE = wholemodule; }; name = Release; };