diff --git a/Source/Modules/ControllerModules/KeyHandler.h b/Source/Modules/ControllerModules/KeyHandler.h index 020bad09..9ab4eb47 100644 --- a/Source/Modules/ControllerModules/KeyHandler.h +++ b/Source/Modules/ControllerModules/KeyHandler.h @@ -28,22 +28,10 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @class InputHandler; @class InputState; +@class KeyHandlerSputnik; NS_ASSUME_NONNULL_BEGIN -typedef NSString *const InputMode NS_TYPED_ENUM; -extern InputMode imeModeCHT; -extern InputMode imeModeCHS; -extern InputMode imeModeNULL; - -struct BufferStatePackage -{ - NSString *composedText; - NSInteger cursorIndex; - NSString *resultOfRear; - NSString *resultOfFront; -}; - @class KeyHandler; @protocol KeyHandlerDelegate @@ -54,48 +42,20 @@ struct BufferStatePackage @interface KeyHandler : NSObject -- (BOOL)isBuilderEmpty; - -- (void)fixNodeWithValue:(NSString *)value NS_SWIFT_NAME(fixNode(value:)); -- (void)clear; - -@property(strong, nonatomic) InputMode inputMode; @property(weak, nonatomic) id delegate; // The following items need to be exposed to Swift: -- (void)_walk; -- (NSString *)_popOverflowComposingTextAndWalk; -- (NSArray *)_currentReadings; - (BOOL)checkWhetherToneMarkerConfirmsPhoneticReadingBuffer; - (BOOL)chkKeyValidity:(UniChar)value; -- (BOOL)ifLangModelHasUnigramsForKey:(NSString *)reading; - (BOOL)isPhoneticReadingBufferEmpty; - (BOOL)isPrintable:(UniChar)charCode; -- (NSArray *)buildAssociatePhraseArrayWithKey:(NSString *)key; -- (NSArray *)getCandidatesArray; -- (NSInteger)getKeyLengthAtIndexZero; -- (NSInteger)getBuilderCursorIndex; -- (NSInteger)getBuilderLength; -- (NSInteger)getPackagedCursorIndex; -- (NSString *)getComposedText; - (NSString *)getCompositionFromPhoneticReadingBuffer; -- (NSString *)getStrLocationResult:(BOOL)isFront NS_SWIFT_NAME(getStrLocationResult(isFront:)); - (NSString *)getSyllableCompositionFromPhoneticReadingBuffer; - (void)clearPhoneticReadingBuffer; - (void)combinePhoneticReadingBufferKey:(UniChar)charCode; -- (void)createNewBuilder; -- (void)dealWithOverrideModelSuggestions; -- (void)deleteBuilderReadingAfterCursor; -- (void)deleteBuilderReadingInFrontOfCursor; - (void)doBackSpaceToPhoneticReadingBuffer; - (void)ensurePhoneticParser; -- (void)insertReadingToBuilderAtCursor:(NSString *)reading; -- (void)packageBufferStateMaterials; -- (void)removeBuilderAndReset:(BOOL)shouldReset; -- (void)setBuilderCursorIndex:(NSInteger)value; -- (void)setInputModesToLM:(BOOL)isCHS; -- (void)syncBaseLMPrefs; @end diff --git a/Source/Modules/ControllerModules/KeyHandler.mm b/Source/Modules/ControllerModules/KeyHandler.mm index 256d4168..c50efb22 100644 --- a/Source/Modules/ControllerModules/KeyHandler.mm +++ b/Source/Modules/ControllerModules/KeyHandler.mm @@ -25,134 +25,23 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #import "KeyHandler.h" -#import "Gramambular.h" -#import "LMInstantiator.h" #import "Mandarin.h" -#import "UserOverrideModel.h" -#import "mgrLangModel_Privates.h" #import "vChewing-Swift.h" #import -InputMode imeModeCHS = ctlInputMethod.kIMEModeCHS; -InputMode imeModeCHT = ctlInputMethod.kIMEModeCHT; -InputMode imeModeNULL = ctlInputMethod.kIMEModeNULL; - -typedef vChewing::LMInstantiator BaseLM; -typedef vChewing::UserOverrideModel UserOverrideLM; -typedef Gramambular::BlockReadingBuilder BlockBuilder; typedef Mandarin::BopomofoReadingBuffer PhoneticBuffer; -static const double kEpsilon = 0.000001; - -NSString *packagedComposedText; -NSInteger packagedCursorIndex; -NSString *packagedResultOfRear; -NSString *packagedResultOfFront; - -// NON-SWIFTIFIABLE -static double FindHighestScore(const std::vector &nodes, double epsilon) -{ - double highestScore = 0.0; - for (auto ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) - { - double score = ni->node->highestUnigramScore(); - if (score > highestScore) - highestScore = score; - } - return highestScore + epsilon; -} - -// NON-SWIFTIFIABLE -class NodeAnchorDescendingSorter -{ - public: - bool operator()(const Gramambular::NodeAnchor &a, const Gramambular::NodeAnchor &b) const - { - return a.node->key().length() > b.node->key().length(); - } -}; - -// if DEBUG is defined, a DOT file (GraphViz format) will be written to the -// specified path every time the grid is walked -#if DEBUG -static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; -#endif - // NON-SWIFTIFIABLE @implementation KeyHandler { // the reading buffer that takes user input PhoneticBuffer *_bpmfReadingBuffer; - - // language model - BaseLM *_languageModel; - - // user override model - UserOverrideLM *_userOverrideModel; - - // the grid (lattice) builder for the unigrams (and bigrams) - BlockBuilder *_builder; - - // latest walked path (trellis) using the Viterbi algorithm - std::vector _walkedNodes; - - NSString *_inputMode; } @synthesize delegate = _delegate; -// NON-SWIFTIFIABLE DUE TO VARIABLE AVAILABLE ACCESSIBILITY RANGE. -// VARIABLE: "_inputMode" -- (NSString *)inputMode -{ - return _inputMode; -} - -// NON-SWIFTIFIABLE -- (BOOL)isBuilderEmpty -{ - return (_builder->grid().width() == 0); -} - -// NON-SWIFTIFIABLE DUE TO VARIABLE AVAILABLE ACCESSIBILITY RANGE. -// VARIABLE: "_inputMode" -- (void)setInputMode:(NSString *)value -{ - // 下面這句的「isKindOfClass」是做類型檢查, - // 為了應對出現輸入法 plist 被改壞掉這樣的極端情況。 - BOOL isCHS = [value isKindOfClass:[NSString class]] && [value isEqual:imeModeCHS]; - - // 緊接著將新的簡繁輸入模式提報給 ctlInputMethod: - ctlInputMethod.currentInputMode = isCHS ? imeModeCHS : imeModeCHT; - mgrPrefs.mostRecentInputMode = ctlInputMethod.currentInputMode; - - // 拿當前的 _inputMode 與 ctlInputMethod 的提報結果對比,不同的話則套用新設定: - if (![_inputMode isEqualToString:ctlInputMethod.currentInputMode]) - { - // Reinitiate language models if necessary - [self setInputModesToLM:isCHS]; - - // Synchronize the sub-languageModel state settings to the new LM. - [self syncBaseLMPrefs]; - - [self removeBuilderAndReset:YES]; - - if (![self isPhoneticReadingBufferEmpty]) - [self clearPhoneticReadingBuffer]; - } - _inputMode = ctlInputMethod.currentInputMode; -} - -// NON-SWIFTIFIABLE: Required by an ObjC(pp)-based class. -- (void)dealloc -{ // clean up everything - if (_bpmfReadingBuffer) - delete _bpmfReadingBuffer; - if (_builder) - [self removeBuilderAndReset:NO]; -} - -// NON-SWIFTIFIABLE: Not placeable in swift extensions. +// Not migrable as long as there's still ObjC++ components needed. +// Will deprecate this once Mandarin gets Swiftified. - (instancetype)init { self = [super init]; @@ -164,262 +53,14 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; return self; } -// NON-SWIFTIFIABLE -- (void)fixNodeWithValue:(NSString *)value -{ - NSInteger cursorIndex = [self getActualCandidateCursorIndex]; - std::string stringValue(value.UTF8String); - Gramambular::NodeAnchor selectedNode = _builder->grid().fixNodeSelectedCandidate(cursorIndex, stringValue); - if (!mgrPrefs.useSCPCTypingMode) - { // 不要針對逐字選字模式啟用臨時半衰記憶模型。 - // If the length of the readings and the characters do not match, - // it often means it is a special symbol and it should not be stored - // in the user override model. - BOOL addToOverrideModel = YES; - if (selectedNode.spanningLength != [value count]) - addToOverrideModel = NO; - - if (addToOverrideModel) - { - double score = selectedNode.node->scoreForCandidate(stringValue); - if (score <= -12) // 威注音的 SymbolLM 的 Score 是 -12。 - addToOverrideModel = NO; - } - if (addToOverrideModel) - _userOverrideModel->observe(_walkedNodes, cursorIndex, stringValue, [[NSDate date] timeIntervalSince1970]); - } - [self _walk]; - - if (mgrPrefs.moveCursorAfterSelectingCandidate) - { - size_t nextPosition = 0; - for (auto node : _walkedNodes) - { - if (nextPosition >= cursorIndex) - break; - nextPosition += node.spanningLength; - } - if (nextPosition <= [self getBuilderLength]) - [self setBuilderCursorIndex:nextPosition]; - } +// NON-SWIFTIFIABLE: Mandarin +- (void)dealloc +{ // clean up everything + if (_bpmfReadingBuffer) + delete _bpmfReadingBuffer; } -// NON-SWIFTIFIABLE -- (void)clear -{ - [self clearPhoneticReadingBuffer]; - _builder->clear(); - _walkedNodes.clear(); -} - -#pragma mark - States Building - -// NON-SWIFTIFIABLE -- (void)packageBufferStateMaterials -{ - // We gather the data through this function, package it, - // and sent it to our Swift extension to build the InputState.Inputting there. - // Otherwise, ObjC++ always bugs for "expecting a type". - - // "updating the composing buffer" means to request the client to "refresh" the text input buffer - // with our "composing text" - NSMutableString *composingBuffer = [[NSMutableString alloc] init]; - NSInteger composedStringCursorIndex = 0; - - // we must do some Unicode codepoint counting to find the actual cursor location for the client - // i.e. we need to take UTF-16 into consideration, for which a surrogate pair takes 2 UniChars - // locations - - size_t readingCursorIndex = 0; - size_t builderCursorIndex = [self getBuilderCursorIndex]; - - NSString *resultOfRear = @""; - NSString *resultOfFront = @""; - - for (std::vector::iterator wi = _walkedNodes.begin(), we = _walkedNodes.end(); wi != we; - ++wi) - { - if ((*wi).node) - { - std::string nodeStr = (*wi).node->currentKeyValue().value; - NSString *valueString = [NSString stringWithUTF8String:nodeStr.c_str()]; - [composingBuffer appendString:valueString]; - - NSArray *splited = [valueString split]; - NSInteger codepointCount = splited.count; - - // this re-aligns the cursor index in the composed string - // (the actual cursor on the screen) with the builder's logical - // cursor (reading) cursor; each built node has a "spanning length" - // (e.g. two reading blocks has a spanning length of 2), and we - // accumulate those lengths to calculate the displayed cursor - // index - size_t spanningLength = (*wi).spanningLength; - if (readingCursorIndex + spanningLength <= builderCursorIndex) - { - composedStringCursorIndex += [valueString length]; - readingCursorIndex += spanningLength; - } - else - { - if (codepointCount == spanningLength) - { - for (size_t i = 0; i < codepointCount && readingCursorIndex < builderCursorIndex; i++) - { - composedStringCursorIndex += [splited[i] length]; - readingCursorIndex++; - } - } - else - { - if (readingCursorIndex < builderCursorIndex) - { - composedStringCursorIndex += [valueString length]; - readingCursorIndex += spanningLength; - if (readingCursorIndex > builderCursorIndex) - { - readingCursorIndex = builderCursorIndex; - } - if (builderCursorIndex == 0) - { - resultOfFront = - [NSString stringWithUTF8String:_builder->readings()[builderCursorIndex].c_str()]; - } - else if (builderCursorIndex >= _builder->readings().size()) - { - resultOfRear = [NSString - stringWithUTF8String:_builder->readings()[_builder->readings().size() - 1].c_str()]; - } - else - { - resultOfFront = - [NSString stringWithUTF8String:_builder->readings()[builderCursorIndex].c_str()]; - resultOfRear = - [NSString stringWithUTF8String:_builder->readings()[builderCursorIndex - 1].c_str()]; - } - } - } - } - } - } - - // now we gather all the info, we separate the composing buffer to two parts, head and tail, - // and insert the reading text (the Mandarin syllable) in between them; - // the reading text is what the user is typing - NSString *head = [composingBuffer substringToIndex:composedStringCursorIndex]; - NSString *reading = [self getCompositionFromPhoneticReadingBuffer]; - NSString *tail = [composingBuffer substringFromIndex:composedStringCursorIndex]; - NSString *composedText = [head stringByAppendingString:[reading stringByAppendingString:tail]]; - NSInteger cursorIndex = composedStringCursorIndex + [reading length]; - - packagedComposedText = composedText; - packagedCursorIndex = cursorIndex; - packagedResultOfRear = resultOfRear; - packagedResultOfFront = resultOfFront; -} - -// NON-SWIFTIFIABLE DUE TO VARIABLE AVAILABLE ACCESSIBILITY RANGE. -- (NSString *)getStrLocationResult:(BOOL)isFront -{ - if (isFront) - return packagedResultOfFront; - else - return packagedResultOfRear; -} - -// NON-SWIFTIFIABLE DUE TO VARIABLE AVAILABLE ACCESSIBILITY RANGE. -- (NSString *)getComposedText -{ - return packagedComposedText; -} - -// NON-SWIFTIFIABLE DUE TO VARIABLE AVAILABLE ACCESSIBILITY RANGE. -- (NSInteger)getPackagedCursorIndex -{ - return packagedCursorIndex; -} - -// NON-SWIFTIFIABLE -- (void)_walk -{ - // retrieve the most likely trellis, i.e. a Maximum Likelihood Estimation - // of the best possible Mandarin characters given the input syllables, - // using the Viterbi algorithm implemented in the Gramambular library - Gramambular::Walker walker(&_builder->grid()); - - // the reverse walk traces the trellis from the end - _walkedNodes = walker.reverseWalk(_builder->grid().width()); - - // then we reverse the nodes so that we get the forward-walked nodes - reverse(_walkedNodes.begin(), _walkedNodes.end()); - - // if DEBUG is defined, a GraphViz file is written to kGraphVizOutputfile -#if DEBUG - std::string dotDump = _builder->grid().dumpDOT(); - NSString *dotStr = [NSString stringWithUTF8String:dotDump.c_str()]; - NSError *error = nil; - - BOOL __unused success = [dotStr writeToFile:kGraphVizOutputfile - atomically:YES - encoding:NSUTF8StringEncoding - error:&error]; -#endif -} - -// NON-SWIFTIFIABLE -- (NSString *)_popOverflowComposingTextAndWalk -{ - // in an ideal world, we can as well let the user type forever, - // but because the Viterbi algorithm has a complexity of O(N^2), - // the walk will become slower as the number of nodes increase, - // therefore we need to auto-commit overflown texts which usually - // lose their influence over the whole MLE anyway -- so that when - // the user type along, the already composed text in the rear side - // of the buffer will be committed (i.e. "popped out"). - - NSString *poppedText = @""; - NSInteger composingBufferSize = mgrPrefs.composingBufferSize; - - if (_builder->grid().width() > (size_t)composingBufferSize) - { - if (_walkedNodes.size() > 0) - { - Gramambular::NodeAnchor &anchor = _walkedNodes[0]; - poppedText = [NSString stringWithUTF8String:anchor.node->currentKeyValue().value.c_str()]; - _builder->removeHeadReadings(anchor.spanningLength); - } - } - - [self _walk]; - return poppedText; -} - -// NON-SWIFTIFIABLE -- (NSArray *)_currentReadings -{ - NSMutableArray *readingsArray = [[NSMutableArray alloc] init]; - std::vector v = _builder->readings(); - for (std::vector::iterator it_i = v.begin(); it_i != v.end(); ++it_i) - [readingsArray addObject:[NSString stringWithUTF8String:it_i->c_str()]]; - return readingsArray; -} - -// NON-SWIFTIFIABLE -- (NSArray *)buildAssociatePhraseArrayWithKey:(NSString *)key -{ - NSMutableArray *array = [NSMutableArray array]; - std::string cppKey = std::string(key.UTF8String); - if (_languageModel->hasAssociatedPhrasesForKey(cppKey)) - { - std::vector phrases = _languageModel->associatedPhrasesForKey(cppKey); - for (auto phrase : phrases) - { - NSString *item = [[NSString alloc] initWithUTF8String:phrase.c_str()]; - [array addObject:item]; - } - } - return array; -} +// MARK: - 目前到這裡了 #pragma mark - 必須用 ObjCpp 處理的部分: Mandarin @@ -504,129 +145,6 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot"; } } -#pragma mark - 必須用 ObjCpp 處理的部分: Gramambular 等 - -- (void)removeBuilderAndReset:(BOOL)shouldReset -{ - if (_builder) - { - delete _builder; - if (shouldReset) - [self createNewBuilder]; - } - else if (shouldReset) - [self createNewBuilder]; -} - -- (void)createNewBuilder -{ - _builder = new Gramambular::BlockReadingBuilder(_languageModel); - // Each Mandarin syllable is separated by a hyphen. - _builder->setJoinSeparator("-"); -} - -- (void)setInputModesToLM:(BOOL)isCHS -{ - _languageModel = isCHS ? [mgrLangModel lmCHS] : [mgrLangModel lmCHT]; - _userOverrideModel = isCHS ? [mgrLangModel userOverrideModelCHS] : [mgrLangModel userOverrideModelCHT]; -} - -- (void)syncBaseLMPrefs -{ - if (_languageModel) - { - _languageModel->setPhraseReplacementEnabled(mgrPrefs.phraseReplacementEnabled); - _languageModel->setSymbolEnabled(mgrPrefs.symbolInputEnabled); - _languageModel->setCNSEnabled(mgrPrefs.cns11643Enabled); - } -} - -// ---- - -- (BOOL)ifLangModelHasUnigramsForKey:(NSString *)reading -{ - return _languageModel->hasUnigramsForKey((std::string)[reading UTF8String]); -} - -- (void)insertReadingToBuilderAtCursor:(NSString *)reading -{ - _builder->insertReadingAtCursor((std::string)[reading UTF8String]); -} - -- (void)dealWithOverrideModelSuggestions -{ - // 這一整段都太 C++ 且只出現一次,就整個端過來了。 - // 拆開封裝的話,只會把問題搞得更麻煩而已。 - std::string overrideValue = (mgrPrefs.useSCPCTypingMode) - ? "" - : _userOverrideModel->suggest(_walkedNodes, [self getBuilderCursorIndex], - [[NSDate date] timeIntervalSince1970]); - - if (!overrideValue.empty()) - { - NSInteger cursorIndex = [self getActualCandidateCursorIndex]; - std::vector nodes = mgrPrefs.setRearCursorMode - ? _builder->grid().nodesCrossingOrEndingAt(cursorIndex) - : _builder->grid().nodesEndingAt(cursorIndex); - double highestScore = FindHighestScore(nodes, kEpsilon); - _builder->grid().overrideNodeScoreForSelectedCandidate(cursorIndex, overrideValue, - static_cast(highestScore)); - } -} - -- (void)setBuilderCursorIndex:(NSInteger)value -{ - _builder->setCursorIndex(value); -} - -- (NSInteger)getBuilderCursorIndex -{ - return _builder->cursorIndex(); -} - -- (NSInteger)getBuilderLength -{ - return _builder->length(); -} - -- (void)deleteBuilderReadingInFrontOfCursor -{ - _builder->deleteReadingBeforeCursor(); -} - -- (void)deleteBuilderReadingAfterCursor -{ - _builder->deleteReadingAfterCursor(); -} - -- (NSArray *)getCandidatesArray -{ - NSMutableArray *candidatesArray = [[NSMutableArray alloc] init]; - - NSInteger cursorIndex = [self getActualCandidateCursorIndex]; - std::vector nodes = mgrPrefs.setRearCursorMode - ? _builder->grid().nodesCrossingOrEndingAt(cursorIndex) - : _builder->grid().nodesEndingAt(cursorIndex); - - // sort the nodes, so that longer nodes (representing longer phrases) are placed at the top of the candidate list - stable_sort(nodes.begin(), nodes.end(), NodeAnchorDescendingSorter()); - - // then use the C++ trick to retrieve the candidates for each node at/crossing the cursor - for (std::vector::iterator ni = nodes.begin(), ne = nodes.end(); ni != ne; ++ni) - { - const std::vector &candidates = (*ni).node->candidates(); - for (std::vector::const_iterator ci = candidates.begin(), ce = candidates.end(); - ci != ce; ++ci) - [candidatesArray addObject:[NSString stringWithUTF8String:(*ci).value.c_str()]]; - } - return candidatesArray; -} - -- (NSInteger)getKeyLengthAtIndexZero -{ - return [NSString stringWithUTF8String:_walkedNodes[0].node->currentKeyValue().value.c_str()].length; -} - #pragma mark - 威注音認為有必要單獨拿出來處理的部分,交給 Swift 則有些困難。 - (BOOL)isPrintable:(UniChar)charCode diff --git a/Source/Modules/ControllerModules/KeyHandlerSputnik.swift b/Source/Modules/ControllerModules/KeyHandlerSputnik.swift index fa1b6cb3..c6a933cf 100644 --- a/Source/Modules/ControllerModules/KeyHandlerSputnik.swift +++ b/Source/Modules/ControllerModules/KeyHandlerSputnik.swift @@ -29,6 +29,10 @@ import Megrez // MARK: - KeyHandler Sputnik. +// Swift Extension 不允許直接存放這些變數,所以就寫了這個衛星型別。 +// 一旦 Mandarin 模組被 Swift 化,整個 KeyHandler 就可以都用 Swift。 +// 屆時會考慮將該衛星型別內的變數與常數都挪回 KeyHandler_Kernel 內。 + class KeyHandlerSputnik: NSObject { static let kEpsilon: Double = 0.000001 static var inputMode: String = "" diff --git a/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift b/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift index 39dac754..4f23b995 100644 --- a/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift +++ b/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift @@ -160,17 +160,17 @@ import Cocoa let reading = getSyllableCompositionFromPhoneticReadingBuffer() if !ifLangModelHasUnigrams(forKey: reading) { - IME.prtDebugIntel("B49C0979") + IME.prtDebugIntel("B49C0979:語彙庫內無「\(reading)」的匹配記錄。") errorCallback() stateCallback(buildInputtingState()) return true } // ... and insert it into the lattice grid... - insertReadingToBuilder(atCursor: reading) + insertReadingToBuilderAtCursor(reading: reading) // ... then walk the lattice grid... - let poppedText = _popOverflowComposingTextAndWalk() + let poppedText = popOverflowComposingTextAndWalk() // ... get and tweak override model suggestion if possible... dealWithOverrideModelSuggestions() @@ -233,8 +233,8 @@ import Cocoa stateCallback(InputState.Committing(poppedText: " ")) stateCallback(InputState.Empty()) } else if ifLangModelHasUnigrams(forKey: " ") { - insertReadingToBuilder(atCursor: " ") - let poppedText = _popOverflowComposingTextAndWalk() + insertReadingToBuilderAtCursor(reading: " ") + let poppedText = popOverflowComposingTextAndWalk() let inputting = buildInputtingState() inputting.poppedText = poppedText stateCallback(inputting) @@ -330,8 +330,8 @@ import Cocoa if !input.isOptionHold { if ifLangModelHasUnigrams(forKey: "_punctuation_list") { if isPhoneticReadingBufferEmpty() { - insertReadingToBuilder(atCursor: "_punctuation_list") - let poppedText: String! = _popOverflowComposingTextAndWalk() + insertReadingToBuilderAtCursor(reading: "_punctuation_list") + let poppedText: String! = popOverflowComposingTextAndWalk() let inputting = buildInputtingState() inputting.poppedText = poppedText stateCallback(inputting) @@ -354,7 +354,7 @@ import Cocoa // MARK: Punctuation - // if nothing is matched, see if it's a punctuation key for current layout. + // If nothing is matched, see if it's a punctuation key for current layout. var punctuationNamePrefix = "" diff --git a/Source/Modules/ControllerModules/KeyHandler_Kernel.swift b/Source/Modules/ControllerModules/KeyHandler_Kernel.swift index 59d81461..f6e55991 100644 --- a/Source/Modules/ControllerModules/KeyHandler_Kernel.swift +++ b/Source/Modules/ControllerModules/KeyHandler_Kernel.swift @@ -25,7 +25,286 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ import Cocoa +import Megrez -@objc extension KeyHandler { - +public enum InputMode: String { + case imeModeCHS = "org.atelierInmu.inputmethod.vChewing.IMECHS" + case imeModeCHT = "org.atelierInmu.inputmethod.vChewing.IMECHT" + case imeModeNULL = "" +} + +// MARK: - Delegate. + +// MARK: - Kernel. + +extension KeyHandler { + var kEpsilon: Double { + KeyHandlerSputnik.kEpsilon + } + + var inputMode: InputMode { + get { + switch KeyHandlerSputnik.inputMode { + case "org.atelierInmu.inputmethod.vChewing.IMECHS": + return InputMode.imeModeCHS + case "org.atelierInmu.inputmethod.vChewing.IMECHT": + return InputMode.imeModeCHT + default: + return InputMode.imeModeNULL + } + } + set { setInputMode(newValue.rawValue) } + } + + // TODO: Will reenable this once Mandarin gets Swiftified. + // override public init() { + // self.ensurePhoneticParser() + // self.setInputMode(ctlInputMethod.currentInputMode) + // super.init() + // } + + func clear() { + clearPhoneticReadingBuffer() + KeyHandlerSputnik.builder.clear() + KeyHandlerSputnik.walkedNodes.removeAll() + } + + // 這個函數得獨立出來給 ObjC 使用。 + @objc func setInputMode(_ value: String) { + // 下面這句的「isKindOfClass」是做類型檢查, + // 為了應對出現輸入法 plist 被改壞掉這樣的極端情況。 + let isCHS: Bool = (value == InputMode.imeModeCHS.rawValue) + + // 緊接著將新的簡繁輸入模式提報給 ctlInputMethod: + ctlInputMethod.currentInputMode = isCHS ? InputMode.imeModeCHS.rawValue : InputMode.imeModeCHT.rawValue + mgrPrefs.mostRecentInputMode = ctlInputMethod.currentInputMode + + // 拿當前的 _inputMode 與 ctlInputMethod 的提報結果對比,不同的話則套用新設定: + if KeyHandlerSputnik.inputMode != ctlInputMethod.currentInputMode { + // Reinitiate language models if necessary + setInputModesToLM(isCHS: isCHS) + + // Synchronize the sub-languageModel state settings to the new LM. + syncBaseLMPrefs() + + // Create new grid builder. + createNewBuilder() + + if !isPhoneticReadingBufferEmpty() { + clearPhoneticReadingBuffer() + } + } + // 直接寫到衛星模組內,省得類型轉換 + KeyHandlerSputnik.inputMode = ctlInputMethod.currentInputMode + } + + // MARK: - Functions dealing with Megrez. + + func walk() { + // Retrieve the most likely trellis, i.e. a Maximum Likelihood Estimation + // of the best possible Mandarin characters given the input syllables, + // using the Viterbi algorithm implemented in the Gramambular library + let walker = Megrez.Walker(grid: KeyHandlerSputnik.builder.grid()) + + // the reverse walk traces the trellis from the end + let walked: [Megrez.NodeAnchor] = walker.reverseWalk(at: KeyHandlerSputnik.builder.grid().width()) + + // then we use ".reversed()" to reverse the nodes so that we get the forward-walked nodes + KeyHandlerSputnik.walkedNodes.removeAll() + KeyHandlerSputnik.walkedNodes.append(contentsOf: walked.reversed()) + } + + func popOverflowComposingTextAndWalk() -> String { + // In ideal situations we can allow users to type infinitely in a buffer. + // However, Viberti algorithm has a complexity of O(N^2), the walk will + // become slower as the number of nodes increase. Therefore, we need to + // auto-commit overflown texts which usually lose their influence over + // the whole MLE anyway -- so that when the user type along, the already + // composed text in the rear side of the buffer will be committed out. + // (i.e. popped out.) + + var poppedText = "" + if KeyHandlerSputnik.builder.grid().width() > mgrPrefs.composingBufferSize { + if KeyHandlerSputnik.walkedNodes.count > 0 { + let anchor: Megrez.NodeAnchor = KeyHandlerSputnik.walkedNodes[0] + if let theNode = anchor.node { + poppedText = theNode.currentKeyValue().value + } + KeyHandlerSputnik.builder.removeHeadReadings(count: anchor.spanningLength) + } + } + walk() + return poppedText + } + + func buildAssociatePhraseArray(withKey key: String) -> [String] { + var arrResult: [String] = [] + if KeyHandlerSputnik.languageModel.hasAssociatedPhrasesForKey(key) { + arrResult.append(contentsOf: KeyHandlerSputnik.languageModel.associatedPhrasesForKey(key)) + } + return arrResult + } + + func fixNode(value: String) { + let cursorIndex: Int = getActualCandidateCursorIndex() + let selectedNode: Megrez.NodeAnchor = KeyHandlerSputnik.builder.grid().fixNodeSelectedCandidate( + location: cursorIndex, value: value + ) + // 不要針對逐字選字模式啟用臨時半衰記憶模型。 + if !mgrPrefs.useSCPCTypingMode { + // If the length of the readings and the characters do not match, + // it often means it is a special symbol and it should not be stored + // in the user override model. + var addToUserOverrideModel = true + if selectedNode.spanningLength != value.count { + addToUserOverrideModel = false + } + if addToUserOverrideModel { + if let theNode = selectedNode.node { + // 威注音的 SymbolLM 的 Score 是 -12。 + if theNode.scoreFor(candidate: value) <= -12 { + addToUserOverrideModel = false + } + } + } + if addToUserOverrideModel { + KeyHandlerSputnik.userOverrideModel.observe( + walkedNodes: KeyHandlerSputnik.walkedNodes, cursorIndex: cursorIndex, candidate: value, + timestamp: NSDate().timeIntervalSince1970 + ) + } + } + walk() + + if mgrPrefs.moveCursorAfterSelectingCandidate { + var nextPosition = 0 + for node in KeyHandlerSputnik.walkedNodes { + if nextPosition >= cursorIndex { break } + nextPosition += node.spanningLength + } + if nextPosition <= getBuilderLength() { + setBuilderCursorIndex(value: nextPosition) + } + } + } + + func getCandidatesArray() -> [String] { + var arrCandidates: [String] = [] + var arrNodes: [Megrez.NodeAnchor] = [] + arrNodes.append(contentsOf: getRawNodes()) + + /// 原理:nodes 這個回饋結果包含一堆子陣列,分別對應不同詞長的候選字。 + /// 這裡先對陣列排序、讓最長候選字的子陣列的優先權最高。 + /// 這個過程不會傷到子陣列內部的排序。 + if !arrNodes.isEmpty { + // sort the nodes, so that longer nodes (representing longer phrases) + // are placed at the top of the candidate list + arrNodes.sort { $0.keyLength > $1.keyLength } + + // then use the Swift trick to retrieve the candidates for each node at/crossing the cursor + for currentNodeAnchor in arrNodes { + if let currentNode = currentNodeAnchor.node { + for currentCandidate in currentNode.candidates() { + arrCandidates.append(currentCandidate.value) + } + } + } + } + return arrCandidates + } + + func dealWithOverrideModelSuggestions() { + var overrideValue = + mgrPrefs.useSCPCTypingMode + ? "" + : KeyHandlerSputnik.userOverrideModel.suggest( + walkedNodes: KeyHandlerSputnik.walkedNodes, cursorIndex: getBuilderCursorIndex(), + timestamp: NSDate().timeIntervalSince1970 + ) + + if !overrideValue.isEmpty { + KeyHandlerSputnik.builder.grid().overrideNodeScoreForSelectedCandidate( + location: getActualCandidateCursorIndex(), + value: &overrideValue, + overridingScore: findHighestScore(nodes: getRawNodes(), epsilon: kEpsilon) + ) + } + } + + func findHighestScore(nodes: [Megrez.NodeAnchor], epsilon: Double) -> Double { + var highestScore: Double = 0 + for currentAnchor in nodes { + if let theNode = currentAnchor.node { + let score = theNode.highestUnigramScore() + if score > highestScore { + highestScore = score + } + } + } + return highestScore + epsilon + } + + // MARK: - Extracted methods and functions. + + func isBuilderEmpty() -> Bool { KeyHandlerSputnik.builder.grid().width() == 0 } + + func getRawNodes() -> [Megrez.NodeAnchor] { + /// 警告:不要對游標前置風格使用 nodesCrossing,否則會導致游標行為與 macOS 內建注音輸入法不一致。 + /// 微軟新注音輸入法的游標後置風格也是不允許 nodeCrossing 的,但目前 Megrez 暫時缺乏對該特性的支援。 + /// 所以暫時只能將威注音的游標後置風格描述成「跟 Windows 版雅虎奇摩注音一致」。 + mgrPrefs.setRearCursorMode + ? KeyHandlerSputnik.builder.grid().nodesCrossingOrEndingAt(location: getActualCandidateCursorIndex()) + : KeyHandlerSputnik.builder.grid().nodesEndingAt(location: getActualCandidateCursorIndex()) + } + + func setInputModesToLM(isCHS: Bool) { + KeyHandlerSputnik.languageModel = isCHS ? mgrLangModel.lmCHS : mgrLangModel.lmCHT + KeyHandlerSputnik.userOverrideModel = isCHS ? mgrLangModel.uomCHS : mgrLangModel.uomCHT + } + + func syncBaseLMPrefs() { + KeyHandlerSputnik.languageModel.isPhraseReplacementEnabled = mgrPrefs.phraseReplacementEnabled + KeyHandlerSputnik.languageModel.isCNSEnabled = mgrPrefs.cns11643Enabled + KeyHandlerSputnik.languageModel.isSymbolEnabled = mgrPrefs.symbolInputEnabled + } + + func createNewBuilder() { + KeyHandlerSputnik.builder = Megrez.BlockReadingBuilder(lm: KeyHandlerSputnik.languageModel) + // Each Mandarin syllable is separated by a hyphen. + KeyHandlerSputnik.builder.setJoinSeparator(separator: "-") + } + + func currentReadings() -> [String] { KeyHandlerSputnik.builder.readings() } + + func ifLangModelHasUnigrams(forKey reading: String) -> Bool { + KeyHandlerSputnik.languageModel.hasUnigramsFor(key: reading) + } + + func insertReadingToBuilderAtCursor(reading: String) { + KeyHandlerSputnik.builder.insertReadingAtCursor(reading: reading) + } + + func setBuilderCursorIndex(value: Int) { + KeyHandlerSputnik.builder.setCursorIndex(newIndex: value) + } + + func getBuilderCursorIndex() -> Int { + KeyHandlerSputnik.builder.cursorIndex() + } + + func getBuilderLength() -> Int { + KeyHandlerSputnik.builder.length() + } + + func deleteBuilderReadingInFrontOfCursor() { + KeyHandlerSputnik.builder.deleteReadingBeforeCursor() + } + + func deleteBuilderReadingAfterCursor() { + KeyHandlerSputnik.builder.deleteReadingAfterCursor() + } + + func getKeyLengthAtIndexZero() -> Int { + KeyHandlerSputnik.walkedNodes[0].node?.currentKeyValue().value.count ?? 0 + } } diff --git a/Source/Modules/ControllerModules/KeyHandler_Misc.swift b/Source/Modules/ControllerModules/KeyHandler_Misc.swift index 5b79ed19..ffa6ec7c 100644 --- a/Source/Modules/ControllerModules/KeyHandler_Misc.swift +++ b/Source/Modules/ControllerModules/KeyHandler_Misc.swift @@ -43,7 +43,7 @@ import Cocoa && (cursorIndex < getBuilderLength())) || cursorIndex == 0 { - if cursorIndex == 0 && !mgrPrefs.setRearCursorMode { + if cursorIndex == 0, !mgrPrefs.setRearCursorMode { cursorIndex += getKeyLengthAtIndexZero() } else { cursorIndex += 1 diff --git a/Source/Modules/ControllerModules/KeyHandler_States.swift b/Source/Modules/ControllerModules/KeyHandler_States.swift index d531b770..e36be109 100644 --- a/Source/Modules/ControllerModules/KeyHandler_States.swift +++ b/Source/Modules/ControllerModules/KeyHandler_States.swift @@ -25,6 +25,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ import Cocoa +import Megrez // MARK: - § State managements. @@ -32,45 +33,67 @@ import Cocoa // MARK: - 構築狀態(State Building) func buildInputtingState() -> InputState.Inputting { - // 觸發資料封裝更新,否則下文拿到的資料會是過期的。 - packageBufferStateMaterials() - // 獲取封裝好的資料 - let composedText = getComposedText() - let packagedCursorIndex = UInt(getPackagedCursorIndex()) - let resultOfRear = getStrLocationResult(isFront: false) - let resultOfFront = getStrLocationResult(isFront: true) + // "Updating the composing buffer" means to request the client + // to "refresh" the text input buffer with our "composing text" + var composingBuffer = "" + var composedStringCursorIndex = 0 - // 初期化狀態 - let newState = InputState.Inputting(composingBuffer: composedText, cursorIndex: packagedCursorIndex) + var readingCursorIndex: size_t = 0 + let builderCursorIndex: size_t = getBuilderCursorIndex() - // 組建提示文本 - var tooltip = "" + // We must do some Unicode codepoint counting to find the actual cursor location for the client + // i.e. we need to take UTF-16 into consideration, for which a surrogate pair takes 2 UniChars + // locations. These processes are inherited from the ObjC++ version of this class and might be + // unnecessary in Swift, but this deduction requires further experiments. + for walkedNode in KeyHandlerSputnik.walkedNodes { + if let theNode = walkedNode.node { + let strNodeValue = theNode.currentKeyValue().value + composingBuffer += strNodeValue - // 如果在用特定的模式的話,則始終顯示對應的提示。 - // TODO: 該功能無法正常運作,暫時註釋掉。 - // if ctlInputMethod.currentKeyHandler.inputMode == InputMode.imeModeCHT { - // if mgrPrefs.chineseConversionEnabled && !mgrPrefs.shiftJISShinjitaiOutputEnabled { - // tooltip = String( - // format: "%@%@%@", NSLocalizedString("Force KangXi Writing", comment: ""), "\n", - // NSLocalizedString("NotificationSwitchON", comment: "")) - // } else if mgrPrefs.shiftJISShinjitaiOutputEnabled { - // tooltip = String( - // format: "%@%@%@", NSLocalizedString("JIS Shinjitai Output", comment: ""), "\n", - // NSLocalizedString("NotificationSwitchON", comment: "")) - // } - // } + let arrSplit: [NSString] = (strNodeValue as NSString).split() + let codepointCount = arrSplit.count - // 備註:因為目前的輸入法已經有了 NSString Emoji 支援,所以這個工具提示可能不會出現了。 - // 姑且留下來用作萬一時的偵錯用途。 - if resultOfRear != "" || resultOfFront != "" { - tooltip = String( - format: NSLocalizedString("Cursor is between \"%@\" and \"%@\".", comment: ""), - resultOfFront, resultOfRear - ) + // This re-aligns the cursor index in the composed string + // (the actual cursor on the screen) with the builder's logical + // cursor (reading) cursor; each built node has a "spanning length" + // (e.g. two reading blocks has a spanning length of 2), and we + // accumulate those lengths to calculate the displayed cursor + // index. + let spanningLength: Int = walkedNode.spanningLength + if readingCursorIndex + spanningLength <= builderCursorIndex { + composedStringCursorIndex += (strNodeValue as NSString).length + readingCursorIndex += spanningLength + } else { + if codepointCount == spanningLength { + var i = 0 + while i < codepointCount, readingCursorIndex < builderCursorIndex { + composedStringCursorIndex += arrSplit[i].length + readingCursorIndex += 1 + i += 1 + } + } else { + if readingCursorIndex < builderCursorIndex { + composedStringCursorIndex += (strNodeValue as NSString).length + readingCursorIndex += spanningLength + if readingCursorIndex > builderCursorIndex { + readingCursorIndex = builderCursorIndex + } + } + } + } + } } + // Now, we gather all the intel, separate the composing buffer to two parts (head and tail), + // and insert the reading text (the Mandarin syllable) in between them. + // The reading text is what the user is typing. - newState.tooltip = tooltip - return newState + let head = String((composingBuffer as NSString).substring(to: composedStringCursorIndex)) + let reading = getCompositionFromPhoneticReadingBuffer() + let tail = String((composingBuffer as NSString).substring(from: composedStringCursorIndex)) + let composedText = head + reading + tail + let cursorIndex = composedStringCursorIndex + reading.count + + return InputState.Inputting(composingBuffer: composedText, cursorIndex: UInt(cursorIndex)) } // MARK: - 用以生成候選詞陣列及狀態 @@ -102,7 +125,8 @@ import Cocoa ) -> InputState.AssociatedPhrases! { // 上一行必須要用驚嘆號,否則 Xcode 會誤導你砍掉某些實際上必需的語句。 InputState.AssociatedPhrases( - candidates: buildAssociatePhraseArray(withKey: key), useVerticalMode: useVerticalMode) + candidates: buildAssociatePhraseArray(withKey: key), useVerticalMode: useVerticalMode + ) } // MARK: - 用以處理就地新增自訂語彙時的行為 @@ -191,8 +215,8 @@ import Cocoa } if isPhoneticReadingBufferEmpty() { - insertReadingToBuilder(atCursor: customPunctuation) - let poppedText = _popOverflowComposingTextAndWalk() + insertReadingToBuilderAtCursor(reading: customPunctuation) + let poppedText = popOverflowComposingTextAndWalk() let inputting = buildInputtingState() inputting.poppedText = poppedText stateCallback(inputting) @@ -256,7 +280,7 @@ import Cocoa return false } - let readings: [String] = _currentReadings() + let readings: [String] = currentReadings() let composingBuffer = (IME.areWeUsingOurOwnPhraseEditor) ? readings.joined(separator: "-") @@ -283,7 +307,7 @@ import Cocoa if isPhoneticReadingBufferEmpty() { if getBuilderCursorIndex() >= 0 { deleteBuilderReadingInFrontOfCursor() - _walk() + walk() } else { IME.prtDebugIntel("9D69908D") errorCallback() @@ -316,7 +340,7 @@ import Cocoa if isPhoneticReadingBufferEmpty() { if getBuilderCursorIndex() != getBuilderLength() { deleteBuilderReadingAfterCursor() - _walk() + walk() let inputting = buildInputtingState() // 這裡不用「count > 0」,因為該整數變數只要「!isEmpty」那就必定滿足這個條件。 if !inputting.composingBuffer.isEmpty { @@ -375,7 +399,7 @@ import Cocoa } if getBuilderCursorIndex() != 0 { - setBuilderCursorIndex(0) + setBuilderCursorIndex(value: 0) stateCallback(buildInputtingState()) } else { IME.prtDebugIntel("66D97F90") @@ -405,7 +429,7 @@ import Cocoa } if getBuilderCursorIndex() != getBuilderLength() { - setBuilderCursorIndex(getBuilderLength()) + setBuilderCursorIndex(value: getBuilderLength()) stateCallback(buildInputtingState()) } else { IME.prtDebugIntel("9B69908E") @@ -475,7 +499,7 @@ import Cocoa composingBuffer: currentState.composingBuffer, cursorIndex: currentState.cursorIndex, markerIndex: UInt(nextPosition), - readings: _currentReadings() + readings: currentReadings() ) marking.tooltipForInputting = currentState.tooltip stateCallback(marking) @@ -486,7 +510,7 @@ import Cocoa } } else { if getBuilderCursorIndex() < getBuilderLength() { - setBuilderCursorIndex(getBuilderCursorIndex() + 1) + setBuilderCursorIndex(value: getBuilderCursorIndex() + 1) stateCallback(buildInputtingState()) } else { IME.prtDebugIntel("A96AAD58") @@ -526,7 +550,7 @@ import Cocoa composingBuffer: currentState.composingBuffer, cursorIndex: currentState.cursorIndex, markerIndex: UInt(previousPosition), - readings: _currentReadings() + readings: currentReadings() ) marking.tooltipForInputting = currentState.tooltip stateCallback(marking) @@ -537,7 +561,7 @@ import Cocoa } } else { if getBuilderCursorIndex() > 0 { - setBuilderCursorIndex(getBuilderCursorIndex() - 1) + setBuilderCursorIndex(value: getBuilderCursorIndex() - 1) stateCallback(buildInputtingState()) } else { IME.prtDebugIntel("7045E6F3")