From dc041e0a000e40a304db8ffad62e8dc4642e2e96 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Sat, 7 May 2022 01:50:00 +0800 Subject: [PATCH 01/20] ctlIME // Force using vertical candidate when typing kaomoji. (#74) --- Source/Modules/IMEModules/ctlInputMethod.swift | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/Source/Modules/IMEModules/ctlInputMethod.swift b/Source/Modules/IMEModules/ctlInputMethod.swift index 6edbbb5b..a01c275e 100644 --- a/Source/Modules/IMEModules/ctlInputMethod.swift +++ b/Source/Modules/IMEModules/ctlInputMethod.swift @@ -407,10 +407,19 @@ extension ctlInputMethod { candidates.sort { $0.count > $1.count } - // If there is a candidate which is too long, we use the vertical - // candidate list window automatically. - if candidates.first?.count ?? 0 > 8 { - // return true // 禁用這一項。威注音回頭會換候選窗格。 + if let candidateFirst = candidates.first { + // If there is a candidate which is too long, we use the vertical + // candidate list window automatically. + if candidateFirst.count > 8 { + // return true // 禁用這一項。威注音回頭會換候選窗格。 + } + } + // 如果是顏文字選單的話,則強行使用縱排候選字窗。 + // 有些顏文字會比較長,所以這裡用 for 判斷。 + for candidate in candidates { + if ["顏文字", "颜文字"].contains(candidate), mgrPrefs.symbolInputEnabled { + return true + } } return false }() From c0c0e39c9b4efc767d1eaaf6328f7cddccdb20ba Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Sat, 7 May 2022 02:08:10 +0800 Subject: [PATCH 02/20] PrefUI // Fix a bug of not syncing prefs to LMInstantiator. (#76) --- Source/UI/PrefUI/suiPrefPaneDictionary.swift | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/UI/PrefUI/suiPrefPaneDictionary.swift b/Source/UI/PrefUI/suiPrefPaneDictionary.swift index b1f7a635..ba9e5170 100644 --- a/Source/UI/PrefUI/suiPrefPaneDictionary.swift +++ b/Source/UI/PrefUI/suiPrefPaneDictionary.swift @@ -116,6 +116,7 @@ struct suiPrefPaneDictionary: View { Toggle(LocalizedStringKey("Enable CNS11643 Support (2022-01-27)"), isOn: $selEnableCNS11643) .onChange(of: selEnableCNS11643) { value in mgrPrefs.cns11643Enabled = value + mgrLangModel.setCNSEnabled(value) } Toggle( LocalizedStringKey("Enable symbol input support (incl. certain emoji symbols)"), @@ -123,6 +124,7 @@ struct suiPrefPaneDictionary: View { ) .onChange(of: selEnableSymbolInputSupport) { value in mgrPrefs.symbolInputEnabled = value + mgrLangModel.setSymbolEnabled(value) } } } From d8fd39862595e74257c057e35f25b95fecc6c530 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Sat, 7 May 2022 10:05:41 +0800 Subject: [PATCH 03/20] KeyHandler // Modifies the behavior of sole tone mark. (UPR315) (#79) --- Source/3rdParty/OVMandarin/Composer.hh | 3 ++- Source/3rdParty/OVMandarin/Composer.mm | 7 ++++++- Source/3rdParty/OVMandarin/Mandarin.h | 6 ++++++ .../KeyHandler_HandleInput.swift | 21 ++++++++++++++++--- .../ControllerModules/KeyHandler_States.swift | 6 ++++-- 5 files changed, 36 insertions(+), 7 deletions(-) diff --git a/Source/3rdParty/OVMandarin/Composer.hh b/Source/3rdParty/OVMandarin/Composer.hh index d620b766..fa49864f 100644 --- a/Source/3rdParty/OVMandarin/Composer.hh +++ b/Source/3rdParty/OVMandarin/Composer.hh @@ -33,7 +33,8 @@ NS_ASSUME_NONNULL_BEGIN + (BOOL)isBufferEmpty; + (void)clearBuffer; + (void)combineReadingKey:(UniChar)charCode; -+ (BOOL)checkWhetherToneMarkerConfirms; ++ (BOOL)hasToneMarker; ++ (BOOL)hasToneMarkerOnly; + (NSString *)getSyllableComposition; + (void)doBackSpaceToBuffer; + (NSString *)getComposition; diff --git a/Source/3rdParty/OVMandarin/Composer.mm b/Source/3rdParty/OVMandarin/Composer.mm index 09379bc7..6efb5d08 100644 --- a/Source/3rdParty/OVMandarin/Composer.mm +++ b/Source/3rdParty/OVMandarin/Composer.mm @@ -52,11 +52,16 @@ static Mandarin::BopomofoReadingBuffer *PhoneticBuffer; PhoneticBuffer->combineKey((char)charCode); } -+ (BOOL)checkWhetherToneMarkerConfirms ++ (BOOL)hasToneMarker { return PhoneticBuffer->hasToneMarker(); } ++ (BOOL)hasToneMarkerOnly +{ + return PhoneticBuffer->hasToneMarkerOnly(); +} + + (NSString *)getSyllableComposition { return [NSString stringWithUTF8String:PhoneticBuffer->syllable().composedString().c_str()]; diff --git a/Source/3rdParty/OVMandarin/Mandarin.h b/Source/3rdParty/OVMandarin/Mandarin.h index 03a46c08..50ae8718 100644 --- a/Source/3rdParty/OVMandarin/Mandarin.h +++ b/Source/3rdParty/OVMandarin/Mandarin.h @@ -569,6 +569,12 @@ extern "C" class BopomofoReadingBuffer return syllable_.hasToneMarker(); } + bool hasToneMarkerOnly() const + { + return syllable_.hasToneMarker() && + !(syllable_.hasConsonant() || syllable_.hasMiddleVowel() || syllable_.hasVowel()); + } + protected: const BopomofoKeyboardLayout *layout_; BPMF syllable_; diff --git a/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift b/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift index 8d425a49..cdea1012 100644 --- a/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift +++ b/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift @@ -135,23 +135,26 @@ extension KeyHandler { // MARK: Handle BPMF Keys. - var composeReading = false + var keyConsumedByReading = false let skipPhoneticHandling = input.isReservedKey || input.isControlHold || input.isOptionHold // See if Phonetic reading is valid. if !skipPhoneticHandling && Composer.chkKeyValidity(charCode) { Composer.combineReadingKey(charCode) + keyConsumedByReading = true // If we have a tone marker, we have to insert the reading to the // builder in other words, if we don't have a tone marker, we just // update the composing buffer. - composeReading = Composer.checkWhetherToneMarkerConfirms() + let composeReading = Composer.hasToneMarker() if !composeReading { stateCallback(buildInputtingState()) return true } } + var composeReading = Composer.hasToneMarker() || Composer.hasToneMarkerOnly() + // See if we have composition if Enter/Space is hit and buffer is not empty. // We use "|=" conditioning so that the tone marker key is also taken into account. // However, Swift does not support "|=". @@ -159,10 +162,12 @@ extension KeyHandler { if composeReading { let reading = Composer.getSyllableComposition() + // See whether we have a unigram for this... if !ifLangModelHasUnigrams(forKey: reading) { IME.prtDebugIntel("B49C0979:語彙庫內無「\(reading)」的匹配記錄。") errorCallback() - stateCallback(buildInputtingState()) + Composer.clearBuffer() + stateCallback((getBuilderLength() == 0) ? InputState.EmptyIgnoringPreviousState() : buildInputtingState()) return true } @@ -210,6 +215,16 @@ extension KeyHandler { stateCallback(choosingCandidates) } } + return true // Telling the client that the key is consumed. + } + + // The only possibility for this to be true is that the Bopomofo reading + // already has a tone marker but the last key is *not* a tone marker key. An + // example is the sequence "6u" with the Standard layout, which produces "ㄧˊ" + // but does not compose. Only sequences such as "ㄧˊ", "ˊㄧˊ", "ˊㄧˇ", or "ˊㄧ " + // would compose. + if keyConsumedByReading { + stateCallback(buildInputtingState()) return true } diff --git a/Source/Modules/ControllerModules/KeyHandler_States.swift b/Source/Modules/ControllerModules/KeyHandler_States.swift index a11cd900..103e02cd 100644 --- a/Source/Modules/ControllerModules/KeyHandler_States.swift +++ b/Source/Modules/ControllerModules/KeyHandler_States.swift @@ -303,7 +303,9 @@ extension KeyHandler { return false } - if Composer.isBufferEmpty() { + if Composer.hasToneMarkerOnly() { + Composer.clearBuffer() + } else if Composer.isBufferEmpty() { if getBuilderCursorIndex() >= 0 { deleteBuilderReadingInFrontOfCursor() walk() @@ -462,7 +464,7 @@ extension KeyHandler { if !Composer.isBufferEmpty() { Composer.clearBuffer() if getBuilderLength() == 0 { - stateCallback(InputState.Empty()) + stateCallback(InputState.EmptyIgnoringPreviousState()) } else { stateCallback(buildInputtingState()) } From bbf551290c4359288d1b7ec7d86da9f67876b4b3 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Sat, 7 May 2022 10:20:31 +0800 Subject: [PATCH 04/20] KeyHander // Comment updates. --- Source/Modules/ControllerModules/KeyHandler_Core.swift | 6 +++--- .../ControllerModules/KeyHandler_HandleInput.swift | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Source/Modules/ControllerModules/KeyHandler_Core.swift b/Source/Modules/ControllerModules/KeyHandler_Core.swift index 4508d704..79e7cee3 100644 --- a/Source/Modules/ControllerModules/KeyHandler_Core.swift +++ b/Source/Modules/ControllerModules/KeyHandler_Core.swift @@ -115,12 +115,12 @@ class KeyHandler: NSObject { // MARK: - Functions dealing with Megrez. func walk() { - // Retrieve the most likely trellis, i.e. a Maximum Likelihood Estimation + // Retrieve the most likely grid, i.e. a Maximum Likelihood Estimation // of the best possible Mandarin characters given the input syllables, - // using the Viterbi algorithm implemented in the Gramambular library + // using the Viterbi algorithm implemented in the Megrez library let walker = Megrez.Walker(grid: _builder.grid()) - // the reverse walk traces the trellis from the end + // the reverse walk traces the grid from the end let walked: [Megrez.NodeAnchor] = walker.reverseWalk(at: _builder.grid().width()) // then we use ".reversed()" to reverse the nodes so that we get the forward-walked nodes diff --git a/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift b/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift index cdea1012..d9a6d990 100644 --- a/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift +++ b/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift @@ -56,7 +56,7 @@ extension KeyHandler { // MARK: Caps Lock processing. - // If Caps Lock is ON, temporarily disable bopomofo. + // If Caps Lock is ON, temporarily disable phonetic reading. // Note: Alphanumerical mode processing. if input.isBackSpace || input.isEnter || input.isAbsorbedArrowKey || input.isExtraChooseCandidateKey || input.isExtraChooseCandidateKeyReverse || input.isCursorForward || input.isCursorBackward @@ -171,10 +171,10 @@ extension KeyHandler { return true } - // ... and insert it into the lattice grid... + // ... and insert it into the grid... insertReadingToBuilderAtCursor(reading: reading) - // ... then walk the lattice grid... + // ... then walk the grid... let poppedText = popOverflowComposingTextAndWalk() // ... get and tweak override model suggestion if possible... @@ -218,7 +218,7 @@ extension KeyHandler { return true // Telling the client that the key is consumed. } - // The only possibility for this to be true is that the Bopomofo reading + // The only possibility for this to be true is that the Phonetic reading // already has a tone marker but the last key is *not* a tone marker key. An // example is the sequence "6u" with the Standard layout, which produces "ㄧˊ" // but does not compose. Only sequences such as "ㄧˊ", "ˊㄧˊ", "ˊㄧˇ", or "ˊㄧ " From 183b9dc335d4e8c95cd8ccc0334e229ac317c901 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Sat, 7 May 2022 12:15:02 +0800 Subject: [PATCH 05/20] Xcode // Add a standalone workspace for debugging. --- .../contents.xcworkspacedata | 4 ++ .../xcshareddata/xcschemes/vChewing.xcscheme | 47 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 vChewingDebug.xcworkspace/contents.xcworkspacedata create mode 100644 vChewingDebug.xcworkspace/xcshareddata/xcschemes/vChewing.xcscheme diff --git a/vChewingDebug.xcworkspace/contents.xcworkspacedata b/vChewingDebug.xcworkspace/contents.xcworkspacedata new file mode 100644 index 00000000..94b2795e --- /dev/null +++ b/vChewingDebug.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,4 @@ + + + diff --git a/vChewingDebug.xcworkspace/xcshareddata/xcschemes/vChewing.xcscheme b/vChewingDebug.xcworkspace/xcshareddata/xcschemes/vChewing.xcscheme new file mode 100644 index 00000000..f0c8830b --- /dev/null +++ b/vChewingDebug.xcworkspace/xcshareddata/xcschemes/vChewing.xcscheme @@ -0,0 +1,47 @@ + + + + + + + + + + + + + + + + + + + From a4085b2377b57e97dcc941bc181252ed26e3305a Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Sat, 7 May 2022 23:02:04 +0800 Subject: [PATCH 06/20] IME & mgrLM // Disable DispatchQueue. --- Source/Modules/IMEModules/IME.swift | 14 ++-- .../LangModelRelated/mgrLangModel.swift | 72 +++++++++---------- 2 files changed, 39 insertions(+), 47 deletions(-) diff --git a/Source/Modules/IMEModules/IME.swift b/Source/Modules/IMEModules/IME.swift index fbdf2f4d..09b769eb 100644 --- a/Source/Modules/IMEModules/IME.swift +++ b/Source/Modules/IMEModules/IME.swift @@ -64,14 +64,12 @@ public class IME: NSObject { // MARK: - Initializing Language Models. static func initLangModels(userOnly: Bool) { - DispatchQueue.global(qos: .userInitiated).async { - // mgrLangModel 的 loadUserPhrases 等函數在自動讀取 dataFolderPath 時, - // 如果發現自訂目錄不可用,則會自動抹去自訂目錄設定、改採預設目錄。 - // 所以這裡不需要特別處理。 - mgrLangModel.loadUserAssociatedPhrases() - mgrLangModel.loadUserPhraseReplacement() - mgrLangModel.loadUserPhrases() - } + // mgrLangModel 的 loadUserPhrases 等函數在自動讀取 dataFolderPath 時, + // 如果發現自訂目錄不可用,則會自動抹去自訂目錄設定、改採預設目錄。 + // 所以這裡不需要特別處理。 + mgrLangModel.loadUserAssociatedPhrases() + mgrLangModel.loadUserPhraseReplacement() + mgrLangModel.loadUserPhrases() if !userOnly { // mgrLangModel.loadDataModels() } diff --git a/Source/Modules/LangModelRelated/mgrLangModel.swift b/Source/Modules/LangModelRelated/mgrLangModel.swift index 8568cf62..0f941952 100644 --- a/Source/Modules/LangModelRelated/mgrLangModel.swift +++ b/Source/Modules/LangModelRelated/mgrLangModel.swift @@ -51,25 +51,23 @@ class mgrLangModel: NSObject { } public static func loadDataModels() { - DispatchQueue.global(qos: .userInitiated).async { - if !gLangModelCHT.isCNSDataLoaded() { - gLangModelCHT.loadCNSData(path: getBundleDataPath("char-kanji-cns")) - } - if !gLangModelCHT.isMiscDataLoaded() { - gLangModelCHT.loadMiscData(path: getBundleDataPath("data-zhuyinwen")) - } - if !gLangModelCHT.isSymbolDataLoaded() { - gLangModelCHT.loadSymbolData(path: getBundleDataPath("data-symbols")) - } - if !gLangModelCHS.isCNSDataLoaded() { - gLangModelCHS.loadCNSData(path: getBundleDataPath("char-kanji-cns")) - } - if !gLangModelCHS.isMiscDataLoaded() { - gLangModelCHS.loadMiscData(path: getBundleDataPath("data-zhuyinwen")) - } - if !gLangModelCHS.isSymbolDataLoaded() { - gLangModelCHS.loadSymbolData(path: getBundleDataPath("data-symbols")) - } + if !gLangModelCHT.isCNSDataLoaded() { + gLangModelCHT.loadCNSData(path: getBundleDataPath("char-kanji-cns")) + } + if !gLangModelCHT.isMiscDataLoaded() { + gLangModelCHT.loadMiscData(path: getBundleDataPath("data-zhuyinwen")) + } + if !gLangModelCHT.isSymbolDataLoaded() { + gLangModelCHT.loadSymbolData(path: getBundleDataPath("data-symbols")) + } + if !gLangModelCHS.isCNSDataLoaded() { + gLangModelCHS.loadCNSData(path: getBundleDataPath("char-kanji-cns")) + } + if !gLangModelCHS.isMiscDataLoaded() { + gLangModelCHS.loadMiscData(path: getBundleDataPath("data-zhuyinwen")) + } + if !gLangModelCHS.isSymbolDataLoaded() { + gLangModelCHS.loadSymbolData(path: getBundleDataPath("data-symbols")) } if !gLangModelCHT.isDataModelLoaded() { NotifierController.notify( @@ -101,16 +99,14 @@ class mgrLangModel: NSObject { public static func loadDataModel(_ mode: InputMode) { if mode == InputMode.imeModeCHS { - DispatchQueue.global(qos: .userInitiated).async { - if !gLangModelCHS.isMiscDataLoaded() { - gLangModelCHS.loadMiscData(path: getBundleDataPath("data-zhuyinwen")) - } - if !gLangModelCHS.isSymbolDataLoaded() { - gLangModelCHS.loadSymbolData(path: getBundleDataPath("data-symbols")) - } - if !gLangModelCHS.isCNSDataLoaded() { - gLangModelCHS.loadCNSData(path: getBundleDataPath("char-kanji-cns")) - } + if !gLangModelCHS.isMiscDataLoaded() { + gLangModelCHS.loadMiscData(path: getBundleDataPath("data-zhuyinwen")) + } + if !gLangModelCHS.isSymbolDataLoaded() { + gLangModelCHS.loadSymbolData(path: getBundleDataPath("data-symbols")) + } + if !gLangModelCHS.isCNSDataLoaded() { + gLangModelCHS.loadCNSData(path: getBundleDataPath("char-kanji-cns")) } if !gLangModelCHS.isDataModelLoaded() { NotifierController.notify( @@ -126,16 +122,14 @@ class mgrLangModel: NSObject { ) } } else if mode == InputMode.imeModeCHT { - DispatchQueue.global(qos: .userInitiated).async { - if !gLangModelCHT.isMiscDataLoaded() { - gLangModelCHT.loadMiscData(path: getBundleDataPath("data-zhuyinwen")) - } - if !gLangModelCHT.isSymbolDataLoaded() { - gLangModelCHT.loadSymbolData(path: getBundleDataPath("data-symbols")) - } - if !gLangModelCHT.isCNSDataLoaded() { - gLangModelCHT.loadCNSData(path: getBundleDataPath("char-kanji-cns")) - } + if !gLangModelCHT.isMiscDataLoaded() { + gLangModelCHT.loadMiscData(path: getBundleDataPath("data-zhuyinwen")) + } + if !gLangModelCHT.isSymbolDataLoaded() { + gLangModelCHT.loadSymbolData(path: getBundleDataPath("data-symbols")) + } + if !gLangModelCHT.isCNSDataLoaded() { + gLangModelCHT.loadCNSData(path: getBundleDataPath("char-kanji-cns")) } if !gLangModelCHT.isDataModelLoaded() { NotifierController.notify( From 17329c0b6a809db726f153be4fdbbe5c084fea3d Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Sun, 8 May 2022 09:24:10 +0800 Subject: [PATCH 07/20] LMs // Fix bugged debug outputs. --- Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift | 2 +- Source/Modules/LangModelRelated/SubLMs/lmCore.swift | 2 +- Source/Modules/LangModelRelated/SubLMs/lmLite.swift | 2 +- Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift b/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift index e5aca9f7..62e5c026 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift @@ -56,7 +56,7 @@ extension vChewing { arrData = try String(contentsOfFile: path, encoding: .utf8).components(separatedBy: "\n") } catch { IME.prtDebugIntel("\(error)") - IME.prtDebugIntel("↑ Exception happened when reading Associated Phrases data.") + IME.prtDebugIntel("↑ Exception happened when reading data at: \(path).") return false } diff --git a/Source/Modules/LangModelRelated/SubLMs/lmCore.swift b/Source/Modules/LangModelRelated/SubLMs/lmCore.swift index 8e63ceee..607c14f1 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmCore.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmCore.swift @@ -69,7 +69,7 @@ extension vChewing { arrData = try String(contentsOfFile: path, encoding: .utf8).components(separatedBy: "\n") } catch { IME.prtDebugIntel("\(error)") - IME.prtDebugIntel("↑ Exception happened when reading Associated Phrases data.") + IME.prtDebugIntel("↑ Exception happened when reading data at: \(path).") return false } diff --git a/Source/Modules/LangModelRelated/SubLMs/lmLite.swift b/Source/Modules/LangModelRelated/SubLMs/lmLite.swift index 62e3ebac..e381bb1b 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmLite.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmLite.swift @@ -60,7 +60,7 @@ extension vChewing { arrData = try String(contentsOfFile: path, encoding: .utf8).components(separatedBy: "\n") } catch { IME.prtDebugIntel("\(error)") - IME.prtDebugIntel("↑ Exception happened when reading Associated Phrases data.") + IME.prtDebugIntel("↑ Exception happened when reading data at: \(path).") return false } diff --git a/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift b/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift index 9cfc5ff5..1b7e2047 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift @@ -57,7 +57,7 @@ extension vChewing { } catch { IME.prtDebugIntel("\(error)") - IME.prtDebugIntel("↑ Exception happened when reading Associated Phrases data.") + IME.prtDebugIntel("↑ Exception happened when reading data at: \(path).") return false } From ed5fe63b2e149884e3c2a8081a910527a9c29eab Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Sun, 8 May 2022 18:27:32 +0800 Subject: [PATCH 08/20] dataCompiler // Tune down the rate for 0-count phrases. --- DataCompiler/dataCompiler.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DataCompiler/dataCompiler.swift b/DataCompiler/dataCompiler.swift index 9122721d..08261761 100644 --- a/DataCompiler/dataCompiler.swift +++ b/DataCompiler/dataCompiler.swift @@ -356,7 +356,7 @@ func weightAndSort(_ arrStructUncalculated: [Entry], isCHS: Bool) -> [Entry] { weight = -13 case 0: // 墊底低頻漢字與詞語 weight = log10( - fscale ** (Float(entry.valPhrase.count) / 3.0 - 1.0) * 0.5 / norm) + fscale ** (Float(entry.valPhrase.count) / 3.0 - 1.0) * 0.25 / norm) default: weight = log10( fscale ** (Float(entry.valPhrase.count) / 3.0 - 1.0) From 6c66fd26c03bb5b5c8efeb4c5caaa88a822c2a45 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Sun, 8 May 2022 09:23:42 +0800 Subject: [PATCH 09/20] LMs // +LMCoreEX, the Swift successor of ParselessLM (WIP). Co-Authored-By: ix4n33 <16833681+isaacxen@users.noreply.github.com> --- .../LangModelRelated/SubLMs/lmCoreEX.swift | 154 ++++++++++++++++++ vChewing.xcodeproj/project.pbxproj | 4 + 2 files changed, 158 insertions(+) create mode 100644 Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift diff --git a/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift b/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift new file mode 100644 index 00000000..de69e9ef --- /dev/null +++ b/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift @@ -0,0 +1,154 @@ +// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// StringView Ranges extension by (c) 2022 and onwards Isaac Xen (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/// 與之前的 LMCore 不同,LMCoreEX 不在辭典內記錄實體,而是記錄 range 範圍。 +/// 需要資料的時候,直接拿 range 去 strData 取資料。 +/// 資料記錄原理與上游 C++ 的 ParselessLM 差不多,但用的是 Swift 原生手段。 +/// 主要時間消耗仍在 For 迴圈,但這個算法可以顯著減少記憶體佔用。 + +import Foundation + +extension vChewing { + @frozen public struct LMCoreEX { + var rangeMap: [String: [Range]] = [:] + var strData: String = "" + var shouldReverse: Bool = false + var allowConsolidation: Bool = false + var defaultScore: Double = 0 + var shouldForceDefaultScore: Bool = false + + public var count: Int { + rangeMap.count + } + + public init( + reverse: Bool = false, consolidate: Bool = false, defaultScore scoreDefault: Double = 0, + forceDefaultScore: Bool = false + ) { + rangeMap = [:] + allowConsolidation = consolidate + shouldReverse = reverse + defaultScore = scoreDefault + shouldForceDefaultScore = forceDefaultScore + } + + public func isLoaded() -> Bool { + !rangeMap.isEmpty + } + + @discardableResult public mutating func open(_ path: String) -> Bool { + if isLoaded() { + return false + } + + if allowConsolidation { + LMConsolidator.fixEOF(path: path) + LMConsolidator.consolidate(path: path, pragma: true) + } + + do { + strData = try String(contentsOfFile: path, encoding: .utf8).replacingOccurrences(of: "\t", with: " ") + strData.ranges(splitBy: "\n").forEach { + let neta = strData[$0].components(separatedBy: " ") + if neta.count >= 2 { + let theKey = shouldReverse ? neta[1] : neta[0] + if !neta[0].isEmpty, !neta[1].isEmpty, theKey.first != "#" { + let theValue = $0 + rangeMap[theKey, default: []].append(theValue) + } + } + } + } catch { + IME.prtDebugIntel("\(error)") + IME.prtDebugIntel("↑ Exception happened when reading data at: \(path).") + return false + } + + return true + } + + public mutating func close() { + if isLoaded() { + rangeMap.removeAll() + } + } + + // MARK: - Advanced features + + public func dump() { + var strDump = "" + for entry in rangeMap { + let netaRanges: [Range] = entry.value + for netaRange in netaRanges { + let neta = strData[netaRange] + let addline = neta + "\n" + strDump += addline + } + } + IME.prtDebugIntel(strDump) + } + + public func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] { + // 這裡用了點廢話處理,不然函數構建體會被 Swift 格式整理工具給毀掉。 + // 其實只要一句「[Megrez.Bigram]()」就夠了。 + precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]() + } + + public func unigramsFor(key: String) -> [Megrez.Unigram] { + var grams: [Megrez.Unigram] = [] + if let arrRangeRecords: [Range] = rangeMap[key] { + for netaRange in arrRangeRecords { + let neta = strData[netaRange].components(separatedBy: " ") + let theValue: String = shouldReverse ? neta[0] : neta[1] + let kvPair = Megrez.KeyValuePair(key: key, value: theValue) + var theScore = defaultScore + if neta.count >= 3, !shouldForceDefaultScore { + theScore = .init(neta[2]) ?? defaultScore + } + grams.append(Megrez.Unigram(keyValue: kvPair, score: theScore)) + } + } + return grams + } + + public func hasUnigramsFor(key: String) -> Bool { + rangeMap[key] != nil + } + } +} + +// MARK: - StringView Ranges Extension (by Isaac Xen) + +extension String { + fileprivate func ranges(splitBy separator: Element) -> [Range] { + var startIndex = startIndex + return split(separator: separator).reduce(into: []) { ranges, substring in + _ = range(of: substring, range: startIndex.. Date: Sun, 8 May 2022 22:22:56 +0800 Subject: [PATCH 10/20] LMs // Make LMAssociates and LMReplacements parseless. Co-Authored-By: ix4n33 <16833681+isaacxen@users.noreply.github.com> --- .../SubLMs/lmAssociates.swift | 87 ++++++++++--------- .../SubLMs/lmReplacements.swift | 83 ++++++++++-------- 2 files changed, 92 insertions(+), 78 deletions(-) diff --git a/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift b/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift index 62e5c026..495ca22d 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift @@ -1,6 +1,5 @@ // Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). -// Refactored from the ObjCpp-version of this class by: -// (c) 2011 and onwards The OpenVanilla Project (MIT License). +// StringView Ranges extension by (c) 2022 and onwards Isaac Xen (MIT License). /* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -28,18 +27,19 @@ import Foundation extension vChewing { @frozen public struct LMAssociates { - var keyValueMap: [String: [Megrez.KeyValuePair]] = [:] + var rangeMap: [String: [Range]] = [:] + var strData: String = "" public var count: Int { - keyValueMap.count + rangeMap.count } public init() { - keyValueMap = [:] + rangeMap = [:] } public func isLoaded() -> Bool { - !keyValueMap.isEmpty + !rangeMap.isEmpty } @discardableResult public mutating func open(_ path: String) -> Bool { @@ -50,53 +50,40 @@ extension vChewing { LMConsolidator.fixEOF(path: path) LMConsolidator.consolidate(path: path, pragma: true) - var arrData: [String] = [] - do { - arrData = try String(contentsOfFile: path, encoding: .utf8).components(separatedBy: "\n") + strData = try String(contentsOfFile: path, encoding: .utf8).replacingOccurrences(of: "\t", with: " ") + strData.ranges(splitBy: "\n").forEach { + let neta = strData[$0].components(separatedBy: " ") + if neta.count >= 2 { + let theKey = neta[0] + if !neta[0].isEmpty, !neta[1].isEmpty, theKey.first != "#" { + let theValue = $0 + rangeMap[theKey, default: []].append(theValue) + } + } + } } catch { IME.prtDebugIntel("\(error)") IME.prtDebugIntel("↑ Exception happened when reading data at: \(path).") return false } - for (lineID, lineContent) in arrData.enumerated() { - if !lineContent.hasPrefix("#") { - let lineContent = lineContent.replacingOccurrences(of: "\t", with: " ") - if lineContent.components(separatedBy: " ").count < 2 { - if lineContent != "", lineContent != " " { - IME.prtDebugIntel("Line #\(lineID + 1) Wrecked: \(lineContent)") - } - continue - } - var currentKV = Megrez.KeyValuePair() - for (unitID, unitContent) in lineContent.components(separatedBy: " ").enumerated() { - switch unitID { - case 0: - currentKV.key = unitContent - case 1: - currentKV.value = unitContent - default: break - } - } - keyValueMap[currentKV.key, default: []].append(currentKV) - } - } return true } public mutating func close() { if isLoaded() { - keyValueMap.removeAll() + rangeMap.removeAll() } } public func dump() { var strDump = "" - for entry in keyValueMap { - let rows: [Megrez.KeyValuePair] = entry.value - for row in rows { - let addline = row.key + " " + row.value + "\n" + for entry in rangeMap { + let netaRanges: [Range] = entry.value + for netaRange in netaRanges { + let neta = strData[netaRange] + let addline = neta + "\n" strDump += addline } } @@ -104,17 +91,33 @@ extension vChewing { } public func valuesFor(key: String) -> [String]? { - var v: [String] = [] - if let matched = keyValueMap[key] { - for entry in matched as [Megrez.KeyValuePair] { - v.append(entry.value) + var pairs: [String] = [] + if let arrRangeRecords: [Range] = rangeMap[key] { + for netaRange in arrRangeRecords { + let neta = strData[netaRange].components(separatedBy: " ") + let theValue: String = neta[1] + pairs.append(theValue) } } - return v + return pairs } public func hasValuesFor(key: String) -> Bool { - keyValueMap[key] != nil + rangeMap[key] != nil + } + } +} + +// MARK: - StringView Ranges Extension (by Isaac Xen) + +extension String { + fileprivate func ranges(splitBy separator: Element) -> [Range] { + var startIndex = startIndex + return split(separator: separator).reduce(into: []) { ranges, substring in + _ = range(of: substring, range: startIndex..] = [:] + var strData: String = "" public var count: Int { - keyValueMap.count + rangeMap.count } public init() { - keyValueMap = [:] + rangeMap = [:] } public func isLoaded() -> Bool { - !keyValueMap.isEmpty + !rangeMap.isEmpty } @discardableResult public mutating func open(_ path: String) -> Bool { @@ -50,58 +50,69 @@ extension vChewing { LMConsolidator.fixEOF(path: path) LMConsolidator.consolidate(path: path, pragma: true) - var arrData: [String] = [] - do { - arrData = try String(contentsOfFile: path, encoding: .utf8).components(separatedBy: "\n") - + strData = try String(contentsOfFile: path, encoding: .utf8).replacingOccurrences(of: "\t", with: " ") + strData.ranges(splitBy: "\n").forEach { + let neta = strData[$0].components(separatedBy: " ") + if neta.count >= 2 { + let theKey = neta[0] + if !neta[0].isEmpty, !neta[1].isEmpty, theKey.first != "#" { + let theValue = $0 + rangeMap[theKey] = theValue + } + } + } } catch { IME.prtDebugIntel("\(error)") IME.prtDebugIntel("↑ Exception happened when reading data at: \(path).") return false } - for (lineID, lineContent) in arrData.enumerated() { - if !lineContent.hasPrefix("#") { - let lineContent = lineContent.replacingOccurrences(of: "\t", with: " ") - if lineContent.components(separatedBy: " ").count < 2 { - if lineContent != "", lineContent != " " { - IME.prtDebugIntel("Line #\(lineID + 1) Wrecked: \(lineContent)") - } - continue - } - var currentKV = Megrez.KeyValuePair() - for (unitID, unitContent) in lineContent.components(separatedBy: " ").enumerated() { - switch unitID { - case 0: - currentKV.key = unitContent - case 1: - currentKV.value = unitContent - default: break - } - } - keyValueMap[currentKV.key] = currentKV.value - } - } return true } public mutating func close() { if isLoaded() { - keyValueMap.removeAll() + rangeMap.removeAll() } } public func dump() { var strDump = "" - for entry in keyValueMap { - strDump += entry.key + " " + entry.value + "\n" + for entry in rangeMap { + strDump += strData[entry.value] + "\n" } IME.prtDebugIntel(strDump) } public func valuesFor(key: String) -> String { - keyValueMap[key] ?? "" + guard let range = rangeMap[key] else { + return "" + } + let arrNeta = strData[range].components(separatedBy: " ") + guard arrNeta.count >= 2 else { + return "" + } + return String(arrNeta[1]) + } + + public func hasValuesFor(key: String) -> Bool { + rangeMap[key] != nil + } + + } +} + +// MARK: - StringView Ranges Extension (by Isaac Xen) + +extension String { + fileprivate func ranges(splitBy separator: Element) -> [Range] { + var startIndex = startIndex + return split(separator: separator).reduce(into: []) { ranges, substring in + _ = range(of: substring, range: startIndex.. Date: Sun, 8 May 2022 22:22:58 +0800 Subject: [PATCH 11/20] LMInstantiator // Deploying LMCoreEX. --- .../LangModelRelated/LMInstantiator.swift | 49 +++++++++---------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/Source/Modules/LangModelRelated/LMInstantiator.swift b/Source/Modules/LangModelRelated/LMInstantiator.swift index c075e5cd..31b2e80c 100644 --- a/Source/Modules/LangModelRelated/LMInstantiator.swift +++ b/Source/Modules/LangModelRelated/LMInstantiator.swift @@ -31,8 +31,10 @@ import Foundation // 簡體中文模式與繁體中文模式共用全字庫擴展模組,故單獨處理。 // 塞在 LMInstantiator 內的話,每個模式都會讀入一份全字庫,會多佔用 100MB 記憶體。 -private var lmCNS = vChewing.LMLite(consolidate: false) -private var lmSymbols = vChewing.LMCore(reverse: true, consolidate: false, defaultScore: -13.0, forceDefaultScore: true) +private var lmCNS = vChewing.LMCoreEX( + reverse: true, consolidate: false, defaultScore: -11.0, forceDefaultScore: false) +private var lmSymbols = vChewing.LMCoreEX( + reverse: true, consolidate: false, defaultScore: -13.0, forceDefaultScore: false) extension vChewing { /// LMInstantiator is a facade for managing a set of models including @@ -62,26 +64,30 @@ extension vChewing { public var isCNSEnabled = false public var isSymbolEnabled = false - /// 介紹一下三個通用的語言模組型別: - /// LMCore 是全功能通用型的模組,每一筆辭典記錄以 key 為注音、以 [Unigram] 陣列作為記錄內容。 + /// 介紹一下幾個通用的語言模組型別: + /// ---------------------- + /// LMCoreEX 是全功能通用型的模組,每一筆辭典記錄以 key 為注音、以 [Unigram] 陣列作為記錄內容。 /// 比較適合那種每筆記錄都有不同的權重數值的語言模組,雖然也可以強制施加權重數值就是了。 - /// 然而缺點是:哪怕你強制施加權重數值,也不會減輕記憶體佔用。 - /// 至於像全字庫這樣所有記錄都使用同一權重數值的模組,可以用 LMLite 以節省記憶體佔用。 - /// LMLite 的辭典內不會存儲權重資料,只會在每次讀取記錄時施加您給定的權重數值。 - /// LMLite 與 LMCore 都會用到多執行緒、以加速載入(不然的話,全部資料載入會耗費八秒左右)。 - /// LMReplacements 與 LMAssociates 均為特種模組,分別擔當語彙置換表資料與使用者聯想詞的資料承載工作。 + /// LMCoreEX 的辭典陣列不承載 Unigram 本體、而是承載索引範圍,這樣可以節約記憶體。 + /// 一個 LMCoreEX 就可以滿足威注音幾乎所有語言模組副本的需求,當然也有這兩個例外: + /// LMReplacements 與 LMAssociates 分別擔當語彙置換表資料與使用者聯想詞的資料承載工作。 // 聲明原廠語言模組 /// Reverse 的話,第一欄是注音,第二欄是對應的漢字,第三欄是可能的權重。 /// 不 Reverse 的話,第一欄是漢字,第二欄是對應的注音,第三欄是可能的權重。 - var lmCore = LMCore(reverse: false, consolidate: false, defaultScore: -9.5, forceDefaultScore: false) - var lmMisc = LMCore(reverse: true, consolidate: false, defaultScore: -1, forceDefaultScore: false) + var lmCore = LMCoreEX( + reverse: false, consolidate: false, defaultScore: -9.9, forceDefaultScore: false) + var lmMisc = LMCoreEX( + reverse: true, consolidate: false, defaultScore: -1.0, forceDefaultScore: false) // 聲明使用者語言模組。 // 使用者語言模組使用多執行緒的話,可能會導致一些問題。有時間再仔細排查看看。 - var lmUserPhrases = LMLite(consolidate: true) - var lmFiltered = LMLite(consolidate: true) - var lmUserSymbols = LMLite(consolidate: true) + var lmUserPhrases = LMCoreEX( + reverse: true, consolidate: true, defaultScore: 0, forceDefaultScore: true) + var lmFiltered = LMCoreEX( + reverse: true, consolidate: true, defaultScore: 0, forceDefaultScore: true) + var lmUserSymbols = LMCoreEX( + reverse: true, consolidate: true, defaultScore: -12.0, forceDefaultScore: true) var lmReplacements = LMReplacments() var lmAssociates = LMAssociates() @@ -201,7 +207,7 @@ extension vChewing { // 用 reversed 指令讓使用者語彙檔案內的詞條優先順序隨著行數增加而逐漸增高。 // 這樣一來就可以在就地新增語彙時徹底複寫優先權。 // 將兩句差分也是為了讓 rawUserUnigrams 的類型不受可能的影響。 - rawAllUnigrams += lmUserPhrases.unigramsFor(key: key, score: 0.0).reversed() + rawAllUnigrams += lmUserPhrases.unigramsFor(key: key).reversed() if lmUserPhrases.unigramsFor(key: key).isEmpty { IME.prtDebugIntel("Not found in UserPhrasesUnigram(\(lmUserPhrases.count)): \(key)") } @@ -211,11 +217,11 @@ extension vChewing { rawAllUnigrams += lmCore.unigramsFor(key: key) if isCNSEnabled { - rawAllUnigrams += lmCNS.unigramsFor(key: key, score: -11) + rawAllUnigrams += lmCNS.unigramsFor(key: key) } if isSymbolEnabled { - rawAllUnigrams += lmUserSymbols.unigramsFor(key: key, score: -12.0) + rawAllUnigrams += lmUserSymbols.unigramsFor(key: key) if lmUserSymbols.unigramsFor(key: key).isEmpty { IME.prtDebugIntel("Not found in UserSymbolUnigram(\(lmUserSymbols.count)): \(key)") } @@ -232,15 +238,6 @@ extension vChewing { filteredPairs.insert(unigram.keyValue) } - var debugOutput = "\n" - for neta in rawAllUnigrams { - debugOutput += "RAW: \(neta.keyValue.key) \(neta.keyValue.value) \(neta.score)\n" - } - if debugOutput == "\n" { - debugOutput = "RAW: No match found in all unigrams." - } - IME.prtDebugIntel(debugOutput) - return filterAndTransform( unigrams: rawAllUnigrams, filter: filteredPairs, inserted: &insertedPairs From 1fa09fe1ebc564056ba310c97772804ebff41074 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Sun, 8 May 2022 22:37:37 +0800 Subject: [PATCH 12/20] LMs // Deprecating LMCore and LMLite. --- .../LangModelRelated/SubLMs/lmCore.swift | 155 ------------------ .../LangModelRelated/SubLMs/lmLite.swift | 124 -------------- vChewing.xcodeproj/project.pbxproj | 8 - 3 files changed, 287 deletions(-) delete mode 100644 Source/Modules/LangModelRelated/SubLMs/lmCore.swift delete mode 100644 Source/Modules/LangModelRelated/SubLMs/lmLite.swift diff --git a/Source/Modules/LangModelRelated/SubLMs/lmCore.swift b/Source/Modules/LangModelRelated/SubLMs/lmCore.swift deleted file mode 100644 index 607c14f1..00000000 --- a/Source/Modules/LangModelRelated/SubLMs/lmCore.swift +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -// 威注音重新設計原廠詞庫語言模組。不排序,但使用 Swift 內建的 String 處理。 - -import Foundation - -extension vChewing { - @frozen public struct LMCore { - var keyValueScoreMap: [String: [Megrez.Unigram]] = [:] - var shouldReverse: Bool = false - var allowConsolidation: Bool = false - var defaultScore: Double = 0 - var shouldForceDefaultScore: Bool = false - - public var count: Int { - keyValueScoreMap.count - } - - public init( - reverse: Bool = false, consolidate: Bool = false, defaultScore scoreDefault: Double = 0, - forceDefaultScore: Bool = false - ) { - keyValueScoreMap = [:] - allowConsolidation = consolidate - shouldReverse = reverse - defaultScore = scoreDefault - shouldForceDefaultScore = forceDefaultScore - } - - public func isLoaded() -> Bool { - !keyValueScoreMap.isEmpty - } - - @discardableResult public mutating func open(_ path: String) -> Bool { - if isLoaded() { - return false - } - - if allowConsolidation { - LMConsolidator.fixEOF(path: path) - LMConsolidator.consolidate(path: path, pragma: true) - } - - var arrData: [String] = [] - - do { - arrData = try String(contentsOfFile: path, encoding: .utf8).components(separatedBy: "\n") - } catch { - IME.prtDebugIntel("\(error)") - IME.prtDebugIntel("↑ Exception happened when reading data at: \(path).") - return false - } - - for (lineID, lineContent) in arrData.enumerated() { - if !lineContent.hasPrefix("#") { - let lineContent = lineContent.replacingOccurrences(of: "\t", with: " ") - if lineContent.components(separatedBy: " ").count < 2 { - if lineContent != "", lineContent != " " { - IME.prtDebugIntel("Line #\(lineID + 1) Wrecked: \(lineContent)") - } - continue - } - var currentUnigram = Megrez.Unigram(keyValue: Megrez.KeyValuePair(), score: defaultScore) - var columnOne = "" - var columnTwo = "" - for (unitID, unitContent) in lineContent.components(separatedBy: " ").enumerated() { - switch unitID { - case 0: - columnOne = unitContent - case 1: - columnTwo = unitContent - case 2: - if !shouldForceDefaultScore { - if let unitContentConverted = Double(unitContent) { - currentUnigram.score = unitContentConverted - } else { - IME.prtDebugIntel("Line #\(lineID) Score Data Wrecked: \(lineContent)") - } - } - default: break - } - } - // 標點符號的頻率最好鎖定一下。 - if columnOne.contains("_punctuation_") { - currentUnigram.score -= (Double(lineID) * 0.000001) - } - let kvPair = - shouldReverse - ? Megrez.KeyValuePair(key: columnTwo, value: columnOne) - : Megrez.KeyValuePair(key: columnOne, value: columnTwo) - currentUnigram.keyValue = kvPair - let key = shouldReverse ? columnTwo : columnOne - keyValueScoreMap[key, default: []].append(currentUnigram) - } - } - return true - } - - public mutating func close() { - if isLoaded() { - keyValueScoreMap.removeAll() - } - } - - // MARK: - Advanced features - - public func dump() { - var strDump = "" - for entry in keyValueScoreMap { - let rows: [Megrez.Unigram] = entry.value - for row in rows { - let addline = row.keyValue.key + " " + row.keyValue.value + " " + String(row.score) + "\n" - strDump += addline - } - } - IME.prtDebugIntel(strDump) - } - - public func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] { - // 這裡用了點廢話處理,不然函數構建體會被 Swift 格式整理工具給毀掉。 - // 其實只要一句「[Megrez.Bigram]()」就夠了。 - precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]() - } - - public func unigramsFor(key: String) -> [Megrez.Unigram] { - keyValueScoreMap[key] ?? [Megrez.Unigram]() - } - - public func hasUnigramsFor(key: String) -> Bool { - keyValueScoreMap[key] != nil - } - } -} diff --git a/Source/Modules/LangModelRelated/SubLMs/lmLite.swift b/Source/Modules/LangModelRelated/SubLMs/lmLite.swift deleted file mode 100644 index e381bb1b..00000000 --- a/Source/Modules/LangModelRelated/SubLMs/lmLite.swift +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). -// Refactored from the ObjCpp-version of this class by: -// (c) 2011 and onwards The OpenVanilla Project (MIT License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -import Foundation - -extension vChewing { - @frozen public struct LMLite { - var keyValueMap: [String: [Megrez.KeyValuePair]] = [:] - var allowConsolidation = false - - public var count: Int { - keyValueMap.count - } - - public init(consolidate: Bool = false) { - keyValueMap = [:] - allowConsolidation = consolidate - } - - public func isLoaded() -> Bool { - !keyValueMap.isEmpty - } - - @discardableResult public mutating func open(_ path: String) -> Bool { - if isLoaded() { - return false - } - - if allowConsolidation { - LMConsolidator.fixEOF(path: path) - LMConsolidator.consolidate(path: path, pragma: true) - } - - var arrData: [String] = [] - - do { - arrData = try String(contentsOfFile: path, encoding: .utf8).components(separatedBy: "\n") - } catch { - IME.prtDebugIntel("\(error)") - IME.prtDebugIntel("↑ Exception happened when reading data at: \(path).") - return false - } - - for (lineID, lineContent) in arrData.enumerated() { - if !lineContent.hasPrefix("#") { - let lineContent = lineContent.replacingOccurrences(of: "\t", with: " ") - if lineContent.components(separatedBy: " ").count < 2 { - if lineContent != "", lineContent != " " { - IME.prtDebugIntel("Line #\(lineID + 1) Wrecked: \(lineContent)") - } - continue - } - var currentKV = Megrez.KeyValuePair() - for (unitID, unitContent) in lineContent.components(separatedBy: " ").enumerated() { - switch unitID { - case 0: - currentKV.value = unitContent - case 1: - currentKV.key = unitContent - default: break - } - } - keyValueMap[currentKV.key, default: []].append(currentKV) - } - } - return true - } - - public mutating func close() { - if isLoaded() { - keyValueMap.removeAll() - } - } - - public func dump() { - var strDump = "" - for entry in keyValueMap { - let rows: [Megrez.KeyValuePair] = entry.value - for row in rows { - let addline = row.key + " " + row.value + "\n" - strDump += addline - } - } - IME.prtDebugIntel(strDump) - } - - public func unigramsFor(key: String, score givenScore: Double = 0.0) -> [Megrez.Unigram] { - var v: [Megrez.Unigram] = [] - if let matched = keyValueMap[key] { - for entry in matched as [Megrez.KeyValuePair] { - v.append(Megrez.Unigram(keyValue: entry, score: givenScore)) - } - } - return v - } - - public func hasUnigramsFor(key: String) -> Bool { - keyValueMap[key] != nil - } - } -} diff --git a/vChewing.xcodeproj/project.pbxproj b/vChewing.xcodeproj/project.pbxproj index 5685aff1..58647c9c 100644 --- a/vChewing.xcodeproj/project.pbxproj +++ b/vChewing.xcodeproj/project.pbxproj @@ -7,7 +7,6 @@ objects = { /* Begin PBXBuildFile section */ - 5B00A230282011980058E5DB /* lmLite.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B00A22F282011980058E5DB /* lmLite.swift */; }; 5B0AF8B527B2C8290096FE54 /* StringExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B0AF8B427B2C8290096FE54 /* StringExtension.swift */; }; 5B11328927B94CFB00E58451 /* AppleKeyboardConverter.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B11328827B94CFB00E58451 /* AppleKeyboardConverter.swift */; }; 5B27AD6A27CB1F9B000ED75B /* data-symbols.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5B27AD6827CB1F9B000ED75B /* data-symbols.txt */; }; @@ -52,7 +51,6 @@ 5B949BD92816DC5400D87B5D /* LineReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B949BD82816DC5400D87B5D /* LineReader.swift */; }; 5B949BDB2816DDBC00D87B5D /* LMConsolidator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B949BDA2816DDBC00D87B5D /* LMConsolidator.swift */; }; 5BA0DF312817857D009E73BB /* lmUserOverride.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BA0DF2E2817857D009E73BB /* lmUserOverride.swift */; }; - 5BA0DF322817857D009E73BB /* lmCore.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BA0DF2F2817857D009E73BB /* lmCore.swift */; }; 5BA9FD0F27FEDB6B002DE248 /* suiPrefPaneGeneral.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BA9FD0A27FEDB6B002DE248 /* suiPrefPaneGeneral.swift */; }; 5BA9FD1027FEDB6B002DE248 /* suiPrefPaneKeyboard.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BA9FD0B27FEDB6B002DE248 /* suiPrefPaneKeyboard.swift */; }; 5BA9FD1127FEDB6B002DE248 /* ctlPrefUI.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BA9FD0C27FEDB6B002DE248 /* ctlPrefUI.swift */; }; @@ -162,7 +160,6 @@ /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ - 5B00A22F282011980058E5DB /* lmLite.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = lmLite.swift; sourceTree = ""; usesTabs = 0; }; 5B04305327B529D800CB65BC /* zh-Hans */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hans"; path = "zh-Hans.lproj/InfoPlist.strings"; sourceTree = ""; }; 5B04305427B529D800CB65BC /* zh-Hans */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hans"; path = "zh-Hans.lproj/Localizable.strings"; sourceTree = ""; }; 5B04305527B529D800CB65BC /* zh-Hans */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hans"; path = "zh-Hans.lproj/MainMenu.strings"; sourceTree = ""; }; @@ -230,7 +227,6 @@ 5B949BD82816DC5400D87B5D /* LineReader.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = LineReader.swift; sourceTree = ""; usesTabs = 0; }; 5B949BDA2816DDBC00D87B5D /* LMConsolidator.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = LMConsolidator.swift; sourceTree = ""; usesTabs = 0; }; 5BA0DF2E2817857D009E73BB /* lmUserOverride.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = lmUserOverride.swift; sourceTree = ""; usesTabs = 0; }; - 5BA0DF2F2817857D009E73BB /* lmCore.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = lmCore.swift; sourceTree = ""; usesTabs = 0; }; 5BA9FD0A27FEDB6B002DE248 /* suiPrefPaneGeneral.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = suiPrefPaneGeneral.swift; sourceTree = ""; tabWidth = 2; usesTabs = 0; }; 5BA9FD0B27FEDB6B002DE248 /* suiPrefPaneKeyboard.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = suiPrefPaneKeyboard.swift; sourceTree = ""; tabWidth = 2; usesTabs = 0; }; 5BA9FD0C27FEDB6B002DE248 /* ctlPrefUI.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ctlPrefUI.swift; sourceTree = ""; tabWidth = 2; usesTabs = 0; }; @@ -395,9 +391,7 @@ isa = PBXGroup; children = ( 5B407309281672610023DFFF /* lmAssociates.swift */, - 5BA0DF2F2817857D009E73BB /* lmCore.swift */, 5B887F2F2826AEA400B6651E /* lmCoreEX.swift */, - 5B00A22F282011980058E5DB /* lmLite.swift */, 5B40730A281672610023DFFF /* lmReplacements.swift */, 5BA0DF2E2817857D009E73BB /* lmUserOverride.swift */, ); @@ -1080,7 +1074,6 @@ 5B707CE827D9F4590099EF99 /* OpenCCBridge.swift in Sources */, D427F76C278CA2B0004A2160 /* AppDelegate.swift in Sources */, 5BA9FD4527FEF3C9002DE248 /* ToolbarItemStyleViewController.swift in Sources */, - 5BA0DF322817857D009E73BB /* lmCore.swift in Sources */, 5BA9FD4127FEF3C8002DE248 /* PreferencesStyle.swift in Sources */, 5B7F225D2808501000DDD3CB /* KeyHandler_HandleInput.swift in Sources */, 5BA9FD1227FEDB6B002DE248 /* suiPrefPaneExperience.swift in Sources */, @@ -1134,7 +1127,6 @@ 5B62A34827AE7CD900A19448 /* ctlCandidateVertical.swift in Sources */, 5BA9FD4027FEF3C8002DE248 /* Localization.swift in Sources */, 5BA9FD1327FEDB6B002DE248 /* suiPrefPaneDictionary.swift in Sources */, - 5B00A230282011980058E5DB /* lmLite.swift in Sources */, 5BBBB77A27AEDC690023B93A /* clsSFX.swift in Sources */, 5BA9FD4727FEF3C9002DE248 /* PreferencesStyleController.swift in Sources */, 5BF8423127BAA942008E7E4C /* vChewingKanjiConverter.swift in Sources */, From cbddea132a4e8b52055e5445789453955c65ffb0 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Sun, 8 May 2022 23:20:36 +0800 Subject: [PATCH 13/20] Repo // Add credits for Isaac Xen. --- AUTHORS | 16 ++++++++++------ Installer/Resources/Base.lproj/MainMenu.xib | 2 +- Installer/Resources/en.lproj/MainMenu.strings | 4 ++-- Installer/Resources/ja.lproj/MainMenu.strings | 4 ++-- .../Resources/zh-Hans.lproj/MainMenu.strings | 4 ++-- .../Resources/zh-Hant.lproj/MainMenu.strings | 4 ++-- LICENSE-CHS.txt | 2 +- LICENSE-CHT.txt | 2 +- LICENSE-JPN.txt | 2 +- LICENSE.txt | 2 +- Source/WindowNIBs/Base.lproj/frmAboutWindow.xib | 9 ++++----- .../WindowNIBs/en.lproj/frmAboutWindow.strings | 4 ++-- .../WindowNIBs/ja.lproj/frmAboutWindow.strings | 4 ++-- .../zh-Hans.lproj/frmAboutWindow.strings | 4 ++-- .../zh-Hant.lproj/frmAboutWindow.strings | 4 ++-- 15 files changed, 35 insertions(+), 32 deletions(-) diff --git a/AUTHORS b/AUTHORS index 158911b6..e9eaf28d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,11 +1,15 @@ $ Main contributors and volunteers of this repository (vChewing for macOS): -Shiki Suen // Main developer of vChewing for macOS. -Hiraku Wang // Technical assistant in Cocoa. - +- Shiki Suen // Main developer of vChewing for macOS. +- Hiraku Wang // Technical reinforcement in Cocoa during the Object-Cpp dev period of this project. +- Isaac Xen // Technical reinforcement in Swift: SFX Module and StringView Ranges Extension. $ Contributors and volunteeres of the upstream repo, having no responsibility in discussing anything in the current repo: -Mengjuei Hsieh // McBopomofo for macOS 1.x main developer and architect. -Zonble Yang // McBopomofo for macOS 2.x architect. -Lukhnos D Liu // Mandarin and Gramambular engine developer. +- Mengjuei Hsieh // McBopomofo for macOS 1.x main developer and architect. +- Zonble Yang // McBopomofo for macOS 2.x architect, especially state-based IME behavior management. +- Lukhnos D Liu // Developer of the Mandarin syllable input processor. + +$ Special thanks to: + +- All supporters from Cocoaheads Taipei and Mobile01 community. diff --git a/Installer/Resources/Base.lproj/MainMenu.xib b/Installer/Resources/Base.lproj/MainMenu.xib index 2a204c55..71ebe7b7 100644 --- a/Installer/Resources/Base.lproj/MainMenu.xib +++ b/Installer/Resources/Base.lproj/MainMenu.xib @@ -198,7 +198,7 @@ McBopomofo Engine by Mengjuei Hsieh, Lukhnos Liu, Zonble Yang, et al. -vChewing macOS Development: Shiki Suen, Hiraku Wang, etc.
vChewing Phrase Database Maintained by Shiki Suen. +vChewing macOS Development: Shiki Suen, Isaac Xen, Hiraku Wang, etc.
vChewing Phrase Database Maintained by Shiki Suen. diff --git a/Installer/Resources/en.lproj/MainMenu.strings b/Installer/Resources/en.lproj/MainMenu.strings index cb7ec3c0..f45a3b26 100644 --- a/Installer/Resources/en.lproj/MainMenu.strings +++ b/Installer/Resources/en.lproj/MainMenu.strings @@ -56,8 +56,8 @@ /* Class = "NSTextFieldCell"; title = "Derived from OpenVanilla McBopopmofo Project."; ObjectID = "QYf-Nf-hoi"; */ "QYf-Nf-hoi.title" = "Derived from OpenVanilla McBopopmofo Project."; -/* Class = "NSTextFieldCell"; title = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine."; ObjectID = "VW8-s5-Wpn"; */ -"VW8-s5-Wpn.title" = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine."; +/* Class = "NSTextFieldCell"; title = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Isaac Xen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine."; ObjectID = "VW8-s5-Wpn"; */ +"VW8-s5-Wpn.title" = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Isaac Xen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine."; /* Class = "NSTextFieldCell"; title = "Placeholder for showing copyright information."; ObjectID = "eo3-TK-0rB"; */ // "eo3-TK-0rB.title" = "Placeholder for showing copyright information."; diff --git a/Installer/Resources/ja.lproj/MainMenu.strings b/Installer/Resources/ja.lproj/MainMenu.strings index cf864cc7..63f67c93 100644 --- a/Installer/Resources/ja.lproj/MainMenu.strings +++ b/Installer/Resources/ja.lproj/MainMenu.strings @@ -56,8 +56,8 @@ /* Class = "NSTextFieldCell"; title = "Derived from OpenVanilla McBopopmofo Project."; ObjectID = "QYf-Nf-hoi"; */ "QYf-Nf-hoi.title" = "OpenVanilla 小麦注音プロジェクトから派生。"; -/* Class = "NSTextFieldCell"; title = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine."; ObjectID = "VW8-s5-Wpn"; */ -"VW8-s5-Wpn.title" = "ボポモフォエンジン開発:Lukhnos Liu。\n入力状態管理システム開発:Zonble Yang。\nmacOS 版威注音の開発:Shiki Suen, Hiraku Wang, など。\n威注音語彙データの維持:Shiki Suen。\nMegrez 辞書処理エンジン:Shiki Suen(Lukhnos の Gramambular C++ エンジンを Swift で再開発したものである)。"; +/* Class = "NSTextFieldCell"; title = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Isaac Xen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine."; ObjectID = "VW8-s5-Wpn"; */ +"VW8-s5-Wpn.title" = "ボポモフォエンジン開発:Lukhnos Liu。\n入力状態管理システム開発:Zonble Yang。\nmacOS 版威注音の開発:Shiki Suen, Isaac Xen, Hiraku Wang, など。\n威注音語彙データの維持:Shiki Suen。\nMegrez 辞書処理エンジン:Shiki Suen(Lukhnos の Gramambular C++ エンジンを Swift で再開発したものである)。"; /* Class = "NSTextFieldCell"; title = "Placeholder for showing copyright information."; ObjectID = "eo3-TK-0rB"; */ "eo3-TK-0rB.title" = "Placeholder for showing copyright information."; diff --git a/Installer/Resources/zh-Hans.lproj/MainMenu.strings b/Installer/Resources/zh-Hans.lproj/MainMenu.strings index 457d2d87..b5ab37d3 100644 --- a/Installer/Resources/zh-Hans.lproj/MainMenu.strings +++ b/Installer/Resources/zh-Hans.lproj/MainMenu.strings @@ -56,9 +56,9 @@ /* Class = "NSTextFieldCell"; title = "Derived from OpenVanilla McBopopmofo Project."; ObjectID = "QYf-Nf-hoi"; */ "QYf-Nf-hoi.title" = "该专案由 OpenVanilla 小麦注音专案衍生而来。"; -/* Class = "NSTextFieldCell"; title = "McBopomofo Engine by Mengjuei Hsieh, Lukhnos Liu, Zonble Yang, et al.\nvChewing macOS Development: Shiki Suen, Hiraku Wang, etc. +/* Class = "NSTextFieldCell"; title = "McBopomofo Engine by Mengjuei Hsieh, Lukhnos Liu, Zonble Yang, et al.\nvChewing macOS Development: Shiki Suen, Isaac Xen, Hiraku Wang, etc. vChewing Phrase Database Maintained by Shiki Suen."; ObjectID = "VW8-s5-Wpn"; */ -"VW8-s5-Wpn.title" = "注音拼音输入处理引擎研发:Lukhnos Liu。\n输入法状态管理引擎研发:Zonble Yang。\n威注音 macOS 程式研发:Shiki Suen, Hiraku Wang, 等。\n威注音词库维护:Shiki Suen。\n天权星语汇引擎:Shiki Suen,用 Swift 将 Lukhnos 的 C++ Gramambular 重写而得。"; +"VW8-s5-Wpn.title" = "注音拼音输入处理引擎研发:Lukhnos Liu。\n输入法状态管理引擎研发:Zonble Yang。\n威注音 macOS 程式研发:Shiki Suen, Isaac Xen, Hiraku Wang, 等。\n威注音词库维护:Shiki Suen。\n天权星语汇引擎:Shiki Suen,用 Swift 将 Lukhnos 的 C++ Gramambular 重写而得。"; /* Class = "NSTextFieldCell"; title = "Placeholder for showing copyright information."; ObjectID = "eo3-TK-0rB"; */ // "eo3-TK-0rB.title" = "Placeholder for showing copyright information."; diff --git a/Installer/Resources/zh-Hant.lproj/MainMenu.strings b/Installer/Resources/zh-Hant.lproj/MainMenu.strings index f55afe6e..0996d412 100644 --- a/Installer/Resources/zh-Hant.lproj/MainMenu.strings +++ b/Installer/Resources/zh-Hant.lproj/MainMenu.strings @@ -56,9 +56,9 @@ /* Class = "NSTextFieldCell"; title = "Derived from OpenVanilla McBopopmofo Project."; ObjectID = "QYf-Nf-hoi"; */ "QYf-Nf-hoi.title" = "該專案由 OpenVanilla 小麥注音專案衍生而來。"; -/* Class = "NSTextFieldCell"; title = "McBopomofo Engine by Mengjuei Hsieh, Lukhnos Liu, Zonble Yang, et al.\nvChewing macOS Development: Shiki Suen, Hiraku Wang, etc. +/* Class = "NSTextFieldCell"; title = "McBopomofo Engine by Mengjuei Hsieh, Lukhnos Liu, Zonble Yang, et al.\nvChewing macOS Development: Shiki Suen, Isaac Xen, Hiraku Wang, etc. vChewing Phrase Database Maintained by Shiki Suen."; ObjectID = "VW8-s5-Wpn"; */ -"VW8-s5-Wpn.title" = "注音拼音輸入處理引擎研發:Lukhnos Liu。\n輸入法狀態管理引擎研發:Zonble Yang。\n威注音 macOS 程式研發:Shiki Suen, Hiraku Wang, 等。\n威注音詞庫維護:Shiki Suen。\n天權星語彙引擎:Shiki Suen,用 Swift 將 Lukhnos 的 C++ Gramambular 重寫而得。"; +"VW8-s5-Wpn.title" = "注音拼音輸入處理引擎研發:Lukhnos Liu。\n輸入法狀態管理引擎研發:Zonble Yang。\n威注音 macOS 程式研發:Shiki Suen, Isaac Xen, Hiraku Wang, 等。\n威注音詞庫維護:Shiki Suen。\n天權星語彙引擎:Shiki Suen,用 Swift 將 Lukhnos 的 C++ Gramambular 重寫而得。"; /* Class = "NSTextFieldCell"; title = "Placeholder for showing copyright information."; ObjectID = "eo3-TK-0rB"; */ // "eo3-TK-0rB.title" = "Placeholder for showing copyright information."; diff --git a/LICENSE-CHS.txt b/LICENSE-CHS.txt index bdaf5b3c..3539bc05 100644 --- a/LICENSE-CHS.txt +++ b/LICENSE-CHS.txt @@ -5,7 +5,7 @@ vChewing macOS: MIT-NTL License 麻理(去商标)授权合约 © 2011-2022 OpenVanilla Project & © 2021-2022 vChewing Project. 注音拼音输入处理引擎研发:Lukhnos Liu。 输入法状态管理引擎研发:Zonble Yang。 -威注音 macOS 程式研发:Shiki Suen, Hiraku Wang, 等。 +威注音 macOS 程式研发:Shiki Suen, Isaac Xen, Hiraku Wang, 等。 威注音词库维护:Shiki Suen。 天权星语汇引擎:Shiki Suen,用 Swift 将 Lukhnos 的 C++ Gramambular 重写而得。 diff --git a/LICENSE-CHT.txt b/LICENSE-CHT.txt index e2e8a84f..b629fd9e 100644 --- a/LICENSE-CHT.txt +++ b/LICENSE-CHT.txt @@ -5,7 +5,7 @@ vChewing macOS: MIT-NTL License 麻理(去商標)授權合約 © 2011-2022 OpenVanilla Project & © 2021-2022 vChewing Project. 注音拼音輸入處理引擎研發:Lukhnos Liu。 輸入法狀態管理引擎研發:Zonble Yang。 -威注音 macOS 程式研發:Shiki Suen, Hiraku Wang, 等。 +威注音 macOS 程式研發:Shiki Suen, Isaac Xen, Hiraku Wang, 等。 威注音詞庫維護:Shiki Suen。 天權星語彙引擎:Shiki Suen,用 Swift 將 Lukhnos 的 C++ Gramambular 重寫而得。 diff --git a/LICENSE-JPN.txt b/LICENSE-JPN.txt index 6bf08d35..1061f95e 100644 --- a/LICENSE-JPN.txt +++ b/LICENSE-JPN.txt @@ -4,7 +4,7 @@ vChewing macOS: MIT商標不許可ライセンス (MIT-NTL License) ボポモフォエンジン開発:Lukhnos Liu。 入力状態管理システム開発:Zonble Yang。 -macOS 版威注音の開発:Shiki Suen, Hiraku Wang, など。 +macOS 版威注音の開発:Shiki Suen, Isaac Xen, Hiraku Wang, など。 威注音語彙データの維持:Shiki Suen。 Megrez 辞書処理エンジン:Shiki Suen(Lukhnos の Gramambular C++ エンジンを Swift で再開発したものである)。 diff --git a/LICENSE.txt b/LICENSE.txt index 1d8868ba..e37f0d24 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -5,7 +5,7 @@ vChewing macOS: MIT-NTL License © 2011-2022 OpenVanilla Project & © 2021-2022 vChewing Project. Mandarin Syllable Composer Engine by Lukhnos Liu. Input State Management Architecture by Zonble Yang. -vChewing macOS Development: Shiki Suen, Hiraku Wang, etc. +vChewing macOS Development: Shiki Suen, Isaac Xen, Hiraku Wang, etc. vChewing Phrase Database Maintained by Shiki Suen. Megrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine. diff --git a/Source/WindowNIBs/Base.lproj/frmAboutWindow.xib b/Source/WindowNIBs/Base.lproj/frmAboutWindow.xib index ed7cce75..68677eea 100644 --- a/Source/WindowNIBs/Base.lproj/frmAboutWindow.xib +++ b/Source/WindowNIBs/Base.lproj/frmAboutWindow.xib @@ -1,8 +1,7 @@ - + - - + @@ -58,7 +57,7 @@ McBopomofo Engine by Mengjuei Hsieh, Lukhnos Liu, Zonble Yang, et al. -vChewing macOS Development: Shiki Suen, Hiraku Wang, etc. +vChewing macOS Development: Shiki Suen, Isaac Xen, Hiraku Wang, etc. vChewing Phrase Database Maintained by Shiki Suen. @@ -125,7 +124,7 @@ DQ - + diff --git a/Source/WindowNIBs/en.lproj/frmAboutWindow.strings b/Source/WindowNIBs/en.lproj/frmAboutWindow.strings index 0ab50907..1d0d5e01 100644 --- a/Source/WindowNIBs/en.lproj/frmAboutWindow.strings +++ b/Source/WindowNIBs/en.lproj/frmAboutWindow.strings @@ -23,5 +23,5 @@ /* Class = "NSTextFieldCell"; title = "© 2011-2022 OpenVanilla Project & © 2021-2022 vChewing Project."; ObjectID = "lblCopyright"; */ // "lblCopyright.title" = "© 2011-2022 OpenVanilla Project & © 2021-2022 vChewing Project."; -/* Class = "NSTextFieldCell"; title = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine."; ObjectID = "lblCredits"; */ -"lblCredits.title" = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine."; +/* Class = "NSTextFieldCell"; title = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Isaac Xen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine."; ObjectID = "lblCredits"; */ +"lblCredits.title" = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Isaac Xen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine."; diff --git a/Source/WindowNIBs/ja.lproj/frmAboutWindow.strings b/Source/WindowNIBs/ja.lproj/frmAboutWindow.strings index 7a918a02..1ff90c89 100644 --- a/Source/WindowNIBs/ja.lproj/frmAboutWindow.strings +++ b/Source/WindowNIBs/ja.lproj/frmAboutWindow.strings @@ -23,5 +23,5 @@ /* Class = "NSTextFieldCell"; title = "© 2011-2022 OpenVanilla Project & © 2021-2022 vChewing Project."; ObjectID = "lblCopyright"; */ // "lblCopyright.title" = "© 2011-2022 OpenVanilla Project & © 2021-2022 vChewing Project."; -/* Class = "NSTextFieldCell"; title = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine."; ObjectID = "lblCredits"; */ -"lblCredits.title" = "ボポモフォエンジン開発:Lukhnos Liu。\n入力状態管理システム開発:Zonble Yang。\nmacOS 版威注音の開発:Shiki Suen, Hiraku Wang, など。\n威注音語彙データの維持:Shiki Suen。\nMegrez 辞書処理エンジン:Shiki Suen(Lukhnos の Gramambular C++ エンジンを Swift で再開発したものである)。"; +/* Class = "NSTextFieldCell"; title = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Isaac Xen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine."; ObjectID = "lblCredits"; */ +"lblCredits.title" = "ボポモフォエンジン開発:Lukhnos Liu。\n入力状態管理システム開発:Zonble Yang。\nmacOS 版威注音の開発:Shiki Suen, Isaac Xen, Hiraku Wang, など。\n威注音語彙データの維持:Shiki Suen。\nMegrez 辞書処理エンジン:Shiki Suen(Lukhnos の Gramambular C++ エンジンを Swift で再開発したものである)。"; diff --git a/Source/WindowNIBs/zh-Hans.lproj/frmAboutWindow.strings b/Source/WindowNIBs/zh-Hans.lproj/frmAboutWindow.strings index b31df929..d9bf31a9 100644 --- a/Source/WindowNIBs/zh-Hans.lproj/frmAboutWindow.strings +++ b/Source/WindowNIBs/zh-Hans.lproj/frmAboutWindow.strings @@ -23,5 +23,5 @@ /* Class = "NSTextFieldCell"; title = "© 2011-2022 OpenVanilla Project & © 2021-2022 vChewing Project."; ObjectID = "lblCopyright"; */ // "lblCopyright.title" = "© 2011-2022 OpenVanilla Project & © 2021-2022 vChewing Project."; -/* Class = "NSTextFieldCell"; title = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine."; ObjectID = "lblCredits"; */ -"lblCredits.title" = "注音拼音输入处理引擎研发:Lukhnos Liu。\n输入法状态管理引擎研发:Zonble Yang。\n威注音 macOS 程式研发:Shiki Suen, Hiraku Wang, 等。\n威注音词库维护:Shiki Suen。\n天权星语汇引擎:Shiki Suen,用 Swift 将 Lukhnos 的 C++ Gramambular 重写而得。"; +/* Class = "NSTextFieldCell"; title = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Isaac Xen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine."; ObjectID = "lblCredits"; */ +"lblCredits.title" = "注音拼音输入处理引擎研发:Lukhnos Liu。\n输入法状态管理引擎研发:Zonble Yang。\n威注音 macOS 程式研发:Shiki Suen, Isaac Xen, Hiraku Wang, 等。\n威注音词库维护:Shiki Suen。\n天权星语汇引擎:Shiki Suen,用 Swift 将 Lukhnos 的 C++ Gramambular 重写而得。"; diff --git a/Source/WindowNIBs/zh-Hant.lproj/frmAboutWindow.strings b/Source/WindowNIBs/zh-Hant.lproj/frmAboutWindow.strings index 151c31e6..e4bd24be 100644 --- a/Source/WindowNIBs/zh-Hant.lproj/frmAboutWindow.strings +++ b/Source/WindowNIBs/zh-Hant.lproj/frmAboutWindow.strings @@ -23,5 +23,5 @@ /* Class = "NSTextFieldCell"; title = "© 2011-2022 OpenVanilla Project & © 2021-2022 vChewing Project."; ObjectID = "lblCopyright"; */ // "lblCopyright.title" = "© 2011-2022 OpenVanilla Project & © 2021-2022 vChewing Project."; -/* Class = "NSTextFieldCell"; title = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine."; ObjectID = "lblCredits"; */ -"lblCredits.title" = "注音拼音輸入處理引擎研發:Lukhnos Liu。\n輸入法狀態管理引擎研發:Zonble Yang。\n威注音 macOS 程式研發:Shiki Suen, Hiraku Wang, 等。\n威注音詞庫維護:Shiki Suen。\n天權星語彙引擎:Shiki Suen,用 Swift 將 Lukhnos 的 C++ Gramambular 重寫而得。"; +/* Class = "NSTextFieldCell"; title = "Mandarin Syllable Composer Engine by Lukhnos Liu.\nInput State Management Architecture by Zonble Yang.\nvChewing macOS Development: Shiki Suen, Isaac Xen, Hiraku Wang, etc.\nvChewing Phrase Database Maintained by Shiki Suen.\nMegrez is a rewritten unigram engine by Shiki Suen using Swift, replacing Lukhnos' C++ Gramambular engine."; ObjectID = "lblCredits"; */ +"lblCredits.title" = "注音拼音輸入處理引擎研發:Lukhnos Liu。\n輸入法狀態管理引擎研發:Zonble Yang。\n威注音 macOS 程式研發:Shiki Suen, Isaac Xen, Hiraku Wang, 等。\n威注音詞庫維護:Shiki Suen。\n天權星語彙引擎:Shiki Suen,用 Swift 將 Lukhnos 的 C++ Gramambular 重寫而得。"; From 8cf6272a076f1579bdaff9b6fce824064e2e5208 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Mon, 9 May 2022 09:49:33 +0800 Subject: [PATCH 14/20] LMCoreEX // Patch the score double if it is > 0 (mistyped). --- Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift b/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift index de69e9ef..0f07eaaf 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmCoreEX.swift @@ -127,6 +127,9 @@ extension vChewing { if neta.count >= 3, !shouldForceDefaultScore { theScore = .init(neta[2]) ?? defaultScore } + if theScore > 0 { + theScore *= -1 // 應對可能忘記寫負號的情形 + } grams.append(Megrez.Unigram(keyValue: kvPair, score: theScore)) } } From d8c2f668d2050cd922a220a0547f2da1ccb0eadb Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Mon, 9 May 2022 09:58:55 +0800 Subject: [PATCH 15/20] LMI, etc. // Nomenclature update, etc. --- Source/Modules/IMEModules/IME.swift | 4 +-- .../IMEModules/ctlInputMethod_Menu.swift | 4 +-- .../LangModelRelated/LMInstantiator.swift | 29 ++++++++++++------- .../SubLMs/lmReplacements.swift | 1 - .../LangModelRelated/mgrLangModel.swift | 26 ++++++++--------- 5 files changed, 35 insertions(+), 29 deletions(-) diff --git a/Source/Modules/IMEModules/IME.swift b/Source/Modules/IMEModules/IME.swift index 09b769eb..9afa12fe 100644 --- a/Source/Modules/IMEModules/IME.swift +++ b/Source/Modules/IMEModules/IME.swift @@ -67,9 +67,9 @@ public class IME: NSObject { // mgrLangModel 的 loadUserPhrases 等函數在自動讀取 dataFolderPath 時, // 如果發現自訂目錄不可用,則會自動抹去自訂目錄設定、改採預設目錄。 // 所以這裡不需要特別處理。 - mgrLangModel.loadUserAssociatedPhrases() + mgrLangModel.loadUserAssociatesData() mgrLangModel.loadUserPhraseReplacement() - mgrLangModel.loadUserPhrases() + mgrLangModel.loadUserPhrasesData() if !userOnly { // mgrLangModel.loadDataModels() } diff --git a/Source/Modules/IMEModules/ctlInputMethod_Menu.swift b/Source/Modules/IMEModules/ctlInputMethod_Menu.swift index eb08908b..82236031 100644 --- a/Source/Modules/IMEModules/ctlInputMethod_Menu.swift +++ b/Source/Modules/IMEModules/ctlInputMethod_Menu.swift @@ -135,7 +135,7 @@ extension ctlInputMethod { if optionKeyPressed || !mgrPrefs.shouldAutoReloadUserDataFiles { menu.addItem( withTitle: NSLocalizedString("Reload User Phrases", comment: ""), - action: #selector(reloadUserPhrases(_:)), keyEquivalent: "" + action: #selector(reloadUserPhrasesData(_:)), keyEquivalent: "" ) } @@ -346,7 +346,7 @@ extension ctlInputMethod { } } - @objc func reloadUserPhrases(_: Any?) { + @objc func reloadUserPhrasesData(_: Any?) { IME.initLangModels(userOnly: true) } diff --git a/Source/Modules/LangModelRelated/LMInstantiator.swift b/Source/Modules/LangModelRelated/LMInstantiator.swift index 31b2e80c..529a11e3 100644 --- a/Source/Modules/LangModelRelated/LMInstantiator.swift +++ b/Source/Modules/LangModelRelated/LMInstantiator.swift @@ -32,9 +32,11 @@ import Foundation // 簡體中文模式與繁體中文模式共用全字庫擴展模組,故單獨處理。 // 塞在 LMInstantiator 內的話,每個模式都會讀入一份全字庫,會多佔用 100MB 記憶體。 private var lmCNS = vChewing.LMCoreEX( - reverse: true, consolidate: false, defaultScore: -11.0, forceDefaultScore: false) + reverse: true, consolidate: false, defaultScore: -11.0, forceDefaultScore: false +) private var lmSymbols = vChewing.LMCoreEX( - reverse: true, consolidate: false, defaultScore: -13.0, forceDefaultScore: false) + reverse: true, consolidate: false, defaultScore: -13.0, forceDefaultScore: false +) extension vChewing { /// LMInstantiator is a facade for managing a set of models including @@ -76,18 +78,23 @@ extension vChewing { /// Reverse 的話,第一欄是注音,第二欄是對應的漢字,第三欄是可能的權重。 /// 不 Reverse 的話,第一欄是漢字,第二欄是對應的注音,第三欄是可能的權重。 var lmCore = LMCoreEX( - reverse: false, consolidate: false, defaultScore: -9.9, forceDefaultScore: false) + reverse: false, consolidate: false, defaultScore: -9.9, forceDefaultScore: false + ) var lmMisc = LMCoreEX( - reverse: true, consolidate: false, defaultScore: -1.0, forceDefaultScore: false) + reverse: true, consolidate: false, defaultScore: -1.0, forceDefaultScore: false + ) // 聲明使用者語言模組。 // 使用者語言模組使用多執行緒的話,可能會導致一些問題。有時間再仔細排查看看。 var lmUserPhrases = LMCoreEX( - reverse: true, consolidate: true, defaultScore: 0, forceDefaultScore: true) + reverse: true, consolidate: true, defaultScore: 0, forceDefaultScore: true + ) var lmFiltered = LMCoreEX( - reverse: true, consolidate: true, defaultScore: 0, forceDefaultScore: true) + reverse: true, consolidate: true, defaultScore: 0, forceDefaultScore: true + ) var lmUserSymbols = LMCoreEX( - reverse: true, consolidate: true, defaultScore: -12.0, forceDefaultScore: true) + reverse: true, consolidate: true, defaultScore: -12.0, forceDefaultScore: true + ) var lmReplacements = LMReplacments() var lmAssociates = LMAssociates() @@ -96,7 +103,7 @@ extension vChewing { // 以下這些函數命名暫時保持原樣,等弒神行動徹底結束了再調整。 - public func isDataModelLoaded() -> Bool { lmCore.isLoaded() } + public func isLanguageModelLoaded() -> Bool { lmCore.isLoaded() } public func loadLanguageModel(path: String) { if FileManager.default.isReadableFile(atPath: path) { lmCore.open(path) @@ -136,7 +143,7 @@ extension vChewing { } } - public func loadUserPhrases(path: String, filterPath: String) { + public func loadUserPhrasesData(path: String, filterPath: String) { if FileManager.default.isReadableFile(atPath: path) { lmUserPhrases.close() lmUserPhrases.open(path) @@ -163,7 +170,7 @@ extension vChewing { } } - public func loadUserAssociatedPhrases(path: String) { + public func loadUserAssociatesData(path: String) { if FileManager.default.isReadableFile(atPath: path) { lmAssociates.close() lmAssociates.open(path) @@ -173,7 +180,7 @@ extension vChewing { } } - public func loadPhraseReplacementMap(path: String) { + public func loadReplacementsData(path: String) { if FileManager.default.isReadableFile(atPath: path) { lmReplacements.close() lmReplacements.open(path) diff --git a/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift b/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift index 3cce9a45..989a7625 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift @@ -99,7 +99,6 @@ extension vChewing { public func hasValuesFor(key: String) -> Bool { rangeMap[key] != nil } - } } diff --git a/Source/Modules/LangModelRelated/mgrLangModel.swift b/Source/Modules/LangModelRelated/mgrLangModel.swift index 0f941952..b72c0441 100644 --- a/Source/Modules/LangModelRelated/mgrLangModel.swift +++ b/Source/Modules/LangModelRelated/mgrLangModel.swift @@ -69,7 +69,7 @@ class mgrLangModel: NSObject { if !gLangModelCHS.isSymbolDataLoaded() { gLangModelCHS.loadSymbolData(path: getBundleDataPath("data-symbols")) } - if !gLangModelCHT.isDataModelLoaded() { + if !gLangModelCHT.isLanguageModelLoaded() { NotifierController.notify( message: String( format: "%@", NSLocalizedString("Loading CHT Core Dict...", comment: "") @@ -82,7 +82,7 @@ class mgrLangModel: NSObject { ) ) } - if !gLangModelCHS.isDataModelLoaded() { + if !gLangModelCHS.isLanguageModelLoaded() { NotifierController.notify( message: String( format: "%@", NSLocalizedString("Loading CHS Core Dict...", comment: "") @@ -108,7 +108,7 @@ class mgrLangModel: NSObject { if !gLangModelCHS.isCNSDataLoaded() { gLangModelCHS.loadCNSData(path: getBundleDataPath("char-kanji-cns")) } - if !gLangModelCHS.isDataModelLoaded() { + if !gLangModelCHS.isLanguageModelLoaded() { NotifierController.notify( message: String( format: "%@", NSLocalizedString("Loading CHS Core Dict...", comment: "") @@ -131,7 +131,7 @@ class mgrLangModel: NSObject { if !gLangModelCHT.isCNSDataLoaded() { gLangModelCHT.loadCNSData(path: getBundleDataPath("char-kanji-cns")) } - if !gLangModelCHT.isDataModelLoaded() { + if !gLangModelCHT.isLanguageModelLoaded() { NotifierController.notify( message: String( format: "%@", NSLocalizedString("Loading CHT Core Dict...", comment: "") @@ -147,12 +147,12 @@ class mgrLangModel: NSObject { } } - public static func loadUserPhrases() { - gLangModelCHT.loadUserPhrases( + public static func loadUserPhrasesData() { + gLangModelCHT.loadUserPhrasesData( path: userPhrasesDataPath(InputMode.imeModeCHT), filterPath: excludedPhrasesDataPath(InputMode.imeModeCHT) ) - gLangModelCHS.loadUserPhrases( + gLangModelCHS.loadUserPhrasesData( path: userPhrasesDataPath(InputMode.imeModeCHS), filterPath: excludedPhrasesDataPath(InputMode.imeModeCHS) ) @@ -160,20 +160,20 @@ class mgrLangModel: NSObject { gLangModelCHS.loadUserSymbolData(path: userSymbolDataPath(InputMode.imeModeCHS)) } - public static func loadUserAssociatedPhrases() { - gLangModelCHT.loadUserAssociatedPhrases( + public static func loadUserAssociatesData() { + gLangModelCHT.loadUserAssociatesData( path: mgrLangModel.userAssociatedPhrasesDataPath(InputMode.imeModeCHT) ) - gLangModelCHS.loadUserAssociatedPhrases( + gLangModelCHS.loadUserAssociatesData( path: mgrLangModel.userAssociatedPhrasesDataPath(InputMode.imeModeCHS) ) } public static func loadUserPhraseReplacement() { - gLangModelCHT.loadPhraseReplacementMap( + gLangModelCHT.loadReplacementsData( path: mgrLangModel.phraseReplacementDataPath(InputMode.imeModeCHT) ) - gLangModelCHS.loadPhraseReplacementMap( + gLangModelCHS.loadReplacementsData( path: mgrLangModel.phraseReplacementDataPath(InputMode.imeModeCHS) ) } @@ -424,7 +424,7 @@ class mgrLangModel: NSObject { // We use FSEventStream to monitor possible changes of the user phrase folder, hence the // lack of the needs of manually load data here unless FSEventStream is disabled by user. if !mgrPrefs.shouldAutoReloadUserDataFiles { - loadUserPhrases() + loadUserPhrasesData() } return true } From bd65f2c00068c882f1a134bd774eeb12ada04391 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Mon, 9 May 2022 12:04:15 +0800 Subject: [PATCH 16/20] KeyHandler & InputHandler // Refuse non-ANSI charCodes. --- .../ControllerModules/InputHandler.swift | 55 +++++++++++++++++-- .../KeyHandler_HandleInput.swift | 8 +++ 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/Source/Modules/ControllerModules/InputHandler.swift b/Source/Modules/ControllerModules/InputHandler.swift index 66de0ba7..67360a2a 100644 --- a/Source/Modules/ControllerModules/InputHandler.swift +++ b/Source/Modules/ControllerModules/InputHandler.swift @@ -61,16 +61,16 @@ enum KeyCode: UInt16 { case kF8 = 100 case kF9 = 101 case kF11 = 103 - case kF13 = 105 + case kF13 = 105 // PrtSc case kF16 = 106 case kF14 = 107 case kF10 = 109 case kF12 = 111 case kF15 = 113 - case kHelp = 114 + case kHelp = 114 // Insert case kHome = 115 case kPageUp = 116 - case kWindowDelete = 117 // Renamed from "kForwardDelete" to avoid nomenclatural confusions. + case kWindowsDelete = 117 // Renamed from "kForwardDelete" to avoid nomenclatural confusions. case kF4 = 118 case kEnd = 119 case kF2 = 120 @@ -82,6 +82,33 @@ enum KeyCode: UInt16 { case kUpArrow = 126 } +enum KeyCodeBlackListed: UInt16 { + case kF17 = 64 + case kVolumeUp = 72 + case kVolumeDown = 73 + case kMute = 74 + case kF18 = 79 + case kF19 = 80 + case kF20 = 90 + case kF5 = 96 + case kF6 = 97 + case kF7 = 98 + case kF3 = 99 + case kF8 = 100 + case kF9 = 101 + case kF11 = 103 + case kF13 = 105 // PrtSc + case kF16 = 106 + case kF14 = 107 + case kF10 = 109 + case kF12 = 111 + case kF15 = 113 + case kHelp = 114 // Insert + case kF4 = 118 + case kF2 = 120 + case kF1 = 122 +} + // CharCodes: https://theasciicode.com.ar/ascii-control-characters/horizontal-tab-ascii-code-9.html enum CharCode: UInt /* 16 */ { case yajuusenpai = 114_514_191_191_810_893 @@ -172,6 +199,26 @@ class InputHandler: NSObject { "<\(super.description) inputText:\(String(describing: inputText)), inputTextIgnoringModifiers:\(String(describing: inputTextIgnoringModifiers)) charCode:\(charCode), keyCode:\(keyCode), flags:\(flags), cursorForwardKey:\(cursorForwardKey), cursorBackwardKey:\(cursorBackwardKey), extraChooseCandidateKey:\(extraChooseCandidateKey), extraChooseCandidateKeyReverse:\(extraChooseCandidateKeyReverse), absorbedArrowKey:\(absorbedArrowKey), verticalModeOnlyChooseCandidateKey:\(verticalModeOnlyChooseCandidateKey), emacsKey:\(emacsKey), useVerticalMode:\(useVerticalMode)>" } + // 除了 ANSI charCode 以外,其餘一律過濾掉,免得純 Swift 版 KeyHandler 被餵屎。 + var isInvalidInput: Bool { + switch charCode { + case 0x20...0xFF: // ANSI charCode 範圍 + return false + default: + if isReservedKey, !isKeyCodeBlacklisted { + return false + } + return true + } + } + + var isKeyCodeBlacklisted: Bool { + guard let code = KeyCodeBlackListed(rawValue: keyCode) else { + return false + } + return code.rawValue != KeyCode.kNone.rawValue + } + var isShiftHold: Bool { flags.contains([.shift]) } @@ -269,7 +316,7 @@ class InputHandler: NSObject { } var isDelete: Bool { - KeyCode(rawValue: keyCode) == KeyCode.kWindowDelete + KeyCode(rawValue: keyCode) == KeyCode.kWindowsDelete } var isCursorBackward: Bool { diff --git a/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift b/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift index d9a6d990..fa95b327 100644 --- a/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift +++ b/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift @@ -46,6 +46,14 @@ extension KeyHandler { return false } + // 提前過濾掉一些不合規的按鍵訊號輸入,免得相關按鍵訊號被送給 Megrez 引發輸入法崩潰。 + if input.isInvalidInput { + IME.prtDebugIntel("550BCF7B: KeyHandler just refused an invalid input.") + errorCallback() + stateCallback(state) + return true + } + // Ignore the input if the composing buffer is empty with no reading // and there is some function key combination. let isFunctionKey: Bool = From 8d0babf95438389edc743d712ffb078e8b8b8599 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Mon, 9 May 2022 16:18:44 +0800 Subject: [PATCH 17/20] IME // Change debug output header to "vChewingDebug". --- Source/Modules/IMEModules/IME.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Modules/IMEModules/IME.swift b/Source/Modules/IMEModules/IME.swift index 9afa12fe..0704414a 100644 --- a/Source/Modules/IMEModules/IME.swift +++ b/Source/Modules/IMEModules/IME.swift @@ -51,7 +51,7 @@ public class IME: NSObject { static func prtDebugIntel(_ strPrint: String) { if mgrPrefs.isDebugModeEnabled { - NSLog("vChewingErrorCallback: %@", strPrint) + NSLog("vChewingDebug: %@", strPrint) } } From 2540b3137c2dd6d8f08200abe8f81ba83c25e652 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Mon, 9 May 2022 22:08:55 +0800 Subject: [PATCH 18/20] =?UTF-8?q?KeyHandler=20//=20Commiting=20HTML=20Ruby?= =?UTF-8?q?=20Composition=20by=20=E2=8C=98=E2=8C=83=E2=8C=A5Enter.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../KeyHandler_HandleInput.swift | 4 ++- .../ControllerModules/KeyHandler_States.swift | 34 ++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift b/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift index fa95b327..57153b4d 100644 --- a/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift +++ b/Source/Modules/ControllerModules/KeyHandler_HandleInput.swift @@ -341,7 +341,9 @@ extension KeyHandler { if input.isEnter { return (input.isCommandHold && input.isControlHold) - ? handleCtrlCommandEnter(state: state, stateCallback: stateCallback, errorCallback: errorCallback) + ? (input.isOptionHold + ? handleCtrlOptionCommandEnter(state: state, stateCallback: stateCallback, errorCallback: errorCallback) + : handleCtrlCommandEnter(state: state, stateCallback: stateCallback, errorCallback: errorCallback)) : handleEnter(state: state, stateCallback: stateCallback, errorCallback: errorCallback) } diff --git a/Source/Modules/ControllerModules/KeyHandler_States.swift b/Source/Modules/ControllerModules/KeyHandler_States.swift index 103e02cd..4ab80928 100644 --- a/Source/Modules/ControllerModules/KeyHandler_States.swift +++ b/Source/Modules/ControllerModules/KeyHandler_States.swift @@ -268,7 +268,7 @@ extension KeyHandler { return true } - // MARK: - CMD+Enter 鍵處理 + // MARK: - CMD+Enter 鍵處理(注音文) func handleCtrlCommandEnter( state: InputState, @@ -292,6 +292,38 @@ extension KeyHandler { return true } + // MARK: - CMD+Alt+Enter 鍵處理(網頁 Ruby 注音文標記) + + func handleCtrlOptionCommandEnter( + state: InputState, + stateCallback: @escaping (InputState) -> Void, + errorCallback _: @escaping () -> Void + ) -> Bool { + if !(state is InputState.Inputting) { + return false + } + + var composed = "" + + for theAnchor in _walkedNodes { + if let node = theAnchor.node { + let key = node.currentKeyValue().key.replacingOccurrences(of: "-", with: " ") + let value = node.currentKeyValue().value + if key.contains("_") { // 不要給標點符號等特殊元素加注音 + composed += value + } else { + composed += "\(value)(\(key))" + } + } + } + + clear() + + stateCallback(InputState.Committing(poppedText: composed)) + stateCallback(InputState.Empty()) + return true + } + // MARK: - 處理 Backspace (macOS Delete) 按鍵行為 func handleBackspace( From 2ac4234992c216d816f587d4be05414023ca6d44 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Mon, 9 May 2022 21:14:03 +0800 Subject: [PATCH 19/20] Update Data - 20220509 --- Source/Data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Data b/Source/Data index 4065cb72..4f922087 160000 --- a/Source/Data +++ b/Source/Data @@ -1 +1 @@ -Subproject commit 4065cb727373ab12a3401eb3526e4a6208671e59 +Subproject commit 4f922087d6c20964a59f7838e05ae82beef493d1 From 63f5e777cc88047aae5db12bd8a27bd98acc5eb3 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Mon, 9 May 2022 22:39:59 +0800 Subject: [PATCH 20/20] Bump version to 1.5.5 Build 1955. --- Update-Info.plist | 4 ++-- vChewing.pkgproj | 2 +- vChewing.xcodeproj/project.pbxproj | 24 ++++++++++++------------ 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Update-Info.plist b/Update-Info.plist index 98491d87..871e0c7e 100644 --- a/Update-Info.plist +++ b/Update-Info.plist @@ -3,9 +3,9 @@ CFBundleShortVersionString - 1.5.4 + 1.5.5 CFBundleVersion - 1954 + 1955 UpdateInfoEndpoint https://gitee.com/vchewing/vChewing-macOS/raw/main/Update-Info.plist UpdateInfoSite diff --git a/vChewing.pkgproj b/vChewing.pkgproj index 6941216c..535324c3 100644 --- a/vChewing.pkgproj +++ b/vChewing.pkgproj @@ -726,7 +726,7 @@ USE_HFS+_COMPRESSION VERSION - 1.5.4 + 1.5.5 TYPE 0 diff --git a/vChewing.xcodeproj/project.pbxproj b/vChewing.xcodeproj/project.pbxproj index 58647c9c..63324db9 100644 --- a/vChewing.xcodeproj/project.pbxproj +++ b/vChewing.xcodeproj/project.pbxproj @@ -1332,7 +1332,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; COPY_PHASE_STRIP = NO; - CURRENT_PROJECT_VERSION = 1954; + CURRENT_PROJECT_VERSION = 1955; DEBUG_INFORMATION_FORMAT = dwarf; GCC_C_LANGUAGE_STANDARD = gnu11; GCC_DYNAMIC_NO_PIC = NO; @@ -1355,7 +1355,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 10.11.5; - MARKETING_VERSION = 1.5.4; + MARKETING_VERSION = 1.5.5; MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; MTL_FAST_MATH = YES; PRODUCT_BUNDLE_IDENTIFIER = org.atelierInmu.vChewing.vChewingPhraseEditor; @@ -1388,7 +1388,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; COPY_PHASE_STRIP = NO; - CURRENT_PROJECT_VERSION = 1954; + CURRENT_PROJECT_VERSION = 1955; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; ENABLE_NS_ASSERTIONS = NO; GCC_C_LANGUAGE_STANDARD = gnu11; @@ -1407,7 +1407,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 10.11.5; - MARKETING_VERSION = 1.5.4; + MARKETING_VERSION = 1.5.5; MTL_ENABLE_DEBUG_INFO = NO; MTL_FAST_MATH = YES; PRODUCT_BUNDLE_IDENTIFIER = org.atelierInmu.vChewing.vChewingPhraseEditor; @@ -1522,7 +1522,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; COPY_PHASE_STRIP = NO; - CURRENT_PROJECT_VERSION = 1954; + CURRENT_PROJECT_VERSION = 1955; DEVELOPMENT_ASSET_PATHS = ""; DEVELOPMENT_TEAM = ""; GCC_C_LANGUAGE_STANDARD = gnu99; @@ -1557,7 +1557,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 10.11.5; - MARKETING_VERSION = 1.5.4; + MARKETING_VERSION = 1.5.5; ONLY_ACTIVE_ARCH = YES; PRODUCT_BUNDLE_IDENTIFIER = org.atelierInmu.inputmethod.vChewing; PRODUCT_NAME = "$(TARGET_NAME)"; @@ -1589,7 +1589,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; COPY_PHASE_STRIP = NO; - CURRENT_PROJECT_VERSION = 1954; + CURRENT_PROJECT_VERSION = 1955; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; DEVELOPMENT_ASSET_PATHS = ""; DEVELOPMENT_TEAM = ""; @@ -1619,7 +1619,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 10.11.5; - MARKETING_VERSION = 1.5.4; + MARKETING_VERSION = 1.5.5; PRODUCT_BUNDLE_IDENTIFIER = org.atelierInmu.inputmethod.vChewing; PRODUCT_NAME = "$(TARGET_NAME)"; PROVISIONING_PROFILE_SPECIFIER = ""; @@ -1702,7 +1702,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; COPY_PHASE_STRIP = NO; - CURRENT_PROJECT_VERSION = 1954; + CURRENT_PROJECT_VERSION = 1955; DEVELOPMENT_TEAM = ""; GCC_C_LANGUAGE_STANDARD = gnu99; GCC_DYNAMIC_NO_PIC = NO; @@ -1727,7 +1727,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 10.11.5; - MARKETING_VERSION = 1.5.4; + MARKETING_VERSION = 1.5.5; ONLY_ACTIVE_ARCH = YES; PRODUCT_BUNDLE_IDENTIFIER = "org.atelierInmu.vChewing.${PRODUCT_NAME:rfc1034identifier}"; PRODUCT_NAME = "$(TARGET_NAME)"; @@ -1754,7 +1754,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; COPY_PHASE_STRIP = NO; - CURRENT_PROJECT_VERSION = 1954; + CURRENT_PROJECT_VERSION = 1955; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; DEVELOPMENT_TEAM = ""; GCC_C_LANGUAGE_STANDARD = gnu99; @@ -1774,7 +1774,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 10.11.5; - MARKETING_VERSION = 1.5.4; + MARKETING_VERSION = 1.5.5; PRODUCT_BUNDLE_IDENTIFIER = "org.atelierInmu.vChewing.${PRODUCT_NAME:rfc1034identifier}"; PRODUCT_NAME = "$(TARGET_NAME)"; PROVISIONING_PROFILE_SPECIFIER = "";