From cb8bb2a7bb64b3971def64f587a635b25af7228e Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Mon, 2 May 2022 21:31:58 +0800 Subject: [PATCH] LMs // Use multithreading to boost data loading speed. - This only applies to certain LMs. --- .../SubLMs/lmAssociates.swift | 2 +- .../LangModelRelated/SubLMs/lmCore.swift | 72 ++++----- .../LangModelRelated/SubLMs/lmLite.swift | 40 ++--- .../LangModelRelated/SubLMs/lmLiteMono.swift | 140 ++++++++++++++++++ .../SubLMs/lmReplacements.swift | 3 +- vChewing.xcodeproj/project.pbxproj | 12 +- 6 files changed, 212 insertions(+), 57 deletions(-) create mode 100644 Source/Modules/LangModelRelated/SubLMs/lmLiteMono.swift diff --git a/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift b/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift index db15a73a..14d7bda6 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmAssociates.swift @@ -93,7 +93,7 @@ extension vChewing { keyValueMap[currentKV.key, default: []].append(currentKV) } } - IME.prtDebugIntel("\(keyValueMap.count) entries of data loaded from: \(path)") + // IME.prtDebugIntel("\(self.keyValueMap.count) entries of data loaded from: \(path)") theData = "" return true } diff --git a/Source/Modules/LangModelRelated/SubLMs/lmCore.swift b/Source/Modules/LangModelRelated/SubLMs/lmCore.swift index cd3c060c..9fba16d9 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmCore.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmCore.swift @@ -85,45 +85,51 @@ extension vChewing { } let arrData = theData.components(separatedBy: "\n") - for (lineID, lineContent) in arrData.enumerated() { - if !lineContent.hasPrefix("#") { - let lineContent = lineContent.replacingOccurrences(of: "\t", with: " ") - if lineContent.components(separatedBy: " ").count < 2 { - if arrData.last != "" { - IME.prtDebugIntel("Line #\(lineID + 1) Wrecked: \(lineContent)") + DispatchQueue.global(qos: .userInitiated).async { + for (lineID, lineContent) in arrData.enumerated() { + if !lineContent.hasPrefix("#") { + let lineContent = lineContent.replacingOccurrences(of: "\t", with: " ") + if lineContent.components(separatedBy: " ").count < 2 { + if arrData.last != "" { + IME.prtDebugIntel("Line #\(lineID + 1) Wrecked: \(lineContent)") + } + continue } - continue - } - var currentUnigram = Megrez.Unigram(keyValue: Megrez.KeyValuePair(), score: defaultScore) - var columnOne = "" - var columnTwo = "" - for (unitID, unitContent) in lineContent.components(separatedBy: " ").enumerated() { - switch unitID { - case 0: - columnOne = unitContent - case 1: - columnTwo = unitContent - case 2: - if !shouldForceDefaultScore { - if let unitContentConverted = Double(unitContent) { - currentUnigram.score = unitContentConverted - } else { - IME.prtDebugIntel("Line #\(lineID) Score Data Wrecked: \(lineContent)") - } + var currentUnigram = Megrez.Unigram(keyValue: Megrez.KeyValuePair(), score: self.defaultScore) + var columnOne = "" + var columnTwo = "" + DispatchQueue.global(qos: .userInitiated).async { + for (unitID, unitContent) in lineContent.components(separatedBy: " ").enumerated() { + switch unitID { + case 0: + columnOne = unitContent + case 1: + columnTwo = unitContent + case 2: + if !self.shouldForceDefaultScore { + if let unitContentConverted = Double(unitContent) { + currentUnigram.score = unitContentConverted + } else { + IME.prtDebugIntel("Line #\(lineID) Score Data Wrecked: \(lineContent)") + } + } + default: break } - default: break + } + DispatchQueue.main.async { + let kvPair = + self.shouldReverse + ? Megrez.KeyValuePair(key: columnTwo, value: columnOne) + : Megrez.KeyValuePair(key: columnOne, value: columnTwo) + currentUnigram.keyValue = kvPair + let key = self.shouldReverse ? columnTwo : columnOne + self.keyValueScoreMap[key, default: []].append(currentUnigram) + } } } - let kvPair = - shouldReverse - ? Megrez.KeyValuePair(key: columnTwo, value: columnOne) - : Megrez.KeyValuePair(key: columnOne, value: columnTwo) - currentUnigram.keyValue = kvPair - let key = shouldReverse ? columnTwo : columnOne - keyValueScoreMap[key, default: []].append(currentUnigram) } + // IME.prtDebugIntel("\(self.keyValueScoreMap.count) entries of data loaded from: \(path)") } - IME.prtDebugIntel("\(keyValueScoreMap.count) entries of data loaded from: \(path)") theData = "" return true } diff --git a/Source/Modules/LangModelRelated/SubLMs/lmLite.swift b/Source/Modules/LangModelRelated/SubLMs/lmLite.swift index 05afdd61..3af6a97f 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmLite.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmLite.swift @@ -76,28 +76,34 @@ extension vChewing { } let arrData = theData.components(separatedBy: "\n") - for (lineID, lineContent) in arrData.enumerated() { - if !lineContent.hasPrefix("#") { - if lineContent.components(separatedBy: " ").count < 2 { - if arrData.last != "" { - IME.prtDebugIntel("Line #\(lineID + 1) Wrecked: \(lineContent)") + DispatchQueue.global(qos: .userInitiated).async { + for (lineID, lineContent) in arrData.enumerated() { + if !lineContent.hasPrefix("#") { + if lineContent.components(separatedBy: " ").count < 2 { + if arrData.last != "" { + IME.prtDebugIntel("Line #\(lineID + 1) Wrecked: \(lineContent)") + } + continue } - continue - } - var currentKV = Megrez.KeyValuePair() - for (unitID, unitContent) in lineContent.components(separatedBy: " ").enumerated() { - switch unitID { - case 0: - currentKV.value = unitContent - case 1: - currentKV.key = unitContent - default: break + var currentKV = Megrez.KeyValuePair() + DispatchQueue.global(qos: .userInitiated).async { + for (unitID, unitContent) in lineContent.components(separatedBy: " ").enumerated() { + switch unitID { + case 0: + currentKV.value = unitContent + case 1: + currentKV.key = unitContent + default: break + } + } + DispatchQueue.main.async { + self.keyValueMap[currentKV.key, default: []].append(currentKV) + } } } - keyValueMap[currentKV.key, default: []].append(currentKV) } + // IME.prtDebugIntel("\(self.keyValueMap.count) entries of data loaded from: \(path)") } - IME.prtDebugIntel("\(keyValueMap.count) entries of data loaded from: \(path)") theData = "" if path.contains("vChewing/") { dump() diff --git a/Source/Modules/LangModelRelated/SubLMs/lmLiteMono.swift b/Source/Modules/LangModelRelated/SubLMs/lmLiteMono.swift new file mode 100644 index 00000000..adac6e2e --- /dev/null +++ b/Source/Modules/LangModelRelated/SubLMs/lmLiteMono.swift @@ -0,0 +1,140 @@ +// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License). +// Refactored from the ObjCpp-version of this class by: +// (c) 2011 and onwards The OpenVanilla Project (MIT License). +/* +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +1. The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +2. No trademark license is granted to use the trade names, trademarks, service +marks, or product names of Contributor, except as required to fulfill notice +requirements above. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +import Foundation + +extension vChewing { + public class LMLiteMono { + var keyValueMap: [String: [Megrez.KeyValuePair]] = [:] + var theData: String = "" + var allowConsolidation = false + + public init(consolidate: Bool = false) { + keyValueMap = [:] + theData = "" + allowConsolidation = consolidate + } + + deinit { + if isLoaded() { + close() + } + } + + public func isLoaded() -> Bool { + !keyValueMap.isEmpty + } + + @discardableResult public func open(_ path: String) -> Bool { + if isLoaded() { + return false + } + + if allowConsolidation { + if !LMConsolidator.fixEOF(path: path) { + return false + } + if !LMConsolidator.consolidate(path: path, pragma: true) { + return false + } + } + + do { + theData = try String(contentsOfFile: path, encoding: .utf8) + } catch { + IME.prtDebugIntel("\(error)") + IME.prtDebugIntel("↑ Exception happened when reading Associated Phrases data.") + return false + } + + let length = theData.count + guard length > 0 else { + return false + } + + let arrData = theData.components(separatedBy: "\n") + for (lineID, lineContent) in arrData.enumerated() { + if !lineContent.hasPrefix("#") { + if lineContent.components(separatedBy: " ").count < 2 { + if arrData.last != "" { + IME.prtDebugIntel("Line #\(lineID + 1) Wrecked: \(lineContent)") + } + continue + } + var currentKV = Megrez.KeyValuePair() + for (unitID, unitContent) in lineContent.components(separatedBy: " ").enumerated() { + switch unitID { + case 0: + currentKV.value = unitContent + case 1: + currentKV.key = unitContent + default: break + } + } + keyValueMap[currentKV.key, default: []].append(currentKV) + } + } + // IME.prtDebugIntel("\(self.keyValueMap.count) entries of data loaded from: \(path)") + theData = "" + if path.contains("vChewing/") { + dump() + } + return true + } + + public func close() { + if isLoaded() { + keyValueMap.removeAll() + } + } + + public func dump() { + var strDump = "" + for entry in keyValueMap { + let rows: [Megrez.KeyValuePair] = entry.1 + for row in rows { + let addline = row.key + " " + row.value + "\n" + strDump += addline + } + } + IME.prtDebugIntel(strDump) + } + + public func unigramsFor(key: String, score givenScore: Double = 0.0) -> [Megrez.Unigram] { + var v: [Megrez.Unigram] = [] + if let matched = keyValueMap[key] { + for entry in matched as [Megrez.KeyValuePair] { + v.append(Megrez.Unigram(keyValue: entry, score: givenScore)) + } + } + return v + } + + public func hasUnigramsFor(key: String) -> Bool { + keyValueMap[key] != nil + } + } +} diff --git a/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift b/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift index b189f8d5..ced9c9b5 100644 --- a/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift +++ b/Source/Modules/LangModelRelated/SubLMs/lmReplacements.swift @@ -93,8 +93,7 @@ extension vChewing { keyValueMap[currentKV.key] = currentKV.value } } - IME.prtDebugIntel("\(keyValueMap.count) entries of data loaded from: \(path)") - theData = "" + // IME.prtDebugIntel("\(self.keyValueMap.count) entries of data loaded from: \(path)") theData = "" return true } diff --git a/vChewing.xcodeproj/project.pbxproj b/vChewing.xcodeproj/project.pbxproj index 5e46a720..2e6bc5de 100644 --- a/vChewing.xcodeproj/project.pbxproj +++ b/vChewing.xcodeproj/project.pbxproj @@ -7,6 +7,7 @@ objects = { /* Begin PBXBuildFile section */ + 5B00A230282011980058E5DB /* lmLite.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B00A22F282011980058E5DB /* lmLite.swift */; }; 5B0AF8B527B2C8290096FE54 /* StringExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B0AF8B427B2C8290096FE54 /* StringExtension.swift */; }; 5B11328927B94CFB00E58451 /* AppleKeyboardConverter.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B11328827B94CFB00E58451 /* AppleKeyboardConverter.swift */; }; 5B27AD6A27CB1F9B000ED75B /* data-symbols.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5B27AD6827CB1F9B000ED75B /* data-symbols.txt */; }; @@ -28,7 +29,7 @@ 5B407153281F94E6009C24CB /* Composer.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5B407152281F94E6009C24CB /* Composer.mm */; }; 5B40730C281672610023DFFF /* lmAssociates.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B407309281672610023DFFF /* lmAssociates.swift */; }; 5B40730D281672610023DFFF /* lmReplacements.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B40730A281672610023DFFF /* lmReplacements.swift */; }; - 5B5D28AC281EA1E900523D4D /* lmLite.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B5D28AB281EA1E800523D4D /* lmLite.swift */; }; + 5B5D28AC281EA1E900523D4D /* lmLiteMono.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B5D28AB281EA1E800523D4D /* lmLiteMono.swift */; }; 5B5E535227EF261400C6AA1E /* IME.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B5E535127EF261400C6AA1E /* IME.swift */; }; 5B61B0CA280BEFD4002E3CFA /* KeyHandler_Misc.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B61B0C9280BEFD4002E3CFA /* KeyHandler_Misc.swift */; }; 5B62A32927AE77D100A19448 /* FSEventStreamHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B62A32827AE77D100A19448 /* FSEventStreamHelper.swift */; }; @@ -162,6 +163,7 @@ /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ + 5B00A22F282011980058E5DB /* lmLite.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = lmLite.swift; sourceTree = ""; }; 5B04305327B529D800CB65BC /* zh-Hans */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hans"; path = "zh-Hans.lproj/InfoPlist.strings"; sourceTree = ""; }; 5B04305427B529D800CB65BC /* zh-Hans */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hans"; path = "zh-Hans.lproj/Localizable.strings"; sourceTree = ""; }; 5B04305527B529D800CB65BC /* zh-Hans */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hans"; path = "zh-Hans.lproj/MainMenu.strings"; sourceTree = ""; }; @@ -205,7 +207,7 @@ 5B407152281F94E6009C24CB /* Composer.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = Composer.mm; sourceTree = ""; }; 5B407309281672610023DFFF /* lmAssociates.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = lmAssociates.swift; sourceTree = ""; usesTabs = 1; }; 5B40730A281672610023DFFF /* lmReplacements.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = lmReplacements.swift; sourceTree = ""; usesTabs = 1; }; - 5B5D28AB281EA1E800523D4D /* lmLite.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = lmLite.swift; sourceTree = ""; }; + 5B5D28AB281EA1E800523D4D /* lmLiteMono.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = lmLiteMono.swift; sourceTree = ""; }; 5B5E535127EF261400C6AA1E /* IME.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = IME.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 5B61B0C9280BEFD4002E3CFA /* KeyHandler_Misc.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = KeyHandler_Misc.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; 5B62A32827AE77D100A19448 /* FSEventStreamHelper.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = FSEventStreamHelper.swift; sourceTree = ""; tabWidth = 2; usesTabs = 1; }; @@ -396,7 +398,8 @@ children = ( 5B407309281672610023DFFF /* lmAssociates.swift */, 5BA0DF2F2817857D009E73BB /* lmCore.swift */, - 5B5D28AB281EA1E800523D4D /* lmLite.swift */, + 5B5D28AB281EA1E800523D4D /* lmLiteMono.swift */, + 5B00A22F282011980058E5DB /* lmLite.swift */, 5B40730A281672610023DFFF /* lmReplacements.swift */, 5BA0DF2E2817857D009E73BB /* lmUserOverride.swift */, ); @@ -1079,7 +1082,7 @@ 5B40730C281672610023DFFF /* lmAssociates.swift in Sources */, 5B707CE827D9F4590099EF99 /* OpenCCBridge.swift in Sources */, D427F76C278CA2B0004A2160 /* AppDelegate.swift in Sources */, - 5B5D28AC281EA1E900523D4D /* lmLite.swift in Sources */, + 5B5D28AC281EA1E900523D4D /* lmLiteMono.swift in Sources */, 5BA9FD4527FEF3C9002DE248 /* ToolbarItemStyleViewController.swift in Sources */, 5BA0DF322817857D009E73BB /* lmCore.swift in Sources */, 5BA9FD4127FEF3C8002DE248 /* PreferencesStyle.swift in Sources */, @@ -1135,6 +1138,7 @@ 5B62A34827AE7CD900A19448 /* ctlCandidateVertical.swift in Sources */, 5BA9FD4027FEF3C8002DE248 /* Localization.swift in Sources */, 5BA9FD1327FEDB6B002DE248 /* suiPrefPaneDictionary.swift in Sources */, + 5B00A230282011980058E5DB /* lmLite.swift in Sources */, 5BBBB77A27AEDC690023B93A /* clsSFX.swift in Sources */, 5BA9FD4727FEF3C9002DE248 /* PreferencesStyleController.swift in Sources */, 5BF8423127BAA942008E7E4C /* vChewingKanjiConverter.swift in Sources */,