LMs // +LMCoreEX, the Swift successor of ParselessLM (WIP).

Co-Authored-By: ix4n33 <16833681+isaacxen@users.noreply.github.com>
This commit is contained in:
ShikiSuen 2022-05-08 09:23:42 +08:00
parent ed5fe63b2e
commit 6c66fd26c0
2 changed files with 158 additions and 0 deletions

View File

@ -0,0 +1,154 @@
// Copyright (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// StringView Ranges extension by (c) 2022 and onwards Isaac Xen (MIT License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/// LMCore LMCoreEX range
/// range strData
/// C++ ParselessLM Swift
/// For
import Foundation
extension vChewing {
@frozen public struct LMCoreEX {
var rangeMap: [String: [Range<String.Index>]] = [:]
var strData: String = ""
var shouldReverse: Bool = false
var allowConsolidation: Bool = false
var defaultScore: Double = 0
var shouldForceDefaultScore: Bool = false
public var count: Int {
rangeMap.count
}
public init(
reverse: Bool = false, consolidate: Bool = false, defaultScore scoreDefault: Double = 0,
forceDefaultScore: Bool = false
) {
rangeMap = [:]
allowConsolidation = consolidate
shouldReverse = reverse
defaultScore = scoreDefault
shouldForceDefaultScore = forceDefaultScore
}
public func isLoaded() -> Bool {
!rangeMap.isEmpty
}
@discardableResult public mutating func open(_ path: String) -> Bool {
if isLoaded() {
return false
}
if allowConsolidation {
LMConsolidator.fixEOF(path: path)
LMConsolidator.consolidate(path: path, pragma: true)
}
do {
strData = try String(contentsOfFile: path, encoding: .utf8).replacingOccurrences(of: "\t", with: " ")
strData.ranges(splitBy: "\n").forEach {
let neta = strData[$0].components(separatedBy: " ")
if neta.count >= 2 {
let theKey = shouldReverse ? neta[1] : neta[0]
if !neta[0].isEmpty, !neta[1].isEmpty, theKey.first != "#" {
let theValue = $0
rangeMap[theKey, default: []].append(theValue)
}
}
}
} catch {
IME.prtDebugIntel("\(error)")
IME.prtDebugIntel("↑ Exception happened when reading data at: \(path).")
return false
}
return true
}
public mutating func close() {
if isLoaded() {
rangeMap.removeAll()
}
}
// MARK: - Advanced features
public func dump() {
var strDump = ""
for entry in rangeMap {
let netaRanges: [Range<String.Index>] = entry.value
for netaRange in netaRanges {
let neta = strData[netaRange]
let addline = neta + "\n"
strDump += addline
}
}
IME.prtDebugIntel(strDump)
}
public func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] {
// Swift
// [Megrez.Bigram]()
precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]()
}
public func unigramsFor(key: String) -> [Megrez.Unigram] {
var grams: [Megrez.Unigram] = []
if let arrRangeRecords: [Range<String.Index>] = rangeMap[key] {
for netaRange in arrRangeRecords {
let neta = strData[netaRange].components(separatedBy: " ")
let theValue: String = shouldReverse ? neta[0] : neta[1]
let kvPair = Megrez.KeyValuePair(key: key, value: theValue)
var theScore = defaultScore
if neta.count >= 3, !shouldForceDefaultScore {
theScore = .init(neta[2]) ?? defaultScore
}
grams.append(Megrez.Unigram(keyValue: kvPair, score: theScore))
}
}
return grams
}
public func hasUnigramsFor(key: String) -> Bool {
rangeMap[key] != nil
}
}
}
// MARK: - StringView Ranges Extension (by Isaac Xen)
extension String {
fileprivate func ranges(splitBy separator: Element) -> [Range<String.Index>] {
var startIndex = startIndex
return split(separator: separator).reduce(into: []) { ranges, substring in
_ = range(of: substring, range: startIndex..<endIndex).map { range in
ranges.append(range)
startIndex = range.upperBound
}
}
}
}

View File

@ -48,6 +48,7 @@
5B782EC4280C243C007276DE /* KeyHandler_HandleCandidate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B782EC3280C243C007276DE /* KeyHandler_HandleCandidate.swift */; };
5B7BC4B027AFFBE800F66C24 /* frmPrefWindow.xib in Resources */ = {isa = PBXBuildFile; fileRef = 5B7BC4AE27AFFBE800F66C24 /* frmPrefWindow.xib */; };
5B7F225D2808501000DDD3CB /* KeyHandler_HandleInput.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B7F225C2808501000DDD3CB /* KeyHandler_HandleInput.swift */; };
5B887F302826AEA400B6651E /* lmCoreEX.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B887F2F2826AEA400B6651E /* lmCoreEX.swift */; };
5B949BD92816DC5400D87B5D /* LineReader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B949BD82816DC5400D87B5D /* LineReader.swift */; };
5B949BDB2816DDBC00D87B5D /* LMConsolidator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B949BDA2816DDBC00D87B5D /* LMConsolidator.swift */; };
5BA0DF312817857D009E73BB /* lmUserOverride.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BA0DF2E2817857D009E73BB /* lmUserOverride.swift */; };
@ -225,6 +226,7 @@
5B7BC4AF27AFFBE800F66C24 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/frmPrefWindow.xib; sourceTree = "<group>"; };
5B7BC4B227AFFC0B00F66C24 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/frmPrefWindow.strings; sourceTree = "<group>"; };
5B7F225C2808501000DDD3CB /* KeyHandler_HandleInput.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = KeyHandler_HandleInput.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
5B887F2F2826AEA400B6651E /* lmCoreEX.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = lmCoreEX.swift; sourceTree = "<group>"; usesTabs = 0; };
5B949BD82816DC5400D87B5D /* LineReader.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = LineReader.swift; sourceTree = "<group>"; usesTabs = 0; };
5B949BDA2816DDBC00D87B5D /* LMConsolidator.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = LMConsolidator.swift; sourceTree = "<group>"; usesTabs = 0; };
5BA0DF2E2817857D009E73BB /* lmUserOverride.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; lineEnding = 0; path = lmUserOverride.swift; sourceTree = "<group>"; usesTabs = 0; };
@ -394,6 +396,7 @@
children = (
5B407309281672610023DFFF /* lmAssociates.swift */,
5BA0DF2F2817857D009E73BB /* lmCore.swift */,
5B887F2F2826AEA400B6651E /* lmCoreEX.swift */,
5B00A22F282011980058E5DB /* lmLite.swift */,
5B40730A281672610023DFFF /* lmReplacements.swift */,
5BA0DF2E2817857D009E73BB /* lmUserOverride.swift */,
@ -1088,6 +1091,7 @@
5BA9FD4827FEF3C9002DE248 /* PreferencesWindowController.swift in Sources */,
5BD0113B28180D6100609769 /* LMInstantiator.swift in Sources */,
D4E569DC27A34D0E00AC2CEF /* CTools.m in Sources */,
5B887F302826AEA400B6651E /* lmCoreEX.swift in Sources */,
5BA9FD4627FEF3C9002DE248 /* Container.swift in Sources */,
D47F7DD0278C0897002F9DD7 /* ctlNonModalAlertWindow.swift in Sources */,
5B38F5A2281E2E49007D5F5D /* 0_Megrez.swift in Sources */,