Megrez // Introducing v1.2.6 update.

This commit is contained in:
ShikiSuen 2022-06-21 23:08:07 +08:00
parent 543d827bd4
commit 4073938d02
12 changed files with 118 additions and 123 deletions

View File

@ -197,7 +197,7 @@ extension vChewing {
if key == " " {
///
let spaceUnigram = Megrez.Unigram(
keyValue: Megrez.KeyValuePair(key: " ", value: " "),
keyValue: Megrez.KeyValuePaired(key: " ", value: " "),
score: 0
)
return [spaceUnigram]
@ -225,7 +225,7 @@ extension vChewing {
}
// Swift 使 NSOrderedSet
var filteredPairs: Set<Megrez.KeyValuePair> = []
var filteredPairs: Set<Megrez.KeyValuePaired> = []
// KeyValuePair
for unigram in lmFiltered.unigramsFor(key: key) {
@ -268,12 +268,12 @@ extension vChewing {
/// - Returns:
func filterAndTransform(
unigrams: [Megrez.Unigram],
filter filteredPairs: Set<Megrez.KeyValuePair>
filter filteredPairs: Set<Megrez.KeyValuePaired>
) -> [Megrez.Unigram] {
var results: [Megrez.Unigram] = []
var insertedPairs: Set<Megrez.KeyValuePair> = []
var insertedPairs: Set<Megrez.KeyValuePaired> = []
for unigram in unigrams {
var pair: Megrez.KeyValuePair = unigram.keyValue
var pair: Megrez.KeyValuePaired = unigram.keyValue
if filteredPairs.contains(pair) { continue }
if isPhraseReplacementEnabled {
let replacement = lmReplacements.valuesFor(key: pair.value)

View File

@ -151,7 +151,7 @@ extension vChewing {
for netaRange in arrRangeRecords {
let neta = strData[netaRange].split(separator: " ")
let theValue: String = shouldReverse ? String(neta[0]) : String(neta[1])
let kvPair = Megrez.KeyValuePair(key: key, value: theValue)
let kvPair = Megrez.KeyValuePaired(key: key, value: theValue)
var theScore = defaultScore
if neta.count >= 3, !shouldForceDefaultScore {
theScore = .init(String(neta[2])) ?? defaultScore

View File

@ -146,7 +146,7 @@ extension vChewing {
let strNetaSet = String(decoding: netaSet, as: UTF8.self)
let neta = Array(strNetaSet.split(separator: " ").reversed())
let theValue: String = .init(neta[0])
let kvPair = Megrez.KeyValuePair(key: key, value: theValue)
let kvPair = Megrez.KeyValuePaired(key: key, value: theValue)
var theScore = defaultScore
if neta.count >= 2, !shouldForceDefaultScore {
theScore = .init(String(neta[1])) ?? defaultScore

View File

@ -35,7 +35,7 @@ extension Megrez {
///
private var mutGrid: Grid = .init()
/// 使
private var mutLM: LanguageModel
private var mutLM: LanguageModelProtocol
///
public var maxBuildSpanLength: Int { mutGrid.maxBuildSpanLength }
@ -62,7 +62,7 @@ extension Megrez {
/// - lm: Megrez.LanguageModel
/// - length: 10
/// - separator:
public init(lm: LanguageModel, length: Int = 10, separator: String = "") {
public init(lm: LanguageModelProtocol, length: Int = 10, separator: String = "") {
mutLM = lm
mutGrid = .init(spanLength: abs(length)) //
joinSeparator = separator
@ -140,8 +140,8 @@ extension Megrez {
///
/// - Parameters:
/// - at:
/// - score: 0
/// - location:
/// - accumulatedScore: 0
/// - joinedPhrase: 使
/// - longPhrases: 使
public func walk(
@ -160,8 +160,8 @@ extension Megrez {
///
/// - Parameters:
/// - at:
/// - score: 0
/// - location:
/// - accumulatedScore: 0
/// - joinedPhrase: 使
/// - longPhrases: 使
public func reverseWalk(
@ -219,11 +219,9 @@ extension Megrez {
} else {
//
var longPhrases = [String]()
for theAnchor in nodes {
for theAnchor in nodes.lazy.filter({ $0.spanningLength > 1 }) {
guard let theNode = theAnchor.node else { continue }
if theAnchor.spanningLength > 1 {
longPhrases.append(theNode.currentKeyValue.value)
}
longPhrases.append(theNode.currentKeyValue.value)
}
longPhrases = longPhrases.stableSorted {
@ -249,10 +247,10 @@ extension Megrez {
}
var result: [NodeAnchor] = paths[0]
for neta in paths {
if neta.last!.accumulatedScore > result.last!.accumulatedScore {
result = neta
}
for neta in paths.lazy.filter({
$0.last!.accumulatedScore > result.last!.accumulatedScore
}) {
result = neta
}
return result
@ -267,29 +265,20 @@ extension Megrez {
for p in itrBegin..<itrEnd {
for q in 1..<maxBuildSpanLength {
if p + q > itrEnd {
break
}
if p + q > itrEnd { break }
let arrSlice = mutReadings[p..<(p + q)]
let combinedReading: String = join(slice: arrSlice, separator: joinSeparator)
if !mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) {
let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading)
if !unigrams.isEmpty {
let n = Node(key: combinedReading, unigrams: unigrams)
mutGrid.insertNode(node: n, location: p, spanningLength: q)
}
}
if mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) { continue }
let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading)
if unigrams.isEmpty { continue }
let n = Node(key: combinedReading, unigrams: unigrams)
mutGrid.insertNode(node: n, location: p, spanningLength: q)
}
}
}
private func join(slice arrSlice: ArraySlice<String>, separator: String) -> String {
var arrResult: [String] = []
for value in arrSlice {
arrResult.append(value)
}
return arrResult.joined(separator: separator)
arrSlice.joined(separator: separator)
}
}
}
@ -303,7 +292,7 @@ extension Sequence {
///
/// - Parameter areInIncreasingOrder: Return nil when two element are equal.
/// - Returns: The sorted collection.
func stableSorted(
fileprivate func stableSorted(
by areInIncreasingOrder: (Element, Element) throws -> Bool
)
rethrows -> [Element]

View File

@ -91,11 +91,10 @@ extension Megrez {
public func expandGridByOneAt(location: Int) {
let location = abs(location) //
mutSpans.insert(Span(), at: location)
if location != 0, location != mutSpans.count {
for i in 0..<location {
// zaps overlapping spans
mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
}
if location == 0 || location == mutSpans.count { return }
for i in 0..<location {
// zaps overlapping spans
mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
}
}
@ -121,18 +120,18 @@ extension Megrez {
public func nodesBeginningAt(location: Int) -> [NodeAnchor] {
let location = abs(location) //
var results = [NodeAnchor]()
if location < mutSpans.count { // mutSpans
let span = mutSpans[location]
for i in 1...maxBuildSpanLength {
if let np = span.node(length: i) {
results.append(
NodeAnchor(
node: np,
location: location,
spanningLength: i
)
if location >= mutSpans.count { return results }
// mutSpans location 0
let span = mutSpans[location]
for i in 1...maxBuildSpanLength {
if let np = span.node(length: i) {
results.append(
.init(
node: np,
location: location,
spanningLength: i
)
}
)
}
}
return results
@ -144,20 +143,18 @@ extension Megrez {
public func nodesEndingAt(location: Int) -> [NodeAnchor] {
let location = abs(location) //
var results = [NodeAnchor]()
if !mutSpans.isEmpty, location <= mutSpans.count {
for i in 0..<location {
let span = mutSpans[i]
if i + span.maximumLength >= location {
if let np = span.node(length: location - i) {
results.append(
NodeAnchor(
node: np,
location: i,
spanningLength: location - i
)
)
}
}
if mutSpans.isEmpty || location > mutSpans.count { return results }
for i in 0..<location {
let span = mutSpans[i]
if i + span.maximumLength < location { continue }
if let np = span.node(length: location - i) {
results.append(
.init(
node: np,
location: i,
spanningLength: location - i
)
)
}
}
return results
@ -169,24 +166,20 @@ extension Megrez {
public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] {
let location = abs(location) //
var results = [NodeAnchor]()
if !mutSpans.isEmpty, location <= mutSpans.count {
for i in 0..<location {
let span = mutSpans[i]
if i + span.maximumLength >= location {
for j in 1...span.maximumLength {
if i + j < location {
continue
}
if let np = span.node(length: j) {
results.append(
NodeAnchor(
node: np,
location: i,
spanningLength: location - i
)
)
}
}
if mutSpans.isEmpty || location > mutSpans.count { return results }
for i in 0..<location {
let span = mutSpans[i]
if i + span.maximumLength < location { continue }
for j in 1...span.maximumLength {
if i + j < location { continue }
if let np = span.node(length: j) {
results.append(
.init(
node: np,
location: i,
spanningLength: location - i
)
)
}
}
}

View File

@ -77,7 +77,8 @@ extension Megrez {
/// - Parameters:
/// - length:
public func node(length: Int) -> Node? {
mutLengthNodeMap[abs(length)] //
// Abs()
mutLengthNodeMap.keys.contains(abs(length)) ? mutLengthNodeMap[abs(length)] : nil
}
}
}

View File

@ -35,11 +35,11 @@ extension Megrez {
///
private var mutBigrams: [Bigram]
///
private var mutCandidates: [KeyValuePair] = []
private var mutCandidates: [KeyValuePaired] = []
/// 調
private var mutValueUnigramIndexMap: [String: Int] = [:]
///
private var mutPrecedingBigramMap: [KeyValuePair: [Megrez.Bigram]] = [:]
private var mutPrecedingBigramMap: [KeyValuePaired: [Megrez.Bigram]] = [:]
///
private var mutCandidateFixed: Bool = false
///
@ -52,7 +52,7 @@ extension Megrez {
}
///
public var candidates: [KeyValuePair] { mutCandidates }
public var candidates: [KeyValuePaired] { mutCandidates }
///
public var isCandidateFixed: Bool { mutCandidateFixed }
@ -61,8 +61,8 @@ extension Megrez {
///
public var score: Double { mutScore }
///
public var currentKeyValue: KeyValuePair {
mutSelectedUnigramIndex >= mutUnigrams.count ? KeyValuePair() : mutCandidates[mutSelectedUnigramIndex]
public var currentKeyValue: KeyValuePaired {
mutSelectedUnigramIndex >= mutUnigrams.count ? KeyValuePaired() : mutCandidates[mutSelectedUnigramIndex]
}
///
@ -91,7 +91,9 @@ extension Megrez {
mutCandidates.append(gram.keyValue)
}
for gram in bigrams {
for gram in bigrams.lazy.filter({ [self] in
mutPrecedingBigramMap.keys.contains($0.precedingKeyValue)
}) {
mutPrecedingBigramMap[gram.precedingKeyValue]?.append(gram)
}
}
@ -99,19 +101,18 @@ extension Megrez {
///
/// - Parameters:
/// - precedingKeyValues:
public func primeNodeWith(precedingKeyValues: [KeyValuePair]) {
public func primeNodeWith(precedingKeyValues: [KeyValuePaired]) {
var newIndex = mutSelectedUnigramIndex
var max = mutScore
if !isCandidateFixed {
for neta in precedingKeyValues {
let bigrams = mutPrecedingBigramMap[neta] ?? []
for bigram in bigrams {
guard bigram.score > max else { continue }
if let valRetrieved = mutValueUnigramIndexMap[bigram.keyValue.value] {
newIndex = valRetrieved as Int
max = bigram.score
}
for bigram in bigrams.lazy.filter({ [self] in
$0.score > max && mutValueUnigramIndexMap.keys.contains($0.keyValue.value)
}) {
newIndex = mutValueUnigramIndexMap[bigram.keyValue.value] ?? newIndex
max = bigram.score
}
}
}
@ -154,10 +155,8 @@ extension Megrez {
/// - Parameters:
/// - candidate:
public func scoreFor(candidate: String) -> Double {
for unigram in mutUnigrams {
if unigram.keyValue.value == candidate {
return unigram.score
}
for unigram in mutUnigrams.lazy.filter({ $0.keyValue.value == candidate }) {
return unigram.score
}
return 0.0
}

View File

@ -23,9 +23,20 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
public protocol LanguageModelProtocol {
///
func unigramsFor(key: String) -> [Megrez.Unigram]
///
func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram]
///
func hasUnigramsFor(key: String) -> Bool
}
extension Megrez {
/// 使
open class LanguageModel {
open class LanguageModel: LanguageModelProtocol {
public init() {}
// Swift

View File

@ -27,9 +27,9 @@ extension Megrez {
///
@frozen public struct Bigram: Equatable, CustomStringConvertible {
///
public var keyValue: KeyValuePair
public var keyValue: KeyValuePaired
///
public var precedingKeyValue: KeyValuePair
public var precedingKeyValue: KeyValuePaired
///
public var score: Double
///
@ -42,7 +42,7 @@ extension Megrez {
/// - precedingKeyValue:
/// - keyValue:
/// - score:
public init(precedingKeyValue: KeyValuePair, keyValue: KeyValuePair, score: Double) {
public init(precedingKeyValue: KeyValuePaired, keyValue: KeyValuePaired, score: Double) {
self.keyValue = keyValue
self.precedingKeyValue = precedingKeyValue
self.score = score

View File

@ -27,7 +27,7 @@ extension Megrez {
///
@frozen public struct Unigram: Equatable, CustomStringConvertible {
///
public var keyValue: KeyValuePair
public var keyValue: KeyValuePaired
///
public var score: Double
///
@ -39,7 +39,7 @@ extension Megrez {
/// - Parameters:
/// - keyValue:
/// - score:
public init(keyValue: KeyValuePair, score: Double) {
public init(keyValue: KeyValuePaired, score: Double) {
self.keyValue = keyValue
self.score = score
}

View File

@ -25,15 +25,17 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
extension Megrez {
///
@frozen public struct KeyValuePair: Equatable, Hashable, Comparable, CustomStringConvertible {
@frozen public struct KeyValuePaired: Equatable, Hashable, Comparable, CustomStringConvertible {
///
public var key: String
///
public var value: String
///
public var description: String {
"(" + key + "," + value + ")"
}
public var description: String { "(" + key + "," + value + ")" }
/// false
public var isValid: Bool { !key.isEmpty && !value.isEmpty }
/// ()
public var toNGramKey: String { !isValid ? "()" : "(" + key + "," + value + ")" }
///
/// - Parameters:
@ -49,23 +51,23 @@ extension Megrez {
hasher.combine(value)
}
public static func == (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool {
public static func == (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
lhs.key.count == rhs.key.count && lhs.value == rhs.value
}
public static func < (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool {
public static func < (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
(lhs.key.count < rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value < rhs.value)
}
public static func > (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool {
public static func > (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
(lhs.key.count > rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value > rhs.value)
}
public static func <= (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool {
public static func <= (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
(lhs.key.count <= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value <= rhs.value)
}
public static func >= (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool {
public static func >= (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
(lhs.key.count >= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value >= rhs.value)
}
}

View File

@ -12,7 +12,7 @@
5B242403284B0D6500520FE4 /* ctlCandidateUniversal.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B242402284B0D6500520FE4 /* ctlCandidateUniversal.swift */; };
5B3133BF280B229700A4A505 /* KeyHandler_States.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B3133BE280B229700A4A505 /* KeyHandler_States.swift */; };
5B38F59A281E2E49007D5F5D /* 6_Unigram.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1D15FC0EB100ABF4B3 /* 6_Unigram.swift */; };
5B38F59B281E2E49007D5F5D /* 7_KeyValuePair.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1815FC0EB100ABF4B3 /* 7_KeyValuePair.swift */; };
5B38F59B281E2E49007D5F5D /* 7_KeyValuePaired.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1815FC0EB100ABF4B3 /* 7_KeyValuePaired.swift */; };
5B38F59C281E2E49007D5F5D /* 2_Grid.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1715FC0EB100ABF4B3 /* 2_Grid.swift */; };
5B38F59D281E2E49007D5F5D /* 4_Node.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1A15FC0EB100ABF4B3 /* 4_Node.swift */; };
5B38F59E281E2E49007D5F5D /* 6_Bigram.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1415FC0EB100ABF4B3 /* 6_Bigram.swift */; };
@ -287,7 +287,7 @@
6A0D4F1515FC0EB100ABF4B3 /* 1_Compositor.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 1_Compositor.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
6A0D4F1615FC0EB100ABF4B3 /* 0_Megrez.swift */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.swift; lineEnding = 0; path = 0_Megrez.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
6A0D4F1715FC0EB100ABF4B3 /* 2_Grid.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 2_Grid.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
6A0D4F1815FC0EB100ABF4B3 /* 7_KeyValuePair.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 7_KeyValuePair.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
6A0D4F1815FC0EB100ABF4B3 /* 7_KeyValuePaired.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 7_KeyValuePaired.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
6A0D4F1915FC0EB100ABF4B3 /* 5_LanguageModel.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 5_LanguageModel.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
6A0D4F1A15FC0EB100ABF4B3 /* 4_Node.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 4_Node.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
6A0D4F1B15FC0EB100ABF4B3 /* 3_NodeAnchor.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 3_NodeAnchor.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
@ -769,7 +769,7 @@
6A0D4F1915FC0EB100ABF4B3 /* 5_LanguageModel.swift */,
6A0D4F1415FC0EB100ABF4B3 /* 6_Bigram.swift */,
6A0D4F1D15FC0EB100ABF4B3 /* 6_Unigram.swift */,
6A0D4F1815FC0EB100ABF4B3 /* 7_KeyValuePair.swift */,
6A0D4F1815FC0EB100ABF4B3 /* 7_KeyValuePaired.swift */,
);
path = Megrez;
sourceTree = "<group>";
@ -1084,7 +1084,7 @@
5B11328927B94CFB00E58451 /* AppleKeyboardConverter.swift in Sources */,
5B54E743283A7D89001ECBDC /* lmCoreNS.swift in Sources */,
5B62A32927AE77D100A19448 /* FSEventStreamHelper.swift in Sources */,
5B38F59B281E2E49007D5F5D /* 7_KeyValuePair.swift in Sources */,
5B38F59B281E2E49007D5F5D /* 7_KeyValuePaired.swift in Sources */,
5B62A33627AE795800A19448 /* mgrPrefs.swift in Sources */,
5B38F5A4281E2E49007D5F5D /* 5_LanguageModel.swift in Sources */,
5BAEFAD028012565001F42C9 /* mgrLangModel.swift in Sources */,