KeyHandler & Megrez // Updates to Megrez v1.1.2.

This commit is contained in:
ShikiSuen 2022-05-20 01:09:03 +08:00
parent af2bdc4343
commit 494e9cf637
14 changed files with 446 additions and 317 deletions

View File

@ -72,7 +72,7 @@ class KeyHandler {
}
public init() {
_builder = Megrez.BlockReadingBuilder(lm: _languageModel)
_builder = Megrez.BlockReadingBuilder(lm: _languageModel, separator: "-")
ensureParser()
setInputMode(ctlInputMethod.currentInputMode)
}
@ -118,9 +118,7 @@ class KeyHandler {
// of the best possible Mandarin characters given the input syllables,
// using the Viterbi algorithm implemented in the Megrez library.
// The walk() traces the grid to the end, hence no need to use .reversed() here.
_walkedNodes = Megrez.Walker(
grid: _builder.grid()
).walk(at: _builder.grid().width(), nodesLimit: 3, balanced: true)
_walkedNodes = _builder.walk(at: _builder.grid.width, nodesLimit: 3, balanced: true)
}
func popOverflowComposingTextAndWalk() -> String {
@ -133,11 +131,11 @@ class KeyHandler {
// (i.e. popped out.)
var poppedText = ""
if _builder.grid().width() > mgrPrefs.composingBufferSize {
if _builder.grid.width > mgrPrefs.composingBufferSize {
if _walkedNodes.count > 0 {
let anchor: Megrez.NodeAnchor = _walkedNodes[0]
if let theNode = anchor.node {
poppedText = theNode.currentKeyValue().value
poppedText = theNode.currentKeyValue.value
}
_builder.removeHeadReadings(count: anchor.spanningLength)
}
@ -156,7 +154,7 @@ class KeyHandler {
func fixNode(value: String) {
let cursorIndex: Int = getActualCandidateCursorIndex()
let selectedNode: Megrez.NodeAnchor = _builder.grid().fixNodeSelectedCandidate(
let selectedNode: Megrez.NodeAnchor = _builder.grid.fixNodeSelectedCandidate(
location: cursorIndex, value: value
)
//
@ -216,7 +214,7 @@ class KeyHandler {
// then use the Swift trick to retrieve the candidates for each node at/crossing the cursor
for currentNodeAnchor in arrNodes {
if let currentNode = currentNodeAnchor.node {
for currentCandidate in currentNode.candidates() {
for currentCandidate in currentNode.candidates {
arrCandidates.append(currentCandidate.value)
}
}
@ -237,7 +235,7 @@ class KeyHandler {
if !overrideValue.isEmpty {
IME.prtDebugIntel(
"UOM: Suggestion retrieved, overriding the node score of the selected candidate.")
_builder.grid().overrideNodeScoreForSelectedCandidate(
_builder.grid.overrideNodeScoreForSelectedCandidate(
location: getActualCandidateCursorIndex(),
value: overrideValue,
overridingScore: findHighestScore(nodes: getRawNodes(), epsilon: kEpsilon)
@ -251,7 +249,7 @@ class KeyHandler {
var highestScore: Double = 0
for currentAnchor in nodes {
if let theNode = currentAnchor.node {
let score = theNode.highestUnigramScore()
let score = theNode.highestUnigramScore
if score > highestScore {
highestScore = score
}
@ -262,15 +260,15 @@ class KeyHandler {
// MARK: - Extracted methods and functions (Megrez).
func isBuilderEmpty() -> Bool { _builder.grid().width() == 0 }
func isBuilderEmpty() -> Bool { _builder.grid.width == 0 }
func getRawNodes() -> [Megrez.NodeAnchor] {
/// 使 nodesCrossing macOS
/// nodeCrossing Megrez
/// Windows
mgrPrefs.setRearCursorMode
? _builder.grid().nodesCrossingOrEndingAt(location: getActualCandidateCursorIndex())
: _builder.grid().nodesEndingAt(location: getActualCandidateCursorIndex())
? _builder.grid.nodesCrossingOrEndingAt(location: getActualCandidateCursorIndex())
: _builder.grid.nodesEndingAt(location: getActualCandidateCursorIndex())
}
func setInputModesToLM(isCHS: Bool) {
@ -285,12 +283,11 @@ class KeyHandler {
}
func createNewBuilder() {
_builder = Megrez.BlockReadingBuilder(lm: _languageModel)
// Each Mandarin syllable is separated by a hyphen.
_builder.setJoinSeparator(separator: "-")
_builder = Megrez.BlockReadingBuilder(lm: _languageModel, separator: "-")
}
func currentReadings() -> [String] { _builder.readings() }
func currentReadings() -> [String] { _builder.readings }
func ifLangModelHasUnigrams(forKey reading: String) -> Bool {
_languageModel.hasUnigramsFor(key: reading)
@ -301,15 +298,15 @@ class KeyHandler {
}
func setBuilderCursorIndex(value: Int) {
_builder.setCursorIndex(newIndex: value)
_builder.cursorIndex = value
}
func getBuilderCursorIndex() -> Int {
_builder.cursorIndex()
_builder.cursorIndex
}
func getBuilderLength() -> Int {
_builder.length()
_builder.length
}
func deleteBuilderReadingInFrontOfCursor() {
@ -321,7 +318,7 @@ class KeyHandler {
}
func getKeyLengthAtIndexZero() -> Int {
_walkedNodes[0].node?.currentKeyValue().value.count ?? 0
_walkedNodes[0].node?.currentKeyValue.value.count ?? 0
}
// MARK: - Extracted methods and functions (Tekkon).

View File

@ -45,7 +45,7 @@ extension KeyHandler {
continue
}
let valueString = node.currentKeyValue().value
let valueString = node.currentKeyValue.value
composingBuffer += valueString
let codepointCount = valueString.count
@ -303,7 +303,7 @@ extension KeyHandler {
for theAnchor in _walkedNodes {
if let node = theAnchor.node {
var key = node.currentKeyValue().key
var key = node.currentKeyValue.key
if mgrPrefs.inlineDumpPinyinInLieuOfZhuyin {
key = restoreToneOneInZhuyinKey(target: key) //
key = Tekkon.cnvPhonaToHanyuPinyin(target: key) //
@ -313,7 +313,7 @@ extension KeyHandler {
key = cnvZhuyinKeyToTextbookReading(target: key, newSeparator: " ")
}
let value = node.currentKeyValue().value
let value = node.currentKeyValue.value
if key.contains("_") { //
composed += value
} else {

View File

@ -186,7 +186,7 @@ extension vChewing {
var strPrevious = "()"
var strAnterior = "()"
guard let kvCurrent = arrNodesReversed[0].node?.currentKeyValue(),
guard let kvCurrent = arrNodesReversed[0].node?.currentKeyValue,
!arrEndingPunctuation.contains(kvCurrent.value)
else {
return ""
@ -196,14 +196,14 @@ extension vChewing {
strCurrent = kvCurrent.key
if arrNodesReversed.count >= 2,
let kvPrevious = arrNodesReversed[1].node?.currentKeyValue(),
let kvPrevious = arrNodesReversed[1].node?.currentKeyValue,
!arrEndingPunctuation.contains(kvPrevious.value)
{
strPrevious = "(\(kvPrevious.key),\(kvPrevious.value))"
}
if arrNodesReversed.count >= 3,
let kvAnterior = arrNodesReversed[2].node?.currentKeyValue(),
let kvAnterior = arrNodesReversed[2].node?.currentKeyValue,
!arrEndingPunctuation.contains(kvAnterior.value)
{
strAnterior = "(\(kvAnterior.key),\(kvAnterior.value))"

View File

@ -24,33 +24,55 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
extension Megrez {
///
public class BlockReadingBuilder {
var mutMaximumBuildSpanLength = 10
var mutCursorIndex: Int = 0
var mutReadings: [String] = []
var mutGrid: Grid = .init()
var mutLM: LanguageModel
var mutJoinSeparator: String = ""
///
private var mutMaximumBuildSpanLength = 10
///
private var mutCursorIndex: Int = 0
///
private var mutReadings: [String] = []
///
private var mutGrid: Grid = .init()
/// 使
private var mutLM: LanguageModel
public init(lm: LanguageModel, length: Int = 10) {
mutLM = lm
mutMaximumBuildSpanLength = length
///
public var joinSeparator: String = ""
///
public var cursorIndex: Int {
get { mutCursorIndex }
set { mutCursorIndex = (newValue < 0) ? 0 : min(newValue, mutReadings.count) }
}
///
public var grid: Grid { mutGrid }
///
public var length: Int { mutReadings.count }
///
public var readings: [String] { mutReadings }
///
/// - Parameters:
/// - lm: Megrez.LanguageModel
/// - length: 10
/// - separator:
public init(lm: LanguageModel, length: Int = 10, separator: String = "") {
mutLM = lm
mutMaximumBuildSpanLength = length
joinSeparator = separator
}
///
public func clear() {
mutCursorIndex = 0
mutReadings.removeAll()
mutGrid.clear()
}
public func length() -> Int { mutReadings.count }
public func cursorIndex() -> Int { mutCursorIndex }
public func setCursorIndex(newIndex: Int) {
mutCursorIndex = min(newIndex, mutReadings.count)
}
///
/// - Parameters:
/// - reading:
public func insertReadingAtCursor(reading: String) {
mutReadings.insert(reading, at: mutCursorIndex)
mutGrid.expandGridByOneAt(location: mutCursorIndex)
@ -58,8 +80,8 @@ extension Megrez {
mutCursorIndex += 1
}
public func readings() -> [String] { mutReadings }
///
/// Rear
@discardableResult public func deleteReadingAtTheRearOfCursor() -> Bool {
if mutCursorIndex == 0 {
return false
@ -72,6 +94,8 @@ extension Megrez {
return true
}
///
/// Front
@discardableResult public func deleteReadingToTheFrontOfCursor() -> Bool {
if mutCursorIndex == mutReadings.count {
return false
@ -83,8 +107,12 @@ extension Megrez {
return true
}
///
///
///
///
@discardableResult public func removeHeadReadings(count: Int) -> Bool {
if count > length() {
if count > length {
return false
}
@ -100,17 +128,108 @@ extension Megrez {
return true
}
public func setJoinSeparator(separator: String) {
mutJoinSeparator = separator
// MARK: - Walker
///
///
/// 使
/// - Parameters:
/// - at:
/// - score: 0
/// - nodesLimit:
/// - balanced:
public func walk(
at location: Int,
score accumulatedScore: Double = 0.0,
nodesLimit: Int = 0,
balanced: Bool = false
) -> [NodeAnchor] {
Array(
reverseWalk(
at: location, score: accumulatedScore,
nodesLimit: nodesLimit, balanced: balanced
).reversed())
}
public func joinSeparator() -> String { mutJoinSeparator }
///
/// - Parameters:
/// - at:
/// - score: 0
/// - nodesLimit:
/// - balanced:
public func reverseWalk(
at location: Int,
score accumulatedScore: Double = 0.0,
nodesLimit: Int = 0,
balanced: Bool = false
) -> [NodeAnchor] {
if location == 0 || location > mutGrid.width {
return [] as [NodeAnchor]
}
public func grid() -> Grid { mutGrid }
var paths: [[NodeAnchor]] = []
var nodes: [NodeAnchor] = mutGrid.nodesEndingAt(location: location)
public func build() {
// if (mutLM == nil) { return } // nil
if balanced {
nodes.sort {
$0.balancedScore > $1.balancedScore
}
}
for (i, n) in nodes.enumerated() {
// X NodeAnchor node
// abs
if abs(nodesLimit) > 0, i == abs(nodesLimit) - 1 {
break
}
var n = n
guard let nNode = n.node else {
continue
}
n.accumulatedScore = accumulatedScore + nNode.score
//
//
if balanced {
let weightedScore: Double = (Double(n.spanningLength) - 1) * 2
n.accumulatedScore += weightedScore
}
var path: [NodeAnchor] = reverseWalk(
at: location - n.spanningLength,
score: n.accumulatedScore
)
path.insert(n, at: 0)
paths.append(path)
// 使
if balanced, nNode.score >= 0 {
break
}
}
if !paths.isEmpty {
if var result = paths.first {
for value in paths {
if let vLast = value.last, let rLast = result.last {
if vLast.accumulatedScore > rLast.accumulatedScore {
result = value
}
}
}
return result
}
}
return [] as [NodeAnchor]
}
// MARK: - Private functions
private func build() {
let itrBegin: Int =
(mutCursorIndex < mutMaximumBuildSpanLength) ? 0 : mutCursorIndex - mutMaximumBuildSpanLength
let itrEnd: Int = min(mutCursorIndex + mutMaximumBuildSpanLength, mutReadings.count)
@ -121,7 +240,7 @@ extension Megrez {
break
}
let strSlice = mutReadings[p..<(p + q)]
let combinedReading: String = join(slice: strSlice, separator: mutJoinSeparator)
let combinedReading: String = join(slice: strSlice, separator: joinSeparator)
if !mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) {
let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading)
@ -134,7 +253,7 @@ extension Megrez {
}
}
public func join(slice strSlice: ArraySlice<String>, separator: String) -> String {
private func join(slice strSlice: ArraySlice<String>, separator: String) -> String {
var arrResult: [String] = []
for value in strSlice {
arrResult.append(value)

View File

@ -1,123 +0,0 @@
// Swiftified by (c) 2022 and onwards The vChewing Project (MIT-NTL License).
// Rebranded from (c) Lukhnos Liu's C++ library "Gramambular" (MIT License).
/*
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
1. The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
2. No trademark license is granted to use the trade names, trademarks, service
marks, or product names of Contributor, except as required to fulfill notice
requirements above.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
extension Megrez {
public class Walker {
var mutGrid: Grid
public init(grid: Megrez.Grid = Megrez.Grid()) {
mutGrid = grid
}
public func walk(
at location: Int,
score accumulatedScore: Double = 0.0,
nodesLimit: Int = 0,
balanced: Bool = false
) -> [NodeAnchor] {
var arrReturn: [NodeAnchor] = []
let arrReversedSource = reverseWalk(
at: location, score: accumulatedScore,
nodesLimit: nodesLimit, balanced: balanced
).reversed()
for neta in arrReversedSource {
arrReturn.append(neta)
}
return arrReturn
}
public func reverseWalk(
at location: Int,
score accumulatedScore: Double = 0.0,
nodesLimit: Int = 0,
balanced: Bool = false
) -> [NodeAnchor] {
if location == 0 || location > mutGrid.width() {
return [] as [NodeAnchor]
}
var paths: [[NodeAnchor]] = []
var nodes: [NodeAnchor] = mutGrid.nodesEndingAt(location: location)
if balanced {
nodes.sort {
$0.balancedScore > $1.balancedScore
}
}
for (i, n) in nodes.enumerated() {
// X NodeAnchor node
// abs
if abs(nodesLimit) > 0, i == abs(nodesLimit) - 1 {
break
}
var n = n
guard let nNode = n.node else {
continue
}
n.accumulatedScore = accumulatedScore + nNode.score()
// Spanning Length
//
if balanced {
let weightedScore: Double = (Double(n.spanningLength) - 1) * 2
n.accumulatedScore += weightedScore
}
var path: [NodeAnchor] = reverseWalk(
at: location - n.spanningLength,
score: n.accumulatedScore
)
path.insert(n, at: 0)
paths.append(path)
// 使
if balanced, nNode.score() >= 0 {
break
}
}
if !paths.isEmpty {
if var result = paths.first {
for value in paths {
if let vLast = value.last, let rLast = result.last {
if vLast.accumulatedScore > rLast.accumulatedScore {
result = value
}
}
}
return result
}
}
return [] as [NodeAnchor]
}
}
}

View File

@ -24,17 +24,28 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
extension Megrez {
///
public class Grid {
var mutSpans: [Megrez.Span]
///
private var mutSpans: [Megrez.Span]
///
var width: Int { mutSpans.count }
public init() {
mutSpans = [Megrez.Span]()
}
///
public func clear() {
mutSpans = [Megrez.Span]()
}
///
/// - Parameters:
/// - node:
/// - location:
/// - spanningLength:
public func insertNode(node: Node, location: Int, spanningLength: Int) {
if location >= mutSpans.count {
let diff = location - mutSpans.count + 1
@ -45,15 +56,23 @@ extension Megrez {
mutSpans[location].insert(node: node, length: spanningLength)
}
///
/// - Parameters:
/// - location:
/// - spanningLength:
/// - key:
public func hasMatchedNode(location: Int, spanningLength: Int, key: String) -> Bool {
if location > mutSpans.count {
return false
}
let n = mutSpans[location].node(length: spanningLength)
return n == nil ? false : key == n?.key()
return n == nil ? false : key == n?.key
}
///
/// - Parameters:
/// - location:
public func expandGridByOneAt(location: Int) {
// abs
mutSpans.insert(Span(), at: abs(location))
@ -65,6 +84,9 @@ extension Megrez {
}
}
///
/// - Parameters:
/// - location:
public func shrinkGridByOneAt(location: Int) {
if location >= mutSpans.count {
return
@ -77,8 +99,9 @@ extension Megrez {
}
}
public func width() -> Int { mutSpans.count }
///
/// - Parameters:
/// - location:
public func nodesEndingAt(location: Int) -> [NodeAnchor] {
var results: [NodeAnchor] = []
if !mutSpans.isEmpty, location <= mutSpans.count {
@ -100,6 +123,9 @@ extension Megrez {
return results
}
///
/// - Parameters:
/// - location:
public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] {
var results: [NodeAnchor] = []
if !mutSpans.isEmpty, location <= mutSpans.count {
@ -126,14 +152,18 @@ extension Megrez {
return results
}
public func fixNodeSelectedCandidate(location: Int, value: String) -> NodeAnchor {
///
/// - Parameters:
/// - location:
/// - value:
@discardableResult public func fixNodeSelectedCandidate(location: Int, value: String) -> NodeAnchor {
var node = NodeAnchor()
for nodeAnchor in nodesCrossingOrEndingAt(location: location) {
guard let theNode = nodeAnchor.node else {
continue
}
let candidates = theNode.candidates()
// Reset the candidate-fixed state of every node at the location.
let candidates = theNode.candidates
//
theNode.resetCandidate()
for (i, candidate) in candidates.enumerated() {
if candidate.value == value {
@ -146,13 +176,18 @@ extension Megrez {
return node
}
///
/// - Parameters:
/// - location:
/// - value:
/// - overridingScore:
public func overrideNodeScoreForSelectedCandidate(location: Int, value: String, overridingScore: Double) {
for nodeAnchor in nodesCrossingOrEndingAt(location: location) {
guard let theNode = nodeAnchor.node else {
continue
}
let candidates = theNode.candidates()
// Reset the candidate-fixed state of every node at the location.
let candidates = theNode.candidates
//
theNode.resetCandidate()
for (i, candidate) in candidates.enumerated() {
if candidate.value == value {
@ -164,3 +199,38 @@ extension Megrez {
}
}
}
// MARK: - DumpDOT-related functions.
extension Megrez.Grid {
public var dumpDOT: String {
var sst = "digraph {\ngraph [ rankdir=LR ];\nBOS;\n"
for (p, span) in mutSpans.enumerated() {
for ni in 0...(span.maximumLength) {
guard let np: Megrez.Node = span.node(length: ni) else {
continue
}
if p == 0 {
sst += "BOS -> \(np.currentKeyValue.value);\n"
}
sst += "\(np.currentKeyValue.value);\n"
if (p + ni) < mutSpans.count {
let dstSpan = mutSpans[p + ni]
for q in 0...(dstSpan.maximumLength) {
if let dn = dstSpan.node(length: q) {
sst += np.currentKeyValue.value + " -> " + dn.currentKeyValue.value + ";\n"
}
}
}
if (p + ni) == mutSpans.count {
sst += np.currentKeyValue.value + " -> EOS;\n"
}
}
}
sst += "EOS;\n}\n"
return sst
}
}

View File

@ -24,19 +24,52 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
extension Megrez {
@frozen public struct NodeAnchor {
///
@frozen public struct NodeAnchor: CustomStringConvertible {
///
public var node: Node?
///
public var location: Int = 0
///
public var spanningLength: Int = 0
///
public var accumulatedScore: Double = 0.0
///
public var keyLength: Int {
node?.key().count ?? 0
node?.key.count ?? 0
}
///
public var description: String {
var stream = ""
stream += "{@(" + String(location) + "," + String(spanningLength) + "),"
if let node = node {
stream += node.description
} else {
stream += "null"
}
stream += "}"
return stream
}
///
public var balancedScore: Double {
let weightedScore: Double = (Double(spanningLength) - 1) * 2
let nodeScore: Double = node?.score() ?? 0
let nodeScore: Double = node?.score ?? 0
return weightedScore + nodeScore
}
}
}
// MARK: - DumpDOT-related functions.
extension Array where Element == Megrez.NodeAnchor {
///
public var description: String {
var arrOutputContent = [""]
for anchor in self {
arrOutputContent.append(anchor.description)
}
return arrOutputContent.joined(separator: "<-")
}
}

View File

@ -24,23 +24,28 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
extension Megrez {
///
@frozen public struct Span {
private var mutLengthNodeMap: [Int: Megrez.Node]
private var mutMaximumLength: Int
///
private var mutLengthNodeMap: [Int: Megrez.Node] = [:]
///
private var mutMaximumLength: Int = 0
///
var maximumLength: Int {
mutMaximumLength
}
public init() {
mutLengthNodeMap = [:]
mutMaximumLength = 0
}
///
mutating func clear() {
mutLengthNodeMap.removeAll()
mutMaximumLength = 0
}
///
/// - Parameters:
/// - node:
/// - length:
mutating func insert(node: Node, length: Int) {
mutLengthNodeMap[length] = node
if length > mutMaximumLength {
@ -48,6 +53,9 @@ extension Megrez {
}
}
///
/// - Parameters:
/// - length:
mutating func removeNodeOfLengthGreaterThan(_ length: Int) {
if length > mutMaximumLength { return }
var max = 0
@ -67,6 +75,9 @@ extension Megrez {
mutMaximumLength = max
}
///
/// - Parameters:
/// - length:
public func node(length: Int) -> Node? {
mutLengthNodeMap[length]
}

View File

@ -24,55 +24,69 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
extension Megrez {
public class Node {
let mutLM: LanguageModel
var mutKey: String
var mutScore: Double = 0
var mutUnigrams: [Unigram]
var mutCandidates: [KeyValuePair]
var mutValueUnigramIndexMap: [String: Int]
var mutPrecedingBigramMap: [KeyValuePair: [Megrez.Bigram]]
var mutCandidateFixed: Bool = false
var mutSelectedUnigramIndex: Int = 0
let kSelectedCandidateScore: Double = 99
public init(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) {
mutLM = LanguageModel()
mutKey = key
mutScore = 0
mutUnigrams = unigrams
mutCandidates = []
mutValueUnigramIndexMap = [:]
mutPrecedingBigramMap = [:]
mutCandidateFixed = false
mutSelectedUnigramIndex = 0
if bigrams == [] {
node(key: key, unigrams: unigrams, bigrams: bigrams)
} else {
node(key: key, unigrams: unigrams)
}
///
public class Node: CustomStringConvertible {
///
private let mutLM: LanguageModel = .init()
///
private var mutKey: String = ""
///
private var mutScore: Double = 0
///
private var mutUnigrams: [Unigram]
///
private var mutBigrams: [Bigram]
///
private var mutCandidates: [KeyValuePair] = []
/// 調
private var mutValueUnigramIndexMap: [String: Int] = [:]
///
private var mutPrecedingBigramMap: [KeyValuePair: [Megrez.Bigram]] = [:]
///
private var mutCandidateFixed: Bool = false
///
private var mutSelectedUnigramIndex: Int = 0
///
private let kSelectedCandidateScore: Double = 99
///
public var description: String {
"(node,key:\(mutKey),fixed:\(mutCandidateFixed ? "true" : "false"),selected:\(mutSelectedUnigramIndex),\(mutUnigrams))"
}
public func node(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) {
var unigrams = unigrams
///
var candidates: [KeyValuePair] { mutCandidates }
///
var isCandidateFixed: Bool { mutCandidateFixed }
///
var key: String { mutKey }
///
var score: Double { mutScore }
///
var currentKeyValue: KeyValuePair {
mutSelectedUnigramIndex >= mutUnigrams.count ? KeyValuePair() : mutCandidates[mutSelectedUnigramIndex]
}
///
var highestUnigramScore: Double { mutUnigrams.isEmpty ? 0.0 : mutUnigrams[0].score }
///
/// - Parameters:
/// - key:
/// - unigrams:
/// - bigrams:
public init(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) {
mutKey = key
unigrams.sort {
mutUnigrams = unigrams
mutBigrams = bigrams
mutUnigrams.sort {
$0.score > $1.score
}
if !mutUnigrams.isEmpty {
mutScore = mutUnigrams[0].score
}
for (i, theGram) in unigrams.enumerated() {
mutValueUnigramIndexMap[theGram.keyValue.value] = i
mutCandidates.append(theGram.keyValue)
for (i, gram) in mutUnigrams.enumerated() {
mutValueUnigramIndexMap[gram.keyValue.value] = i
mutCandidates.append(gram.keyValue)
}
for gram in bigrams {
@ -80,11 +94,14 @@ extension Megrez {
}
}
///
/// - Parameters:
/// - precedingKeyValues:
public func primeNodeWith(precedingKeyValues: [KeyValuePair]) {
var newIndex = mutSelectedUnigramIndex
var max = mutScore
if !isCandidateFixed() {
if !isCandidateFixed {
for neta in precedingKeyValues {
let bigrams = mutPrecedingBigramMap[neta] ?? []
for bigram in bigrams {
@ -107,16 +124,17 @@ extension Megrez {
}
}
public func isCandidateFixed() -> Bool { mutCandidateFixed }
public func candidates() -> [KeyValuePair] { mutCandidates }
///
/// - Parameters:
/// - index:
/// - fix:
public func selectCandidateAt(index: Int = 0, fix: Bool = false) {
mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index
mutCandidateFixed = fix
mutScore = kSelectedCandidateScore
}
///
public func resetCandidate() {
mutSelectedUnigramIndex = 0
mutCandidateFixed = false
@ -125,16 +143,19 @@ extension Megrez {
}
}
///
/// - Parameters:
/// - index:
/// - score:
public func selectFloatingCandidateAt(index: Int, score: Double) {
mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index
mutCandidateFixed = false
mutScore = score
}
public func key() -> String { mutKey }
public func score() -> Double { mutScore }
///
/// - Parameters:
/// - candidate:
public func scoreFor(candidate: String) -> Double {
for unigram in mutUnigrams {
if unigram.keyValue.value == candidate {
@ -144,14 +165,6 @@ extension Megrez {
return 0.0
}
public func currentKeyValue() -> KeyValuePair {
mutSelectedUnigramIndex >= mutUnigrams.count ? KeyValuePair() : mutCandidates[mutSelectedUnigramIndex]
}
public func highestUnigramScore() -> Double {
mutUnigrams.isEmpty ? 0.0 : mutUnigrams[0].score
}
public static func == (lhs: Node, rhs: Node) -> Bool {
lhs.mutUnigrams == rhs.mutUnigrams && lhs.mutCandidates == rhs.mutCandidates
&& lhs.mutValueUnigramIndexMap == rhs.mutValueUnigramIndexMap

View File

@ -24,19 +24,23 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
extension Megrez {
// 使
// Swift
/// 使
open class LanguageModel {
public init() {}
// Swift
///
open func unigramsFor(key: String) -> [Megrez.Unigram] {
key.isEmpty ? [Megrez.Unigram]() : [Megrez.Unigram]()
}
///
open func bigramsForKeys(precedingKey: String, key: String) -> [Megrez.Bigram] {
precedingKey == key ? [Megrez.Bigram]() : [Megrez.Bigram]()
}
///
open func hasUnigramsFor(key: String) -> Bool {
key.count != 0
}

View File

@ -24,17 +24,28 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
extension Megrez {
@frozen public struct Bigram: Equatable {
///
@frozen public struct Bigram: Equatable, CustomStringConvertible {
///
public var keyValue: KeyValuePair
///
public var precedingKeyValue: KeyValuePair
///
public var score: Double
// var paired: String
///
public var description: String {
"(" + keyValue.description + "|" + precedingKeyValue.description + "," + String(score) + ")"
}
///
/// - Parameters:
/// - precedingKeyValue:
/// - keyValue:
/// - score:
public init(precedingKeyValue: KeyValuePair, keyValue: KeyValuePair, score: Double) {
self.keyValue = keyValue
self.precedingKeyValue = precedingKeyValue
self.score = score
// paired = "(" + keyValue.paired + "|" + precedingKeyValue.paired + "," + String(score) + ")"
}
public func hash(into hasher: inout Hasher) {
@ -44,16 +55,6 @@ extension Megrez {
// hasher.combine(paired)
}
// static func getPairedBigrams(grams: [Bigram]) -> String {
// var arrOutputContent = [""]
// var index = 0
// for gram in grams {
// arrOutputContent.append(contentsOf: [String(index) + "=>" + gram.paired])
// index += 1
// }
// return "[" + String(grams.count) + "]=>{" + arrOutputContent.joined(separator: ",") + "}"
// }
public static func == (lhs: Bigram, rhs: Bigram) -> Bool {
lhs.precedingKeyValue == rhs.precedingKeyValue && lhs.keyValue == rhs.keyValue && lhs.score == rhs.score
}
@ -62,13 +63,18 @@ extension Megrez {
lhs.precedingKeyValue < rhs.precedingKeyValue
|| (lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.keyValue < rhs.keyValue))
}
var description: String {
"\(keyValue):\(score)"
}
var debugDescription: String {
"Bigram(keyValue: \(keyValue), score: \(score))"
}
}
}
// MARK: - DumpDOT-related functions.
extension Array where Element == Megrez.Bigram {
///
public var description: String {
var arrOutputContent = [""]
for (index, gram) in enumerated() {
arrOutputContent.append(contentsOf: [String(index) + "=>" + gram.description])
}
return "[" + String(count) + "]=>{" + arrOutputContent.joined(separator: ",") + "}"
}
}

View File

@ -24,21 +24,29 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
extension Megrez {
@frozen public struct Unigram: Equatable {
///
@frozen public struct Unigram: Equatable, CustomStringConvertible {
///
public var keyValue: KeyValuePair
///
public var score: Double
// var paired: String
///
public var description: String {
"(" + keyValue.description + "," + String(score) + ")"
}
///
/// - Parameters:
/// - keyValue:
/// - score:
public init(keyValue: KeyValuePair, score: Double) {
self.keyValue = keyValue
self.score = score
// paired = "(" + keyValue.paired + "," + String(score) + ")"
}
public func hash(into hasher: inout Hasher) {
hasher.combine(keyValue)
hasher.combine(score)
// hasher.combine(paired)
}
//
@ -46,16 +54,6 @@ extension Megrez {
a.score > b.score
}
// static func getPairedUnigrams(grams: [Unigram]) -> String {
// var arrOutputContent = [""]
// var index = 0
// for gram in grams {
// arrOutputContent.append(contentsOf: [String(index) + "=>" + gram.paired])
// index += 1
// }
// return "[" + String(grams.count) + "]=>{" + arrOutputContent.joined(separator: ",") + "}"
// }
public static func == (lhs: Unigram, rhs: Unigram) -> Bool {
lhs.keyValue == rhs.keyValue && lhs.score == rhs.score
}
@ -63,13 +61,18 @@ extension Megrez {
public static func < (lhs: Unigram, rhs: Unigram) -> Bool {
lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.keyValue < rhs.keyValue)
}
var description: String {
"\(keyValue):\(score)"
}
var debugDescription: String {
"Unigram(keyValue: \(keyValue), score: \(score))"
}
}
}
// MARK: - DumpDOT-related functions.
extension Array where Element == Megrez.Unigram {
///
public var description: String {
var arrOutputContent = [""]
for (index, gram) in enumerated() {
arrOutputContent.append(contentsOf: [String(index) + "=>" + gram.description])
}
return "[" + String(count) + "]=>{" + arrOutputContent.joined(separator: ",") + "}"
}
}

View File

@ -24,21 +24,29 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
extension Megrez {
@frozen public struct KeyValuePair: Equatable, Hashable, Comparable {
///
@frozen public struct KeyValuePair: Equatable, Hashable, Comparable, CustomStringConvertible {
///
public var key: String
///
public var value: String
// public var paired: String
///
public var description: String {
"(" + key + "," + value + ")"
}
///
/// - Parameters:
/// - key:
/// - value:
public init(key: String = "", value: String = "") {
self.key = key
self.value = value
// paired = "(" + key + "," + value + ")"
}
public func hash(into hasher: inout Hasher) {
hasher.combine(key)
hasher.combine(value)
// hasher.combine(paired)
}
public static func == (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool {
@ -60,13 +68,5 @@ extension Megrez {
public static func >= (lhs: KeyValuePair, rhs: KeyValuePair) -> Bool {
(lhs.key.count >= rhs.key.count) || (lhs.key.count == rhs.key.count && lhs.value >= rhs.value)
}
public var description: String {
"(\(key), \(value))"
}
public var debugDescription: String {
"KeyValuePair(key: \(key), value: \(value))"
}
}
}

View File

@ -20,7 +20,6 @@
5B38F59D281E2E49007D5F5D /* 4_Node.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1A15FC0EB100ABF4B3 /* 4_Node.swift */; };
5B38F59E281E2E49007D5F5D /* 6_Bigram.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1415FC0EB100ABF4B3 /* 6_Bigram.swift */; };
5B38F59F281E2E49007D5F5D /* 3_NodeAnchor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1B15FC0EB100ABF4B3 /* 3_NodeAnchor.swift */; };
5B38F5A0281E2E49007D5F5D /* 1_Walker.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1E15FC0EB100ABF4B3 /* 1_Walker.swift */; };
5B38F5A1281E2E49007D5F5D /* 1_BlockReadingBuilder.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1515FC0EB100ABF4B3 /* 1_BlockReadingBuilder.swift */; };
5B38F5A2281E2E49007D5F5D /* 0_Megrez.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1615FC0EB100ABF4B3 /* 0_Megrez.swift */; };
5B38F5A3281E2E49007D5F5D /* 3_Span.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6A0D4F1C15FC0EB100ABF4B3 /* 3_Span.swift */; };
@ -300,7 +299,6 @@
6A0D4F1B15FC0EB100ABF4B3 /* 3_NodeAnchor.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 3_NodeAnchor.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
6A0D4F1C15FC0EB100ABF4B3 /* 3_Span.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 3_Span.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
6A0D4F1D15FC0EB100ABF4B3 /* 6_Unigram.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 6_Unigram.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
6A0D4F1E15FC0EB100ABF4B3 /* 1_Walker.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = 1_Walker.swift; sourceTree = "<group>"; tabWidth = 2; usesTabs = 0; };
6A15B32421A51F2300B92CD3 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/MainMenu.xib; sourceTree = "<group>"; };
6A15B32521A51F2300B92CD3 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/MainMenu.xib; sourceTree = "<group>"; };
6A225A1E23679F2600F685C6 /* NotarizedArchives */ = {isa = PBXFileReference; lastKnownFileType = folder; path = NotarizedArchives; sourceTree = "<group>"; };
@ -769,7 +767,6 @@
children = (
6A0D4F1615FC0EB100ABF4B3 /* 0_Megrez.swift */,
6A0D4F1515FC0EB100ABF4B3 /* 1_BlockReadingBuilder.swift */,
6A0D4F1E15FC0EB100ABF4B3 /* 1_Walker.swift */,
6A0D4F1715FC0EB100ABF4B3 /* 2_Grid.swift */,
6A0D4F1B15FC0EB100ABF4B3 /* 3_NodeAnchor.swift */,
6A0D4F1C15FC0EB100ABF4B3 /* 3_Span.swift */,
@ -1086,7 +1083,6 @@
5B62A34927AE7CD900A19448 /* TooltipController.swift in Sources */,
5B61B0CA280BEFD4002E3CFA /* KeyHandler_Misc.swift in Sources */,
5B38F59A281E2E49007D5F5D /* 6_Unigram.swift in Sources */,
5B38F5A0281E2E49007D5F5D /* 1_Walker.swift in Sources */,
5B62A34827AE7CD900A19448 /* ctlCandidateVertical.swift in Sources */,
5BA9FD4027FEF3C8002DE248 /* Localization.swift in Sources */,
5BAA8FBE282CAF380066C406 /* SyllableComposer.swift in Sources */,