Megrez // v1.2.8 update + UOM punctuation conditioning fix.

This commit is contained in:
ShikiSuen 2022-07-09 10:15:12 +08:00
parent 3db04310a2
commit 4ff9051c17
13 changed files with 450 additions and 419 deletions

View File

@ -56,7 +56,7 @@ class KeyHandler {
var compositor: Megrez.Compositor // var compositor: Megrez.Compositor //
var currentLM: vChewing.LMInstantiator = .init() // var currentLM: vChewing.LMInstantiator = .init() //
var currentUOM: vChewing.LMUserOverride = .init() // var currentUOM: vChewing.LMUserOverride = .init() //
var walkedAnchors: [Megrez.NodeAnchor] = [] // var walkedAnchors: [Megrez.NodeAnchor] { compositor.walkedAnchors } //
/// (ctlInputMethod)便 /// (ctlInputMethod)便
var delegate: KeyHandlerDelegate? var delegate: KeyHandlerDelegate?
@ -95,7 +95,6 @@ class KeyHandler {
func clear() { func clear() {
composer.clear() composer.clear()
compositor.clear() compositor.clear()
walkedAnchors.removeAll()
} }
// MARK: - Functions dealing with Megrez. // MARK: - Functions dealing with Megrez.
@ -103,7 +102,7 @@ class KeyHandler {
/// Megrez 使便 /// Megrez 使便
/// ///
/// 使 Node Crossing /// 使 Node Crossing
var actualCandidateCursorIndex: Int { var actualCandidateCursor: Int {
mgrPrefs.useRearCursorMode ? min(compositorCursorIndex, compositorLength - 1) : max(compositorCursorIndex, 1) mgrPrefs.useRearCursorMode ? min(compositorCursorIndex, compositorLength - 1) : max(compositorCursorIndex, 1)
} }
@ -113,11 +112,11 @@ class KeyHandler {
/// ///
/// ///
func walk() { func walk() {
walkedAnchors = compositor.walk() compositor.walk()
// GraphViz // GraphViz
if mgrPrefs.isDebugModeEnabled { if mgrPrefs.isDebugModeEnabled {
let result = compositor.grid.dumpDOT let result = compositor.dumpDOT
do { do {
try result.write( try result.write(
toFile: "/private/var/tmp/vChewing-visualization.dot", toFile: "/private/var/tmp/vChewing-visualization.dot",
@ -137,12 +136,10 @@ class KeyHandler {
/// ///
var commitOverflownCompositionAndWalk: String { var commitOverflownCompositionAndWalk: String {
var textToCommit = "" var textToCommit = ""
if compositor.grid.width > mgrPrefs.composingBufferSize, !walkedAnchors.isEmpty { if compositor.width > mgrPrefs.composingBufferSize, !walkedAnchors.isEmpty {
let anchor: Megrez.NodeAnchor = walkedAnchors[0] let anchor: Megrez.NodeAnchor = walkedAnchors[0]
if let theNode = anchor.node { textToCommit = anchor.node.currentPair.value
textToCommit = theNode.currentKeyValue.value compositor.removeHeadReadings(count: anchor.spanLength)
}
compositor.removeHeadReadings(count: anchor.spanningLength)
} }
walk() walk()
return textToCommit return textToCommit
@ -166,26 +163,22 @@ class KeyHandler {
/// - value: /// - value:
/// - respectCursorPushing: true /// - respectCursorPushing: true
func fixNode(value: String, respectCursorPushing: Bool = true) { func fixNode(value: String, respectCursorPushing: Bool = true) {
let adjustedIndex = max(0, min(actualCandidateCursorIndex + (mgrPrefs.useRearCursorMode ? 1 : 0), compositorLength)) let adjustedCursor = max(0, min(actualCandidateCursor + (mgrPrefs.useRearCursorMode ? 1 : 0), compositorLength))
// //
let selectedNode: Megrez.NodeAnchor = compositor.grid.fixNodeSelectedCandidate( let selectedNode: Megrez.NodeAnchor = compositor.fixNodeSelectedCandidate(value, at: adjustedCursor)
location: adjustedIndex, value: value
)
// //
if !mgrPrefs.useSCPCTypingMode { if !mgrPrefs.useSCPCTypingMode {
var addToUserOverrideModel = true var addToUserOverrideModel = true
// //
if selectedNode.spanningLength != value.count { if selectedNode.spanLength != value.count {
IME.prtDebugIntel("UOM: SpanningLength != value.count, dismissing.") IME.prtDebugIntel("UOM: SpanningLength != value.count, dismissing.")
addToUserOverrideModel = false addToUserOverrideModel = false
} }
if addToUserOverrideModel { if addToUserOverrideModel {
if let theNode = selectedNode.node { // SymbolLM Score -12
// SymbolLM Score -12 if selectedNode.node.scoreFor(candidate: value) <= -12 {
if theNode.scoreFor(candidate: value) <= -12 { IME.prtDebugIntel("UOM: Score <= -12, dismissing.")
IME.prtDebugIntel("UOM: Score <= -12, dismissing.") addToUserOverrideModel = false
addToUserOverrideModel = false
}
} }
} }
if addToUserOverrideModel { if addToUserOverrideModel {
@ -193,7 +186,7 @@ class KeyHandler {
// //
// //
currentUOM.observe( currentUOM.observe(
walkedAnchors: walkedAnchors, cursorIndex: adjustedIndex, candidate: value, walkedAnchors: walkedAnchors, cursorIndex: adjustedCursor, candidate: value,
timestamp: NSDate().timeIntervalSince1970 timestamp: NSDate().timeIntervalSince1970
) )
} }
@ -206,8 +199,8 @@ class KeyHandler {
if mgrPrefs.moveCursorAfterSelectingCandidate, respectCursorPushing { if mgrPrefs.moveCursorAfterSelectingCandidate, respectCursorPushing {
var nextPosition = 0 var nextPosition = 0
for theAnchor in walkedAnchors { for theAnchor in walkedAnchors {
if nextPosition >= adjustedIndex { break } if nextPosition >= adjustedCursor { break }
nextPosition += theAnchor.spanningLength nextPosition += theAnchor.spanLength
} }
if nextPosition <= compositorLength { if nextPosition <= compositorLength {
compositorCursorIndex = nextPosition compositorCursorIndex = nextPosition
@ -217,20 +210,17 @@ class KeyHandler {
/// ///
func markNodesFixedIfNecessary() { func markNodesFixedIfNecessary() {
let width = compositor.grid.width let width = compositor.width
if width <= kMaxComposingBufferNeedsToWalkSize { if width <= kMaxComposingBufferNeedsToWalkSize {
return return
} }
var index = 0 var index = 0
for anchor in walkedAnchors { for anchor in walkedAnchors {
guard let node = anchor.node else { break }
if index >= width - kMaxComposingBufferNeedsToWalkSize { break } if index >= width - kMaxComposingBufferNeedsToWalkSize { break }
if node.score < node.kSelectedCandidateScore { if anchor.node.score < Megrez.Node.kSelectedCandidateScore {
compositor.grid.fixNodeSelectedCandidate( compositor.fixNodeSelectedCandidate(anchor.node.currentPair.value, at: index + anchor.spanLength)
location: index + anchor.spanningLength, value: node.currentKeyValue.value
)
} }
index += anchor.spanningLength index += anchor.spanLength
} }
} }
@ -248,14 +238,11 @@ class KeyHandler {
arrAnchors = arrAnchors.stableSort { $0.keyLength > $1.keyLength } arrAnchors = arrAnchors.stableSort { $0.keyLength > $1.keyLength }
// //
for currentNodeAnchor in arrAnchors { for currentCandidate in arrAnchors.map(\.node.candidates).joined() {
guard let currentNode = currentNodeAnchor.node else { continue } // / JIS
for currentCandidate in currentNode.candidates { //
// / JIS //
// arrCandidates.append(currentCandidate.value)
//
arrCandidates.append(currentCandidate.value)
}
} }
// 調 // 調
if !mgrPrefs.fetchSuggestionsFromUserOverrideModel || mgrPrefs.useSCPCTypingMode || fixOrder { if !mgrPrefs.fetchSuggestionsFromUserOverrideModel || mgrPrefs.useSCPCTypingMode || fixOrder {
@ -291,8 +278,8 @@ class KeyHandler {
if !overrideValue.isEmpty { if !overrideValue.isEmpty {
IME.prtDebugIntel( IME.prtDebugIntel(
"UOM: Suggestion retrieved, overriding the node score of the selected candidate.") "UOM: Suggestion retrieved, overriding the node score of the selected candidate.")
compositor.grid.overrideNodeScoreForSelectedCandidate( compositor.overrideNodeScoreForSelectedCandidate(
location: min(actualCandidateCursorIndex + (mgrPrefs.useRearCursorMode ? 1 : 0), compositorLength), location: min(actualCandidateCursor + (mgrPrefs.useRearCursorMode ? 1 : 0), compositorLength),
value: overrideValue, value: overrideValue,
overridingScore: findHighestScore(nodeAnchors: rawAnchorsOfNodes, epsilon: kEpsilon) overridingScore: findHighestScore(nodeAnchors: rawAnchorsOfNodes, epsilon: kEpsilon)
) )
@ -307,7 +294,7 @@ class KeyHandler {
/// - epsilon: /// - epsilon:
/// - Returns: /// - Returns:
func findHighestScore(nodeAnchors: [Megrez.NodeAnchor], epsilon: Double) -> Double { func findHighestScore(nodeAnchors: [Megrez.NodeAnchor], epsilon: Double) -> Double {
return nodeAnchors.compactMap(\.node?.highestUnigramScore).max() ?? 0 + epsilon return nodeAnchors.map(\.node.highestUnigramScore).max() ?? 0 + epsilon
} }
// MARK: - Extracted methods and functions (Tekkon). // MARK: - Extracted methods and functions (Tekkon).
@ -363,8 +350,8 @@ class KeyHandler {
/// 使 nodesCrossing macOS /// 使 nodesCrossing macOS
/// nodeCrossing /// nodeCrossing
mgrPrefs.useRearCursorMode mgrPrefs.useRearCursorMode
? compositor.grid.nodesBeginningAt(location: actualCandidateCursorIndex) ? compositor.nodesBeginningAt(location: actualCandidateCursor)
: compositor.grid.nodesEndingAt(location: actualCandidateCursorIndex) : compositor.nodesEndingAt(location: actualCandidateCursor)
} }
/// ///
@ -390,7 +377,7 @@ class KeyHandler {
/// ///
func insertToCompositorAtCursor(reading: String) { func insertToCompositorAtCursor(reading: String) {
compositor.insertReadingAtCursor(reading: reading) compositor.insertReading(reading)
} }
/// ///
@ -408,28 +395,27 @@ class KeyHandler {
/// ///
/// Rear /// Rear
func deleteCompositorReadingAtTheRearOfCursor() { func deleteCompositorReadingAtTheRearOfCursor() {
compositor.deleteReadingAtTheRearOfCursor() compositor.dropReading(direction: .rear)
} }
/// ///
/// ///
/// Front /// Front
func deleteCompositorReadingToTheFrontOfCursor() { func deleteCompositorReadingToTheFrontOfCursor() {
compositor.deleteReadingToTheFrontOfCursor() compositor.dropReading(direction: .front)
} }
/// ///
/// - Returns: /// - Returns:
var keyLengthAtCurrentIndex: Int { var keyLengthAtCurrentIndex: Int {
guard let node = walkedAnchors[compositorCursorIndex].node else { return 0 } walkedAnchors[compositorCursorIndex].node.key.split(separator: "-").count
return node.key.split(separator: "-").count
} }
var nextPhrasePosition: Int { var nextPhrasePosition: Int {
var nextPosition = 0 var nextPosition = 0
for theAnchor in walkedAnchors { for theAnchor in walkedAnchors {
if nextPosition > actualCandidateCursorIndex { break } if nextPosition > actualCandidateCursor { break }
nextPosition += theAnchor.spanningLength nextPosition += theAnchor.spanLength
} }
return min(nextPosition, compositorLength) return min(nextPosition, compositorLength)
} }

View File

@ -45,15 +45,15 @@ extension KeyHandler {
/// Swift.utf16NSString.length() /// Swift.utf16NSString.length()
/// ///
for theAnchor in walkedAnchors { for theAnchor in walkedAnchors {
guard let theNode = theAnchor.node else { continue } let theNode = theAnchor.node
let strNodeValue = theNode.currentKeyValue.value let strNodeValue = theNode.currentPair.value
composingBuffer += strNodeValue composingBuffer += strNodeValue
let arrSplit: [String] = Array(strNodeValue).map { String($0) } let arrSplit: [String] = Array(strNodeValue).map { String($0) }
let codepointCount = arrSplit.count let codepointCount = arrSplit.count
/// ///
/// NodeAnchorspanningLength /// NodeAnchorspanningLength
/// ///
let spanningLength: Int = theAnchor.spanningLength let spanningLength: Int = theAnchor.spanLength
if readingCursorIndex + spanningLength <= compositorCursorIndex { if readingCursorIndex + spanningLength <= compositorCursorIndex {
composedStringCursorIndex += strNodeValue.utf16.count composedStringCursorIndex += strNodeValue.utf16.count
readingCursorIndex += spanningLength readingCursorIndex += spanningLength
@ -406,22 +406,20 @@ extension KeyHandler {
var composed = "" var composed = ""
for theAnchor in walkedAnchors { for node in walkedAnchors.map(\.node) {
if let node = theAnchor.node { var key = node.key
var key = node.key if mgrPrefs.inlineDumpPinyinInLieuOfZhuyin {
if mgrPrefs.inlineDumpPinyinInLieuOfZhuyin { key = Tekkon.restoreToneOneInZhuyinKey(target: key) //
key = Tekkon.restoreToneOneInZhuyinKey(target: key) // key = Tekkon.cnvPhonaToHanyuPinyin(target: key) //
key = Tekkon.cnvPhonaToHanyuPinyin(target: key) // key = Tekkon.cnvHanyuPinyinToTextbookStyle(target: key) // 調
key = Tekkon.cnvHanyuPinyinToTextbookStyle(target: key) // 調 key = key.replacingOccurrences(of: "-", with: " ")
key = key.replacingOccurrences(of: "-", with: " ") } else {
} else { key = Tekkon.cnvZhuyinChainToTextbookReading(target: key, newSeparator: " ")
key = Tekkon.cnvZhuyinChainToTextbookReading(target: key, newSeparator: " ")
}
let value = node.currentKeyValue.value
//
composed += key.contains("_") ? value : "<ruby>\(value)<rp>(</rp><rt>\(key)</rt><rp>)</rp></ruby>"
} }
let value = node.currentPair.value
//
composed += key.contains("_") ? value : "<ruby>\(value)<rp>(</rp><rt>\(key)</rt><rp>)</rp></ruby>"
} }
clear() clear()
@ -796,26 +794,21 @@ extension KeyHandler {
var length = 0 var length = 0
var currentAnchor = Megrez.NodeAnchor() var currentAnchor = Megrez.NodeAnchor()
let cursorIndex = min( let cursorIndex = min(
actualCandidateCursorIndex + (mgrPrefs.useRearCursorMode ? 1 : 0), compositorLength actualCandidateCursor + (mgrPrefs.useRearCursorMode ? 1 : 0), compositorLength
) )
for anchor in walkedAnchors { for anchor in walkedAnchors {
length += anchor.spanningLength length += anchor.spanLength
if length >= cursorIndex { if length >= cursorIndex {
currentAnchor = anchor currentAnchor = anchor
break break
} }
} }
guard let currentNode = currentAnchor.node else { let currentNode = currentAnchor.node
IME.prtDebugIntel("4F2DEC2F") let currentValue = currentNode.currentPair.value
errorCallback()
return true
}
let currentValue = currentNode.currentKeyValue.value
var currentIndex = 0 var currentIndex = 0
if currentNode.score < currentNode.kSelectedCandidateScore { if currentNode.score < Megrez.Node.kSelectedCandidateScore {
/// 使 /// 使
/// 使 /// 使
/// 2 使 /// 2 使

View File

@ -28,7 +28,7 @@ import Foundation
extension vChewing { extension vChewing {
/// LMInstantiatorLMI /// LMInstantiatorLMI
/// LanguageModelProtocol 使 /// LangModelProtocol 使
/// ///
/// ///
/// LMI 調 /// LMI 調
@ -44,7 +44,7 @@ extension vChewing {
/// ///
/// LMI LMI /// LMI LMI
/// ///
public class LMInstantiator: LanguageModelProtocol { public class LMInstantiator: LangModelProtocol {
// //
public var isPhraseReplacementEnabled = false public var isPhraseReplacementEnabled = false
public var isCNSEnabled = false public var isCNSEnabled = false
@ -256,7 +256,7 @@ extension vChewing {
lmAssociates.hasValuesFor(key: key) lmAssociates.hasValuesFor(key: key)
} }
/// 滿 LanguageModelProtocol /// 滿 LangModelProtocol
public func bigramsForKeys(precedingKey _: String, key _: String) -> [Megrez.Bigram] { .init() } public func bigramsForKeys(precedingKey _: String, key _: String) -> [Megrez.Bigram] { .init() }
// MARK: - // MARK: -

View File

@ -130,13 +130,12 @@ extension vChewing {
func convertKeyFrom( func convertKeyFrom(
walkedAnchors: [Megrez.NodeAnchor], cursorIndex: Int, readingOnly: Bool = false walkedAnchors: [Megrez.NodeAnchor], cursorIndex: Int, readingOnly: Bool = false
) -> String { ) -> String {
let arrEndingPunctuation = ["", "", "", "", "", "", "", ""]
let whiteList = "你他妳她祢衪它牠再在" let whiteList = "你他妳她祢衪它牠再在"
var arrNodes: [Megrez.NodeAnchor] = [] var arrNodes: [Megrez.NodeAnchor] = []
var intLength = 0 var intLength = 0
for theNodeAnchor in walkedAnchors { for theNodeAnchor in walkedAnchors {
arrNodes.append(theNodeAnchor) arrNodes.append(theNodeAnchor)
intLength += theNodeAnchor.spanningLength intLength += theNodeAnchor.spanLength
if intLength >= cursorIndex { if intLength >= cursorIndex {
break break
} }
@ -146,9 +145,8 @@ extension vChewing {
arrNodes = Array(arrNodes.reversed()) arrNodes = Array(arrNodes.reversed())
guard let kvCurrent = arrNodes[0].node?.currentKeyValue, let kvCurrent = arrNodes[0].node.currentPair
!arrEndingPunctuation.contains(kvCurrent.value) guard !kvCurrent.key.contains("_") else {
else {
return "" return ""
} }
@ -173,20 +171,18 @@ extension vChewing {
} }
if arrNodes.count >= 2, if arrNodes.count >= 2,
let kvPreviousThisOne = arrNodes[1].node?.currentKeyValue, !kvPrevious.key.contains("_"),
!arrEndingPunctuation.contains(kvPrevious.value),
kvPrevious.key.split(separator: "-").count == kvPrevious.value.count kvPrevious.key.split(separator: "-").count == kvPrevious.value.count
{ {
kvPrevious = kvPreviousThisOne kvPrevious = arrNodes[1].node.currentPair
readingStack = kvPrevious.key + readingStack readingStack = kvPrevious.key + readingStack
} }
if arrNodes.count >= 3, if arrNodes.count >= 3,
let kvAnteriorThisOne = arrNodes[2].node?.currentKeyValue, !kvAnterior.key.contains("_"),
!arrEndingPunctuation.contains(kvAnterior.value),
kvAnterior.key.split(separator: "-").count == kvAnterior.value.count kvAnterior.key.split(separator: "-").count == kvAnterior.value.count
{ {
kvAnterior = kvAnteriorThisOne kvAnterior = arrNodes[2].node.currentPair
readingStack = kvAnterior.key + readingStack readingStack = kvAnterior.key + readingStack
} }

View File

@ -25,89 +25,106 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
extension Megrez { extension Megrez {
/// ///
public class Compositor { public class Compositor: Grid {
///
public enum TypingDirection { case front, rear }
/// ///
private let kDroppedPathScore: Double = -999 private let kDroppedPathScore: Double = -999
/// ///
private var mutCursorIndex: Int = 0 public var cursor: Int = 0 { didSet { cursor = max(0, min(cursor, readings.count)) } }
/// ///
private var mutReadings: [String] = [] private(set) var readings: [String] = []
///
private var mutGrid: Grid = .init()
/// 使 /// 使
private var mutLM: LanguageModelProtocol private var langModel: LangModelProtocol
/// 0
private(set) var cursorRegionMap: [Int: Int] = .init()
private(set) var walkedAnchors: [Megrez.NodeAnchor] = [] //
///
public var maxBuildSpanLength: Int { mutGrid.maxBuildSpanLength }
/// ///
public var joinSeparator: String = "" public var joinSeparator: String = "-"
///
public var cursorIndex: Int {
get { mutCursorIndex }
set { mutCursorIndex = (newValue < 0) ? 0 : min(newValue, mutReadings.count) }
}
///
public var isEmpty: Bool { mutGrid.isEmpty }
///
public var grid: Grid { mutGrid }
/// ///
public var length: Int { mutReadings.count } public var length: Int { readings.count }
///
public var readings: [String] { mutReadings } ///
/// - Parameter direction:
/// - Returns:
@discardableResult public func jumpCursorBySpan(to direction: TypingDirection) -> Bool {
switch direction {
case .front:
if cursor == width { return false }
case .rear:
if cursor == 0 { return false }
}
guard let currentRegion = cursorRegionMap[cursor] else { return false }
let aRegionForward = max(currentRegion - 1, 0)
let currentRegionBorderRear: Int = walkedAnchors[0..<currentRegion].map(\.spanLength).reduce(0, +)
switch cursor {
case currentRegionBorderRear:
switch direction {
case .front:
cursor =
(currentRegion > walkedAnchors.count)
? readings.count : walkedAnchors[0...currentRegion].map(\.spanLength).reduce(0, +)
case .rear:
cursor = walkedAnchors[0..<aRegionForward].map(\.spanLength).reduce(0, +)
}
default:
switch direction {
case .front:
cursor = currentRegionBorderRear + walkedAnchors[currentRegion].spanLength
case .rear:
cursor = currentRegionBorderRear
}
}
return true
}
/// ///
/// - Parameters: /// - Parameters:
/// - lm: Megrez.LanguageModel /// - lm: Megrez.LangModel
/// - length: 10 /// - length: 10
/// - separator: /// - separator:
public init(lm: LanguageModelProtocol, length: Int = 10, separator: String = "") { public init(lm: LangModelProtocol, length: Int = 10, separator: String = "-") {
mutLM = lm langModel = lm
mutGrid = .init(spanLength: abs(length)) // super.init(spanLength: abs(length)) //
joinSeparator = separator joinSeparator = separator
} }
/// ///
public func clear() { override public func clear() {
mutCursorIndex = 0 super.clear()
mutReadings.removeAll() cursor = 0
mutGrid.clear() readings.removeAll()
walkedAnchors.removeAll()
} }
/// ///
/// - Parameters: /// - Parameters:
/// - reading: /// - reading:
public func insertReadingAtCursor(reading: String) { @discardableResult public func insertReading(_ reading: String) -> Bool {
mutReadings.insert(reading, at: mutCursorIndex) guard !reading.isEmpty, langModel.hasUnigramsFor(key: reading) else { return false }
mutGrid.expandGridByOneAt(location: mutCursorIndex) readings.insert(reading, at: cursor)
build() resizeGridByOneAt(location: cursor, to: .expand)
mutCursorIndex += 1
}
///
/// Rear
@discardableResult public func deleteReadingAtTheRearOfCursor() -> Bool {
if mutCursorIndex == 0 {
return false
}
mutReadings.remove(at: mutCursorIndex - 1)
mutCursorIndex -= 1
mutGrid.shrinkGridByOneAt(location: mutCursorIndex)
build() build()
cursor += 1
return true return true
} }
/// ///
/// Front ///
@discardableResult public func deleteReadingToTheFrontOfCursor() -> Bool { /// RearFront
if mutCursorIndex == mutReadings.count { /// - Parameter direction:
/// - Returns:
@discardableResult public func dropReading(direction: TypingDirection) -> Bool {
let isBackSpace = direction == .rear
if cursor == (isBackSpace ? 0 : readings.count) {
return false return false
} }
readings.remove(at: cursor - (isBackSpace ? 1 : 0))
mutReadings.remove(at: mutCursorIndex) cursor -= (isBackSpace ? 1 : 0)
mutGrid.shrinkGridByOneAt(location: mutCursorIndex) resizeGridByOneAt(location: cursor, to: .shrink)
build() build()
return true return true
} }
@ -118,98 +135,84 @@ extension Megrez {
/// ///
@discardableResult public func removeHeadReadings(count: Int) -> Bool { @discardableResult public func removeHeadReadings(count: Int) -> Bool {
let count = abs(count) // let count = abs(count) //
if count > length { if count > length { return false }
return false
}
for _ in 0..<count { for _ in 0..<count {
if mutCursorIndex > 0 { cursor = max(cursor - 1, 0)
mutCursorIndex -= 1 if !readings.isEmpty {
} readings.removeFirst()
if !mutReadings.isEmpty { resizeGridByOneAt(location: 0, to: .shrink)
mutReadings.removeFirst()
mutGrid.shrinkGridByOneAt(location: 0)
} }
build() build()
} }
return true return true
} }
// MARK: - Walker
/// ///
/// - Parameters: /// - Returns:
/// - location: @discardableResult public func walk() -> [NodeAnchor] {
/// - accumulatedScore: 0 let newLocation = width
/// - joinedPhrase: 使 //
/// - longPhrases: 使 walkedAnchors = Array(
public func walk( reverseWalk(at: newLocation).reversed()
at location: Int = 0, ).lazy.filter { !$0.isEmpty }
score accumulatedScore: Double = 0.0, updateCursorJumpingTables(walkedAnchors)
joinedPhrase: String = "", return walkedAnchors
longPhrases: [String] = .init()
) -> [NodeAnchor] {
let newLocation = (mutGrid.width) - abs(location) //
return Array(
reverseWalk(
at: newLocation, score: accumulatedScore,
joinedPhrase: joinedPhrase, longPhrases: longPhrases
).reversed())
} }
/// // MARK: - Private functions
///
/// - Parameters: /// - Parameters:
/// - location: /// - location:
/// - accumulatedScore: 0 /// - mass: 0
/// - joinedPhrase: 使 /// - joinedPhrase: 使
/// - longPhrases: 使 /// - longPhrases: 使
public func reverseWalk( /// - Returns:
private func reverseWalk(
at location: Int, at location: Int,
score accumulatedScore: Double = 0.0, mass: Double = 0.0,
joinedPhrase: String = "", joinedPhrase: String = "",
longPhrases: [String] = .init() longPhrases: [String] = .init()
) -> [NodeAnchor] { ) -> [NodeAnchor] {
let location = abs(location) // let location = abs(location) //
if location == 0 || location > mutGrid.width { if location == 0 || location > width {
return .init() return .init()
} }
var paths = [[NodeAnchor]]() var paths = [[NodeAnchor]]()
var nodes = mutGrid.nodesEndingAt(location: location) let nodes = nodesEndingAt(location: location).stableSorted {
nodes = nodes.stableSorted {
$0.scoreForSort > $1.scoreForSort $0.scoreForSort > $1.scoreForSort
} }
if let nodeZero = nodes[0].node, nodeZero.score >= nodeZero.kSelectedCandidateScore { guard !nodes.isEmpty else { return .init() } //
if nodes[0].node.score >= Node.kSelectedCandidateScore {
// 使 // 使
var anchorZero = nodes[0] var theAnchor = nodes[0]
anchorZero.accumulatedScore = accumulatedScore + nodeZero.score theAnchor.mass = mass + nodes[0].node.score
var path: [NodeAnchor] = reverseWalk( var path: [NodeAnchor] = reverseWalk(
at: location - anchorZero.spanningLength, score: anchorZero.accumulatedScore at: location - theAnchor.spanLength, mass: theAnchor.mass
) )
path.insert(anchorZero, at: 0) path.insert(theAnchor, at: 0)
paths.append(path) paths.append(path)
} else if !longPhrases.isEmpty { } else if !longPhrases.isEmpty {
var path = [NodeAnchor]() var path = [NodeAnchor]()
for theAnchor in nodes { for theAnchor in nodes {
guard let theNode = theAnchor.node else { continue }
var theAnchor = theAnchor var theAnchor = theAnchor
let joinedValue = theNode.currentKeyValue.value + joinedPhrase let joinedValue = theAnchor.node.currentPair.value + joinedPhrase
// //
// /////////使 // /////////使
// //
if longPhrases.contains(joinedValue) { if longPhrases.contains(joinedValue) {
theAnchor.accumulatedScore = kDroppedPathScore theAnchor.mass = kDroppedPathScore
path.insert(theAnchor, at: 0) path.insert(theAnchor, at: 0)
paths.append(path) paths.append(path)
continue continue
} }
theAnchor.accumulatedScore = accumulatedScore + theNode.score theAnchor.mass = mass + theAnchor.node.score
path = reverseWalk( path = reverseWalk(
at: location - theAnchor.spanningLength, at: location - theAnchor.spanLength,
score: theAnchor.accumulatedScore, mass: theAnchor.mass,
joinedPhrase: (joinedValue.count >= longPhrases[0].count) ? "" : joinedValue, joinedPhrase: (joinedValue.count >= longPhrases[0].count) ? "" : joinedValue,
longPhrases: .init() longPhrases: .init()
) )
@ -219,9 +222,8 @@ extension Megrez {
} else { } else {
// //
var longPhrases = [String]() var longPhrases = [String]()
for theAnchor in nodes.lazy.filter({ $0.spanningLength > 1 }) { for theAnchor in nodes.lazy.filter({ $0.spanLength > 1 }) {
guard let theNode = theAnchor.node else { continue } longPhrases.append(theAnchor.node.currentPair.value)
longPhrases.append(theNode.currentKeyValue.value)
} }
longPhrases = longPhrases.stableSorted { longPhrases = longPhrases.stableSorted {
@ -229,12 +231,11 @@ extension Megrez {
} }
for theAnchor in nodes { for theAnchor in nodes {
var theAnchor = theAnchor var theAnchor = theAnchor
guard let theNode = theAnchor.node else { continue } theAnchor.mass = mass + theAnchor.node.score
theAnchor.accumulatedScore = accumulatedScore + theNode.score
var path = [NodeAnchor]() var path = [NodeAnchor]()
path = reverseWalk( path = reverseWalk(
at: location - theAnchor.spanningLength, score: theAnchor.accumulatedScore, at: location - theAnchor.spanLength, mass: theAnchor.mass,
joinedPhrase: (theAnchor.spanningLength > 1) ? "" : theNode.currentKeyValue.value, joinedPhrase: (theAnchor.spanLength > 1) ? "" : theAnchor.node.currentPair.value,
longPhrases: .init() longPhrases: .init()
) )
path.insert(theAnchor, at: 0) path.insert(theAnchor, at: 0)
@ -248,31 +249,29 @@ extension Megrez {
var result: [NodeAnchor] = paths[0] var result: [NodeAnchor] = paths[0]
for neta in paths.lazy.filter({ for neta in paths.lazy.filter({
$0.last!.accumulatedScore > result.last!.accumulatedScore $0.last!.mass > result.last!.mass
}) { }) {
result = neta result = neta
} }
return result return result // walk()
} }
// MARK: - Private functions
private func build() { private func build() {
let itrBegin: Int = let itrBegin: Int =
(mutCursorIndex < maxBuildSpanLength) ? 0 : mutCursorIndex - maxBuildSpanLength (cursor < maxBuildSpanLength) ? 0 : cursor - maxBuildSpanLength
let itrEnd: Int = min(mutCursorIndex + maxBuildSpanLength, mutReadings.count) let itrEnd: Int = min(cursor + maxBuildSpanLength, readings.count)
for p in itrBegin..<itrEnd { for p in itrBegin..<itrEnd {
for q in 1..<maxBuildSpanLength { for q in 1..<maxBuildSpanLength {
if p + q > itrEnd { break } if p + q > itrEnd { break }
let arrSlice = mutReadings[p..<(p + q)] let arrSlice = readings[p..<(p + q)]
let combinedReading: String = join(slice: arrSlice, separator: joinSeparator) let combinedReading: String = join(slice: arrSlice, separator: joinSeparator)
if mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) { continue } if hasMatchedNode(location: p, spanLength: q, key: combinedReading) { continue }
let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading) let unigrams: [Unigram] = langModel.unigramsFor(key: combinedReading)
if unigrams.isEmpty { continue } if unigrams.isEmpty { continue }
let n = Node(key: combinedReading, unigrams: unigrams) let n = Node(key: combinedReading, unigrams: unigrams)
mutGrid.insertNode(node: n, location: p, spanningLength: q) insertNode(node: n, location: p, spanLength: q)
} }
} }
} }
@ -280,6 +279,20 @@ extension Megrez {
private func join(slice arrSlice: ArraySlice<String>, separator: String) -> String { private func join(slice arrSlice: ArraySlice<String>, separator: String) -> String {
arrSlice.joined(separator: separator) arrSlice.joined(separator: separator)
} }
internal func updateCursorJumpingTables(_ anchors: [NodeAnchor]) {
var cursorRegionMapDict = [Int: Int]()
var counter = 0
for (i, anchor) in anchors.enumerated() {
for _ in 0..<anchor.spanLength {
cursorRegionMapDict[counter] = i
counter += 1
}
}
cursorRegionMapDict[counter] = anchors.count
cursorRegionMapDict[-1] = 0 //
cursorRegionMap = cursorRegionMapDict
}
} }
} }

View File

@ -24,93 +24,82 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
extension Megrez { extension Megrez {
/// ///
public class Grid { public class Grid {
///
public enum ResizeBehavior { case expand, shrink }
/// ///
private var mutSpans: [Megrez.Span] private(set) var spans: [Megrez.SpanUnit]
/// ///
private var mutMaxBuildSpanLength = 10 private(set) var maxBuildSpanLength = 10
///
public var maxBuildSpanLength: Int { mutMaxBuildSpanLength }
/// ///
public var width: Int { mutSpans.count } public var width: Int { spans.count }
/// ///
public var isEmpty: Bool { mutSpans.isEmpty } public var isEmpty: Bool { spans.isEmpty }
/// ///
public init(spanLength: Int = 10) { public init(spanLength: Int = 10) {
mutMaxBuildSpanLength = spanLength maxBuildSpanLength = spanLength
mutSpans = [Megrez.Span]() spans = [Megrez.SpanUnit]()
} }
/// ///
public func clear() { public func clear() {
mutSpans.removeAll() spans.removeAll()
} }
/// ///
/// - Parameters: /// - Parameters:
/// - node: /// - node:
/// - location: /// - location:
/// - spanningLength: /// - spanLength:
public func insertNode(node: Node, location: Int, spanningLength: Int) { public func insertNode(node: Node, location: Int, spanLength: Int) {
let location = abs(location) // let location = abs(location) //
let spanningLength = abs(spanningLength) // let spanLength = abs(spanLength) //
if location >= mutSpans.count { if location >= spans.count {
let diff = location - mutSpans.count + 1 let diff = location - spans.count + 1
for _ in 0..<diff { for _ in 0..<diff {
mutSpans.append(Span()) spans.append(SpanUnit())
} }
} }
mutSpans[location].insert(node: node, length: spanningLength) spans[location].insert(node: node, length: spanLength)
} }
/// ///
/// - Parameters: /// - Parameters:
/// - location: /// - location:
/// - spanningLength: /// - spanLength:
/// - key: /// - key:
public func hasMatchedNode(location: Int, spanningLength: Int, key: String) -> Bool { public func hasMatchedNode(location: Int, spanLength: Int, key: String) -> Bool {
let location = abs(location) // let location = abs(location) //
let spanningLength = abs(spanningLength) // let spanLength = abs(spanLength) //
if location > mutSpans.count { if location > spans.count {
return false return false
} }
let n = mutSpans[location].node(length: spanningLength) let n = spans[location].nodeOf(length: spanLength)
return n != nil && key == n?.key return n != nil && key == n?.key
} }
/// ///
/// - Parameters: /// - Parameters:
/// - location: /// - location:
public func expandGridByOneAt(location: Int) { public func resizeGridByOneAt(location: Int, to behavior: ResizeBehavior) {
let location = abs(location) // let location = max(0, min(width, location)) //
mutSpans.insert(Span(), at: location) switch behavior {
if location == 0 || location == mutSpans.count { return } case .expand:
spans.insert(SpanUnit(), at: location)
if [spans.count, 0].contains(location) { return }
case .shrink:
if location >= spans.count { return }
spans.remove(at: location)
}
for i in 0..<location { for i in 0..<location {
// zaps overlapping spans // zaps overlapping spans
mutSpans[i].removeNodeOfLengthGreaterThan(location - i) spans[i].dropNodesBeyond(length: location - i)
}
}
///
/// - Parameters:
/// - location:
public func shrinkGridByOneAt(location: Int) {
let location = abs(location) //
if location >= mutSpans.count {
return
}
mutSpans.remove(at: location)
for i in 0..<location {
// zaps overlapping spans
mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
} }
} }
@ -120,21 +109,21 @@ extension Megrez {
public func nodesBeginningAt(location: Int) -> [NodeAnchor] { public func nodesBeginningAt(location: Int) -> [NodeAnchor] {
let location = abs(location) // let location = abs(location) //
var results = [NodeAnchor]() var results = [NodeAnchor]()
if location >= mutSpans.count { return results } if location >= spans.count { return results }
// mutSpans location 0 // spans location 0
let span = mutSpans[location] let span = spans[location]
for i in 1...maxBuildSpanLength { for i in 1...maxBuildSpanLength {
if let np = span.node(length: i) { if let np = span.nodeOf(length: i) {
results.append( results.append(
.init( .init(
node: np, node: np,
location: location, location: location,
spanningLength: i spanLength: i
) )
) )
} }
} }
return results return results //
} }
/// ///
@ -143,21 +132,21 @@ extension Megrez {
public func nodesEndingAt(location: Int) -> [NodeAnchor] { public func nodesEndingAt(location: Int) -> [NodeAnchor] {
let location = abs(location) // let location = abs(location) //
var results = [NodeAnchor]() var results = [NodeAnchor]()
if mutSpans.isEmpty || location > mutSpans.count { return results } if spans.isEmpty || location > spans.count { return results }
for i in 0..<location { for i in 0..<location {
let span = mutSpans[i] let span = spans[i]
if i + span.maximumLength < location { continue } if i + span.maxLength < location { continue }
if let np = span.node(length: location - i) { if let np = span.nodeOf(length: location - i) {
results.append( results.append(
.init( .init(
node: np, node: np,
location: i, location: i,
spanningLength: location - i spanLength: location - i
) )
) )
} }
} }
return results return results //
} }
/// ///
@ -166,46 +155,76 @@ extension Megrez {
public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] { public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] {
let location = abs(location) // let location = abs(location) //
var results = [NodeAnchor]() var results = [NodeAnchor]()
if mutSpans.isEmpty || location > mutSpans.count { return results } if spans.isEmpty || location > spans.count { return results }
for i in 0..<location { for i in 0..<location {
let span = mutSpans[i] let span = spans[i]
if i + span.maximumLength < location { continue } if i + span.maxLength < location { continue }
for j in 1...span.maximumLength { for j in 1...span.maxLength {
if i + j < location { continue } if i + j < location { continue }
if let np = span.node(length: j) { if let np = span.nodeOf(length: j) {
results.append( results.append(
.init( .init(
node: np, node: np,
location: i, location: i,
spanningLength: location - i spanLength: location - i
) )
) )
} }
} }
} }
return results return results //
} }
/// ///
///
///
/// - Parameters:
/// - location:
public func nodesOverlappedAt(location: Int) -> [NodeAnchor] {
Array(Set(nodesBeginningAt(location: location) + nodesCrossingOrEndingAt(location: location)))
}
/// 使
///
/// fixNodeWithCandidate()
/// - Parameters:
/// - location:
/// - value:
@discardableResult public func fixNodeWithCandidateLiteral(_ value: String, at location: Int) -> NodeAnchor {
let location = abs(location) //
var node = NodeAnchor()
for theAnchor in nodesOverlappedAt(location: location) {
let candidates = theAnchor.node.candidates
//
theAnchor.node.resetCandidate()
for (i, candidate) in candidates.enumerated() {
if candidate.value == value {
theAnchor.node.selectCandidateAt(index: i)
node = theAnchor
break
}
}
}
return node
}
/// 使
/// ///
/// ///
/// - Parameters: /// - Parameters:
/// - location: /// - location:
/// - value: /// - value:
@discardableResult public func fixNodeSelectedCandidate(location: Int, value: String) -> NodeAnchor { @discardableResult public func fixNodeWithCandidate(_ pair: KeyValuePaired, at location: Int) -> NodeAnchor {
let location = abs(location) // let location = abs(location) //
var node = NodeAnchor() var node = NodeAnchor()
for nodeAnchor in nodesCrossingOrEndingAt(location: location) { for theAnchor in nodesOverlappedAt(location: location) {
guard let theNode = nodeAnchor.node else { let candidates = theAnchor.node.candidates
continue
}
let candidates = theNode.candidates
// //
theNode.resetCandidate() theAnchor.node.resetCandidate()
for (i, candidate) in candidates.enumerated() { for (i, candidate) in candidates.enumerated() {
if candidate.value == value { if candidate == pair {
theNode.selectCandidateAt(index: i) theAnchor.node.selectCandidateAt(index: i)
node = nodeAnchor node = theAnchor
break break
} }
} }
@ -220,16 +239,13 @@ extension Megrez {
/// - overridingScore: /// - overridingScore:
public func overrideNodeScoreForSelectedCandidate(location: Int, value: String, overridingScore: Double) { public func overrideNodeScoreForSelectedCandidate(location: Int, value: String, overridingScore: Double) {
let location = abs(location) // let location = abs(location) //
for nodeAnchor in nodesCrossingOrEndingAt(location: location) { for theAnchor in nodesOverlappedAt(location: location) {
guard let theNode = nodeAnchor.node else { let candidates = theAnchor.node.candidates
continue
}
let candidates = theNode.candidates
// //
theNode.resetCandidate() theAnchor.node.resetCandidate()
for (i, candidate) in candidates.enumerated() { for (i, candidate) in candidates.enumerated() {
if candidate.value == value { if candidate.value == value {
theNode.selectFloatingCandidateAt(index: i, score: overridingScore) theAnchor.node.selectFloatingCandidateAt(index: i, score: overridingScore)
break break
} }
} }
@ -244,29 +260,22 @@ extension Megrez.Grid {
/// GraphViz /// GraphViz
public var dumpDOT: String { public var dumpDOT: String {
var strOutput = "digraph {\ngraph [ rankdir=LR ];\nBOS;\n" var strOutput = "digraph {\ngraph [ rankdir=LR ];\nBOS;\n"
for (p, span) in mutSpans.enumerated() { for (p, span) in spans.enumerated() {
for ni in 0...(span.maximumLength) { for ni in 0...(span.maxLength) {
guard let np: Megrez.Node = span.node(length: ni) else { guard let np = span.nodeOf(length: ni) else { continue }
continue
}
if p == 0 { if p == 0 {
strOutput += "BOS -> \(np.currentKeyValue.value);\n" strOutput += "BOS -> \(np.currentPair.value);\n"
} }
strOutput += "\(np.currentPair.value);\n"
strOutput += "\(np.currentKeyValue.value);\n" if (p + ni) < spans.count {
let destinationSpan = spans[p + ni]
if (p + ni) < mutSpans.count { for q in 0...(destinationSpan.maxLength) {
let destinationSpan = mutSpans[p + ni] guard let dn = destinationSpan.nodeOf(length: q) else { continue }
for q in 0...(destinationSpan.maximumLength) { strOutput += np.currentPair.value + " -> " + dn.currentPair.value + ";\n"
if let dn = destinationSpan.node(length: q) {
strOutput += np.currentKeyValue.value + " -> " + dn.currentKeyValue.value + ";\n"
}
} }
} }
guard (p + ni) == spans.count else { continue }
if (p + ni) == mutSpans.count { strOutput += np.currentPair.value + " -> EOS;\n"
strOutput += np.currentKeyValue.value + " -> EOS;\n"
}
} }
} }
strOutput += "EOS;\n}\n" strOutput += "EOS;\n}\n"

View File

@ -25,25 +25,34 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
extension Megrez { extension Megrez {
/// ///
@frozen public struct NodeAnchor: CustomStringConvertible { @frozen public struct NodeAnchor: Hashable {
///
public var isEmpty: Bool { node.key.isEmpty }
/// ///
public var node: Node? public var node: Node = .init()
/// ///
public var location: Int = 0 public var location: Int = 0
/// ///
public var spanningLength: Int = 0 public var spanLength: Int = 0
/// ///
public var accumulatedScore: Double = 0.0 public var mass: Double = 0.0
/// ///
public var keyLength: Int { public var keyLength: Int {
node?.key.count ?? 0 isEmpty ? node.key.count : 0
}
public func hash(into hasher: inout Hasher) {
hasher.combine(node)
hasher.combine(location)
hasher.combine(spanLength)
hasher.combine(mass)
} }
/// ///
public var description: String { public var description: String {
var stream = "" var stream = ""
stream += "{@(" + String(location) + "," + String(spanningLength) + ")," stream += "{@(" + String(location) + "," + String(spanLength) + "),"
if let node = node { if node.key.isEmpty {
stream += node.description stream += node.description
} else { } else {
stream += "null" stream += "null"
@ -54,12 +63,12 @@ extension Megrez {
/// ///
public var scoreForSort: Double { public var scoreForSort: Double {
node?.score ?? 0 isEmpty ? node.score : 0
} }
} }
} }
// MARK: - DumpDOT-related functions. // MARK: - Array Extensions.
extension Array where Element == Megrez.NodeAnchor { extension Array where Element == Megrez.NodeAnchor {
/// ///
@ -70,4 +79,14 @@ extension Array where Element == Megrez.NodeAnchor {
} }
return arrOutputContent.joined(separator: "<-") return arrOutputContent.joined(separator: "<-")
} }
///
public var values: [String] {
map(\.node.currentPair.value)
}
///
public var keys: [String] {
map(\.node.currentPair.key)
}
} }

View File

@ -25,21 +25,16 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
extension Megrez { extension Megrez {
/// ///
@frozen public struct Span { @frozen public struct SpanUnit {
/// ///
private var mutLengthNodeMap: [Int: Megrez.Node] = [:] private var lengthNodeMap: [Int: Megrez.Node] = [:]
/// ///
private var mutMaximumLength: Int = 0 private(set) var maxLength: Int = 0
///
public var maximumLength: Int {
mutMaximumLength
}
/// ///
mutating func clear() { mutating func clear() {
mutLengthNodeMap.removeAll() lengthNodeMap.removeAll()
mutMaximumLength = 0 maxLength = 0
} }
/// ///
@ -48,37 +43,37 @@ extension Megrez {
/// - length: /// - length:
mutating func insert(node: Node, length: Int) { mutating func insert(node: Node, length: Int) {
let length = abs(length) // let length = abs(length) //
mutLengthNodeMap[length] = node lengthNodeMap[length] = node
mutMaximumLength = max(mutMaximumLength, length) maxLength = max(maxLength, length)
} }
/// ///
/// - Parameters: /// - Parameters:
/// - length: /// - length:
mutating func removeNodeOfLengthGreaterThan(_ length: Int) { mutating func dropNodesBeyond(length: Int) {
let length = abs(length) // let length = abs(length) //
if length > mutMaximumLength { return } if length > maxLength { return }
var lenMax = 0 var lenMax = 0
var removalList: [Int: Megrez.Node] = [:] var removalList: [Int: Megrez.Node] = [:]
for key in mutLengthNodeMap.keys { for key in lengthNodeMap.keys {
if key > length { if key > length {
removalList[key] = mutLengthNodeMap[key] removalList[key] = lengthNodeMap[key]
} else { } else {
lenMax = max(lenMax, key) lenMax = max(lenMax, key)
} }
} }
for key in removalList.keys { for key in removalList.keys {
mutLengthNodeMap.removeValue(forKey: key) lengthNodeMap.removeValue(forKey: key)
} }
mutMaximumLength = lenMax maxLength = lenMax
} }
/// ///
/// - Parameters: /// - Parameters:
/// - length: /// - length:
public func node(length: Int) -> Node? { public func nodeOf(length: Int) -> Node? {
// Abs() // Abs()
mutLengthNodeMap.keys.contains(abs(length)) ? mutLengthNodeMap[abs(length)] : nil lengthNodeMap.keys.contains(abs(length)) ? lengthNodeMap[abs(length)] : nil
} }
} }
} }

View File

@ -25,76 +25,86 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
extension Megrez { extension Megrez {
/// ///
public class Node { public class Node: Equatable, Hashable {
/// public static func == (lhs: Megrez.Node, rhs: Megrez.Node) -> Bool {
private var mutKey: String = "" lhs.key == rhs.key && lhs.score == rhs.score && lhs.unigrams == rhs.unigrams && lhs.bigrams == rhs.bigrams
/// && lhs.candidates == rhs.candidates && lhs.valueUnigramIndexMap == rhs.valueUnigramIndexMap
private var mutScore: Double = 0 && lhs.precedingBigramMap == rhs.precedingBigramMap && lhs.isCandidateFixed == rhs.isCandidateFixed
/// && lhs.selectedUnigramIndex == rhs.selectedUnigramIndex
private var mutUnigrams: [Unigram]
///
private var mutBigrams: [Bigram]
///
private var mutCandidates: [KeyValuePaired] = []
/// 調
private var mutValueUnigramIndexMap: [String: Int] = [:]
///
private var mutPrecedingBigramMap: [KeyValuePaired: [Megrez.Bigram]] = [:]
///
private var mutCandidateFixed: Bool = false
///
private var mutSelectedUnigramIndex: Int = 0
///
public let kSelectedCandidateScore: Double = 99
///
public var description: String {
"(node,key:\(mutKey),fixed:\(mutCandidateFixed ? "true" : "false"),selected:\(mutSelectedUnigramIndex),\(mutUnigrams))"
} }
/// public func hash(into hasher: inout Hasher) {
public var candidates: [KeyValuePaired] { mutCandidates } hasher.combine(key)
/// hasher.combine(score)
public var isCandidateFixed: Bool { mutCandidateFixed } hasher.combine(unigrams)
hasher.combine(bigrams)
hasher.combine(candidates)
hasher.combine(valueUnigramIndexMap)
hasher.combine(precedingBigramMap)
hasher.combine(isCandidateFixed)
hasher.combine(selectedUnigramIndex)
}
///
private(set) var key: String = ""
///
private(set) var score: Double = 0
///
private var unigrams: [Unigram]
///
private var bigrams: [Bigram]
///
private(set) var candidates: [KeyValuePaired] = []
/// 調
private var valueUnigramIndexMap: [String: Int] = [:]
///
private var precedingBigramMap: [KeyValuePaired: [Megrez.Bigram]] = [:]
///
private(set) var isCandidateFixed: Bool = false
///
private var selectedUnigramIndex: Int = 0
///
public static let kSelectedCandidateScore: Double = 99
///
public var description: String {
"(node,key:\(key),fixed:\(isCandidateFixed ? "true" : "false"),selected:\(selectedUnigramIndex),\(unigrams))"
}
///
public var key: String { mutKey }
///
public var score: Double { mutScore }
/// ///
public var currentKeyValue: KeyValuePaired { public var currentPair: KeyValuePaired {
mutSelectedUnigramIndex >= mutUnigrams.count ? KeyValuePaired() : mutCandidates[mutSelectedUnigramIndex] selectedUnigramIndex >= unigrams.count ? KeyValuePaired() : candidates[selectedUnigramIndex]
} }
/// ///
public var highestUnigramScore: Double { mutUnigrams.isEmpty ? 0.0 : mutUnigrams[0].score } public var highestUnigramScore: Double { unigrams.isEmpty ? 0.0 : unigrams[0].score }
/// ///
/// - Parameters: /// - Parameters:
/// - key: /// - key:
/// - unigrams: /// - unigrams:
/// - bigrams: /// - bigrams:
public init(key: String, unigrams: [Megrez.Unigram], bigrams: [Megrez.Bigram] = []) { public init(key: String = "", unigrams: [Megrez.Unigram] = [], bigrams: [Megrez.Bigram] = []) {
mutKey = key self.key = key
mutUnigrams = unigrams self.unigrams = unigrams
mutBigrams = bigrams self.bigrams = bigrams
mutUnigrams.sort { self.unigrams.sort {
$0.score > $1.score $0.score > $1.score
} }
if !mutUnigrams.isEmpty { if !self.unigrams.isEmpty {
mutScore = mutUnigrams[0].score score = unigrams[0].score
} }
for (i, gram) in mutUnigrams.enumerated() { for (i, gram) in self.unigrams.enumerated() {
mutValueUnigramIndexMap[gram.keyValue.value] = i valueUnigramIndexMap[gram.keyValue.value] = i
mutCandidates.append(gram.keyValue) candidates.append(gram.keyValue)
} }
for gram in bigrams.lazy.filter({ [self] in for gram in bigrams.lazy.filter({ [self] in
mutPrecedingBigramMap.keys.contains($0.precedingKeyValue) precedingBigramMap.keys.contains($0.precedingKeyValue)
}) { }) {
mutPrecedingBigramMap[gram.precedingKeyValue]?.append(gram) precedingBigramMap[gram.precedingKeyValue]?.append(gram)
} }
} }
@ -102,22 +112,22 @@ extension Megrez {
/// - Parameters: /// - Parameters:
/// - precedingKeyValues: /// - precedingKeyValues:
public func primeNodeWith(precedingKeyValues: [KeyValuePaired]) { public func primeNodeWith(precedingKeyValues: [KeyValuePaired]) {
var newIndex = mutSelectedUnigramIndex var newIndex = selectedUnigramIndex
var max = mutScore var max = score
if !isCandidateFixed { if !isCandidateFixed {
for neta in precedingKeyValues { for neta in precedingKeyValues {
let bigrams = mutPrecedingBigramMap[neta] ?? [] let bigrams = precedingBigramMap[neta] ?? []
for bigram in bigrams.lazy.filter({ [self] in for bigram in bigrams.lazy.filter({ [self] in
$0.score > max && mutValueUnigramIndexMap.keys.contains($0.keyValue.value) $0.score > max && valueUnigramIndexMap.keys.contains($0.keyValue.value)
}) { }) {
newIndex = mutValueUnigramIndexMap[bigram.keyValue.value] ?? newIndex newIndex = valueUnigramIndexMap[bigram.keyValue.value] ?? newIndex
max = bigram.score max = bigram.score
} }
} }
} }
mutScore = max score = max
mutSelectedUnigramIndex = newIndex selectedUnigramIndex = newIndex
} }
/// ///
@ -126,17 +136,17 @@ extension Megrez {
/// - fix: /// - fix:
public func selectCandidateAt(index: Int = 0, fix: Bool = false) { public func selectCandidateAt(index: Int = 0, fix: Bool = false) {
let index = abs(index) let index = abs(index)
mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index selectedUnigramIndex = index >= unigrams.count ? 0 : index
mutCandidateFixed = fix isCandidateFixed = fix
mutScore = kSelectedCandidateScore score = Megrez.Node.kSelectedCandidateScore
} }
/// ///
public func resetCandidate() { public func resetCandidate() {
mutSelectedUnigramIndex = 0 selectedUnigramIndex = 0
mutCandidateFixed = false isCandidateFixed = false
if !mutUnigrams.isEmpty { if !unigrams.isEmpty {
mutScore = mutUnigrams[0].score score = unigrams[0].score
} }
} }
@ -146,16 +156,26 @@ extension Megrez {
/// - score: /// - score:
public func selectFloatingCandidateAt(index: Int, score: Double) { public func selectFloatingCandidateAt(index: Int, score: Double) {
let index = abs(index) // let index = abs(index) //
mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index selectedUnigramIndex = index >= unigrams.count ? 0 : index
mutCandidateFixed = false isCandidateFixed = false
mutScore = score self.score = score
} }
/// ///
/// - Parameters: /// - Parameters:
/// - candidate: /// - candidate:
public func scoreFor(candidate: String) -> Double { public func scoreFor(candidate: String) -> Double {
for unigram in mutUnigrams.lazy.filter({ $0.keyValue.value == candidate }) { for unigram in unigrams.lazy.filter({ $0.keyValue.value == candidate }) {
return unigram.score
}
return 0.0
}
///
/// - Parameters:
/// - candidate:
public func scoreForPaired(candidate: KeyValuePaired) -> Double {
for unigram in unigrams.lazy.filter({ $0.keyValue == candidate }) {
return unigram.score return unigram.score
} }
return 0.0 return 0.0

View File

@ -23,7 +23,7 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
public protocol LanguageModelProtocol { public protocol LangModelProtocol {
/// ///
func unigramsFor(key: String) -> [Megrez.Unigram] func unigramsFor(key: String) -> [Megrez.Unigram]
@ -36,7 +36,7 @@ public protocol LanguageModelProtocol {
extension Megrez { extension Megrez {
/// 使 /// 使
open class LanguageModel: LanguageModelProtocol { open class LangModel: LangModelProtocol {
public init() {} public init() {}
// Swift // Swift

View File

@ -25,7 +25,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
extension Megrez { extension Megrez {
/// ///
@frozen public struct Bigram: Equatable, CustomStringConvertible { @frozen public struct Bigram: Equatable, CustomStringConvertible, Hashable {
/// ///
public var keyValue: KeyValuePaired public var keyValue: KeyValuePaired
/// ///
@ -61,7 +61,7 @@ extension Megrez {
public static func < (lhs: Bigram, rhs: Bigram) -> Bool { public static func < (lhs: Bigram, rhs: Bigram) -> Bool {
lhs.precedingKeyValue < rhs.precedingKeyValue lhs.precedingKeyValue < rhs.precedingKeyValue
|| (lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.keyValue < rhs.keyValue)) || (lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.score < rhs.score))
} }
} }
} }

View File

@ -25,7 +25,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
extension Megrez { extension Megrez {
/// ///
@frozen public struct Unigram: Equatable, CustomStringConvertible { @frozen public struct Unigram: Equatable, CustomStringConvertible, Hashable {
/// ///
public var keyValue: KeyValuePaired public var keyValue: KeyValuePaired
/// ///
@ -54,7 +54,7 @@ extension Megrez {
} }
public static func < (lhs: Unigram, rhs: Unigram) -> Bool { public static func < (lhs: Unigram, rhs: Unigram) -> Bool {
lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.keyValue < rhs.keyValue) lhs.keyValue < rhs.keyValue || (lhs.keyValue == rhs.keyValue && lhs.score < rhs.score)
} }
} }
} }

View File

@ -52,7 +52,7 @@ extension Megrez {
} }
public static func == (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool { public static func == (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {
lhs.key.count == rhs.key.count && lhs.value == rhs.value lhs.key == rhs.key && lhs.value == rhs.value
} }
public static func < (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool { public static func < (lhs: KeyValuePaired, rhs: KeyValuePaired) -> Bool {