Megrez v1.1.8 // Add nodesBeginningAt().

This commit is contained in:
ShikiSuen 2022-05-30 15:38:19 +08:00
parent 69be62bb69
commit 87e39bf943
5 changed files with 198 additions and 104 deletions

View File

@ -26,8 +26,8 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
extension Megrez { extension Megrez {
/// ///
public class BlockReadingBuilder { public class BlockReadingBuilder {
/// ///
private var mutMaximumBuildSpanLength = 10 private let kDroppedPathScore: Double = -999
/// ///
private var mutCursorIndex: Int = 0 private var mutCursorIndex: Int = 0
/// ///
@ -37,6 +37,8 @@ extension Megrez {
/// 使 /// 使
private var mutLM: LanguageModel private var mutLM: LanguageModel
///
public var maxBuildSpanLength: Int { mutGrid.maxBuildSpanLength }
/// ///
public var joinSeparator: String = "" public var joinSeparator: String = ""
/// ///
@ -55,11 +57,11 @@ extension Megrez {
/// ///
/// - Parameters: /// - Parameters:
/// - lm: Megrez.LanguageModel /// - lm: Megrez.LanguageModel
/// - length: 10 /// - length: 10
/// - separator: /// - separator:
public init(lm: LanguageModel, length: Int = 10, separator: String = "") { public init(lm: LanguageModel, length: Int = 10, separator: String = "") {
mutLM = lm mutLM = lm
mutMaximumBuildSpanLength = length mutGrid = .init(spanLength: abs(length)) //
joinSeparator = separator joinSeparator = separator
} }
@ -112,6 +114,7 @@ extension Megrez {
/// ///
/// ///
@discardableResult public func removeHeadReadings(count: Int) -> Bool { @discardableResult public func removeHeadReadings(count: Int) -> Bool {
let count = abs(count) //
if count > length { if count > length {
return false return false
} }
@ -120,8 +123,10 @@ extension Megrez {
if mutCursorIndex > 0 { if mutCursorIndex > 0 {
mutCursorIndex -= 1 mutCursorIndex -= 1
} }
mutReadings.removeFirst() if !mutReadings.isEmpty {
mutGrid.shrinkGridByOneAt(location: 0) mutReadings.removeFirst()
mutGrid.shrinkGridByOneAt(location: 0)
}
build() build()
} }
@ -131,23 +136,22 @@ extension Megrez {
// MARK: - Walker // MARK: - Walker
/// ///
///
/// 使
/// - Parameters: /// - Parameters:
/// - at: /// - at:
/// - score: 0 /// - score: 0
/// - nodesLimit: /// - joinedPhrase: 使
/// - balanced: /// - longPhrases: 使
public func walk( public func walk(
at location: Int, at location: Int = 0,
score accumulatedScore: Double = 0.0, score accumulatedScore: Double = 0.0,
nodesLimit: Int = 0, joinedPhrase: String = "",
balanced: Bool = false longPhrases: [String] = .init()
) -> [NodeAnchor] { ) -> [NodeAnchor] {
Array( let newLocation = (mutGrid.width) - abs(location) //
return Array(
reverseWalk( reverseWalk(
at: location, score: accumulatedScore, at: newLocation, score: accumulatedScore,
nodesLimit: nodesLimit, balanced: balanced joinedPhrase: joinedPhrase, longPhrases: longPhrases
).reversed()) ).reversed())
} }
@ -155,91 +159,125 @@ extension Megrez {
/// - Parameters: /// - Parameters:
/// - at: /// - at:
/// - score: 0 /// - score: 0
/// - nodesLimit: /// - joinedPhrase: 使
/// - balanced: /// - longPhrases: 使
public func reverseWalk( public func reverseWalk(
at location: Int, at location: Int,
score accumulatedScore: Double = 0.0, score accumulatedScore: Double = 0.0,
nodesLimit: Int = 0, joinedPhrase: String = "",
balanced: Bool = false longPhrases: [String] = .init()
) -> [NodeAnchor] { ) -> [NodeAnchor] {
let location = abs(location) //
if location == 0 || location > mutGrid.width { if location == 0 || location > mutGrid.width {
return [] as [NodeAnchor] return .init()
} }
var paths: [[NodeAnchor]] = [] var paths = [[NodeAnchor]]()
var nodes: [NodeAnchor] = mutGrid.nodesEndingAt(location: location) var nodes = mutGrid.nodesEndingAt(location: location)
if balanced { nodes = nodes.stableSorted {
nodes.sort { $0.scoreForSort > $1.scoreForSort
$0.balancedScore > $1.balancedScore
}
} }
for (i, n) in nodes.enumerated() { if let nodeOfNodeZero = nodes[0].node, nodeOfNodeZero.score >= nodeOfNodeZero.kSelectedCandidateScore {
// X NodeAnchor node // 使
// abs var nodeZero = nodes[0]
if abs(nodesLimit) > 0, i == abs(nodesLimit) { nodeZero.accumulatedScore = accumulatedScore + nodeOfNodeZero.score
break var path: [NodeAnchor] = reverseWalk(at: location - nodeZero.spanningLength, score: nodeZero.accumulatedScore)
} path.insert(nodeZero, at: 0)
var n = n
guard let nNode = n.node else {
continue
}
n.accumulatedScore = accumulatedScore + nNode.score
//
//
if balanced {
n.accumulatedScore += n.additionalWeights
}
var path: [NodeAnchor] = reverseWalk(
at: location - n.spanningLength,
score: n.accumulatedScore
)
path.insert(n, at: 0)
paths.append(path) paths.append(path)
} else if !longPhrases.isEmpty {
// 使 var path = [NodeAnchor]()
if balanced, nNode.score >= 0 { for theAnchor in nodes {
break guard let theNode = theAnchor.node else { continue }
} var theAnchor = theAnchor
} let joinedValue = theNode.currentKeyValue.value + joinedPhrase
//
if !paths.isEmpty { // /////////使
if var result = paths.first { //
for value in paths { if longPhrases.contains(joinedValue) {
if let vLast = value.last, let rLast = result.last { theAnchor.accumulatedScore = kDroppedPathScore
if vLast.accumulatedScore > rLast.accumulatedScore { path.insert(theAnchor, at: 0)
result = value paths.append(path)
} continue
}
} }
return result theAnchor.accumulatedScore = accumulatedScore + theNode.score
if joinedValue.count >= longPhrases[0].count {
path = reverseWalk(
at: location - theAnchor.spanningLength, score: theAnchor.accumulatedScore, joinedPhrase: "",
longPhrases: .init()
)
} else {
path = reverseWalk(
at: location - theAnchor.spanningLength, score: theAnchor.accumulatedScore, joinedPhrase: joinedValue,
longPhrases: longPhrases
)
}
path.insert(theAnchor, at: 0)
paths.append(path)
}
} else {
//
var longPhrases = [String]()
for theAnchor in nodes {
guard let theNode = theAnchor.node else { continue }
if theAnchor.spanningLength > 1 {
longPhrases.append(theNode.currentKeyValue.value)
}
}
longPhrases = longPhrases.stableSorted {
$0.count > $1.count
}
for theAnchor in nodes {
var theAnchor = theAnchor
guard let theNode = theAnchor.node else { continue }
theAnchor.accumulatedScore = accumulatedScore + theNode.score
var path = [NodeAnchor]()
if theAnchor.spanningLength > 1 {
path = reverseWalk(
at: location - theAnchor.spanningLength, score: theAnchor.accumulatedScore, joinedPhrase: "",
longPhrases: .init()
)
} else {
path = reverseWalk(
at: location - theAnchor.spanningLength, score: theAnchor.accumulatedScore,
joinedPhrase: theNode.currentKeyValue.value, longPhrases: longPhrases
)
}
path.insert(theAnchor, at: 0)
paths.append(path)
} }
} }
return [] as [NodeAnchor]
guard !paths.isEmpty else {
return .init()
}
var result: [NodeAnchor] = paths[0]
for neta in paths {
if neta.last!.accumulatedScore > result.last!.accumulatedScore {
result = neta
}
}
return result
} }
// MARK: - Private functions // MARK: - Private functions
private func build() { private func build() {
let itrBegin: Int = let itrBegin: Int =
(mutCursorIndex < mutMaximumBuildSpanLength) ? 0 : mutCursorIndex - mutMaximumBuildSpanLength (mutCursorIndex < maxBuildSpanLength) ? 0 : mutCursorIndex - maxBuildSpanLength
let itrEnd: Int = min(mutCursorIndex + mutMaximumBuildSpanLength, mutReadings.count) let itrEnd: Int = min(mutCursorIndex + maxBuildSpanLength, mutReadings.count)
for p in itrBegin..<itrEnd { for p in itrBegin..<itrEnd {
for q in 1..<mutMaximumBuildSpanLength { for q in 1..<maxBuildSpanLength {
if p + q > itrEnd { if p + q > itrEnd {
break break
} }
let strSlice = mutReadings[p..<(p + q)] let arrSlice = mutReadings[p..<(p + q)]
let combinedReading: String = join(slice: strSlice, separator: joinSeparator) let combinedReading: String = join(slice: arrSlice, separator: joinSeparator)
if !mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) { if !mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) {
let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading) let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading)
@ -252,12 +290,35 @@ extension Megrez {
} }
} }
private func join(slice strSlice: ArraySlice<String>, separator: String) -> String { private func join(slice arrSlice: ArraySlice<String>, separator: String) -> String {
var arrResult: [String] = [] var arrResult: [String] = []
for value in strSlice { for value in arrSlice {
arrResult.append(value) arrResult.append(value)
} }
return arrResult.joined(separator: separator) return arrResult.joined(separator: separator)
} }
} }
} }
// MARK: - Stable Sort Extension
// Reference: https://stackoverflow.com/a/50545761/4162914
extension Sequence {
/// Return a stable-sorted collection.
///
/// - Parameter areInIncreasingOrder: Return nil when two element are equal.
/// - Returns: The sorted collection.
func stableSorted(
by areInIncreasingOrder: (Element, Element) throws -> Bool
)
rethrows -> [Element]
{
try enumerated()
.sorted { a, b -> Bool in
try areInIncreasingOrder(a.element, b.element)
|| (a.offset < b.offset && !areInIncreasingOrder(b.element, a.element))
}
.map(\.element)
}
}

View File

@ -29,16 +29,23 @@ extension Megrez {
/// ///
private var mutSpans: [Megrez.Span] private var mutSpans: [Megrez.Span]
///
private var mutMaxBuildSpanLength = 10
///
public var maxBuildSpanLength: Int { mutMaxBuildSpanLength }
/// ///
var width: Int { mutSpans.count } var width: Int { mutSpans.count }
public init() { public init(spanLength: Int = 10) {
mutMaxBuildSpanLength = spanLength
mutSpans = [Megrez.Span]() mutSpans = [Megrez.Span]()
} }
/// ///
public func clear() { public func clear() {
mutSpans = [Megrez.Span]() mutSpans.removeAll()
} }
/// ///
@ -47,6 +54,8 @@ extension Megrez {
/// - location: /// - location:
/// - spanningLength: /// - spanningLength:
public func insertNode(node: Node, location: Int, spanningLength: Int) { public func insertNode(node: Node, location: Int, spanningLength: Int) {
let location = abs(location) //
let spanningLength = abs(spanningLength) //
if location >= mutSpans.count { if location >= mutSpans.count {
let diff = location - mutSpans.count + 1 let diff = location - mutSpans.count + 1
for _ in 0..<diff { for _ in 0..<diff {
@ -62,24 +71,26 @@ extension Megrez {
/// - spanningLength: /// - spanningLength:
/// - key: /// - key:
public func hasMatchedNode(location: Int, spanningLength: Int, key: String) -> Bool { public func hasMatchedNode(location: Int, spanningLength: Int, key: String) -> Bool {
let location = abs(location) //
let spanningLength = abs(spanningLength) //
if location > mutSpans.count { if location > mutSpans.count {
return false return false
} }
let n = mutSpans[location].node(length: spanningLength) let n = mutSpans[location].node(length: spanningLength)
return n == nil ? false : key == n?.key return n != nil && key == n?.key
} }
/// ///
/// - Parameters: /// - Parameters:
/// - location: /// - location:
public func expandGridByOneAt(location: Int) { public func expandGridByOneAt(location: Int) {
// abs let location = abs(location) //
mutSpans.insert(Span(), at: abs(location)) mutSpans.insert(Span(), at: location)
if location != 0, abs(location) != mutSpans.count { if location != 0, location != mutSpans.count {
for i in 0..<abs(location) { for i in 0..<location {
// zaps overlapping spans // zaps overlapping spans
mutSpans[i].removeNodeOfLengthGreaterThan(abs(location) - i) mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
} }
} }
} }
@ -88,6 +99,7 @@ extension Megrez {
/// - Parameters: /// - Parameters:
/// - location: /// - location:
public func shrinkGridByOneAt(location: Int) { public func shrinkGridByOneAt(location: Int) {
let location = abs(location) //
if location >= mutSpans.count { if location >= mutSpans.count {
return return
} }
@ -99,11 +111,35 @@ extension Megrez {
} }
} }
///
/// - Parameters:
/// - location:
public func nodesBeginningAt(location: Int) -> [NodeAnchor] {
let location = abs(location) //
var results = [NodeAnchor]()
if location < mutSpans.count { // mutSpans
let span = mutSpans[location]
for i in 1...maxBuildSpanLength {
if let np = span.node(length: i) {
results.append(
NodeAnchor(
node: np,
location: location,
spanningLength: i
)
)
}
}
}
return results
}
/// ///
/// - Parameters: /// - Parameters:
/// - location: /// - location:
public func nodesEndingAt(location: Int) -> [NodeAnchor] { public func nodesEndingAt(location: Int) -> [NodeAnchor] {
var results: [NodeAnchor] = [] let location = abs(location) //
var results = [NodeAnchor]()
if !mutSpans.isEmpty, location <= mutSpans.count { if !mutSpans.isEmpty, location <= mutSpans.count {
for i in 0..<location { for i in 0..<location {
let span = mutSpans[i] let span = mutSpans[i]
@ -127,7 +163,8 @@ extension Megrez {
/// - Parameters: /// - Parameters:
/// - location: /// - location:
public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] { public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] {
var results: [NodeAnchor] = [] let location = abs(location) //
var results = [NodeAnchor]()
if !mutSpans.isEmpty, location <= mutSpans.count { if !mutSpans.isEmpty, location <= mutSpans.count {
for i in 0..<location { for i in 0..<location {
let span = mutSpans[i] let span = mutSpans[i]
@ -157,6 +194,7 @@ extension Megrez {
/// - location: /// - location:
/// - value: /// - value:
@discardableResult public func fixNodeSelectedCandidate(location: Int, value: String) -> NodeAnchor { @discardableResult public func fixNodeSelectedCandidate(location: Int, value: String) -> NodeAnchor {
let location = abs(location) //
var node = NodeAnchor() var node = NodeAnchor()
for nodeAnchor in nodesCrossingOrEndingAt(location: location) { for nodeAnchor in nodesCrossingOrEndingAt(location: location) {
guard let theNode = nodeAnchor.node else { guard let theNode = nodeAnchor.node else {
@ -182,6 +220,7 @@ extension Megrez {
/// - value: /// - value:
/// - overridingScore: /// - overridingScore:
public func overrideNodeScoreForSelectedCandidate(location: Int, value: String, overridingScore: Double) { public func overrideNodeScoreForSelectedCandidate(location: Int, value: String, overridingScore: Double) {
let location = abs(location) //
for nodeAnchor in nodesCrossingOrEndingAt(location: location) { for nodeAnchor in nodesCrossingOrEndingAt(location: location) {
guard let theNode = nodeAnchor.node else { guard let theNode = nodeAnchor.node else {
continue continue

View File

@ -52,19 +52,9 @@ extension Megrez {
return stream return stream
} }
/// ///
public var additionalWeights: Double { public var scoreForSort: Double {
(Double(spanningLength) - 1) * 0.75 node?.score ?? 0
}
///
public var balancedScore: Double {
(node?.score ?? 0) + additionalWeights
}
///
public var balancedAccumulatedScore: Double {
accumulatedScore + additionalWeights
} }
} }
} }

View File

@ -47,6 +47,7 @@ extension Megrez {
/// - node: /// - node:
/// - length: /// - length:
mutating func insert(node: Node, length: Int) { mutating func insert(node: Node, length: Int) {
let length = abs(length) //
mutLengthNodeMap[length] = node mutLengthNodeMap[length] = node
if length > mutMaximumLength { if length > mutMaximumLength {
mutMaximumLength = length mutMaximumLength = length
@ -57,6 +58,7 @@ extension Megrez {
/// - Parameters: /// - Parameters:
/// - length: /// - length:
mutating func removeNodeOfLengthGreaterThan(_ length: Int) { mutating func removeNodeOfLengthGreaterThan(_ length: Int) {
let length = abs(length) //
if length > mutMaximumLength { return } if length > mutMaximumLength { return }
var max = 0 var max = 0
var removalList: [Int: Megrez.Node] = [:] var removalList: [Int: Megrez.Node] = [:]
@ -79,7 +81,7 @@ extension Megrez {
/// - Parameters: /// - Parameters:
/// - length: /// - length:
public func node(length: Int) -> Node? { public func node(length: Int) -> Node? {
mutLengthNodeMap[length] mutLengthNodeMap[abs(length)] //
} }
} }
} }

View File

@ -47,7 +47,7 @@ extension Megrez {
/// ///
private var mutSelectedUnigramIndex: Int = 0 private var mutSelectedUnigramIndex: Int = 0
/// ///
private let kSelectedCandidateScore: Double = 99 public let kSelectedCandidateScore: Double = 99
/// ///
public var description: String { public var description: String {
"(node,key:\(mutKey),fixed:\(mutCandidateFixed ? "true" : "false"),selected:\(mutSelectedUnigramIndex),\(mutUnigrams))" "(node,key:\(mutKey),fixed:\(mutCandidateFixed ? "true" : "false"),selected:\(mutSelectedUnigramIndex),\(mutUnigrams))"
@ -84,7 +84,7 @@ extension Megrez {
$0.score > $1.score $0.score > $1.score
} }
if mutUnigrams.count > 0 { if !mutUnigrams.isEmpty {
mutScore = mutUnigrams[0].score mutScore = mutUnigrams[0].score
} }
@ -133,6 +133,7 @@ extension Megrez {
/// - index: /// - index:
/// - fix: /// - fix:
public func selectCandidateAt(index: Int = 0, fix: Bool = false) { public func selectCandidateAt(index: Int = 0, fix: Bool = false) {
let index = abs(index)
mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index
mutCandidateFixed = fix mutCandidateFixed = fix
mutScore = kSelectedCandidateScore mutScore = kSelectedCandidateScore
@ -152,6 +153,7 @@ extension Megrez {
/// - index: /// - index:
/// - score: /// - score:
public func selectFloatingCandidateAt(index: Int, score: Double) { public func selectFloatingCandidateAt(index: Int, score: Double) {
let index = abs(index) //
mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index
mutCandidateFixed = false mutCandidateFixed = false
mutScore = score mutScore = score