Megrez v1.1.8 // Add nodesBeginningAt().

This commit is contained in:
ShikiSuen 2022-05-30 15:38:19 +08:00
parent 69be62bb69
commit 87e39bf943
5 changed files with 198 additions and 104 deletions

View File

@ -26,8 +26,8 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
extension Megrez {
///
public class BlockReadingBuilder {
///
private var mutMaximumBuildSpanLength = 10
///
private let kDroppedPathScore: Double = -999
///
private var mutCursorIndex: Int = 0
///
@ -37,6 +37,8 @@ extension Megrez {
/// 使
private var mutLM: LanguageModel
///
public var maxBuildSpanLength: Int { mutGrid.maxBuildSpanLength }
///
public var joinSeparator: String = ""
///
@ -55,11 +57,11 @@ extension Megrez {
///
/// - Parameters:
/// - lm: Megrez.LanguageModel
/// - length: 10
/// - length: 10
/// - separator:
public init(lm: LanguageModel, length: Int = 10, separator: String = "") {
mutLM = lm
mutMaximumBuildSpanLength = length
mutGrid = .init(spanLength: abs(length)) //
joinSeparator = separator
}
@ -112,6 +114,7 @@ extension Megrez {
///
///
@discardableResult public func removeHeadReadings(count: Int) -> Bool {
let count = abs(count) //
if count > length {
return false
}
@ -120,8 +123,10 @@ extension Megrez {
if mutCursorIndex > 0 {
mutCursorIndex -= 1
}
mutReadings.removeFirst()
mutGrid.shrinkGridByOneAt(location: 0)
if !mutReadings.isEmpty {
mutReadings.removeFirst()
mutGrid.shrinkGridByOneAt(location: 0)
}
build()
}
@ -131,23 +136,22 @@ extension Megrez {
// MARK: - Walker
///
///
/// 使
/// - Parameters:
/// - at:
/// - score: 0
/// - nodesLimit:
/// - balanced:
/// - joinedPhrase: 使
/// - longPhrases: 使
public func walk(
at location: Int,
at location: Int = 0,
score accumulatedScore: Double = 0.0,
nodesLimit: Int = 0,
balanced: Bool = false
joinedPhrase: String = "",
longPhrases: [String] = .init()
) -> [NodeAnchor] {
Array(
let newLocation = (mutGrid.width) - abs(location) //
return Array(
reverseWalk(
at: location, score: accumulatedScore,
nodesLimit: nodesLimit, balanced: balanced
at: newLocation, score: accumulatedScore,
joinedPhrase: joinedPhrase, longPhrases: longPhrases
).reversed())
}
@ -155,91 +159,125 @@ extension Megrez {
/// - Parameters:
/// - at:
/// - score: 0
/// - nodesLimit:
/// - balanced:
/// - joinedPhrase: 使
/// - longPhrases: 使
public func reverseWalk(
at location: Int,
score accumulatedScore: Double = 0.0,
nodesLimit: Int = 0,
balanced: Bool = false
joinedPhrase: String = "",
longPhrases: [String] = .init()
) -> [NodeAnchor] {
let location = abs(location) //
if location == 0 || location > mutGrid.width {
return [] as [NodeAnchor]
return .init()
}
var paths: [[NodeAnchor]] = []
var nodes: [NodeAnchor] = mutGrid.nodesEndingAt(location: location)
var paths = [[NodeAnchor]]()
var nodes = mutGrid.nodesEndingAt(location: location)
if balanced {
nodes.sort {
$0.balancedScore > $1.balancedScore
}
nodes = nodes.stableSorted {
$0.scoreForSort > $1.scoreForSort
}
for (i, n) in nodes.enumerated() {
// X NodeAnchor node
// abs
if abs(nodesLimit) > 0, i == abs(nodesLimit) {
break
}
var n = n
guard let nNode = n.node else {
continue
}
n.accumulatedScore = accumulatedScore + nNode.score
//
//
if balanced {
n.accumulatedScore += n.additionalWeights
}
var path: [NodeAnchor] = reverseWalk(
at: location - n.spanningLength,
score: n.accumulatedScore
)
path.insert(n, at: 0)
if let nodeOfNodeZero = nodes[0].node, nodeOfNodeZero.score >= nodeOfNodeZero.kSelectedCandidateScore {
// 使
var nodeZero = nodes[0]
nodeZero.accumulatedScore = accumulatedScore + nodeOfNodeZero.score
var path: [NodeAnchor] = reverseWalk(at: location - nodeZero.spanningLength, score: nodeZero.accumulatedScore)
path.insert(nodeZero, at: 0)
paths.append(path)
// 使
if balanced, nNode.score >= 0 {
break
}
}
if !paths.isEmpty {
if var result = paths.first {
for value in paths {
if let vLast = value.last, let rLast = result.last {
if vLast.accumulatedScore > rLast.accumulatedScore {
result = value
}
}
} else if !longPhrases.isEmpty {
var path = [NodeAnchor]()
for theAnchor in nodes {
guard let theNode = theAnchor.node else { continue }
var theAnchor = theAnchor
let joinedValue = theNode.currentKeyValue.value + joinedPhrase
//
// /////////使
//
if longPhrases.contains(joinedValue) {
theAnchor.accumulatedScore = kDroppedPathScore
path.insert(theAnchor, at: 0)
paths.append(path)
continue
}
return result
theAnchor.accumulatedScore = accumulatedScore + theNode.score
if joinedValue.count >= longPhrases[0].count {
path = reverseWalk(
at: location - theAnchor.spanningLength, score: theAnchor.accumulatedScore, joinedPhrase: "",
longPhrases: .init()
)
} else {
path = reverseWalk(
at: location - theAnchor.spanningLength, score: theAnchor.accumulatedScore, joinedPhrase: joinedValue,
longPhrases: longPhrases
)
}
path.insert(theAnchor, at: 0)
paths.append(path)
}
} else {
//
var longPhrases = [String]()
for theAnchor in nodes {
guard let theNode = theAnchor.node else { continue }
if theAnchor.spanningLength > 1 {
longPhrases.append(theNode.currentKeyValue.value)
}
}
longPhrases = longPhrases.stableSorted {
$0.count > $1.count
}
for theAnchor in nodes {
var theAnchor = theAnchor
guard let theNode = theAnchor.node else { continue }
theAnchor.accumulatedScore = accumulatedScore + theNode.score
var path = [NodeAnchor]()
if theAnchor.spanningLength > 1 {
path = reverseWalk(
at: location - theAnchor.spanningLength, score: theAnchor.accumulatedScore, joinedPhrase: "",
longPhrases: .init()
)
} else {
path = reverseWalk(
at: location - theAnchor.spanningLength, score: theAnchor.accumulatedScore,
joinedPhrase: theNode.currentKeyValue.value, longPhrases: longPhrases
)
}
path.insert(theAnchor, at: 0)
paths.append(path)
}
}
return [] as [NodeAnchor]
guard !paths.isEmpty else {
return .init()
}
var result: [NodeAnchor] = paths[0]
for neta in paths {
if neta.last!.accumulatedScore > result.last!.accumulatedScore {
result = neta
}
}
return result
}
// MARK: - Private functions
private func build() {
let itrBegin: Int =
(mutCursorIndex < mutMaximumBuildSpanLength) ? 0 : mutCursorIndex - mutMaximumBuildSpanLength
let itrEnd: Int = min(mutCursorIndex + mutMaximumBuildSpanLength, mutReadings.count)
(mutCursorIndex < maxBuildSpanLength) ? 0 : mutCursorIndex - maxBuildSpanLength
let itrEnd: Int = min(mutCursorIndex + maxBuildSpanLength, mutReadings.count)
for p in itrBegin..<itrEnd {
for q in 1..<mutMaximumBuildSpanLength {
for q in 1..<maxBuildSpanLength {
if p + q > itrEnd {
break
}
let strSlice = mutReadings[p..<(p + q)]
let combinedReading: String = join(slice: strSlice, separator: joinSeparator)
let arrSlice = mutReadings[p..<(p + q)]
let combinedReading: String = join(slice: arrSlice, separator: joinSeparator)
if !mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) {
let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading)
@ -252,12 +290,35 @@ extension Megrez {
}
}
private func join(slice strSlice: ArraySlice<String>, separator: String) -> String {
private func join(slice arrSlice: ArraySlice<String>, separator: String) -> String {
var arrResult: [String] = []
for value in strSlice {
for value in arrSlice {
arrResult.append(value)
}
return arrResult.joined(separator: separator)
}
}
}
// MARK: - Stable Sort Extension
// Reference: https://stackoverflow.com/a/50545761/4162914
extension Sequence {
/// Return a stable-sorted collection.
///
/// - Parameter areInIncreasingOrder: Return nil when two element are equal.
/// - Returns: The sorted collection.
func stableSorted(
by areInIncreasingOrder: (Element, Element) throws -> Bool
)
rethrows -> [Element]
{
try enumerated()
.sorted { a, b -> Bool in
try areInIncreasingOrder(a.element, b.element)
|| (a.offset < b.offset && !areInIncreasingOrder(b.element, a.element))
}
.map(\.element)
}
}

View File

@ -29,16 +29,23 @@ extension Megrez {
///
private var mutSpans: [Megrez.Span]
///
private var mutMaxBuildSpanLength = 10
///
public var maxBuildSpanLength: Int { mutMaxBuildSpanLength }
///
var width: Int { mutSpans.count }
public init() {
public init(spanLength: Int = 10) {
mutMaxBuildSpanLength = spanLength
mutSpans = [Megrez.Span]()
}
///
public func clear() {
mutSpans = [Megrez.Span]()
mutSpans.removeAll()
}
///
@ -47,6 +54,8 @@ extension Megrez {
/// - location:
/// - spanningLength:
public func insertNode(node: Node, location: Int, spanningLength: Int) {
let location = abs(location) //
let spanningLength = abs(spanningLength) //
if location >= mutSpans.count {
let diff = location - mutSpans.count + 1
for _ in 0..<diff {
@ -62,24 +71,26 @@ extension Megrez {
/// - spanningLength:
/// - key:
public func hasMatchedNode(location: Int, spanningLength: Int, key: String) -> Bool {
let location = abs(location) //
let spanningLength = abs(spanningLength) //
if location > mutSpans.count {
return false
}
let n = mutSpans[location].node(length: spanningLength)
return n == nil ? false : key == n?.key
return n != nil && key == n?.key
}
///
/// - Parameters:
/// - location:
public func expandGridByOneAt(location: Int) {
// abs
mutSpans.insert(Span(), at: abs(location))
if location != 0, abs(location) != mutSpans.count {
for i in 0..<abs(location) {
let location = abs(location) //
mutSpans.insert(Span(), at: location)
if location != 0, location != mutSpans.count {
for i in 0..<location {
// zaps overlapping spans
mutSpans[i].removeNodeOfLengthGreaterThan(abs(location) - i)
mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
}
}
}
@ -88,6 +99,7 @@ extension Megrez {
/// - Parameters:
/// - location:
public func shrinkGridByOneAt(location: Int) {
let location = abs(location) //
if location >= mutSpans.count {
return
}
@ -99,11 +111,35 @@ extension Megrez {
}
}
///
/// - Parameters:
/// - location:
public func nodesBeginningAt(location: Int) -> [NodeAnchor] {
let location = abs(location) //
var results = [NodeAnchor]()
if location < mutSpans.count { // mutSpans
let span = mutSpans[location]
for i in 1...maxBuildSpanLength {
if let np = span.node(length: i) {
results.append(
NodeAnchor(
node: np,
location: location,
spanningLength: i
)
)
}
}
}
return results
}
///
/// - Parameters:
/// - location:
public func nodesEndingAt(location: Int) -> [NodeAnchor] {
var results: [NodeAnchor] = []
let location = abs(location) //
var results = [NodeAnchor]()
if !mutSpans.isEmpty, location <= mutSpans.count {
for i in 0..<location {
let span = mutSpans[i]
@ -127,7 +163,8 @@ extension Megrez {
/// - Parameters:
/// - location:
public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] {
var results: [NodeAnchor] = []
let location = abs(location) //
var results = [NodeAnchor]()
if !mutSpans.isEmpty, location <= mutSpans.count {
for i in 0..<location {
let span = mutSpans[i]
@ -157,6 +194,7 @@ extension Megrez {
/// - location:
/// - value:
@discardableResult public func fixNodeSelectedCandidate(location: Int, value: String) -> NodeAnchor {
let location = abs(location) //
var node = NodeAnchor()
for nodeAnchor in nodesCrossingOrEndingAt(location: location) {
guard let theNode = nodeAnchor.node else {
@ -182,6 +220,7 @@ extension Megrez {
/// - value:
/// - overridingScore:
public func overrideNodeScoreForSelectedCandidate(location: Int, value: String, overridingScore: Double) {
let location = abs(location) //
for nodeAnchor in nodesCrossingOrEndingAt(location: location) {
guard let theNode = nodeAnchor.node else {
continue

View File

@ -52,19 +52,9 @@ extension Megrez {
return stream
}
///
public var additionalWeights: Double {
(Double(spanningLength) - 1) * 0.75
}
///
public var balancedScore: Double {
(node?.score ?? 0) + additionalWeights
}
///
public var balancedAccumulatedScore: Double {
accumulatedScore + additionalWeights
///
public var scoreForSort: Double {
node?.score ?? 0
}
}
}

View File

@ -47,6 +47,7 @@ extension Megrez {
/// - node:
/// - length:
mutating func insert(node: Node, length: Int) {
let length = abs(length) //
mutLengthNodeMap[length] = node
if length > mutMaximumLength {
mutMaximumLength = length
@ -57,6 +58,7 @@ extension Megrez {
/// - Parameters:
/// - length:
mutating func removeNodeOfLengthGreaterThan(_ length: Int) {
let length = abs(length) //
if length > mutMaximumLength { return }
var max = 0
var removalList: [Int: Megrez.Node] = [:]
@ -79,7 +81,7 @@ extension Megrez {
/// - Parameters:
/// - length:
public func node(length: Int) -> Node? {
mutLengthNodeMap[length]
mutLengthNodeMap[abs(length)] //
}
}
}

View File

@ -47,7 +47,7 @@ extension Megrez {
///
private var mutSelectedUnigramIndex: Int = 0
///
private let kSelectedCandidateScore: Double = 99
public let kSelectedCandidateScore: Double = 99
///
public var description: String {
"(node,key:\(mutKey),fixed:\(mutCandidateFixed ? "true" : "false"),selected:\(mutSelectedUnigramIndex),\(mutUnigrams))"
@ -84,7 +84,7 @@ extension Megrez {
$0.score > $1.score
}
if mutUnigrams.count > 0 {
if !mutUnigrams.isEmpty {
mutScore = mutUnigrams[0].score
}
@ -133,6 +133,7 @@ extension Megrez {
/// - index:
/// - fix:
public func selectCandidateAt(index: Int = 0, fix: Bool = false) {
let index = abs(index)
mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index
mutCandidateFixed = fix
mutScore = kSelectedCandidateScore
@ -152,6 +153,7 @@ extension Megrez {
/// - index:
/// - score:
public func selectFloatingCandidateAt(index: Int, score: Double) {
let index = abs(index) //
mutSelectedUnigramIndex = index >= mutUnigrams.count ? 0 : index
mutCandidateFixed = false
mutScore = score