Megrez // Let insertKey() return false if no new nodes added.

This commit is contained in:
ShikiSuen 2022-10-24 12:46:06 +08:00
parent 11d2ce635d
commit 32e462af43
3 changed files with 20 additions and 9 deletions

View File

@ -73,8 +73,14 @@ extension Megrez {
@discardableResult public mutating func insertKey(_ key: String) -> Bool { @discardableResult public mutating func insertKey(_ key: String) -> Bool {
guard !key.isEmpty, key != separator, langModel.hasUnigramsFor(key: key) else { return false } guard !key.isEmpty, key != separator, langModel.hasUnigramsFor(key: key) else { return false }
keys.insert(key, at: cursor) keys.insert(key, at: cursor)
let gridBackup = spans
resizeGrid(at: cursor, do: .expand) resizeGrid(at: cursor, do: .expand)
update() let nodesInserted = update()
// langModel.hasUnigramsFor() spans
if nodesInserted == 0 {
spans = gridBackup
return false
}
cursor += 1 // update() cursor += 1 // update()
return true return true
} }
@ -219,7 +225,7 @@ extension Megrez.Compositor {
/// (XXXXXXX? <- /// (XXXXXXX? <-
/// ``` /// ```
/// - Parameter location: /// - Parameter location:
func dropWreckedNodes(at location: Int) { mutating func dropWreckedNodes(at location: Int) {
let location = max(min(location, spans.count), 0) // let location = max(min(location, spans.count), 0) //
guard !spans.isEmpty else { return } guard !spans.isEmpty else { return }
let affectedLength = Megrez.Compositor.maxSpanLength - 1 let affectedLength = Megrez.Compositor.maxSpanLength - 1
@ -230,7 +236,7 @@ extension Megrez.Compositor {
} }
} }
@discardableResult func insertNode(_ node: Node, at location: Int) -> Bool { @discardableResult mutating func insertNode(_ node: Node, at location: Int) -> Bool {
let location = max(min(location, spans.count - 1), 0) // let location = max(min(location, spans.count - 1), 0) //
spans[location].append(node: node) spans[location].append(node: node)
return true return true
@ -254,9 +260,12 @@ extension Megrez.Compositor {
return key == node.key return key == node.key
} }
func update() { ///
/// - Returns:
@discardableResult mutating func update() -> Int {
let maxSpanLength = Megrez.Compositor.maxSpanLength let maxSpanLength = Megrez.Compositor.maxSpanLength
let range = max(0, cursor - maxSpanLength)..<min(cursor + maxSpanLength, keys.count) let range = max(0, cursor - maxSpanLength)..<min(cursor + maxSpanLength, keys.count)
var nodesInserted = 0
for position in range { for position in range {
for theLength in 1...min(maxSpanLength, range.upperBound - position) { for theLength in 1...min(maxSpanLength, range.upperBound - position) {
let jointKeyArray = getJointKeyArray(range: position..<(position + theLength)) let jointKeyArray = getJointKeyArray(range: position..<(position + theLength))
@ -268,8 +277,10 @@ extension Megrez.Compositor {
.init(keyArray: jointKeyArray, spanLength: theLength, unigrams: unigrams, keySeparator: separator), .init(keyArray: jointKeyArray, spanLength: theLength, unigrams: unigrams, keySeparator: separator),
at: position at: position
) )
nodesInserted += 1
} }
} }
return nodesInserted
} }
mutating func updateCursorJumpingTables(_ walkedNodes: [Node]) { mutating func updateCursorJumpingTables(_ walkedNodes: [Node]) {

View File

@ -5,7 +5,7 @@
extension Megrez.Compositor { extension Megrez.Compositor {
/// ///
public class Span { public struct Span {
private var nodes: [Node?] = [] private var nodes: [Node?] = []
public private(set) var maxLength = 0 public private(set) var maxLength = 0
private var maxSpanLength: Int { Megrez.Compositor.maxSpanLength } private var maxSpanLength: Int { Megrez.Compositor.maxSpanLength }
@ -13,7 +13,7 @@ extension Megrez.Compositor {
clear() clear()
} }
public func clear() { public mutating func clear() {
nodes.removeAll() nodes.removeAll()
for _ in 0..<maxSpanLength { for _ in 0..<maxSpanLength {
nodes.append(nil) nodes.append(nil)
@ -24,7 +24,7 @@ extension Megrez.Compositor {
/// ///
/// - Parameter node: /// - Parameter node:
/// - Returns: /// - Returns:
@discardableResult public func append(node: Node) -> Bool { @discardableResult public mutating func append(node: Node) -> Bool {
guard (1...maxSpanLength).contains(node.spanLength) else { guard (1...maxSpanLength).contains(node.spanLength) else {
return false return false
} }
@ -36,7 +36,7 @@ extension Megrez.Compositor {
/// ///
/// - Parameter length: /// - Parameter length:
/// - Returns: /// - Returns:
@discardableResult public func dropNodesOfOrBeyond(length: Int) -> Bool { @discardableResult public mutating func dropNodesOfOrBeyond(length: Int) -> Bool {
guard (1...maxSpanLength).contains(length) else { guard (1...maxSpanLength).contains(length) else {
return false return false
} }

View File

@ -11,7 +11,7 @@ import XCTest
final class MegrezTests: XCTestCase { final class MegrezTests: XCTestCase {
func testSpan() throws { func testSpan() throws {
let langModel = SimpleLM(input: strSampleData) let langModel = SimpleLM(input: strSampleData)
let span = Megrez.Compositor.Span() var span = Megrez.Compositor.Span()
let n1 = Megrez.Compositor.Node(keyArray: ["gao1"], spanLength: 1, unigrams: langModel.unigramsFor(key: "gao1")) let n1 = Megrez.Compositor.Node(keyArray: ["gao1"], spanLength: 1, unigrams: langModel.unigramsFor(key: "gao1"))
let n3 = Megrez.Compositor.Node( let n3 = Megrez.Compositor.Node(
keyArray: ["gao1ke1ji4"], spanLength: 3, unigrams: langModel.unigramsFor(key: "gao1ke1ji4") keyArray: ["gao1ke1ji4"], spanLength: 3, unigrams: langModel.unigramsFor(key: "gao1ke1ji4")