Megrez // Sanitizing node-crossed candidates.

This commit is contained in:
ShikiSuen 2023-03-16 20:00:36 +08:00
parent d83e9c7e05
commit 2063b0e50b
4 changed files with 46 additions and 8 deletions

View File

@ -92,28 +92,41 @@ public extension Megrez.Compositor {
/// ///
/// ///
/// location - 1 /// location 1
/// - Parameter location: /// - Parameter location:
/// - Returns: /// - Returns:
func fetchCandidates(at location: Int, filter: CandidateFetchFilter = .all) -> [Megrez.KeyValuePaired] { func fetchCandidates(
at givenLocation: Int? = nil, filter givenFilter: CandidateFetchFilter = .all
) -> [Megrez.KeyValuePaired] {
var result = [Megrez.KeyValuePaired]() var result = [Megrez.KeyValuePaired]()
guard !keys.isEmpty else { return result } guard !keys.isEmpty else { return result }
let location = max(min(location, keys.count - 1), 0) // var location = max(min(givenLocation ?? cursor, keys.count), 0)
var filter = givenFilter
if filter == .endAt {
if location == keys.count { filter = .all }
location -= 1
}
location = max(min(location, keys.count - 1), 0)
let anchors: [NodeAnchor] = fetchOverlappingNodes(at: location).stableSorted { let anchors: [NodeAnchor] = fetchOverlappingNodes(at: location).stableSorted {
// //
$0.spanLength > $1.spanLength $0.spanLength > $1.spanLength
} }
let keyAtCursor = keys[location] let keyAtCursor = keys[location]
anchors.map(\.node).filter(\.keyArray.isEmpty.negative).forEach { theNode in anchors.forEach { theAnchor in
let theNode = theAnchor.node
theNode.unigrams.forEach { gram in theNode.unigrams.forEach { gram in
switch filter { switch filter {
case .all: case .all:
// //
if !theNode.keyArray.contains(keyAtCursor) { return } if !theNode.keyArray.contains(keyAtCursor) { return }
case .beginAt: case .beginAt:
if theNode.keyArray[0] != keyAtCursor { return } guard theAnchor.spanIndex == location else { return }
case .endAt: case .endAt:
if theNode.keyArray.reversed()[0] != keyAtCursor { return } guard theNode.keyArray.last == keyAtCursor else { return }
switch theNode.spanLength {
case 2... where theAnchor.spanIndex + theAnchor.spanLength - 1 != location: return
default: break
}
} }
result.append(.init(keyArray: theNode.keyArray, value: gram.value)) result.append(.init(keyArray: theNode.keyArray, value: gram.value))
} }

View File

@ -66,11 +66,13 @@ extension Megrez.Compositor {
/// - Returns: /// - Returns:
func fetchOverlappingNodes(at givenLocation: Int) -> [NodeAnchor] { func fetchOverlappingNodes(at givenLocation: Int) -> [NodeAnchor] {
var results = [NodeAnchor]() var results = [NodeAnchor]()
guard !spans.isEmpty, givenLocation < spans.count else { return results } let givenLocation = max(0, min(givenLocation, keys.count - 1))
guard !spans.isEmpty else { return results }
// //
(1 ... max(spans[givenLocation].maxLength, 1)).forEach { theSpanLength in (1 ... max(spans[givenLocation].maxLength, 1)).forEach { theSpanLength in
guard let node = spans[givenLocation][theSpanLength] else { return } guard let node = spans[givenLocation][theSpanLength] else { return }
guard !node.keyArray.joined().isEmpty else { return }
results.append(.init(node: node, spanIndex: givenLocation)) results.append(.init(node: node, spanIndex: givenLocation))
} }
@ -81,6 +83,7 @@ extension Megrez.Compositor {
guard A <= B else { return } guard A <= B else { return }
(A ... B).forEach { theLength in (A ... B).forEach { theLength in
guard let node = spans[theLocation][theLength] else { return } guard let node = spans[theLocation][theLength] else { return }
guard !node.keyArray.joined().isEmpty else { return }
results.append(.init(node: node, spanIndex: theLocation)) results.append(.init(node: node, spanIndex: theLocation))
} }
} }

View File

@ -173,6 +173,7 @@ nian2zhong1 年中 -11.373044
gao1ke1ji4 -9.842421 gao1ke1ji4 -9.842421
zhe4yang4 -6.000000 // Non-LibTaBE zhe4yang4 -6.000000 // Non-LibTaBE
ni3zhe4 -9.000000 // Non-LibTaBE ni3zhe4 -9.000000 // Non-LibTaBE
ke1ke1 -8.000000 // Non-LibTaBE
jiao4 -3.676169 jiao4 -3.676169
jiao4 -3.24869962 jiao4 -3.24869962
jiao4yu4 -3.32220565 jiao4yu4 -3.32220565

View File

@ -559,4 +559,25 @@ final class MegrezTests: XCTestCase {
let resultB = compositorB.walk().walkedNodes let resultB = compositorB.walk().walkedNodes
XCTAssertEqual(resultA, resultB) XCTAssertEqual(resultA, resultB)
} }
func test22_Compositor_SanitizingNodeCrossing() throws {
let theLM = SimpleLM(input: strSampleData)
let rawReadings = "ke1 ke1"
var compositor = Megrez.Compositor(with: theLM)
rawReadings.split(separator: " ").forEach { key in
compositor.insertKey(key.description)
}
var a = compositor.fetchCandidates(at: 1, filter: .beginAt).map(\.keyArray.count).max() ?? 0
var b = compositor.fetchCandidates(at: 1, filter: .endAt).map(\.keyArray.count).max() ?? 0
var c = compositor.fetchCandidates(at: 0, filter: .beginAt).map(\.keyArray.count).max() ?? 0
var d = compositor.fetchCandidates(at: 2, filter: .endAt).map(\.keyArray.count).max() ?? 0
XCTAssertEqual("\(a) \(b) \(c) \(d)", "1 1 2 2")
compositor.cursor = compositor.length
compositor.insertKey("jin1")
a = compositor.fetchCandidates(at: 1, filter: .beginAt).map(\.keyArray.count).max() ?? 0
b = compositor.fetchCandidates(at: 1, filter: .endAt).map(\.keyArray.count).max() ?? 0
c = compositor.fetchCandidates(at: 0, filter: .beginAt).map(\.keyArray.count).max() ?? 0
d = compositor.fetchCandidates(at: 2, filter: .endAt).map(\.keyArray.count).max() ?? 0
XCTAssertEqual("\(a) \(b) \(c) \(d)", "1 1 2 2")
}
} }