Megrez // Sanitizing node-crossed candidates.
This commit is contained in:
parent
d83e9c7e05
commit
2063b0e50b
|
@ -92,28 +92,41 @@ public extension Megrez.Compositor {
|
||||||
|
|
||||||
/// 返回在當前位置的所有候選字詞(以詞音配對的形式)。如果組字器內有幅位、且游標
|
/// 返回在當前位置的所有候選字詞(以詞音配對的形式)。如果組字器內有幅位、且游標
|
||||||
/// 位於組字器的(文字輸入順序的)最前方(也就是游標位置的數值是最大合規數值)的
|
/// 位於組字器的(文字輸入順序的)最前方(也就是游標位置的數值是最大合規數值)的
|
||||||
/// 話,那麼這裡會用到 location - 1、以免去在呼叫該函式後再處理的麻煩。
|
/// 話,那麼這裡會對 location 的位置自動減去 1、以免去在呼叫該函式後再處理的麻煩。
|
||||||
/// - Parameter location: 游標位置。
|
/// - Parameter location: 游標位置,必須是顯示的游標位置、不得做任何事先糾偏處理。
|
||||||
/// - Returns: 候選字音配對陣列。
|
/// - Returns: 候選字音配對陣列。
|
||||||
func fetchCandidates(at location: Int, filter: CandidateFetchFilter = .all) -> [Megrez.KeyValuePaired] {
|
func fetchCandidates(
|
||||||
|
at givenLocation: Int? = nil, filter givenFilter: CandidateFetchFilter = .all
|
||||||
|
) -> [Megrez.KeyValuePaired] {
|
||||||
var result = [Megrez.KeyValuePaired]()
|
var result = [Megrez.KeyValuePaired]()
|
||||||
guard !keys.isEmpty else { return result }
|
guard !keys.isEmpty else { return result }
|
||||||
let location = max(min(location, keys.count - 1), 0) // 防呆
|
var location = max(min(givenLocation ?? cursor, keys.count), 0)
|
||||||
|
var filter = givenFilter
|
||||||
|
if filter == .endAt {
|
||||||
|
if location == keys.count { filter = .all }
|
||||||
|
location -= 1
|
||||||
|
}
|
||||||
|
location = max(min(location, keys.count - 1), 0)
|
||||||
let anchors: [NodeAnchor] = fetchOverlappingNodes(at: location).stableSorted {
|
let anchors: [NodeAnchor] = fetchOverlappingNodes(at: location).stableSorted {
|
||||||
// 按照讀音的長度(幅位長度)來給節點排序。
|
// 按照讀音的長度(幅位長度)來給節點排序。
|
||||||
$0.spanLength > $1.spanLength
|
$0.spanLength > $1.spanLength
|
||||||
}
|
}
|
||||||
let keyAtCursor = keys[location]
|
let keyAtCursor = keys[location]
|
||||||
anchors.map(\.node).filter(\.keyArray.isEmpty.negative).forEach { theNode in
|
anchors.forEach { theAnchor in
|
||||||
|
let theNode = theAnchor.node
|
||||||
theNode.unigrams.forEach { gram in
|
theNode.unigrams.forEach { gram in
|
||||||
switch filter {
|
switch filter {
|
||||||
case .all:
|
case .all:
|
||||||
// 得加上這道篩選,不然會出現很多無效結果。
|
// 得加上這道篩選,不然會出現很多無效結果。
|
||||||
if !theNode.keyArray.contains(keyAtCursor) { return }
|
if !theNode.keyArray.contains(keyAtCursor) { return }
|
||||||
case .beginAt:
|
case .beginAt:
|
||||||
if theNode.keyArray[0] != keyAtCursor { return }
|
guard theAnchor.spanIndex == location else { return }
|
||||||
case .endAt:
|
case .endAt:
|
||||||
if theNode.keyArray.reversed()[0] != keyAtCursor { return }
|
guard theNode.keyArray.last == keyAtCursor else { return }
|
||||||
|
switch theNode.spanLength {
|
||||||
|
case 2... where theAnchor.spanIndex + theAnchor.spanLength - 1 != location: return
|
||||||
|
default: break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
result.append(.init(keyArray: theNode.keyArray, value: gram.value))
|
result.append(.init(keyArray: theNode.keyArray, value: gram.value))
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,11 +66,13 @@ extension Megrez.Compositor {
|
||||||
/// - Returns: 一個包含所有與該位置重疊的節點的陣列。
|
/// - Returns: 一個包含所有與該位置重疊的節點的陣列。
|
||||||
func fetchOverlappingNodes(at givenLocation: Int) -> [NodeAnchor] {
|
func fetchOverlappingNodes(at givenLocation: Int) -> [NodeAnchor] {
|
||||||
var results = [NodeAnchor]()
|
var results = [NodeAnchor]()
|
||||||
guard !spans.isEmpty, givenLocation < spans.count else { return results }
|
let givenLocation = max(0, min(givenLocation, keys.count - 1))
|
||||||
|
guard !spans.isEmpty else { return results }
|
||||||
|
|
||||||
// 先獲取該位置的所有單字節點。
|
// 先獲取該位置的所有單字節點。
|
||||||
(1 ... max(spans[givenLocation].maxLength, 1)).forEach { theSpanLength in
|
(1 ... max(spans[givenLocation].maxLength, 1)).forEach { theSpanLength in
|
||||||
guard let node = spans[givenLocation][theSpanLength] else { return }
|
guard let node = spans[givenLocation][theSpanLength] else { return }
|
||||||
|
guard !node.keyArray.joined().isEmpty else { return }
|
||||||
results.append(.init(node: node, spanIndex: givenLocation))
|
results.append(.init(node: node, spanIndex: givenLocation))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -81,6 +83,7 @@ extension Megrez.Compositor {
|
||||||
guard A <= B else { return }
|
guard A <= B else { return }
|
||||||
(A ... B).forEach { theLength in
|
(A ... B).forEach { theLength in
|
||||||
guard let node = spans[theLocation][theLength] else { return }
|
guard let node = spans[theLocation][theLength] else { return }
|
||||||
|
guard !node.keyArray.joined().isEmpty else { return }
|
||||||
results.append(.init(node: node, spanIndex: theLocation))
|
results.append(.init(node: node, spanIndex: theLocation))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -173,6 +173,7 @@ nian2zhong1 年中 -11.373044
|
||||||
gao1ke1ji4 高科技 -9.842421
|
gao1ke1ji4 高科技 -9.842421
|
||||||
zhe4yang4 這樣 -6.000000 // Non-LibTaBE
|
zhe4yang4 這樣 -6.000000 // Non-LibTaBE
|
||||||
ni3zhe4 你這 -9.000000 // Non-LibTaBE
|
ni3zhe4 你這 -9.000000 // Non-LibTaBE
|
||||||
|
ke1ke1 顆顆 -8.000000 // Non-LibTaBE
|
||||||
jiao4 教 -3.676169
|
jiao4 教 -3.676169
|
||||||
jiao4 較 -3.24869962
|
jiao4 較 -3.24869962
|
||||||
jiao4yu4 教育 -3.32220565
|
jiao4yu4 教育 -3.32220565
|
||||||
|
|
|
@ -559,4 +559,25 @@ final class MegrezTests: XCTestCase {
|
||||||
let resultB = compositorB.walk().walkedNodes
|
let resultB = compositorB.walk().walkedNodes
|
||||||
XCTAssertEqual(resultA, resultB)
|
XCTAssertEqual(resultA, resultB)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func test22_Compositor_SanitizingNodeCrossing() throws {
|
||||||
|
let theLM = SimpleLM(input: strSampleData)
|
||||||
|
let rawReadings = "ke1 ke1"
|
||||||
|
var compositor = Megrez.Compositor(with: theLM)
|
||||||
|
rawReadings.split(separator: " ").forEach { key in
|
||||||
|
compositor.insertKey(key.description)
|
||||||
|
}
|
||||||
|
var a = compositor.fetchCandidates(at: 1, filter: .beginAt).map(\.keyArray.count).max() ?? 0
|
||||||
|
var b = compositor.fetchCandidates(at: 1, filter: .endAt).map(\.keyArray.count).max() ?? 0
|
||||||
|
var c = compositor.fetchCandidates(at: 0, filter: .beginAt).map(\.keyArray.count).max() ?? 0
|
||||||
|
var d = compositor.fetchCandidates(at: 2, filter: .endAt).map(\.keyArray.count).max() ?? 0
|
||||||
|
XCTAssertEqual("\(a) \(b) \(c) \(d)", "1 1 2 2")
|
||||||
|
compositor.cursor = compositor.length
|
||||||
|
compositor.insertKey("jin1")
|
||||||
|
a = compositor.fetchCandidates(at: 1, filter: .beginAt).map(\.keyArray.count).max() ?? 0
|
||||||
|
b = compositor.fetchCandidates(at: 1, filter: .endAt).map(\.keyArray.count).max() ?? 0
|
||||||
|
c = compositor.fetchCandidates(at: 0, filter: .beginAt).map(\.keyArray.count).max() ?? 0
|
||||||
|
d = compositor.fetchCandidates(at: 2, filter: .endAt).map(\.keyArray.count).max() ?? 0
|
||||||
|
XCTAssertEqual("\(a) \(b) \(c) \(d)", "1 1 2 2")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue