Megrez::Builder // Allow defining max span length on init.

- Also use for loop in lieu of while loop in Megrez.
This commit is contained in:
ShikiSuen 2022-05-12 22:25:18 +08:00
parent 47e15d9cbd
commit 9813dd19d2
3 changed files with 27 additions and 40 deletions

View File

@ -25,15 +25,16 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
extension Megrez { extension Megrez {
public class BlockReadingBuilder { public class BlockReadingBuilder {
let kMaximumBuildSpanLength = 10 // 10 var mutMaximumBuildSpanLength = 10
var mutCursorIndex: Int = 0 var mutCursorIndex: Int = 0
var mutReadings: [String] = [] var mutReadings: [String] = []
var mutGrid: Grid = .init() var mutGrid: Grid = .init()
var mutLM: LanguageModel var mutLM: LanguageModel
var mutJoinSeparator: String = "" var mutJoinSeparator: String = ""
public init(lm: LanguageModel) { public init(lm: LanguageModel, length: Int = 10) {
mutLM = lm mutLM = lm
mutMaximumBuildSpanLength = length
} }
public func clear() { public func clear() {
@ -87,15 +88,13 @@ extension Megrez {
return false return false
} }
var i = 0 for _ in 0..<count {
while i < count { if mutCursorIndex > 0 {
if mutCursorIndex != 0 {
mutCursorIndex -= 1 mutCursorIndex -= 1
} }
mutReadings.removeFirst() mutReadings.removeFirst()
mutGrid.shrinkGridByOneAt(location: 0) mutGrid.shrinkGridByOneAt(location: 0)
build() build()
i += 1
} }
return true return true
@ -113,15 +112,17 @@ extension Megrez {
// if (mutLM == nil) { return } // nil // if (mutLM == nil) { return } // nil
let itrBegin: Int = let itrBegin: Int =
(mutCursorIndex < kMaximumBuildSpanLength) ? 0 : mutCursorIndex - kMaximumBuildSpanLength (mutCursorIndex < mutMaximumBuildSpanLength) ? 0 : mutCursorIndex - mutMaximumBuildSpanLength
let itrEnd: Int = min(mutCursorIndex + kMaximumBuildSpanLength, mutReadings.count) let itrEnd: Int = min(mutCursorIndex + mutMaximumBuildSpanLength, mutReadings.count)
var p = itrBegin for p in itrBegin..<itrEnd {
while p < itrEnd { for q in 1..<mutMaximumBuildSpanLength {
var q = 1 if p + q > itrEnd {
while q <= kMaximumBuildSpanLength, p + q <= itrEnd { break
}
let strSlice = mutReadings[p..<(p + q)] let strSlice = mutReadings[p..<(p + q)]
let combinedReading: String = join(slice: strSlice, separator: mutJoinSeparator) let combinedReading: String = join(slice: strSlice, separator: mutJoinSeparator)
if !mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) { if !mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) {
let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading) let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading)
if !unigrams.isEmpty { if !unigrams.isEmpty {
@ -129,9 +130,7 @@ extension Megrez {
mutGrid.insertNode(node: n, location: p, spanningLength: q) mutGrid.insertNode(node: n, location: p, spanningLength: q)
} }
} }
q += 1
} }
p += 1
} }
} }

View File

@ -50,20 +50,20 @@ extension Megrez {
} }
} }
// X NodeAnchor node for (i, n) in nodes.enumerated() {
// abs // X NodeAnchor node
var border: Int = nodes.count // abs
if nodesLimit > 0 { if abs(nodesLimit) > 0, i == abs(nodesLimit) - 1 {
border = min(nodes.count, abs(nodesLimit)) break
} }
for n in nodes[0..<border] {
var n = n var n = n
guard let nNode = n.node else { guard let nNode = n.node else {
continue continue
} }
n.accumulatedScore = accumulatedScore + nNode.score() n.accumulatedScore = accumulatedScore + nNode.score()
// Spanning Length // Spanning Length
// //
if balanced { if balanced {
@ -75,6 +75,7 @@ extension Megrez {
at: location - n.spanningLength, at: location - n.spanningLength,
score: n.accumulatedScore score: n.accumulatedScore
) )
path.insert(n, at: 0) path.insert(n, at: 0)
paths.append(path) paths.append(path)

View File

@ -38,10 +38,8 @@ extension Megrez {
public func insertNode(node: Node, location: Int, spanningLength: Int) { public func insertNode(node: Node, location: Int, spanningLength: Int) {
if location >= mutSpans.count { if location >= mutSpans.count {
let diff = location - mutSpans.count + 1 let diff = location - mutSpans.count + 1
var i = 0 for _ in 0..<diff {
while i < diff {
mutSpans.append(Span()) mutSpans.append(Span())
i += 1
} }
} }
mutSpans[location].insert(node: node, length: spanningLength) mutSpans[location].insert(node: node, length: spanningLength)
@ -59,11 +57,9 @@ extension Megrez {
public func expandGridByOneAt(location: Int) { public func expandGridByOneAt(location: Int) {
mutSpans.append(Span()) mutSpans.append(Span())
if location > 0, location < mutSpans.count { if location > 0, location < mutSpans.count {
var i = 0 for i in 0..<location {
while i < location {
// zaps overlapping spans // zaps overlapping spans
mutSpans[i].removeNodeOfLengthGreaterThan(location - i) mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
i += 1
} }
} }
} }
@ -74,11 +70,9 @@ extension Megrez {
} }
mutSpans.remove(at: location) mutSpans.remove(at: location)
var i = 0 for i in 0..<location {
while i < location {
// zaps overlapping spans // zaps overlapping spans
mutSpans[i].removeNodeOfLengthGreaterThan(location - i) mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
i += 1
} }
} }
@ -87,8 +81,7 @@ extension Megrez {
public func nodesEndingAt(location: Int) -> [NodeAnchor] { public func nodesEndingAt(location: Int) -> [NodeAnchor] {
var results: [NodeAnchor] = [] var results: [NodeAnchor] = []
if !mutSpans.isEmpty, location <= mutSpans.count { if !mutSpans.isEmpty, location <= mutSpans.count {
var i = 0 for i in 0..<location {
while i < location {
let span = mutSpans[i] let span = mutSpans[i]
if i + span.maximumLength >= location { if i + span.maximumLength >= location {
if let np = span.node(length: location - i) { if let np = span.node(length: location - i) {
@ -101,7 +94,6 @@ extension Megrez {
) )
} }
} }
i += 1
} }
} }
return results return results
@ -110,14 +102,11 @@ extension Megrez {
public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] { public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] {
var results: [NodeAnchor] = [] var results: [NodeAnchor] = []
if !mutSpans.isEmpty, location <= mutSpans.count { if !mutSpans.isEmpty, location <= mutSpans.count {
var i = 0 for i in 0..<location {
while i < location {
let span = mutSpans[i] let span = mutSpans[i]
if i + span.maximumLength >= location { if i + span.maximumLength >= location {
var j = 1 for j in 1...span.maximumLength {
while j <= span.maximumLength {
if i + j < location { if i + j < location {
j += 1
continue continue
} }
if let np = span.node(length: j) { if let np = span.node(length: j) {
@ -129,10 +118,8 @@ extension Megrez {
) )
) )
} }
j += 1
} }
} }
i += 1
} }
} }
return results return results