Megrez::Builder // Allow defining max span length on init.

- Also use for loop in lieu of while loop in Megrez.
This commit is contained in:
ShikiSuen 2022-05-12 22:25:18 +08:00
parent 47e15d9cbd
commit 9813dd19d2
3 changed files with 27 additions and 40 deletions

View File

@ -25,15 +25,16 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
extension Megrez {
public class BlockReadingBuilder {
let kMaximumBuildSpanLength = 10 // 10
var mutMaximumBuildSpanLength = 10
var mutCursorIndex: Int = 0
var mutReadings: [String] = []
var mutGrid: Grid = .init()
var mutLM: LanguageModel
var mutJoinSeparator: String = ""
public init(lm: LanguageModel) {
public init(lm: LanguageModel, length: Int = 10) {
mutLM = lm
mutMaximumBuildSpanLength = length
}
public func clear() {
@ -87,15 +88,13 @@ extension Megrez {
return false
}
var i = 0
while i < count {
if mutCursorIndex != 0 {
for _ in 0..<count {
if mutCursorIndex > 0 {
mutCursorIndex -= 1
}
mutReadings.removeFirst()
mutGrid.shrinkGridByOneAt(location: 0)
build()
i += 1
}
return true
@ -113,15 +112,17 @@ extension Megrez {
// if (mutLM == nil) { return } // nil
let itrBegin: Int =
(mutCursorIndex < kMaximumBuildSpanLength) ? 0 : mutCursorIndex - kMaximumBuildSpanLength
let itrEnd: Int = min(mutCursorIndex + kMaximumBuildSpanLength, mutReadings.count)
(mutCursorIndex < mutMaximumBuildSpanLength) ? 0 : mutCursorIndex - mutMaximumBuildSpanLength
let itrEnd: Int = min(mutCursorIndex + mutMaximumBuildSpanLength, mutReadings.count)
var p = itrBegin
while p < itrEnd {
var q = 1
while q <= kMaximumBuildSpanLength, p + q <= itrEnd {
for p in itrBegin..<itrEnd {
for q in 1..<mutMaximumBuildSpanLength {
if p + q > itrEnd {
break
}
let strSlice = mutReadings[p..<(p + q)]
let combinedReading: String = join(slice: strSlice, separator: mutJoinSeparator)
if !mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) {
let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading)
if !unigrams.isEmpty {
@ -129,9 +130,7 @@ extension Megrez {
mutGrid.insertNode(node: n, location: p, spanningLength: q)
}
}
q += 1
}
p += 1
}
}

View File

@ -50,20 +50,20 @@ extension Megrez {
}
}
// X NodeAnchor node
// abs
var border: Int = nodes.count
if nodesLimit > 0 {
border = min(nodes.count, abs(nodesLimit))
}
for (i, n) in nodes.enumerated() {
// X NodeAnchor node
// abs
if abs(nodesLimit) > 0, i == abs(nodesLimit) - 1 {
break
}
for n in nodes[0..<border] {
var n = n
guard let nNode = n.node else {
continue
}
n.accumulatedScore = accumulatedScore + nNode.score()
// Spanning Length
//
if balanced {
@ -75,6 +75,7 @@ extension Megrez {
at: location - n.spanningLength,
score: n.accumulatedScore
)
path.insert(n, at: 0)
paths.append(path)

View File

@ -38,10 +38,8 @@ extension Megrez {
public func insertNode(node: Node, location: Int, spanningLength: Int) {
if location >= mutSpans.count {
let diff = location - mutSpans.count + 1
var i = 0
while i < diff {
for _ in 0..<diff {
mutSpans.append(Span())
i += 1
}
}
mutSpans[location].insert(node: node, length: spanningLength)
@ -59,11 +57,9 @@ extension Megrez {
public func expandGridByOneAt(location: Int) {
mutSpans.append(Span())
if location > 0, location < mutSpans.count {
var i = 0
while i < location {
for i in 0..<location {
// zaps overlapping spans
mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
i += 1
}
}
}
@ -74,11 +70,9 @@ extension Megrez {
}
mutSpans.remove(at: location)
var i = 0
while i < location {
for i in 0..<location {
// zaps overlapping spans
mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
i += 1
}
}
@ -87,8 +81,7 @@ extension Megrez {
public func nodesEndingAt(location: Int) -> [NodeAnchor] {
var results: [NodeAnchor] = []
if !mutSpans.isEmpty, location <= mutSpans.count {
var i = 0
while i < location {
for i in 0..<location {
let span = mutSpans[i]
if i + span.maximumLength >= location {
if let np = span.node(length: location - i) {
@ -101,7 +94,6 @@ extension Megrez {
)
}
}
i += 1
}
}
return results
@ -110,14 +102,11 @@ extension Megrez {
public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] {
var results: [NodeAnchor] = []
if !mutSpans.isEmpty, location <= mutSpans.count {
var i = 0
while i < location {
for i in 0..<location {
let span = mutSpans[i]
if i + span.maximumLength >= location {
var j = 1
while j <= span.maximumLength {
for j in 1...span.maximumLength {
if i + j < location {
j += 1
continue
}
if let np = span.node(length: j) {
@ -129,10 +118,8 @@ extension Megrez {
)
)
}
j += 1
}
}
i += 1
}
}
return results