Megrez::Builder // Allow defining max span length on init.
- Also use for loop in lieu of while loop in Megrez.
This commit is contained in:
parent
47e15d9cbd
commit
9813dd19d2
|
@ -25,15 +25,16 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|||
|
||||
extension Megrez {
|
||||
public class BlockReadingBuilder {
|
||||
let kMaximumBuildSpanLength = 10 // 規定最多可以組成的詞的字數上限為 10
|
||||
var mutMaximumBuildSpanLength = 10
|
||||
var mutCursorIndex: Int = 0
|
||||
var mutReadings: [String] = []
|
||||
var mutGrid: Grid = .init()
|
||||
var mutLM: LanguageModel
|
||||
var mutJoinSeparator: String = ""
|
||||
|
||||
public init(lm: LanguageModel) {
|
||||
public init(lm: LanguageModel, length: Int = 10) {
|
||||
mutLM = lm
|
||||
mutMaximumBuildSpanLength = length
|
||||
}
|
||||
|
||||
public func clear() {
|
||||
|
@ -87,15 +88,13 @@ extension Megrez {
|
|||
return false
|
||||
}
|
||||
|
||||
var i = 0
|
||||
while i < count {
|
||||
if mutCursorIndex != 0 {
|
||||
for _ in 0..<count {
|
||||
if mutCursorIndex > 0 {
|
||||
mutCursorIndex -= 1
|
||||
}
|
||||
mutReadings.removeFirst()
|
||||
mutGrid.shrinkGridByOneAt(location: 0)
|
||||
build()
|
||||
i += 1
|
||||
}
|
||||
|
||||
return true
|
||||
|
@ -113,15 +112,17 @@ extension Megrez {
|
|||
// if (mutLM == nil) { return } // 這個出不了 nil,所以註釋掉。
|
||||
|
||||
let itrBegin: Int =
|
||||
(mutCursorIndex < kMaximumBuildSpanLength) ? 0 : mutCursorIndex - kMaximumBuildSpanLength
|
||||
let itrEnd: Int = min(mutCursorIndex + kMaximumBuildSpanLength, mutReadings.count)
|
||||
(mutCursorIndex < mutMaximumBuildSpanLength) ? 0 : mutCursorIndex - mutMaximumBuildSpanLength
|
||||
let itrEnd: Int = min(mutCursorIndex + mutMaximumBuildSpanLength, mutReadings.count)
|
||||
|
||||
var p = itrBegin
|
||||
while p < itrEnd {
|
||||
var q = 1
|
||||
while q <= kMaximumBuildSpanLength, p + q <= itrEnd {
|
||||
for p in itrBegin..<itrEnd {
|
||||
for q in 1..<mutMaximumBuildSpanLength {
|
||||
if p + q > itrEnd {
|
||||
break
|
||||
}
|
||||
let strSlice = mutReadings[p..<(p + q)]
|
||||
let combinedReading: String = join(slice: strSlice, separator: mutJoinSeparator)
|
||||
|
||||
if !mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) {
|
||||
let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading)
|
||||
if !unigrams.isEmpty {
|
||||
|
@ -129,9 +130,7 @@ extension Megrez {
|
|||
mutGrid.insertNode(node: n, location: p, spanningLength: q)
|
||||
}
|
||||
}
|
||||
q += 1
|
||||
}
|
||||
p += 1
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -50,20 +50,20 @@ extension Megrez {
|
|||
}
|
||||
}
|
||||
|
||||
// 只檢查前 X 個 NodeAnchor 是否有 node。
|
||||
// 這裡有 abs 是為了防止有白癡填負數。
|
||||
var border: Int = nodes.count
|
||||
if nodesLimit > 0 {
|
||||
border = min(nodes.count, abs(nodesLimit))
|
||||
}
|
||||
for (i, n) in nodes.enumerated() {
|
||||
// 只檢查前 X 個 NodeAnchor 是否有 node。
|
||||
// 這裡有 abs 是為了防止有白癡填負數。
|
||||
if abs(nodesLimit) > 0, i == abs(nodesLimit) - 1 {
|
||||
break
|
||||
}
|
||||
|
||||
for n in nodes[0..<border] {
|
||||
var n = n
|
||||
guard let nNode = n.node else {
|
||||
continue
|
||||
}
|
||||
|
||||
n.accumulatedScore = accumulatedScore + nNode.score()
|
||||
|
||||
// 利用 Spanning Length 來決定權重。
|
||||
// 這樣一來,例:「再見」比「在」與「見」的權重更高。
|
||||
if balanced {
|
||||
|
@ -75,6 +75,7 @@ extension Megrez {
|
|||
at: location - n.spanningLength,
|
||||
score: n.accumulatedScore
|
||||
)
|
||||
|
||||
path.insert(n, at: 0)
|
||||
|
||||
paths.append(path)
|
||||
|
|
|
@ -38,10 +38,8 @@ extension Megrez {
|
|||
public func insertNode(node: Node, location: Int, spanningLength: Int) {
|
||||
if location >= mutSpans.count {
|
||||
let diff = location - mutSpans.count + 1
|
||||
var i = 0
|
||||
while i < diff {
|
||||
for _ in 0..<diff {
|
||||
mutSpans.append(Span())
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
mutSpans[location].insert(node: node, length: spanningLength)
|
||||
|
@ -59,11 +57,9 @@ extension Megrez {
|
|||
public func expandGridByOneAt(location: Int) {
|
||||
mutSpans.append(Span())
|
||||
if location > 0, location < mutSpans.count {
|
||||
var i = 0
|
||||
while i < location {
|
||||
for i in 0..<location {
|
||||
// zaps overlapping spans
|
||||
mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -74,11 +70,9 @@ extension Megrez {
|
|||
}
|
||||
|
||||
mutSpans.remove(at: location)
|
||||
var i = 0
|
||||
while i < location {
|
||||
for i in 0..<location {
|
||||
// zaps overlapping spans
|
||||
mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -87,8 +81,7 @@ extension Megrez {
|
|||
public func nodesEndingAt(location: Int) -> [NodeAnchor] {
|
||||
var results: [NodeAnchor] = []
|
||||
if !mutSpans.isEmpty, location <= mutSpans.count {
|
||||
var i = 0
|
||||
while i < location {
|
||||
for i in 0..<location {
|
||||
let span = mutSpans[i]
|
||||
if i + span.maximumLength >= location {
|
||||
if let np = span.node(length: location - i) {
|
||||
|
@ -101,7 +94,6 @@ extension Megrez {
|
|||
)
|
||||
}
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
return results
|
||||
|
@ -110,14 +102,11 @@ extension Megrez {
|
|||
public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] {
|
||||
var results: [NodeAnchor] = []
|
||||
if !mutSpans.isEmpty, location <= mutSpans.count {
|
||||
var i = 0
|
||||
while i < location {
|
||||
for i in 0..<location {
|
||||
let span = mutSpans[i]
|
||||
if i + span.maximumLength >= location {
|
||||
var j = 1
|
||||
while j <= span.maximumLength {
|
||||
for j in 1...span.maximumLength {
|
||||
if i + j < location {
|
||||
j += 1
|
||||
continue
|
||||
}
|
||||
if let np = span.node(length: j) {
|
||||
|
@ -129,10 +118,8 @@ extension Megrez {
|
|||
)
|
||||
)
|
||||
}
|
||||
j += 1
|
||||
}
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
return results
|
||||
|
|
Loading…
Reference in New Issue