Megrez::Builder // Allow defining max span length on init.
- Also use for loop in lieu of while loop in Megrez.
This commit is contained in:
parent
47e15d9cbd
commit
9813dd19d2
|
@ -25,15 +25,16 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
extension Megrez {
|
extension Megrez {
|
||||||
public class BlockReadingBuilder {
|
public class BlockReadingBuilder {
|
||||||
let kMaximumBuildSpanLength = 10 // 規定最多可以組成的詞的字數上限為 10
|
var mutMaximumBuildSpanLength = 10
|
||||||
var mutCursorIndex: Int = 0
|
var mutCursorIndex: Int = 0
|
||||||
var mutReadings: [String] = []
|
var mutReadings: [String] = []
|
||||||
var mutGrid: Grid = .init()
|
var mutGrid: Grid = .init()
|
||||||
var mutLM: LanguageModel
|
var mutLM: LanguageModel
|
||||||
var mutJoinSeparator: String = ""
|
var mutJoinSeparator: String = ""
|
||||||
|
|
||||||
public init(lm: LanguageModel) {
|
public init(lm: LanguageModel, length: Int = 10) {
|
||||||
mutLM = lm
|
mutLM = lm
|
||||||
|
mutMaximumBuildSpanLength = length
|
||||||
}
|
}
|
||||||
|
|
||||||
public func clear() {
|
public func clear() {
|
||||||
|
@ -87,15 +88,13 @@ extension Megrez {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
var i = 0
|
for _ in 0..<count {
|
||||||
while i < count {
|
if mutCursorIndex > 0 {
|
||||||
if mutCursorIndex != 0 {
|
|
||||||
mutCursorIndex -= 1
|
mutCursorIndex -= 1
|
||||||
}
|
}
|
||||||
mutReadings.removeFirst()
|
mutReadings.removeFirst()
|
||||||
mutGrid.shrinkGridByOneAt(location: 0)
|
mutGrid.shrinkGridByOneAt(location: 0)
|
||||||
build()
|
build()
|
||||||
i += 1
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true
|
return true
|
||||||
|
@ -113,15 +112,17 @@ extension Megrez {
|
||||||
// if (mutLM == nil) { return } // 這個出不了 nil,所以註釋掉。
|
// if (mutLM == nil) { return } // 這個出不了 nil,所以註釋掉。
|
||||||
|
|
||||||
let itrBegin: Int =
|
let itrBegin: Int =
|
||||||
(mutCursorIndex < kMaximumBuildSpanLength) ? 0 : mutCursorIndex - kMaximumBuildSpanLength
|
(mutCursorIndex < mutMaximumBuildSpanLength) ? 0 : mutCursorIndex - mutMaximumBuildSpanLength
|
||||||
let itrEnd: Int = min(mutCursorIndex + kMaximumBuildSpanLength, mutReadings.count)
|
let itrEnd: Int = min(mutCursorIndex + mutMaximumBuildSpanLength, mutReadings.count)
|
||||||
|
|
||||||
var p = itrBegin
|
for p in itrBegin..<itrEnd {
|
||||||
while p < itrEnd {
|
for q in 1..<mutMaximumBuildSpanLength {
|
||||||
var q = 1
|
if p + q > itrEnd {
|
||||||
while q <= kMaximumBuildSpanLength, p + q <= itrEnd {
|
break
|
||||||
|
}
|
||||||
let strSlice = mutReadings[p..<(p + q)]
|
let strSlice = mutReadings[p..<(p + q)]
|
||||||
let combinedReading: String = join(slice: strSlice, separator: mutJoinSeparator)
|
let combinedReading: String = join(slice: strSlice, separator: mutJoinSeparator)
|
||||||
|
|
||||||
if !mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) {
|
if !mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) {
|
||||||
let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading)
|
let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading)
|
||||||
if !unigrams.isEmpty {
|
if !unigrams.isEmpty {
|
||||||
|
@ -129,9 +130,7 @@ extension Megrez {
|
||||||
mutGrid.insertNode(node: n, location: p, spanningLength: q)
|
mutGrid.insertNode(node: n, location: p, spanningLength: q)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
q += 1
|
|
||||||
}
|
}
|
||||||
p += 1
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -50,20 +50,20 @@ extension Megrez {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 只檢查前 X 個 NodeAnchor 是否有 node。
|
for (i, n) in nodes.enumerated() {
|
||||||
// 這裡有 abs 是為了防止有白癡填負數。
|
// 只檢查前 X 個 NodeAnchor 是否有 node。
|
||||||
var border: Int = nodes.count
|
// 這裡有 abs 是為了防止有白癡填負數。
|
||||||
if nodesLimit > 0 {
|
if abs(nodesLimit) > 0, i == abs(nodesLimit) - 1 {
|
||||||
border = min(nodes.count, abs(nodesLimit))
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
for n in nodes[0..<border] {
|
|
||||||
var n = n
|
var n = n
|
||||||
guard let nNode = n.node else {
|
guard let nNode = n.node else {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
n.accumulatedScore = accumulatedScore + nNode.score()
|
n.accumulatedScore = accumulatedScore + nNode.score()
|
||||||
|
|
||||||
// 利用 Spanning Length 來決定權重。
|
// 利用 Spanning Length 來決定權重。
|
||||||
// 這樣一來,例:「再見」比「在」與「見」的權重更高。
|
// 這樣一來,例:「再見」比「在」與「見」的權重更高。
|
||||||
if balanced {
|
if balanced {
|
||||||
|
@ -75,6 +75,7 @@ extension Megrez {
|
||||||
at: location - n.spanningLength,
|
at: location - n.spanningLength,
|
||||||
score: n.accumulatedScore
|
score: n.accumulatedScore
|
||||||
)
|
)
|
||||||
|
|
||||||
path.insert(n, at: 0)
|
path.insert(n, at: 0)
|
||||||
|
|
||||||
paths.append(path)
|
paths.append(path)
|
||||||
|
|
|
@ -38,10 +38,8 @@ extension Megrez {
|
||||||
public func insertNode(node: Node, location: Int, spanningLength: Int) {
|
public func insertNode(node: Node, location: Int, spanningLength: Int) {
|
||||||
if location >= mutSpans.count {
|
if location >= mutSpans.count {
|
||||||
let diff = location - mutSpans.count + 1
|
let diff = location - mutSpans.count + 1
|
||||||
var i = 0
|
for _ in 0..<diff {
|
||||||
while i < diff {
|
|
||||||
mutSpans.append(Span())
|
mutSpans.append(Span())
|
||||||
i += 1
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mutSpans[location].insert(node: node, length: spanningLength)
|
mutSpans[location].insert(node: node, length: spanningLength)
|
||||||
|
@ -59,11 +57,9 @@ extension Megrez {
|
||||||
public func expandGridByOneAt(location: Int) {
|
public func expandGridByOneAt(location: Int) {
|
||||||
mutSpans.append(Span())
|
mutSpans.append(Span())
|
||||||
if location > 0, location < mutSpans.count {
|
if location > 0, location < mutSpans.count {
|
||||||
var i = 0
|
for i in 0..<location {
|
||||||
while i < location {
|
|
||||||
// zaps overlapping spans
|
// zaps overlapping spans
|
||||||
mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
|
mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
|
||||||
i += 1
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -74,11 +70,9 @@ extension Megrez {
|
||||||
}
|
}
|
||||||
|
|
||||||
mutSpans.remove(at: location)
|
mutSpans.remove(at: location)
|
||||||
var i = 0
|
for i in 0..<location {
|
||||||
while i < location {
|
|
||||||
// zaps overlapping spans
|
// zaps overlapping spans
|
||||||
mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
|
mutSpans[i].removeNodeOfLengthGreaterThan(location - i)
|
||||||
i += 1
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -87,8 +81,7 @@ extension Megrez {
|
||||||
public func nodesEndingAt(location: Int) -> [NodeAnchor] {
|
public func nodesEndingAt(location: Int) -> [NodeAnchor] {
|
||||||
var results: [NodeAnchor] = []
|
var results: [NodeAnchor] = []
|
||||||
if !mutSpans.isEmpty, location <= mutSpans.count {
|
if !mutSpans.isEmpty, location <= mutSpans.count {
|
||||||
var i = 0
|
for i in 0..<location {
|
||||||
while i < location {
|
|
||||||
let span = mutSpans[i]
|
let span = mutSpans[i]
|
||||||
if i + span.maximumLength >= location {
|
if i + span.maximumLength >= location {
|
||||||
if let np = span.node(length: location - i) {
|
if let np = span.node(length: location - i) {
|
||||||
|
@ -101,7 +94,6 @@ extension Megrez {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
i += 1
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return results
|
return results
|
||||||
|
@ -110,14 +102,11 @@ extension Megrez {
|
||||||
public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] {
|
public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] {
|
||||||
var results: [NodeAnchor] = []
|
var results: [NodeAnchor] = []
|
||||||
if !mutSpans.isEmpty, location <= mutSpans.count {
|
if !mutSpans.isEmpty, location <= mutSpans.count {
|
||||||
var i = 0
|
for i in 0..<location {
|
||||||
while i < location {
|
|
||||||
let span = mutSpans[i]
|
let span = mutSpans[i]
|
||||||
if i + span.maximumLength >= location {
|
if i + span.maximumLength >= location {
|
||||||
var j = 1
|
for j in 1...span.maximumLength {
|
||||||
while j <= span.maximumLength {
|
|
||||||
if i + j < location {
|
if i + j < location {
|
||||||
j += 1
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if let np = span.node(length: j) {
|
if let np = span.node(length: j) {
|
||||||
|
@ -129,10 +118,8 @@ extension Megrez {
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
j += 1
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
i += 1
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return results
|
return results
|
||||||
|
|
Loading…
Reference in New Issue