From 9813dd19d2c8c41e85382a459973561501f707d4 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Thu, 12 May 2022 22:25:18 +0800 Subject: [PATCH] Megrez::Builder // Allow defining max span length on init. - Also use for loop in lieu of while loop in Megrez. --- .../Megrez/1_BlockReadingBuilder.swift | 27 +++++++++---------- .../LanguageParsers/Megrez/1_Walker.swift | 15 ++++++----- .../LanguageParsers/Megrez/2_Grid.swift | 25 +++++------------ 3 files changed, 27 insertions(+), 40 deletions(-) diff --git a/Source/Modules/LanguageParsers/Megrez/1_BlockReadingBuilder.swift b/Source/Modules/LanguageParsers/Megrez/1_BlockReadingBuilder.swift index 652ddc4c..d07d3af9 100644 --- a/Source/Modules/LanguageParsers/Megrez/1_BlockReadingBuilder.swift +++ b/Source/Modules/LanguageParsers/Megrez/1_BlockReadingBuilder.swift @@ -25,15 +25,16 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. extension Megrez { public class BlockReadingBuilder { - let kMaximumBuildSpanLength = 10 // 規定最多可以組成的詞的字數上限為 10 + var mutMaximumBuildSpanLength = 10 var mutCursorIndex: Int = 0 var mutReadings: [String] = [] var mutGrid: Grid = .init() var mutLM: LanguageModel var mutJoinSeparator: String = "" - public init(lm: LanguageModel) { + public init(lm: LanguageModel, length: Int = 10) { mutLM = lm + mutMaximumBuildSpanLength = length } public func clear() { @@ -87,15 +88,13 @@ extension Megrez { return false } - var i = 0 - while i < count { - if mutCursorIndex != 0 { + for _ in 0.. 0 { mutCursorIndex -= 1 } mutReadings.removeFirst() mutGrid.shrinkGridByOneAt(location: 0) build() - i += 1 } return true @@ -113,15 +112,17 @@ extension Megrez { // if (mutLM == nil) { return } // 這個出不了 nil,所以註釋掉。 let itrBegin: Int = - (mutCursorIndex < kMaximumBuildSpanLength) ? 0 : mutCursorIndex - kMaximumBuildSpanLength - let itrEnd: Int = min(mutCursorIndex + kMaximumBuildSpanLength, mutReadings.count) + (mutCursorIndex < mutMaximumBuildSpanLength) ? 0 : mutCursorIndex - mutMaximumBuildSpanLength + let itrEnd: Int = min(mutCursorIndex + mutMaximumBuildSpanLength, mutReadings.count) - var p = itrBegin - while p < itrEnd { - var q = 1 - while q <= kMaximumBuildSpanLength, p + q <= itrEnd { + for p in itrBegin.. itrEnd { + break + } let strSlice = mutReadings[p..<(p + q)] let combinedReading: String = join(slice: strSlice, separator: mutJoinSeparator) + if !mutGrid.hasMatchedNode(location: p, spanningLength: q, key: combinedReading) { let unigrams: [Unigram] = mutLM.unigramsFor(key: combinedReading) if !unigrams.isEmpty { @@ -129,9 +130,7 @@ extension Megrez { mutGrid.insertNode(node: n, location: p, spanningLength: q) } } - q += 1 } - p += 1 } } diff --git a/Source/Modules/LanguageParsers/Megrez/1_Walker.swift b/Source/Modules/LanguageParsers/Megrez/1_Walker.swift index bf98aaea..d0f68b3a 100644 --- a/Source/Modules/LanguageParsers/Megrez/1_Walker.swift +++ b/Source/Modules/LanguageParsers/Megrez/1_Walker.swift @@ -50,20 +50,20 @@ extension Megrez { } } - // 只檢查前 X 個 NodeAnchor 是否有 node。 - // 這裡有 abs 是為了防止有白癡填負數。 - var border: Int = nodes.count - if nodesLimit > 0 { - border = min(nodes.count, abs(nodesLimit)) - } + for (i, n) in nodes.enumerated() { + // 只檢查前 X 個 NodeAnchor 是否有 node。 + // 這裡有 abs 是為了防止有白癡填負數。 + if abs(nodesLimit) > 0, i == abs(nodesLimit) - 1 { + break + } - for n in nodes[0..= mutSpans.count { let diff = location - mutSpans.count + 1 - var i = 0 - while i < diff { + for _ in 0.. 0, location < mutSpans.count { - var i = 0 - while i < location { + for i in 0.. [NodeAnchor] { var results: [NodeAnchor] = [] if !mutSpans.isEmpty, location <= mutSpans.count { - var i = 0 - while i < location { + for i in 0..= location { if let np = span.node(length: location - i) { @@ -101,7 +94,6 @@ extension Megrez { ) } } - i += 1 } } return results @@ -110,14 +102,11 @@ extension Megrez { public func nodesCrossingOrEndingAt(location: Int) -> [NodeAnchor] { var results: [NodeAnchor] = [] if !mutSpans.isEmpty, location <= mutSpans.count { - var i = 0 - while i < location { + for i in 0..= location { - var j = 1 - while j <= span.maximumLength { + for j in 1...span.maximumLength { if i + j < location { - j += 1 continue } if let np = span.node(length: j) { @@ -129,10 +118,8 @@ extension Megrez { ) ) } - j += 1 } } - i += 1 } } return results