vChewing-macOS/Packages/vChewing_LangModelAssembly/Sources/LangModelAssembly/LMConsolidator.swift

156 lines
6.6 KiB
Swift
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// (c) 2021 and onwards The vChewing Project (MIT-NTL License).
// ====================
// This code is released under the MIT license (SPDX-License-Identifier: MIT)
// ... with NTL restriction stating that:
// No trademark license is granted to use the trade names, trademarks, service
// marks, or product names of Contributor, except as required to fulfill notice
// requirements defined in MIT License.
import Foundation
import LineReader
import Shared
public extension vChewingLM {
enum LMConsolidator {
public static let kPragmaHeader = "# 𝙵𝙾𝚁𝙼𝙰𝚃 𝚘𝚛𝚐.𝚊𝚝𝚎𝚕𝚒𝚎𝚛𝙸𝚗𝚖𝚞.𝚟𝚌𝚑𝚎𝚠𝚒𝚗𝚐.𝚞𝚜𝚎𝚛𝙻𝚊𝚗𝚐𝚞𝚊𝚐𝚎𝙼𝚘𝚍𝚎𝚕𝙳𝚊𝚝𝚊.𝚏𝚘𝚛𝚖𝚊𝚝𝚝𝚎𝚍"
///
/// - Parameter path:
/// - Returns:
public static func checkPragma(path: String) -> Bool {
if FileManager.default.fileExists(atPath: path) {
do {
guard let fileHandle = FileHandle(forReadingAtPath: path) else {
throw FileErrors.fileHandleError("")
}
let lineReader = try LineReader(file: fileHandle)
for strLine in lineReader { // i=0
if strLine != kPragmaHeader {
vCLog("Header Mismatch, Starting In-Place Consolidation.")
return false
} else {
vCLog("Header Verification Succeeded: \(strLine).")
return true
}
}
} catch {
vCLog("Header Verification Failed: File Access Error.")
return false
}
}
vCLog("Header Verification Failed: File Missing.")
return false
}
///
/// - Parameter path:
/// - Returns:
@discardableResult public static func fixEOF(path: String) -> Bool {
var fileSize: UInt64?
do {
let dict = try FileManager.default.attributesOfItem(atPath: path)
if let value = dict[FileAttributeKey.size] as? UInt64 { fileSize = value }
} catch {
vCLog("EOF Fix Failed: File Missing at \(path).")
return false
}
guard let fileSize = fileSize else { return false }
guard let writeFile = FileHandle(forUpdatingAtPath: path) else {
vCLog("EOF Fix Failed: File Not Writable at \(path).")
return false
}
defer { writeFile.closeFile() }
/// Swift LMConsolidator EOF
/// consolidate()
writeFile.seek(toFileOffset: fileSize - 1)
if writeFile.readDataToEndOfFile().first != 0x0A {
vCLog("EOF Missing Confirmed, Start Fixing.")
var newData = Data()
newData.append(0x0A)
writeFile.write(newData)
vCLog("EOF Successfully Assured.")
}
return false
}
///
/// - Parameters:
/// - text:
/// - shouldCheckPragma:
public static func consolidate(text strProcessed: inout String, pragma shouldCheckPragma: Bool) {
var pragmaResult: Bool {
let realPragmaHeader = kPragmaHeader + "\n"
if strProcessed.count <= kPragmaHeader.count { return false }
let range = 0 ..< (realPragmaHeader.count)
let fetchedPragma = ContiguousArray(strProcessed.utf8CString[range])
return fetchedPragma == realPragmaHeader.utf8CString
}
if shouldCheckPragma, pragmaResult { return }
// Step 1: Consolidating formats per line.
// -------
// CJKWhiteSpace (\x{3000}) to ASCII Space
// NonBreakWhiteSpace (\x{A0}) to ASCII Space
// Tab to ASCII Space
// ASCII
strProcessed.regReplace(pattern: #"( +| +| +|\t+)+"#, replaceWith: " ")
//
strProcessed.regReplace(pattern: #"(^ | $)"#, replaceWith: "")
strProcessed.regReplace(pattern: #"(\n | \n)"#, replaceWith: "\n")
// CR & FF to LF,
strProcessed.regReplace(pattern: #"(\f+|\r+|\n+)+"#, replaceWith: "\n")
strProcessed.regReplace(pattern: "^\(kPragmaHeader)$", replaceWith: "")
if strProcessed.prefix(1) == " " { //
strProcessed.removeFirst()
}
if strProcessed.suffix(1) == " " { //
strProcessed.removeLast()
}
// Step 3: Deduplication.
let arrData = strProcessed.split(separator: "\n")
// reversed override
let arrDataDeduplicated = Array(NSOrderedSet(array: arrData.reversed()).array as! [String])
strProcessed = arrDataDeduplicated.reversed().joined(separator: "\n") + "\n"
// Step 4: Remove duplicated newlines at the end of the file.
strProcessed.regReplace(pattern: #"\n+"#, replaceWith: "\n")
// Step 5: Add pragma header back.
strProcessed = kPragmaHeader + "\n" + strProcessed // Add Pragma Header
}
///
/// - Parameters:
/// - path:
/// - shouldCheckPragma:
/// - Returns:
@discardableResult public static func consolidate(path: String, pragma shouldCheckPragma: Bool) -> Bool {
let pragmaResult = checkPragma(path: path)
if shouldCheckPragma {
if pragmaResult {
return true
}
}
let urlPath = URL(fileURLWithPath: path)
if FileManager.default.fileExists(atPath: path) {
do {
var strProcessed = try String(contentsOf: urlPath, encoding: .utf8)
consolidate(text: &strProcessed, pragma: shouldCheckPragma)
// Write consolidated file contents.
try strProcessed.write(to: urlPath, atomically: false, encoding: .utf8)
} catch {
vCLog("Consolidation Failed w/ File: \(path), error: \(error)")
return false
}
vCLog("Either Consolidation Successful Or No-Need-To-Consolidate.")
return true
}
vCLog("Consolidation Failed: File Missing at \(path).")
return false
}
}
}