OpenCC // Swift Clang-Format.

This commit is contained in:
ShikiSuen 2022-04-03 12:50:51 +08:00
parent b0b4976142
commit f0bea85698
8 changed files with 408 additions and 401 deletions

View File

@ -3,83 +3,83 @@
import PackageDescription import PackageDescription
let package = Package( let package = Package(
name: "SwiftyOpenCC", name: "SwiftyOpenCC",
products: [ products: [
.library( .library(
name: "OpenCC", name: "OpenCC",
targets: ["OpenCC"]), targets: ["OpenCC"])
], ],
targets: [ targets: [
.target( .target(
name: "OpenCC", name: "OpenCC",
dependencies: ["copencc"], dependencies: ["copencc"],
resources: [ resources: [
.copy("Dictionary") .copy("Dictionary")
]), ]),
.testTarget( .testTarget(
name: "OpenCCTests", name: "OpenCCTests",
dependencies: ["OpenCC"], dependencies: ["OpenCC"],
resources: [ resources: [
.copy("benchmark"), .copy("benchmark"),
.copy("testcases"), .copy("testcases"),
]), ]),
.target( .target(
name: "copencc", name: "copencc",
exclude: [ exclude: [
"src/benchmark", "src/benchmark",
"src/tools", "src/tools",
"src/BinaryDictTest.cpp", "src/BinaryDictTest.cpp",
"src/Config.cpp", "src/Config.cpp",
"src/ConfigTest.cpp", "src/ConfigTest.cpp",
"src/ConversionChainTest.cpp", "src/ConversionChainTest.cpp",
"src/ConversionTest.cpp", "src/ConversionTest.cpp",
"src/DartsDictTest.cpp", "src/DartsDictTest.cpp",
"src/DictGroupTest.cpp", "src/DictGroupTest.cpp",
"src/MarisaDictTest.cpp", "src/MarisaDictTest.cpp",
"src/MaxMatchSegmentationTest.cpp", "src/MaxMatchSegmentationTest.cpp",
"src/PhraseExtractTest.cpp", "src/PhraseExtractTest.cpp",
"src/SerializedValuesTest.cpp", "src/SerializedValuesTest.cpp",
"src/SimpleConverter.cpp", "src/SimpleConverter.cpp",
"src/SimpleConverterTest.cpp", "src/SimpleConverterTest.cpp",
"src/TextDictTest.cpp", "src/TextDictTest.cpp",
"src/UTF8StringSliceTest.cpp", "src/UTF8StringSliceTest.cpp",
"src/UTF8UtilTest.cpp", "src/UTF8UtilTest.cpp",
"deps/google-benchmark", "deps/google-benchmark",
"deps/gtest-1.11.0", "deps/gtest-1.11.0",
"deps/pybind11-2.5.0", "deps/pybind11-2.5.0",
"deps/rapidjson-1.1.0", "deps/rapidjson-1.1.0",
"deps/tclap-1.2.2", "deps/tclap-1.2.2",
"src/CmdLineOutput.hpp", "src/CmdLineOutput.hpp",
"src/Config.hpp", "src/Config.hpp",
"src/ConfigTestBase.hpp", "src/ConfigTestBase.hpp",
"src/DictGroupTestBase.hpp", "src/DictGroupTestBase.hpp",
"src/SimpleConverter.hpp", "src/SimpleConverter.hpp",
"src/TestUtils.hpp", "src/TestUtils.hpp",
"src/TestUtilsUTF8.hpp", "src/TestUtilsUTF8.hpp",
"src/TextDictTestBase.hpp", "src/TextDictTestBase.hpp",
"src/py_opencc.cpp", "src/py_opencc.cpp",
// ??? // ???
"src/README.md", "src/README.md",
"src/CMakeLists.txt", "src/CMakeLists.txt",
"deps/marisa-0.2.6/AUTHORS", "deps/marisa-0.2.6/AUTHORS",
"deps/marisa-0.2.6/CMakeLists.txt", "deps/marisa-0.2.6/CMakeLists.txt",
"deps/marisa-0.2.6/COPYING.md", "deps/marisa-0.2.6/COPYING.md",
"deps/marisa-0.2.6/README.md", "deps/marisa-0.2.6/README.md",
], ],
sources: [ sources: [
"source.cpp", "source.cpp",
"src", "src",
"deps/marisa-0.2.6", "deps/marisa-0.2.6",
], ],
cxxSettings: [ cxxSettings: [
.headerSearchPath("src"), .headerSearchPath("src"),
.headerSearchPath("deps/darts-clone"), .headerSearchPath("deps/darts-clone"),
.headerSearchPath("deps/marisa-0.2.6/include"), .headerSearchPath("deps/marisa-0.2.6/include"),
.headerSearchPath("deps/marisa-0.2.6/lib"), .headerSearchPath("deps/marisa-0.2.6/lib"),
.define("ENABLE_DARTS"), .define("ENABLE_DARTS"),
]), ]),
], ],
cxxLanguageStandard: .cxx14 cxxLanguageStandard: .cxx14
) )

View File

@ -23,67 +23,67 @@ import copencc
/// during the course of a conversion. /// during the course of a conversion.
public class ChineseConverter { public class ChineseConverter {
/// These constants define the ChineseConverter options. /// These constants define the ChineseConverter options.
public struct Options: OptionSet { public struct Options: OptionSet {
public let rawValue: Int public let rawValue: Int
public init(rawValue: Int) { public init(rawValue: Int) {
self.rawValue = rawValue self.rawValue = rawValue
} }
/// Convert to Traditional Chinese. (default) /// Convert to Traditional Chinese. (default)
public static let traditionalize = Options(rawValue: 1 << 0) public static let traditionalize = Options(rawValue: 1 << 0)
/// Convert to Simplified Chinese. /// Convert to Simplified Chinese.
public static let simplify = Options(rawValue: 1 << 1) public static let simplify = Options(rawValue: 1 << 1)
/// Use Taiwan standard. /// Use Taiwan standard.
public static let twStandard = Options(rawValue: 1 << 5) public static let twStandard = Options(rawValue: 1 << 5)
/// Use HongKong standard. /// Use HongKong standard.
public static let hkStandard = Options(rawValue: 1 << 6) public static let hkStandard = Options(rawValue: 1 << 6)
/// Cancel Taiwan standard. /// Cancel Taiwan standard.
public static let twStandardRev = Options(rawValue: 1 << 15) public static let twStandardRev = Options(rawValue: 1 << 15)
/// Cancel HongKong standard. /// Cancel HongKong standard.
public static let hkStandardRev = Options(rawValue: 1 << 16) public static let hkStandardRev = Options(rawValue: 1 << 16)
/// Taiwanese idiom conversion. /// Taiwanese idiom conversion.
public static let twIdiom = Options(rawValue: 1 << 10) public static let twIdiom = Options(rawValue: 1 << 10)
} }
private let seg: ConversionDictionary private let seg: ConversionDictionary
private let chain: [ConversionDictionary] private let chain: [ConversionDictionary]
private let converter: CCConverterRef private let converter: CCConverterRef
private init(loader: DictionaryLoader, options: Options) throws { private init(loader: DictionaryLoader, options: Options) throws {
seg = try loader.segmentation(options: options) seg = try loader.segmentation(options: options)
chain = try loader.conversionChain(options: options) chain = try loader.conversionChain(options: options)
var rawChain = chain.map { $0.dict } var rawChain = chain.map { $0.dict }
converter = CCConverterCreate("SwiftyOpenCC", seg.dict, &rawChain, rawChain.count) converter = CCConverterCreate("SwiftyOpenCC", seg.dict, &rawChain, rawChain.count)
} }
/// Returns an initialized `ChineseConverter` instance with the specified /// Returns an initialized `ChineseConverter` instance with the specified
/// conversion options. /// conversion options.
/// ///
/// - Parameter options: The converts options. /// - Parameter options: The converts options.
/// - Throws: Throws `ConversionError` if failed. /// - Throws: Throws `ConversionError` if failed.
public convenience init(options: Options) throws { public convenience init(options: Options) throws {
let loader = DictionaryLoader(bundle: .module) let loader = DictionaryLoader(bundle: .module)
try self.init(loader: loader, options: options) try self.init(loader: loader, options: options)
} }
/// Return a converted string using the converts current option. /// Return a converted string using the converts current option.
/// ///
/// - Parameter text: The string to convert. /// - Parameter text: The string to convert.
/// - Returns: A converted string using the converts current option. /// - Returns: A converted string using the converts current option.
public func convert(_ text: String) -> String { public func convert(_ text: String) -> String {
let stlStr = CCConverterCreateConvertedStringFromString(converter, text)! let stlStr = CCConverterCreateConvertedStringFromString(converter, text)!
defer { STLStringDestroy(stlStr) } defer { STLStringDestroy(stlStr) }
return String(utf8String: STLStringGetUTF8String(stlStr))! return String(utf8String: STLStringGetUTF8String(stlStr))!
} }
} }

View File

@ -10,21 +10,21 @@ import copencc
class ConversionDictionary { class ConversionDictionary {
let group: [ConversionDictionary] let group: [ConversionDictionary]
let dict: CCDictRef let dict: CCDictRef
init(path: String) throws { init(path: String) throws {
guard let dict = CCDictCreateMarisaWithPath(path) else { guard let dict = CCDictCreateMarisaWithPath(path) else {
throw ConversionError(ccErrorno) throw ConversionError(ccErrorno)
} }
self.group = [] self.group = []
self.dict = dict self.dict = dict
} }
init(group: [ConversionDictionary]) { init(group: [ConversionDictionary]) {
var rawGroup = group.map { $0.dict } var rawGroup = group.map { $0.dict }
self.group = group self.group = group
self.dict = CCDictCreateWithGroup(&rawGroup, rawGroup.count) self.dict = CCDictCreateWithGroup(&rawGroup, rawGroup.count)
} }
} }

View File

@ -10,28 +10,28 @@ import copencc
public enum ConversionError: Error { public enum ConversionError: Error {
case fileNotFound case fileNotFound
case invalidFormat case invalidFormat
case invalidTextDictionary case invalidTextDictionary
case invalidUTF8 case invalidUTF8
case unknown case unknown
init(_ code: CCErrorCode) { init(_ code: CCErrorCode) {
switch code { switch code {
case .fileNotFound: case .fileNotFound:
self = .fileNotFound self = .fileNotFound
case .invalidFormat: case .invalidFormat:
self = .invalidFormat self = .invalidFormat
case .invalidTextDictionary: case .invalidTextDictionary:
self = .invalidTextDictionary self = .invalidTextDictionary
case .invalidUTF8: case .invalidUTF8:
self = .invalidUTF8 self = .invalidUTF8
case .unknown, _: case .unknown, _:
self = .unknown self = .unknown
} }
} }
} }

View File

@ -10,46 +10,50 @@ import copencc
extension ChineseConverter { extension ChineseConverter {
struct DictionaryLoader { struct DictionaryLoader {
private static let subdirectory = "Dictionary" private static let subdirectory = "Dictionary"
private static let dictCache = WeakValueCache<String, ConversionDictionary>() private static let dictCache = WeakValueCache<String, ConversionDictionary>()
private let bundle: Bundle private let bundle: Bundle
init(bundle: Bundle) { init(bundle: Bundle) {
self.bundle = bundle self.bundle = bundle
} }
func dict(_ name: ChineseConverter.DictionaryName) throws -> ConversionDictionary { func dict(_ name: ChineseConverter.DictionaryName) throws -> ConversionDictionary {
guard let path = bundle.path(forResource: name.description, ofType: "ocd2", inDirectory: DictionaryLoader.subdirectory) else { guard
throw ConversionError.fileNotFound let path = bundle.path(
} forResource: name.description, ofType: "ocd2",
return try DictionaryLoader.dictCache.value(for: path) { inDirectory: DictionaryLoader.subdirectory)
return try ConversionDictionary(path: path) else {
} throw ConversionError.fileNotFound
} }
} return try DictionaryLoader.dictCache.value(for: path) {
return try ConversionDictionary(path: path)
}
}
}
} }
extension ChineseConverter.DictionaryLoader { extension ChineseConverter.DictionaryLoader {
func segmentation(options: ChineseConverter.Options) throws -> ConversionDictionary { func segmentation(options: ChineseConverter.Options) throws -> ConversionDictionary {
let dictName = options.segmentationDictName let dictName = options.segmentationDictName
return try dict(dictName) return try dict(dictName)
} }
func conversionChain(options: ChineseConverter.Options) throws -> [ConversionDictionary] { func conversionChain(options: ChineseConverter.Options) throws -> [ConversionDictionary] {
return try options.conversionChain.compactMap { names in return try options.conversionChain.compactMap { names in
switch names.count { switch names.count {
case 0: case 0:
return nil return nil
case 1: case 1:
return try dict(names.first!) return try dict(names.first!)
case _: case _:
let dicts = try names.map(dict) let dicts = try names.map(dict)
return ConversionDictionary(group: dicts) return ConversionDictionary(group: dicts)
} }
} }
} }
} }

View File

@ -9,98 +9,98 @@ import Foundation
extension ChineseConverter { extension ChineseConverter {
enum DictionaryName: CustomStringConvertible { enum DictionaryName: CustomStringConvertible {
case hkVariants case hkVariants
case hkVariantsRev case hkVariantsRev
case hkVariantsRevPhrases case hkVariantsRevPhrases
case jpVariants case jpVariants
case stCharacters case stCharacters
case stPhrases case stPhrases
case tsCharacters case tsCharacters
case tsPhrases case tsPhrases
case twPhrases case twPhrases
case twPhrasesRev case twPhrasesRev
case twVariants case twVariants
case twVariantsRev case twVariantsRev
case twVariantsRevPhrases case twVariantsRevPhrases
var description: String { var description: String {
switch self { switch self {
case .hkVariants: return "HKVariants" case .hkVariants: return "HKVariants"
case .hkVariantsRev: return "HKVariantsRev" case .hkVariantsRev: return "HKVariantsRev"
case .hkVariantsRevPhrases: return "HKVariantsRevPhrases" case .hkVariantsRevPhrases: return "HKVariantsRevPhrases"
case .jpVariants: return "JPVariants" case .jpVariants: return "JPVariants"
case .stCharacters: return "STCharacters" case .stCharacters: return "STCharacters"
case .stPhrases: return "STPhrases" case .stPhrases: return "STPhrases"
case .tsCharacters: return "TSCharacters" case .tsCharacters: return "TSCharacters"
case .tsPhrases: return "TSPhrases" case .tsPhrases: return "TSPhrases"
case .twPhrases: return "TWPhrases" case .twPhrases: return "TWPhrases"
case .twPhrasesRev: return "TWPhrasesRev" case .twPhrasesRev: return "TWPhrasesRev"
case .twVariants: return "TWVariants" case .twVariants: return "TWVariants"
case .twVariantsRev: return "TWVariantsRev" case .twVariantsRev: return "TWVariantsRev"
case .twVariantsRevPhrases: return "TWVariantsRevPhrases" case .twVariantsRevPhrases: return "TWVariantsRevPhrases"
} }
} }
} }
} }
extension ChineseConverter.Options { extension ChineseConverter.Options {
var segmentationDictName: ChineseConverter.DictionaryName { var segmentationDictName: ChineseConverter.DictionaryName {
if contains(.traditionalize) { if contains(.traditionalize) {
return .stPhrases return .stPhrases
} else if contains(.simplify) { } else if contains(.simplify) {
return .tsPhrases return .tsPhrases
} else if contains(.hkStandard) { } else if contains(.hkStandard) {
return .hkVariants return .hkVariants
} else if contains(.twStandard) { } else if contains(.twStandard) {
return .twVariants return .twVariants
} else if contains(.hkStandardRev) { } else if contains(.hkStandardRev) {
return .hkVariantsRev return .hkVariantsRev
} else if contains(.twStandardRev) { } else if contains(.twStandardRev) {
return .twVariantsRev return .twVariantsRev
} else { } else {
return .stPhrases return .stPhrases
} }
} }
var conversionChain: [[ChineseConverter.DictionaryName]] { var conversionChain: [[ChineseConverter.DictionaryName]] {
var result: [[ChineseConverter.DictionaryName]] = [] var result: [[ChineseConverter.DictionaryName]] = []
if contains(.traditionalize) { if contains(.traditionalize) {
result.append([.stPhrases, .stCharacters]) result.append([.stPhrases, .stCharacters])
if contains(.twIdiom) { if contains(.twIdiom) {
result.append([.twPhrases]) result.append([.twPhrases])
} }
if contains(.hkStandard) { if contains(.hkStandard) {
result.append([.hkVariants]) result.append([.hkVariants])
} else if contains(.twStandard) { } else if contains(.twStandard) {
result.append([.twVariants]) result.append([.twVariants])
} }
} else if contains(.simplify) { } else if contains(.simplify) {
if contains(.hkStandard) { if contains(.hkStandard) {
result.append([.hkVariantsRevPhrases, .hkVariantsRev]) result.append([.hkVariantsRevPhrases, .hkVariantsRev])
} else if contains(.twStandard) { } else if contains(.twStandard) {
result.append([.twVariantsRevPhrases, .twVariantsRev]) result.append([.twVariantsRevPhrases, .twVariantsRev])
} }
if contains(.twIdiom) { if contains(.twIdiom) {
result.append([.twPhrasesRev]) result.append([.twPhrasesRev])
} }
result.append([.tsPhrases, .tsCharacters]) result.append([.tsPhrases, .tsCharacters])
} else { } else {
if contains(.hkStandard) { if contains(.hkStandard) {
result.append([.hkVariants]) result.append([.hkVariants])
} else if contains(.twStandard) { } else if contains(.twStandard) {
result.append([.twVariants]) result.append([.twVariants])
} else if contains(.hkStandardRev) { } else if contains(.hkStandardRev) {
result.append([.hkVariantsRev]) result.append([.hkVariantsRev])
} else if contains(.twStandardRev) { } else if contains(.twStandardRev) {
result.append([.twVariantsRev]) result.append([.twVariantsRev])
} }
} }
if result.isEmpty { if result.isEmpty {
return [[.stPhrases, .stCharacters]] return [[.stPhrases, .stCharacters]]
} }
return result return result
} }
} }

View File

@ -9,34 +9,34 @@ import Foundation
class WeakBox<Value: AnyObject> { class WeakBox<Value: AnyObject> {
private(set) weak var value: Value? private(set) weak var value: Value?
init(_ value: Value) { init(_ value: Value) {
self.value = value self.value = value
} }
} }
class WeakValueCache<Key: Hashable, Value: AnyObject> { class WeakValueCache<Key: Hashable, Value: AnyObject> {
private var storage: [Key: WeakBox<Value>] = [:] private var storage: [Key: WeakBox<Value>] = [:]
private var lock = NSLock() private var lock = NSLock()
func value(for key: Key) -> Value? { func value(for key: Key) -> Value? {
return storage[key]?.value return storage[key]?.value
} }
func value(for key: Key, make: () throws -> Value) rethrows -> Value { func value(for key: Key, make: () throws -> Value) rethrows -> Value {
if let value = storage[key]?.value { if let value = storage[key]?.value {
return value return value
} }
lock.lock() lock.lock()
defer { lock.unlock() } defer { lock.unlock() }
if let value = storage[key]?.value { if let value = storage[key]?.value {
return value return value
} }
let value = try make() let value = try make()
storage[key] = WeakBox(value) storage[key] = WeakBox(value)
return value return value
} }
} }

View File

@ -1,64 +1,67 @@
import XCTest import XCTest
@testable import OpenCC @testable import OpenCC
let testCases: [(String, ChineseConverter.Options)] = [ let testCases: [(String, ChineseConverter.Options)] = [
("s2t", [.traditionalize]), ("s2t", [.traditionalize]),
("t2s", [.simplify]), ("t2s", [.simplify]),
("s2hk", [.traditionalize, .hkStandard]), ("s2hk", [.traditionalize, .hkStandard]),
("hk2s", [.simplify, .hkStandard]), ("hk2s", [.simplify, .hkStandard]),
("s2tw", [.traditionalize, .twStandard]), ("s2tw", [.traditionalize, .twStandard]),
("tw2s", [.simplify, .twStandard]), ("tw2s", [.simplify, .twStandard]),
("s2twp", [.traditionalize, .twStandard, .twIdiom]), ("s2twp", [.traditionalize, .twStandard, .twIdiom]),
("tw2sp", [.simplify, .twStandard, .twIdiom]), ("tw2sp", [.simplify, .twStandard, .twIdiom]),
] ]
class OpenCCTests: XCTestCase { class OpenCCTests: XCTestCase {
func converter(option: ChineseConverter.Options) throws -> ChineseConverter { func converter(option: ChineseConverter.Options) throws -> ChineseConverter {
return try ChineseConverter(options: option) return try ChineseConverter(options: option)
} }
func testConversion() throws { func testConversion() throws {
func testCase(name: String, ext: String) -> String { func testCase(name: String, ext: String) -> String {
let url = Bundle.module.url(forResource: name, withExtension: ext, subdirectory: "testcases")! let url = Bundle.module.url(
return try! String(contentsOf: url) forResource: name, withExtension: ext, subdirectory: "testcases")!
} return try! String(contentsOf: url)
for (name, opt) in testCases { }
let coverter = try ChineseConverter(options: opt) for (name, opt) in testCases {
let input = testCase(name: name, ext: "in") let coverter = try ChineseConverter(options: opt)
let converted = coverter.convert(input) let input = testCase(name: name, ext: "in")
let output = testCase(name: name, ext: "ans") let converted = coverter.convert(input)
XCTAssertEqual(converted, output, "Conversion \(name) fails") let output = testCase(name: name, ext: "ans")
} XCTAssertEqual(converted, output, "Conversion \(name) fails")
} }
}
func testConverterCreationPerformance() { func testConverterCreationPerformance() {
let options: ChineseConverter.Options = [.traditionalize, .twStandard, .twIdiom] let options: ChineseConverter.Options = [.traditionalize, .twStandard, .twIdiom]
measure { measure {
for _ in 0..<10 { for _ in 0..<10 {
_ = try! ChineseConverter(options: options) _ = try! ChineseConverter(options: options)
} }
} }
} }
func testDictionaryCache() { func testDictionaryCache() {
let options: ChineseConverter.Options = [.traditionalize, .twStandard, .twIdiom] let options: ChineseConverter.Options = [.traditionalize, .twStandard, .twIdiom]
let holder = try! ChineseConverter(options: options) let holder = try! ChineseConverter(options: options)
measure { measure {
for _ in 0..<1_000 { for _ in 0..<1_000 {
_ = try! ChineseConverter(options: options) _ = try! ChineseConverter(options: options)
} }
} }
_ = holder.convert("foo") _ = holder.convert("foo")
} }
func testConversionPerformance() throws { func testConversionPerformance() throws {
let cov = try converter(option: [.traditionalize, .twStandard, .twIdiom]) let cov = try converter(option: [.traditionalize, .twStandard, .twIdiom])
let url = Bundle.module.url(forResource: "zuozhuan", withExtension: "txt", subdirectory: "benchmark")! let url = Bundle.module.url(
// 1.9 MB, 624k word forResource: "zuozhuan", withExtension: "txt", subdirectory: "benchmark")!
let str = try String(contentsOf: url) // 1.9 MB, 624k word
measure { let str = try String(contentsOf: url)
_ = cov.convert(str) measure {
} _ = cov.convert(str)
} }
}
} }