OpenCC // Swift Clang-Format.

This commit is contained in:
ShikiSuen 2022-04-03 12:50:51 +08:00
parent 8d68d9f380
commit 63c6a3e4be
8 changed files with 408 additions and 401 deletions

View File

@ -3,83 +3,83 @@
import PackageDescription import PackageDescription
let package = Package( let package = Package(
name: "SwiftyOpenCC", name: "SwiftyOpenCC",
products: [ products: [
.library( .library(
name: "OpenCC", name: "OpenCC",
targets: ["OpenCC"]), targets: ["OpenCC"])
], ],
targets: [ targets: [
.target( .target(
name: "OpenCC", name: "OpenCC",
dependencies: ["copencc"], dependencies: ["copencc"],
resources: [ resources: [
.copy("Dictionary") .copy("Dictionary")
]), ]),
.testTarget( .testTarget(
name: "OpenCCTests", name: "OpenCCTests",
dependencies: ["OpenCC"], dependencies: ["OpenCC"],
resources: [ resources: [
.copy("benchmark"), .copy("benchmark"),
.copy("testcases"), .copy("testcases"),
]), ]),
.target( .target(
name: "copencc", name: "copencc",
exclude: [ exclude: [
"src/benchmark", "src/benchmark",
"src/tools", "src/tools",
"src/BinaryDictTest.cpp", "src/BinaryDictTest.cpp",
"src/Config.cpp", "src/Config.cpp",
"src/ConfigTest.cpp", "src/ConfigTest.cpp",
"src/ConversionChainTest.cpp", "src/ConversionChainTest.cpp",
"src/ConversionTest.cpp", "src/ConversionTest.cpp",
"src/DartsDictTest.cpp", "src/DartsDictTest.cpp",
"src/DictGroupTest.cpp", "src/DictGroupTest.cpp",
"src/MarisaDictTest.cpp", "src/MarisaDictTest.cpp",
"src/MaxMatchSegmentationTest.cpp", "src/MaxMatchSegmentationTest.cpp",
"src/PhraseExtractTest.cpp", "src/PhraseExtractTest.cpp",
"src/SerializedValuesTest.cpp", "src/SerializedValuesTest.cpp",
"src/SimpleConverter.cpp", "src/SimpleConverter.cpp",
"src/SimpleConverterTest.cpp", "src/SimpleConverterTest.cpp",
"src/TextDictTest.cpp", "src/TextDictTest.cpp",
"src/UTF8StringSliceTest.cpp", "src/UTF8StringSliceTest.cpp",
"src/UTF8UtilTest.cpp", "src/UTF8UtilTest.cpp",
"deps/google-benchmark", "deps/google-benchmark",
"deps/gtest-1.11.0", "deps/gtest-1.11.0",
"deps/pybind11-2.5.0", "deps/pybind11-2.5.0",
"deps/rapidjson-1.1.0", "deps/rapidjson-1.1.0",
"deps/tclap-1.2.2", "deps/tclap-1.2.2",
"src/CmdLineOutput.hpp", "src/CmdLineOutput.hpp",
"src/Config.hpp", "src/Config.hpp",
"src/ConfigTestBase.hpp", "src/ConfigTestBase.hpp",
"src/DictGroupTestBase.hpp", "src/DictGroupTestBase.hpp",
"src/SimpleConverter.hpp", "src/SimpleConverter.hpp",
"src/TestUtils.hpp", "src/TestUtils.hpp",
"src/TestUtilsUTF8.hpp", "src/TestUtilsUTF8.hpp",
"src/TextDictTestBase.hpp", "src/TextDictTestBase.hpp",
"src/py_opencc.cpp", "src/py_opencc.cpp",
// ??? // ???
"src/README.md", "src/README.md",
"src/CMakeLists.txt", "src/CMakeLists.txt",
"deps/marisa-0.2.6/AUTHORS", "deps/marisa-0.2.6/AUTHORS",
"deps/marisa-0.2.6/CMakeLists.txt", "deps/marisa-0.2.6/CMakeLists.txt",
"deps/marisa-0.2.6/COPYING.md", "deps/marisa-0.2.6/COPYING.md",
"deps/marisa-0.2.6/README.md", "deps/marisa-0.2.6/README.md",
], ],
sources: [ sources: [
"source.cpp", "source.cpp",
"src", "src",
"deps/marisa-0.2.6", "deps/marisa-0.2.6",
], ],
cxxSettings: [ cxxSettings: [
.headerSearchPath("src"), .headerSearchPath("src"),
.headerSearchPath("deps/darts-clone"), .headerSearchPath("deps/darts-clone"),
.headerSearchPath("deps/marisa-0.2.6/include"), .headerSearchPath("deps/marisa-0.2.6/include"),
.headerSearchPath("deps/marisa-0.2.6/lib"), .headerSearchPath("deps/marisa-0.2.6/lib"),
.define("ENABLE_DARTS"), .define("ENABLE_DARTS"),
]), ]),
], ],
cxxLanguageStandard: .cxx14 cxxLanguageStandard: .cxx14
) )

View File

@ -22,68 +22,68 @@ import copencc
/// However, the string on which it is operating should not be mutated /// However, the string on which it is operating should not be mutated
/// during the course of a conversion. /// during the course of a conversion.
public class ChineseConverter { public class ChineseConverter {
/// These constants define the ChineseConverter options.
public struct Options: OptionSet {
public let rawValue: Int
public init(rawValue: Int) {
self.rawValue = rawValue
}
/// Convert to Traditional Chinese. (default)
public static let traditionalize = Options(rawValue: 1 << 0)
/// Convert to Simplified Chinese.
public static let simplify = Options(rawValue: 1 << 1)
/// Use Taiwan standard.
public static let twStandard = Options(rawValue: 1 << 5)
/// Use HongKong standard.
public static let hkStandard = Options(rawValue: 1 << 6)
/// Cancel Taiwan standard.
public static let twStandardRev = Options(rawValue: 1 << 15)
/// Cancel HongKong standard.
public static let hkStandardRev = Options(rawValue: 1 << 16)
/// Taiwanese idiom conversion. /// These constants define the ChineseConverter options.
public static let twIdiom = Options(rawValue: 1 << 10) public struct Options: OptionSet {
}
public let rawValue: Int
private let seg: ConversionDictionary
private let chain: [ConversionDictionary] public init(rawValue: Int) {
self.rawValue = rawValue
private let converter: CCConverterRef }
private init(loader: DictionaryLoader, options: Options) throws { /// Convert to Traditional Chinese. (default)
seg = try loader.segmentation(options: options) public static let traditionalize = Options(rawValue: 1 << 0)
chain = try loader.conversionChain(options: options)
var rawChain = chain.map { $0.dict } /// Convert to Simplified Chinese.
converter = CCConverterCreate("SwiftyOpenCC", seg.dict, &rawChain, rawChain.count) public static let simplify = Options(rawValue: 1 << 1)
}
/// Use Taiwan standard.
/// Returns an initialized `ChineseConverter` instance with the specified public static let twStandard = Options(rawValue: 1 << 5)
/// conversion options.
/// /// Use HongKong standard.
/// - Parameter options: The converts options. public static let hkStandard = Options(rawValue: 1 << 6)
/// - Throws: Throws `ConversionError` if failed.
public convenience init(options: Options) throws { /// Cancel Taiwan standard.
let loader = DictionaryLoader(bundle: .module) public static let twStandardRev = Options(rawValue: 1 << 15)
try self.init(loader: loader, options: options)
} /// Cancel HongKong standard.
public static let hkStandardRev = Options(rawValue: 1 << 16)
/// Return a converted string using the converts current option.
/// /// Taiwanese idiom conversion.
/// - Parameter text: The string to convert. public static let twIdiom = Options(rawValue: 1 << 10)
/// - Returns: A converted string using the converts current option. }
public func convert(_ text: String) -> String {
let stlStr = CCConverterCreateConvertedStringFromString(converter, text)! private let seg: ConversionDictionary
defer { STLStringDestroy(stlStr) } private let chain: [ConversionDictionary]
return String(utf8String: STLStringGetUTF8String(stlStr))!
} private let converter: CCConverterRef
private init(loader: DictionaryLoader, options: Options) throws {
seg = try loader.segmentation(options: options)
chain = try loader.conversionChain(options: options)
var rawChain = chain.map { $0.dict }
converter = CCConverterCreate("SwiftyOpenCC", seg.dict, &rawChain, rawChain.count)
}
/// Returns an initialized `ChineseConverter` instance with the specified
/// conversion options.
///
/// - Parameter options: The converts options.
/// - Throws: Throws `ConversionError` if failed.
public convenience init(options: Options) throws {
let loader = DictionaryLoader(bundle: .module)
try self.init(loader: loader, options: options)
}
/// Return a converted string using the converts current option.
///
/// - Parameter text: The string to convert.
/// - Returns: A converted string using the converts current option.
public func convert(_ text: String) -> String {
let stlStr = CCConverterCreateConvertedStringFromString(converter, text)!
defer { STLStringDestroy(stlStr) }
return String(utf8String: STLStringGetUTF8String(stlStr))!
}
} }

View File

@ -9,22 +9,22 @@ import Foundation
import copencc import copencc
class ConversionDictionary { class ConversionDictionary {
let group: [ConversionDictionary] let group: [ConversionDictionary]
let dict: CCDictRef let dict: CCDictRef
init(path: String) throws { init(path: String) throws {
guard let dict = CCDictCreateMarisaWithPath(path) else { guard let dict = CCDictCreateMarisaWithPath(path) else {
throw ConversionError(ccErrorno) throw ConversionError(ccErrorno)
} }
self.group = [] self.group = []
self.dict = dict self.dict = dict
} }
init(group: [ConversionDictionary]) { init(group: [ConversionDictionary]) {
var rawGroup = group.map { $0.dict } var rawGroup = group.map { $0.dict }
self.group = group self.group = group
self.dict = CCDictCreateWithGroup(&rawGroup, rawGroup.count) self.dict = CCDictCreateWithGroup(&rawGroup, rawGroup.count)
} }
} }

View File

@ -9,29 +9,29 @@ import Foundation
import copencc import copencc
public enum ConversionError: Error { public enum ConversionError: Error {
case fileNotFound case fileNotFound
case invalidFormat case invalidFormat
case invalidTextDictionary case invalidTextDictionary
case invalidUTF8 case invalidUTF8
case unknown case unknown
init(_ code: CCErrorCode) { init(_ code: CCErrorCode) {
switch code { switch code {
case .fileNotFound: case .fileNotFound:
self = .fileNotFound self = .fileNotFound
case .invalidFormat: case .invalidFormat:
self = .invalidFormat self = .invalidFormat
case .invalidTextDictionary: case .invalidTextDictionary:
self = .invalidTextDictionary self = .invalidTextDictionary
case .invalidUTF8: case .invalidUTF8:
self = .invalidUTF8 self = .invalidUTF8
case .unknown, _: case .unknown, _:
self = .unknown self = .unknown
} }
} }
} }

View File

@ -9,47 +9,51 @@ import Foundation
import copencc import copencc
extension ChineseConverter { extension ChineseConverter {
struct DictionaryLoader { struct DictionaryLoader {
private static let subdirectory = "Dictionary" private static let subdirectory = "Dictionary"
private static let dictCache = WeakValueCache<String, ConversionDictionary>() private static let dictCache = WeakValueCache<String, ConversionDictionary>()
private let bundle: Bundle private let bundle: Bundle
init(bundle: Bundle) { init(bundle: Bundle) {
self.bundle = bundle self.bundle = bundle
} }
func dict(_ name: ChineseConverter.DictionaryName) throws -> ConversionDictionary { func dict(_ name: ChineseConverter.DictionaryName) throws -> ConversionDictionary {
guard let path = bundle.path(forResource: name.description, ofType: "ocd2", inDirectory: DictionaryLoader.subdirectory) else { guard
throw ConversionError.fileNotFound let path = bundle.path(
} forResource: name.description, ofType: "ocd2",
return try DictionaryLoader.dictCache.value(for: path) { inDirectory: DictionaryLoader.subdirectory)
return try ConversionDictionary(path: path) else {
} throw ConversionError.fileNotFound
} }
} return try DictionaryLoader.dictCache.value(for: path) {
return try ConversionDictionary(path: path)
}
}
}
} }
extension ChineseConverter.DictionaryLoader { extension ChineseConverter.DictionaryLoader {
func segmentation(options: ChineseConverter.Options) throws -> ConversionDictionary { func segmentation(options: ChineseConverter.Options) throws -> ConversionDictionary {
let dictName = options.segmentationDictName let dictName = options.segmentationDictName
return try dict(dictName) return try dict(dictName)
} }
func conversionChain(options: ChineseConverter.Options) throws -> [ConversionDictionary] { func conversionChain(options: ChineseConverter.Options) throws -> [ConversionDictionary] {
return try options.conversionChain.compactMap { names in return try options.conversionChain.compactMap { names in
switch names.count { switch names.count {
case 0: case 0:
return nil return nil
case 1: case 1:
return try dict(names.first!) return try dict(names.first!)
case _: case _:
let dicts = try names.map(dict) let dicts = try names.map(dict)
return ConversionDictionary(group: dicts) return ConversionDictionary(group: dicts)
} }
} }
} }
} }

View File

@ -8,99 +8,99 @@
import Foundation import Foundation
extension ChineseConverter { extension ChineseConverter {
enum DictionaryName: CustomStringConvertible { enum DictionaryName: CustomStringConvertible {
case hkVariants case hkVariants
case hkVariantsRev case hkVariantsRev
case hkVariantsRevPhrases case hkVariantsRevPhrases
case jpVariants case jpVariants
case stCharacters case stCharacters
case stPhrases case stPhrases
case tsCharacters case tsCharacters
case tsPhrases case tsPhrases
case twPhrases case twPhrases
case twPhrasesRev case twPhrasesRev
case twVariants case twVariants
case twVariantsRev case twVariantsRev
case twVariantsRevPhrases case twVariantsRevPhrases
var description: String { var description: String {
switch self { switch self {
case .hkVariants: return "HKVariants" case .hkVariants: return "HKVariants"
case .hkVariantsRev: return "HKVariantsRev" case .hkVariantsRev: return "HKVariantsRev"
case .hkVariantsRevPhrases: return "HKVariantsRevPhrases" case .hkVariantsRevPhrases: return "HKVariantsRevPhrases"
case .jpVariants: return "JPVariants" case .jpVariants: return "JPVariants"
case .stCharacters: return "STCharacters" case .stCharacters: return "STCharacters"
case .stPhrases: return "STPhrases" case .stPhrases: return "STPhrases"
case .tsCharacters: return "TSCharacters" case .tsCharacters: return "TSCharacters"
case .tsPhrases: return "TSPhrases" case .tsPhrases: return "TSPhrases"
case .twPhrases: return "TWPhrases" case .twPhrases: return "TWPhrases"
case .twPhrasesRev: return "TWPhrasesRev" case .twPhrasesRev: return "TWPhrasesRev"
case .twVariants: return "TWVariants" case .twVariants: return "TWVariants"
case .twVariantsRev: return "TWVariantsRev" case .twVariantsRev: return "TWVariantsRev"
case .twVariantsRevPhrases: return "TWVariantsRevPhrases" case .twVariantsRevPhrases: return "TWVariantsRevPhrases"
} }
} }
} }
} }
extension ChineseConverter.Options { extension ChineseConverter.Options {
var segmentationDictName: ChineseConverter.DictionaryName { var segmentationDictName: ChineseConverter.DictionaryName {
if contains(.traditionalize) { if contains(.traditionalize) {
return .stPhrases return .stPhrases
} else if contains(.simplify) { } else if contains(.simplify) {
return .tsPhrases return .tsPhrases
} else if contains(.hkStandard) { } else if contains(.hkStandard) {
return .hkVariants return .hkVariants
} else if contains(.twStandard) { } else if contains(.twStandard) {
return .twVariants return .twVariants
} else if contains(.hkStandardRev) { } else if contains(.hkStandardRev) {
return .hkVariantsRev return .hkVariantsRev
} else if contains(.twStandardRev) { } else if contains(.twStandardRev) {
return .twVariantsRev return .twVariantsRev
} else { } else {
return .stPhrases return .stPhrases
} }
} }
var conversionChain: [[ChineseConverter.DictionaryName]] { var conversionChain: [[ChineseConverter.DictionaryName]] {
var result: [[ChineseConverter.DictionaryName]] = [] var result: [[ChineseConverter.DictionaryName]] = []
if contains(.traditionalize) { if contains(.traditionalize) {
result.append([.stPhrases, .stCharacters]) result.append([.stPhrases, .stCharacters])
if contains(.twIdiom) { if contains(.twIdiom) {
result.append([.twPhrases]) result.append([.twPhrases])
} }
if contains(.hkStandard) { if contains(.hkStandard) {
result.append([.hkVariants]) result.append([.hkVariants])
} else if contains(.twStandard) { } else if contains(.twStandard) {
result.append([.twVariants]) result.append([.twVariants])
} }
} else if contains(.simplify) { } else if contains(.simplify) {
if contains(.hkStandard) { if contains(.hkStandard) {
result.append([.hkVariantsRevPhrases, .hkVariantsRev]) result.append([.hkVariantsRevPhrases, .hkVariantsRev])
} else if contains(.twStandard) { } else if contains(.twStandard) {
result.append([.twVariantsRevPhrases, .twVariantsRev]) result.append([.twVariantsRevPhrases, .twVariantsRev])
} }
if contains(.twIdiom) { if contains(.twIdiom) {
result.append([.twPhrasesRev]) result.append([.twPhrasesRev])
} }
result.append([.tsPhrases, .tsCharacters]) result.append([.tsPhrases, .tsCharacters])
} else { } else {
if contains(.hkStandard) { if contains(.hkStandard) {
result.append([.hkVariants]) result.append([.hkVariants])
} else if contains(.twStandard) { } else if contains(.twStandard) {
result.append([.twVariants]) result.append([.twVariants])
} else if contains(.hkStandardRev) { } else if contains(.hkStandardRev) {
result.append([.hkVariantsRev]) result.append([.hkVariantsRev])
} else if contains(.twStandardRev) { } else if contains(.twStandardRev) {
result.append([.twVariantsRev]) result.append([.twVariantsRev])
} }
} }
if result.isEmpty { if result.isEmpty {
return [[.stPhrases, .stCharacters]] return [[.stPhrases, .stCharacters]]
} }
return result return result
} }
} }

View File

@ -8,35 +8,35 @@
import Foundation import Foundation
class WeakBox<Value: AnyObject> { class WeakBox<Value: AnyObject> {
private(set) weak var value: Value? private(set) weak var value: Value?
init(_ value: Value) { init(_ value: Value) {
self.value = value self.value = value
} }
} }
class WeakValueCache<Key: Hashable, Value: AnyObject> { class WeakValueCache<Key: Hashable, Value: AnyObject> {
private var storage: [Key: WeakBox<Value>] = [:] private var storage: [Key: WeakBox<Value>] = [:]
private var lock = NSLock() private var lock = NSLock()
func value(for key: Key) -> Value? { func value(for key: Key) -> Value? {
return storage[key]?.value return storage[key]?.value
} }
func value(for key: Key, make: () throws -> Value) rethrows -> Value { func value(for key: Key, make: () throws -> Value) rethrows -> Value {
if let value = storage[key]?.value { if let value = storage[key]?.value {
return value return value
} }
lock.lock() lock.lock()
defer { lock.unlock() } defer { lock.unlock() }
if let value = storage[key]?.value { if let value = storage[key]?.value {
return value return value
} }
let value = try make() let value = try make()
storage[key] = WeakBox(value) storage[key] = WeakBox(value)
return value return value
} }
} }

View File

@ -1,64 +1,67 @@
import XCTest import XCTest
@testable import OpenCC @testable import OpenCC
let testCases: [(String, ChineseConverter.Options)] = [ let testCases: [(String, ChineseConverter.Options)] = [
("s2t", [.traditionalize]), ("s2t", [.traditionalize]),
("t2s", [.simplify]), ("t2s", [.simplify]),
("s2hk", [.traditionalize, .hkStandard]), ("s2hk", [.traditionalize, .hkStandard]),
("hk2s", [.simplify, .hkStandard]), ("hk2s", [.simplify, .hkStandard]),
("s2tw", [.traditionalize, .twStandard]), ("s2tw", [.traditionalize, .twStandard]),
("tw2s", [.simplify, .twStandard]), ("tw2s", [.simplify, .twStandard]),
("s2twp", [.traditionalize, .twStandard, .twIdiom]), ("s2twp", [.traditionalize, .twStandard, .twIdiom]),
("tw2sp", [.simplify, .twStandard, .twIdiom]), ("tw2sp", [.simplify, .twStandard, .twIdiom]),
] ]
class OpenCCTests: XCTestCase { class OpenCCTests: XCTestCase {
func converter(option: ChineseConverter.Options) throws -> ChineseConverter { func converter(option: ChineseConverter.Options) throws -> ChineseConverter {
return try ChineseConverter(options: option) return try ChineseConverter(options: option)
} }
func testConversion() throws { func testConversion() throws {
func testCase(name: String, ext: String) -> String { func testCase(name: String, ext: String) -> String {
let url = Bundle.module.url(forResource: name, withExtension: ext, subdirectory: "testcases")! let url = Bundle.module.url(
return try! String(contentsOf: url) forResource: name, withExtension: ext, subdirectory: "testcases")!
} return try! String(contentsOf: url)
for (name, opt) in testCases { }
let coverter = try ChineseConverter(options: opt) for (name, opt) in testCases {
let input = testCase(name: name, ext: "in") let coverter = try ChineseConverter(options: opt)
let converted = coverter.convert(input) let input = testCase(name: name, ext: "in")
let output = testCase(name: name, ext: "ans") let converted = coverter.convert(input)
XCTAssertEqual(converted, output, "Conversion \(name) fails") let output = testCase(name: name, ext: "ans")
} XCTAssertEqual(converted, output, "Conversion \(name) fails")
} }
}
func testConverterCreationPerformance() {
let options: ChineseConverter.Options = [.traditionalize, .twStandard, .twIdiom] func testConverterCreationPerformance() {
measure { let options: ChineseConverter.Options = [.traditionalize, .twStandard, .twIdiom]
for _ in 0..<10 { measure {
_ = try! ChineseConverter(options: options) for _ in 0..<10 {
} _ = try! ChineseConverter(options: options)
} }
} }
}
func testDictionaryCache() {
let options: ChineseConverter.Options = [.traditionalize, .twStandard, .twIdiom] func testDictionaryCache() {
let holder = try! ChineseConverter(options: options) let options: ChineseConverter.Options = [.traditionalize, .twStandard, .twIdiom]
measure { let holder = try! ChineseConverter(options: options)
for _ in 0..<1_000 { measure {
_ = try! ChineseConverter(options: options) for _ in 0..<1_000 {
} _ = try! ChineseConverter(options: options)
} }
_ = holder.convert("foo") }
} _ = holder.convert("foo")
}
func testConversionPerformance() throws {
let cov = try converter(option: [.traditionalize, .twStandard, .twIdiom]) func testConversionPerformance() throws {
let url = Bundle.module.url(forResource: "zuozhuan", withExtension: "txt", subdirectory: "benchmark")! let cov = try converter(option: [.traditionalize, .twStandard, .twIdiom])
// 1.9 MB, 624k word let url = Bundle.module.url(
let str = try String(contentsOf: url) forResource: "zuozhuan", withExtension: "txt", subdirectory: "benchmark")!
measure { // 1.9 MB, 624k word
_ = cov.convert(str) let str = try String(contentsOf: url)
} measure {
} _ = cov.convert(str)
}
}
} }