OpenCC // Swift Clang-Format.

This commit is contained in:
ShikiSuen 2022-04-03 12:50:51 +08:00
parent b0b4976142
commit f0bea85698
8 changed files with 408 additions and 401 deletions

View File

@ -3,83 +3,83 @@
import PackageDescription
let package = Package(
name: "SwiftyOpenCC",
products: [
.library(
name: "OpenCC",
targets: ["OpenCC"]),
],
targets: [
.target(
name: "OpenCC",
dependencies: ["copencc"],
resources: [
.copy("Dictionary")
]),
.testTarget(
name: "OpenCCTests",
dependencies: ["OpenCC"],
resources: [
.copy("benchmark"),
.copy("testcases"),
]),
.target(
name: "copencc",
exclude: [
"src/benchmark",
"src/tools",
"src/BinaryDictTest.cpp",
"src/Config.cpp",
"src/ConfigTest.cpp",
"src/ConversionChainTest.cpp",
"src/ConversionTest.cpp",
"src/DartsDictTest.cpp",
"src/DictGroupTest.cpp",
"src/MarisaDictTest.cpp",
"src/MaxMatchSegmentationTest.cpp",
"src/PhraseExtractTest.cpp",
"src/SerializedValuesTest.cpp",
"src/SimpleConverter.cpp",
"src/SimpleConverterTest.cpp",
"src/TextDictTest.cpp",
"src/UTF8StringSliceTest.cpp",
"src/UTF8UtilTest.cpp",
"deps/google-benchmark",
"deps/gtest-1.11.0",
"deps/pybind11-2.5.0",
"deps/rapidjson-1.1.0",
"deps/tclap-1.2.2",
name: "SwiftyOpenCC",
products: [
.library(
name: "OpenCC",
targets: ["OpenCC"])
],
targets: [
.target(
name: "OpenCC",
dependencies: ["copencc"],
resources: [
.copy("Dictionary")
]),
.testTarget(
name: "OpenCCTests",
dependencies: ["OpenCC"],
resources: [
.copy("benchmark"),
.copy("testcases"),
]),
.target(
name: "copencc",
exclude: [
"src/benchmark",
"src/tools",
"src/BinaryDictTest.cpp",
"src/Config.cpp",
"src/ConfigTest.cpp",
"src/ConversionChainTest.cpp",
"src/ConversionTest.cpp",
"src/DartsDictTest.cpp",
"src/DictGroupTest.cpp",
"src/MarisaDictTest.cpp",
"src/MaxMatchSegmentationTest.cpp",
"src/PhraseExtractTest.cpp",
"src/SerializedValuesTest.cpp",
"src/SimpleConverter.cpp",
"src/SimpleConverterTest.cpp",
"src/TextDictTest.cpp",
"src/UTF8StringSliceTest.cpp",
"src/UTF8UtilTest.cpp",
"deps/google-benchmark",
"deps/gtest-1.11.0",
"deps/pybind11-2.5.0",
"deps/rapidjson-1.1.0",
"deps/tclap-1.2.2",
"src/CmdLineOutput.hpp",
"src/Config.hpp",
"src/ConfigTestBase.hpp",
"src/DictGroupTestBase.hpp",
"src/SimpleConverter.hpp",
"src/TestUtils.hpp",
"src/TestUtilsUTF8.hpp",
"src/TextDictTestBase.hpp",
"src/py_opencc.cpp",
"src/CmdLineOutput.hpp",
"src/Config.hpp",
"src/ConfigTestBase.hpp",
"src/DictGroupTestBase.hpp",
"src/SimpleConverter.hpp",
"src/TestUtils.hpp",
"src/TestUtilsUTF8.hpp",
"src/TextDictTestBase.hpp",
"src/py_opencc.cpp",
// ???
"src/README.md",
"src/CMakeLists.txt",
"deps/marisa-0.2.6/AUTHORS",
"deps/marisa-0.2.6/CMakeLists.txt",
"deps/marisa-0.2.6/COPYING.md",
"deps/marisa-0.2.6/README.md",
],
sources: [
"source.cpp",
"src",
"deps/marisa-0.2.6",
],
cxxSettings: [
.headerSearchPath("src"),
.headerSearchPath("deps/darts-clone"),
.headerSearchPath("deps/marisa-0.2.6/include"),
.headerSearchPath("deps/marisa-0.2.6/lib"),
.define("ENABLE_DARTS"),
]),
],
cxxLanguageStandard: .cxx14
// ???
"src/README.md",
"src/CMakeLists.txt",
"deps/marisa-0.2.6/AUTHORS",
"deps/marisa-0.2.6/CMakeLists.txt",
"deps/marisa-0.2.6/COPYING.md",
"deps/marisa-0.2.6/README.md",
],
sources: [
"source.cpp",
"src",
"deps/marisa-0.2.6",
],
cxxSettings: [
.headerSearchPath("src"),
.headerSearchPath("deps/darts-clone"),
.headerSearchPath("deps/marisa-0.2.6/include"),
.headerSearchPath("deps/marisa-0.2.6/lib"),
.define("ENABLE_DARTS"),
]),
],
cxxLanguageStandard: .cxx14
)

View File

@ -23,67 +23,67 @@ import copencc
/// during the course of a conversion.
public class ChineseConverter {
/// These constants define the ChineseConverter options.
public struct Options: OptionSet {
/// These constants define the ChineseConverter options.
public struct Options: OptionSet {
public let rawValue: Int
public let rawValue: Int
public init(rawValue: Int) {
self.rawValue = rawValue
}
public init(rawValue: Int) {
self.rawValue = rawValue
}
/// Convert to Traditional Chinese. (default)
public static let traditionalize = Options(rawValue: 1 << 0)
/// Convert to Traditional Chinese. (default)
public static let traditionalize = Options(rawValue: 1 << 0)
/// Convert to Simplified Chinese.
public static let simplify = Options(rawValue: 1 << 1)
/// Convert to Simplified Chinese.
public static let simplify = Options(rawValue: 1 << 1)
/// Use Taiwan standard.
public static let twStandard = Options(rawValue: 1 << 5)
/// Use Taiwan standard.
public static let twStandard = Options(rawValue: 1 << 5)
/// Use HongKong standard.
public static let hkStandard = Options(rawValue: 1 << 6)
/// Use HongKong standard.
public static let hkStandard = Options(rawValue: 1 << 6)
/// Cancel Taiwan standard.
public static let twStandardRev = Options(rawValue: 1 << 15)
/// Cancel Taiwan standard.
public static let twStandardRev = Options(rawValue: 1 << 15)
/// Cancel HongKong standard.
public static let hkStandardRev = Options(rawValue: 1 << 16)
/// Cancel HongKong standard.
public static let hkStandardRev = Options(rawValue: 1 << 16)
/// Taiwanese idiom conversion.
public static let twIdiom = Options(rawValue: 1 << 10)
}
/// Taiwanese idiom conversion.
public static let twIdiom = Options(rawValue: 1 << 10)
}
private let seg: ConversionDictionary
private let chain: [ConversionDictionary]
private let seg: ConversionDictionary
private let chain: [ConversionDictionary]
private let converter: CCConverterRef
private let converter: CCConverterRef
private init(loader: DictionaryLoader, options: Options) throws {
seg = try loader.segmentation(options: options)
chain = try loader.conversionChain(options: options)
var rawChain = chain.map { $0.dict }
converter = CCConverterCreate("SwiftyOpenCC", seg.dict, &rawChain, rawChain.count)
}
private init(loader: DictionaryLoader, options: Options) throws {
seg = try loader.segmentation(options: options)
chain = try loader.conversionChain(options: options)
var rawChain = chain.map { $0.dict }
converter = CCConverterCreate("SwiftyOpenCC", seg.dict, &rawChain, rawChain.count)
}
/// Returns an initialized `ChineseConverter` instance with the specified
/// conversion options.
///
/// - Parameter options: The converts options.
/// - Throws: Throws `ConversionError` if failed.
public convenience init(options: Options) throws {
let loader = DictionaryLoader(bundle: .module)
try self.init(loader: loader, options: options)
}
/// Returns an initialized `ChineseConverter` instance with the specified
/// conversion options.
///
/// - Parameter options: The converts options.
/// - Throws: Throws `ConversionError` if failed.
public convenience init(options: Options) throws {
let loader = DictionaryLoader(bundle: .module)
try self.init(loader: loader, options: options)
}
/// Return a converted string using the converts current option.
///
/// - Parameter text: The string to convert.
/// - Returns: A converted string using the converts current option.
public func convert(_ text: String) -> String {
let stlStr = CCConverterCreateConvertedStringFromString(converter, text)!
defer { STLStringDestroy(stlStr) }
return String(utf8String: STLStringGetUTF8String(stlStr))!
}
/// Return a converted string using the converts current option.
///
/// - Parameter text: The string to convert.
/// - Returns: A converted string using the converts current option.
public func convert(_ text: String) -> String {
let stlStr = CCConverterCreateConvertedStringFromString(converter, text)!
defer { STLStringDestroy(stlStr) }
return String(utf8String: STLStringGetUTF8String(stlStr))!
}
}

View File

@ -10,21 +10,21 @@ import copencc
class ConversionDictionary {
let group: [ConversionDictionary]
let group: [ConversionDictionary]
let dict: CCDictRef
let dict: CCDictRef
init(path: String) throws {
guard let dict = CCDictCreateMarisaWithPath(path) else {
throw ConversionError(ccErrorno)
}
self.group = []
self.dict = dict
}
init(path: String) throws {
guard let dict = CCDictCreateMarisaWithPath(path) else {
throw ConversionError(ccErrorno)
}
self.group = []
self.dict = dict
}
init(group: [ConversionDictionary]) {
var rawGroup = group.map { $0.dict }
self.group = group
self.dict = CCDictCreateWithGroup(&rawGroup, rawGroup.count)
}
init(group: [ConversionDictionary]) {
var rawGroup = group.map { $0.dict }
self.group = group
self.dict = CCDictCreateWithGroup(&rawGroup, rawGroup.count)
}
}

View File

@ -10,28 +10,28 @@ import copencc
public enum ConversionError: Error {
case fileNotFound
case fileNotFound
case invalidFormat
case invalidFormat
case invalidTextDictionary
case invalidTextDictionary
case invalidUTF8
case invalidUTF8
case unknown
case unknown
init(_ code: CCErrorCode) {
switch code {
case .fileNotFound:
self = .fileNotFound
case .invalidFormat:
self = .invalidFormat
case .invalidTextDictionary:
self = .invalidTextDictionary
case .invalidUTF8:
self = .invalidUTF8
case .unknown, _:
self = .unknown
}
}
init(_ code: CCErrorCode) {
switch code {
case .fileNotFound:
self = .fileNotFound
case .invalidFormat:
self = .invalidFormat
case .invalidTextDictionary:
self = .invalidTextDictionary
case .invalidUTF8:
self = .invalidUTF8
case .unknown, _:
self = .unknown
}
}
}

View File

@ -10,46 +10,50 @@ import copencc
extension ChineseConverter {
struct DictionaryLoader {
struct DictionaryLoader {
private static let subdirectory = "Dictionary"
private static let dictCache = WeakValueCache<String, ConversionDictionary>()
private static let subdirectory = "Dictionary"
private static let dictCache = WeakValueCache<String, ConversionDictionary>()
private let bundle: Bundle
private let bundle: Bundle
init(bundle: Bundle) {
self.bundle = bundle
}
init(bundle: Bundle) {
self.bundle = bundle
}
func dict(_ name: ChineseConverter.DictionaryName) throws -> ConversionDictionary {
guard let path = bundle.path(forResource: name.description, ofType: "ocd2", inDirectory: DictionaryLoader.subdirectory) else {
throw ConversionError.fileNotFound
}
return try DictionaryLoader.dictCache.value(for: path) {
return try ConversionDictionary(path: path)
}
}
}
func dict(_ name: ChineseConverter.DictionaryName) throws -> ConversionDictionary {
guard
let path = bundle.path(
forResource: name.description, ofType: "ocd2",
inDirectory: DictionaryLoader.subdirectory)
else {
throw ConversionError.fileNotFound
}
return try DictionaryLoader.dictCache.value(for: path) {
return try ConversionDictionary(path: path)
}
}
}
}
extension ChineseConverter.DictionaryLoader {
func segmentation(options: ChineseConverter.Options) throws -> ConversionDictionary {
let dictName = options.segmentationDictName
return try dict(dictName)
}
func segmentation(options: ChineseConverter.Options) throws -> ConversionDictionary {
let dictName = options.segmentationDictName
return try dict(dictName)
}
func conversionChain(options: ChineseConverter.Options) throws -> [ConversionDictionary] {
return try options.conversionChain.compactMap { names in
switch names.count {
case 0:
return nil
case 1:
return try dict(names.first!)
case _:
let dicts = try names.map(dict)
return ConversionDictionary(group: dicts)
}
}
}
func conversionChain(options: ChineseConverter.Options) throws -> [ConversionDictionary] {
return try options.conversionChain.compactMap { names in
switch names.count {
case 0:
return nil
case 1:
return try dict(names.first!)
case _:
let dicts = try names.map(dict)
return ConversionDictionary(group: dicts)
}
}
}
}

View File

@ -9,98 +9,98 @@ import Foundation
extension ChineseConverter {
enum DictionaryName: CustomStringConvertible {
enum DictionaryName: CustomStringConvertible {
case hkVariants
case hkVariantsRev
case hkVariantsRevPhrases
case jpVariants
case stCharacters
case stPhrases
case tsCharacters
case tsPhrases
case twPhrases
case twPhrasesRev
case twVariants
case twVariantsRev
case twVariantsRevPhrases
case hkVariants
case hkVariantsRev
case hkVariantsRevPhrases
case jpVariants
case stCharacters
case stPhrases
case tsCharacters
case tsPhrases
case twPhrases
case twPhrasesRev
case twVariants
case twVariantsRev
case twVariantsRevPhrases
var description: String {
switch self {
case .hkVariants: return "HKVariants"
case .hkVariantsRev: return "HKVariantsRev"
case .hkVariantsRevPhrases: return "HKVariantsRevPhrases"
case .jpVariants: return "JPVariants"
case .stCharacters: return "STCharacters"
case .stPhrases: return "STPhrases"
case .tsCharacters: return "TSCharacters"
case .tsPhrases: return "TSPhrases"
case .twPhrases: return "TWPhrases"
case .twPhrasesRev: return "TWPhrasesRev"
case .twVariants: return "TWVariants"
case .twVariantsRev: return "TWVariantsRev"
case .twVariantsRevPhrases: return "TWVariantsRevPhrases"
}
}
}
var description: String {
switch self {
case .hkVariants: return "HKVariants"
case .hkVariantsRev: return "HKVariantsRev"
case .hkVariantsRevPhrases: return "HKVariantsRevPhrases"
case .jpVariants: return "JPVariants"
case .stCharacters: return "STCharacters"
case .stPhrases: return "STPhrases"
case .tsCharacters: return "TSCharacters"
case .tsPhrases: return "TSPhrases"
case .twPhrases: return "TWPhrases"
case .twPhrasesRev: return "TWPhrasesRev"
case .twVariants: return "TWVariants"
case .twVariantsRev: return "TWVariantsRev"
case .twVariantsRevPhrases: return "TWVariantsRevPhrases"
}
}
}
}
extension ChineseConverter.Options {
var segmentationDictName: ChineseConverter.DictionaryName {
if contains(.traditionalize) {
return .stPhrases
} else if contains(.simplify) {
return .tsPhrases
} else if contains(.hkStandard) {
return .hkVariants
} else if contains(.twStandard) {
return .twVariants
} else if contains(.hkStandardRev) {
return .hkVariantsRev
} else if contains(.twStandardRev) {
return .twVariantsRev
} else {
return .stPhrases
}
}
var segmentationDictName: ChineseConverter.DictionaryName {
if contains(.traditionalize) {
return .stPhrases
} else if contains(.simplify) {
return .tsPhrases
} else if contains(.hkStandard) {
return .hkVariants
} else if contains(.twStandard) {
return .twVariants
} else if contains(.hkStandardRev) {
return .hkVariantsRev
} else if contains(.twStandardRev) {
return .twVariantsRev
} else {
return .stPhrases
}
}
var conversionChain: [[ChineseConverter.DictionaryName]] {
var result: [[ChineseConverter.DictionaryName]] = []
if contains(.traditionalize) {
result.append([.stPhrases, .stCharacters])
if contains(.twIdiom) {
result.append([.twPhrases])
}
if contains(.hkStandard) {
result.append([.hkVariants])
} else if contains(.twStandard) {
result.append([.twVariants])
}
} else if contains(.simplify) {
if contains(.hkStandard) {
result.append([.hkVariantsRevPhrases, .hkVariantsRev])
} else if contains(.twStandard) {
result.append([.twVariantsRevPhrases, .twVariantsRev])
}
if contains(.twIdiom) {
result.append([.twPhrasesRev])
}
result.append([.tsPhrases, .tsCharacters])
} else {
if contains(.hkStandard) {
result.append([.hkVariants])
} else if contains(.twStandard) {
result.append([.twVariants])
} else if contains(.hkStandardRev) {
result.append([.hkVariantsRev])
} else if contains(.twStandardRev) {
result.append([.twVariantsRev])
}
}
if result.isEmpty {
return [[.stPhrases, .stCharacters]]
}
return result
}
var conversionChain: [[ChineseConverter.DictionaryName]] {
var result: [[ChineseConverter.DictionaryName]] = []
if contains(.traditionalize) {
result.append([.stPhrases, .stCharacters])
if contains(.twIdiom) {
result.append([.twPhrases])
}
if contains(.hkStandard) {
result.append([.hkVariants])
} else if contains(.twStandard) {
result.append([.twVariants])
}
} else if contains(.simplify) {
if contains(.hkStandard) {
result.append([.hkVariantsRevPhrases, .hkVariantsRev])
} else if contains(.twStandard) {
result.append([.twVariantsRevPhrases, .twVariantsRev])
}
if contains(.twIdiom) {
result.append([.twPhrasesRev])
}
result.append([.tsPhrases, .tsCharacters])
} else {
if contains(.hkStandard) {
result.append([.hkVariants])
} else if contains(.twStandard) {
result.append([.twVariants])
} else if contains(.hkStandardRev) {
result.append([.hkVariantsRev])
} else if contains(.twStandardRev) {
result.append([.twVariantsRev])
}
}
if result.isEmpty {
return [[.stPhrases, .stCharacters]]
}
return result
}
}

View File

@ -9,34 +9,34 @@ import Foundation
class WeakBox<Value: AnyObject> {
private(set) weak var value: Value?
private(set) weak var value: Value?
init(_ value: Value) {
self.value = value
}
init(_ value: Value) {
self.value = value
}
}
class WeakValueCache<Key: Hashable, Value: AnyObject> {
private var storage: [Key: WeakBox<Value>] = [:]
private var storage: [Key: WeakBox<Value>] = [:]
private var lock = NSLock()
private var lock = NSLock()
func value(for key: Key) -> Value? {
return storage[key]?.value
}
func value(for key: Key) -> Value? {
return storage[key]?.value
}
func value(for key: Key, make: () throws -> Value) rethrows -> Value {
if let value = storage[key]?.value {
return value
}
lock.lock()
defer { lock.unlock() }
if let value = storage[key]?.value {
return value
}
let value = try make()
storage[key] = WeakBox(value)
return value
}
func value(for key: Key, make: () throws -> Value) rethrows -> Value {
if let value = storage[key]?.value {
return value
}
lock.lock()
defer { lock.unlock() }
if let value = storage[key]?.value {
return value
}
let value = try make()
storage[key] = WeakBox(value)
return value
}
}

View File

@ -1,64 +1,67 @@
import XCTest
@testable import OpenCC
let testCases: [(String, ChineseConverter.Options)] = [
("s2t", [.traditionalize]),
("t2s", [.simplify]),
("s2hk", [.traditionalize, .hkStandard]),
("hk2s", [.simplify, .hkStandard]),
("s2tw", [.traditionalize, .twStandard]),
("tw2s", [.simplify, .twStandard]),
("s2twp", [.traditionalize, .twStandard, .twIdiom]),
("tw2sp", [.simplify, .twStandard, .twIdiom]),
("s2t", [.traditionalize]),
("t2s", [.simplify]),
("s2hk", [.traditionalize, .hkStandard]),
("hk2s", [.simplify, .hkStandard]),
("s2tw", [.traditionalize, .twStandard]),
("tw2s", [.simplify, .twStandard]),
("s2twp", [.traditionalize, .twStandard, .twIdiom]),
("tw2sp", [.simplify, .twStandard, .twIdiom]),
]
class OpenCCTests: XCTestCase {
func converter(option: ChineseConverter.Options) throws -> ChineseConverter {
return try ChineseConverter(options: option)
}
func converter(option: ChineseConverter.Options) throws -> ChineseConverter {
return try ChineseConverter(options: option)
}
func testConversion() throws {
func testCase(name: String, ext: String) -> String {
let url = Bundle.module.url(forResource: name, withExtension: ext, subdirectory: "testcases")!
return try! String(contentsOf: url)
}
for (name, opt) in testCases {
let coverter = try ChineseConverter(options: opt)
let input = testCase(name: name, ext: "in")
let converted = coverter.convert(input)
let output = testCase(name: name, ext: "ans")
XCTAssertEqual(converted, output, "Conversion \(name) fails")
}
}
func testConversion() throws {
func testCase(name: String, ext: String) -> String {
let url = Bundle.module.url(
forResource: name, withExtension: ext, subdirectory: "testcases")!
return try! String(contentsOf: url)
}
for (name, opt) in testCases {
let coverter = try ChineseConverter(options: opt)
let input = testCase(name: name, ext: "in")
let converted = coverter.convert(input)
let output = testCase(name: name, ext: "ans")
XCTAssertEqual(converted, output, "Conversion \(name) fails")
}
}
func testConverterCreationPerformance() {
let options: ChineseConverter.Options = [.traditionalize, .twStandard, .twIdiom]
measure {
for _ in 0..<10 {
_ = try! ChineseConverter(options: options)
}
}
}
func testConverterCreationPerformance() {
let options: ChineseConverter.Options = [.traditionalize, .twStandard, .twIdiom]
measure {
for _ in 0..<10 {
_ = try! ChineseConverter(options: options)
}
}
}
func testDictionaryCache() {
let options: ChineseConverter.Options = [.traditionalize, .twStandard, .twIdiom]
let holder = try! ChineseConverter(options: options)
measure {
for _ in 0..<1_000 {
_ = try! ChineseConverter(options: options)
}
}
_ = holder.convert("foo")
}
func testDictionaryCache() {
let options: ChineseConverter.Options = [.traditionalize, .twStandard, .twIdiom]
let holder = try! ChineseConverter(options: options)
measure {
for _ in 0..<1_000 {
_ = try! ChineseConverter(options: options)
}
}
_ = holder.convert("foo")
}
func testConversionPerformance() throws {
let cov = try converter(option: [.traditionalize, .twStandard, .twIdiom])
let url = Bundle.module.url(forResource: "zuozhuan", withExtension: "txt", subdirectory: "benchmark")!
// 1.9 MB, 624k word
let str = try String(contentsOf: url)
measure {
_ = cov.convert(str)
}
}
func testConversionPerformance() throws {
let cov = try converter(option: [.traditionalize, .twStandard, .twIdiom])
let url = Bundle.module.url(
forResource: "zuozhuan", withExtension: "txt", subdirectory: "benchmark")!
// 1.9 MB, 624k word
let str = try String(contentsOf: url)
measure {
_ = cov.convert(str)
}
}
}