OpenCC // Swift Clang-Format.

This commit is contained in:
ShikiSuen 2022-04-03 12:50:51 +08:00
parent 9aa781ecfd
commit 5fba88cfeb
8 changed files with 408 additions and 401 deletions

View File

@ -3,83 +3,83 @@
import PackageDescription
let package = Package(
name: "SwiftyOpenCC",
products: [
.library(
name: "OpenCC",
targets: ["OpenCC"]),
],
targets: [
.target(
name: "OpenCC",
dependencies: ["copencc"],
resources: [
.copy("Dictionary")
]),
.testTarget(
name: "OpenCCTests",
dependencies: ["OpenCC"],
resources: [
.copy("benchmark"),
.copy("testcases"),
]),
.target(
name: "copencc",
exclude: [
"src/benchmark",
"src/tools",
"src/BinaryDictTest.cpp",
"src/Config.cpp",
"src/ConfigTest.cpp",
"src/ConversionChainTest.cpp",
"src/ConversionTest.cpp",
"src/DartsDictTest.cpp",
"src/DictGroupTest.cpp",
"src/MarisaDictTest.cpp",
"src/MaxMatchSegmentationTest.cpp",
"src/PhraseExtractTest.cpp",
"src/SerializedValuesTest.cpp",
"src/SimpleConverter.cpp",
"src/SimpleConverterTest.cpp",
"src/TextDictTest.cpp",
"src/UTF8StringSliceTest.cpp",
"src/UTF8UtilTest.cpp",
"deps/google-benchmark",
"deps/gtest-1.11.0",
"deps/pybind11-2.5.0",
"deps/rapidjson-1.1.0",
"deps/tclap-1.2.2",
"src/CmdLineOutput.hpp",
"src/Config.hpp",
"src/ConfigTestBase.hpp",
"src/DictGroupTestBase.hpp",
"src/SimpleConverter.hpp",
"src/TestUtils.hpp",
"src/TestUtilsUTF8.hpp",
"src/TextDictTestBase.hpp",
"src/py_opencc.cpp",
// ???
"src/README.md",
"src/CMakeLists.txt",
"deps/marisa-0.2.6/AUTHORS",
"deps/marisa-0.2.6/CMakeLists.txt",
"deps/marisa-0.2.6/COPYING.md",
"deps/marisa-0.2.6/README.md",
],
sources: [
"source.cpp",
"src",
"deps/marisa-0.2.6",
],
cxxSettings: [
.headerSearchPath("src"),
.headerSearchPath("deps/darts-clone"),
.headerSearchPath("deps/marisa-0.2.6/include"),
.headerSearchPath("deps/marisa-0.2.6/lib"),
.define("ENABLE_DARTS"),
]),
],
cxxLanguageStandard: .cxx14
name: "SwiftyOpenCC",
products: [
.library(
name: "OpenCC",
targets: ["OpenCC"])
],
targets: [
.target(
name: "OpenCC",
dependencies: ["copencc"],
resources: [
.copy("Dictionary")
]),
.testTarget(
name: "OpenCCTests",
dependencies: ["OpenCC"],
resources: [
.copy("benchmark"),
.copy("testcases"),
]),
.target(
name: "copencc",
exclude: [
"src/benchmark",
"src/tools",
"src/BinaryDictTest.cpp",
"src/Config.cpp",
"src/ConfigTest.cpp",
"src/ConversionChainTest.cpp",
"src/ConversionTest.cpp",
"src/DartsDictTest.cpp",
"src/DictGroupTest.cpp",
"src/MarisaDictTest.cpp",
"src/MaxMatchSegmentationTest.cpp",
"src/PhraseExtractTest.cpp",
"src/SerializedValuesTest.cpp",
"src/SimpleConverter.cpp",
"src/SimpleConverterTest.cpp",
"src/TextDictTest.cpp",
"src/UTF8StringSliceTest.cpp",
"src/UTF8UtilTest.cpp",
"deps/google-benchmark",
"deps/gtest-1.11.0",
"deps/pybind11-2.5.0",
"deps/rapidjson-1.1.0",
"deps/tclap-1.2.2",
"src/CmdLineOutput.hpp",
"src/Config.hpp",
"src/ConfigTestBase.hpp",
"src/DictGroupTestBase.hpp",
"src/SimpleConverter.hpp",
"src/TestUtils.hpp",
"src/TestUtilsUTF8.hpp",
"src/TextDictTestBase.hpp",
"src/py_opencc.cpp",
// ???
"src/README.md",
"src/CMakeLists.txt",
"deps/marisa-0.2.6/AUTHORS",
"deps/marisa-0.2.6/CMakeLists.txt",
"deps/marisa-0.2.6/COPYING.md",
"deps/marisa-0.2.6/README.md",
],
sources: [
"source.cpp",
"src",
"deps/marisa-0.2.6",
],
cxxSettings: [
.headerSearchPath("src"),
.headerSearchPath("deps/darts-clone"),
.headerSearchPath("deps/marisa-0.2.6/include"),
.headerSearchPath("deps/marisa-0.2.6/lib"),
.define("ENABLE_DARTS"),
]),
],
cxxLanguageStandard: .cxx14
)

View File

@ -22,68 +22,68 @@ import copencc
/// However, the string on which it is operating should not be mutated
/// during the course of a conversion.
public class ChineseConverter {
/// These constants define the ChineseConverter options.
public struct Options: OptionSet {
public let rawValue: Int
public init(rawValue: Int) {
self.rawValue = rawValue
}
/// Convert to Traditional Chinese. (default)
public static let traditionalize = Options(rawValue: 1 << 0)
/// Convert to Simplified Chinese.
public static let simplify = Options(rawValue: 1 << 1)
/// Use Taiwan standard.
public static let twStandard = Options(rawValue: 1 << 5)
/// Use HongKong standard.
public static let hkStandard = Options(rawValue: 1 << 6)
/// Cancel Taiwan standard.
public static let twStandardRev = Options(rawValue: 1 << 15)
/// Cancel HongKong standard.
public static let hkStandardRev = Options(rawValue: 1 << 16)
/// Taiwanese idiom conversion.
public static let twIdiom = Options(rawValue: 1 << 10)
}
private let seg: ConversionDictionary
private let chain: [ConversionDictionary]
private let converter: CCConverterRef
private init(loader: DictionaryLoader, options: Options) throws {
seg = try loader.segmentation(options: options)
chain = try loader.conversionChain(options: options)
var rawChain = chain.map { $0.dict }
converter = CCConverterCreate("SwiftyOpenCC", seg.dict, &rawChain, rawChain.count)
}
/// Returns an initialized `ChineseConverter` instance with the specified
/// conversion options.
///
/// - Parameter options: The converts options.
/// - Throws: Throws `ConversionError` if failed.
public convenience init(options: Options) throws {
let loader = DictionaryLoader(bundle: .module)
try self.init(loader: loader, options: options)
}
/// Return a converted string using the converts current option.
///
/// - Parameter text: The string to convert.
/// - Returns: A converted string using the converts current option.
public func convert(_ text: String) -> String {
let stlStr = CCConverterCreateConvertedStringFromString(converter, text)!
defer { STLStringDestroy(stlStr) }
return String(utf8String: STLStringGetUTF8String(stlStr))!
}
/// These constants define the ChineseConverter options.
public struct Options: OptionSet {
public let rawValue: Int
public init(rawValue: Int) {
self.rawValue = rawValue
}
/// Convert to Traditional Chinese. (default)
public static let traditionalize = Options(rawValue: 1 << 0)
/// Convert to Simplified Chinese.
public static let simplify = Options(rawValue: 1 << 1)
/// Use Taiwan standard.
public static let twStandard = Options(rawValue: 1 << 5)
/// Use HongKong standard.
public static let hkStandard = Options(rawValue: 1 << 6)
/// Cancel Taiwan standard.
public static let twStandardRev = Options(rawValue: 1 << 15)
/// Cancel HongKong standard.
public static let hkStandardRev = Options(rawValue: 1 << 16)
/// Taiwanese idiom conversion.
public static let twIdiom = Options(rawValue: 1 << 10)
}
private let seg: ConversionDictionary
private let chain: [ConversionDictionary]
private let converter: CCConverterRef
private init(loader: DictionaryLoader, options: Options) throws {
seg = try loader.segmentation(options: options)
chain = try loader.conversionChain(options: options)
var rawChain = chain.map { $0.dict }
converter = CCConverterCreate("SwiftyOpenCC", seg.dict, &rawChain, rawChain.count)
}
/// Returns an initialized `ChineseConverter` instance with the specified
/// conversion options.
///
/// - Parameter options: The converts options.
/// - Throws: Throws `ConversionError` if failed.
public convenience init(options: Options) throws {
let loader = DictionaryLoader(bundle: .module)
try self.init(loader: loader, options: options)
}
/// Return a converted string using the converts current option.
///
/// - Parameter text: The string to convert.
/// - Returns: A converted string using the converts current option.
public func convert(_ text: String) -> String {
let stlStr = CCConverterCreateConvertedStringFromString(converter, text)!
defer { STLStringDestroy(stlStr) }
return String(utf8String: STLStringGetUTF8String(stlStr))!
}
}

View File

@ -9,22 +9,22 @@ import Foundation
import copencc
class ConversionDictionary {
let group: [ConversionDictionary]
let dict: CCDictRef
init(path: String) throws {
guard let dict = CCDictCreateMarisaWithPath(path) else {
throw ConversionError(ccErrorno)
}
self.group = []
self.dict = dict
}
init(group: [ConversionDictionary]) {
var rawGroup = group.map { $0.dict }
self.group = group
self.dict = CCDictCreateWithGroup(&rawGroup, rawGroup.count)
}
let group: [ConversionDictionary]
let dict: CCDictRef
init(path: String) throws {
guard let dict = CCDictCreateMarisaWithPath(path) else {
throw ConversionError(ccErrorno)
}
self.group = []
self.dict = dict
}
init(group: [ConversionDictionary]) {
var rawGroup = group.map { $0.dict }
self.group = group
self.dict = CCDictCreateWithGroup(&rawGroup, rawGroup.count)
}
}

View File

@ -9,29 +9,29 @@ import Foundation
import copencc
public enum ConversionError: Error {
case fileNotFound
case invalidFormat
case invalidTextDictionary
case invalidUTF8
case unknown
init(_ code: CCErrorCode) {
switch code {
case .fileNotFound:
self = .fileNotFound
case .invalidFormat:
self = .invalidFormat
case .invalidTextDictionary:
self = .invalidTextDictionary
case .invalidUTF8:
self = .invalidUTF8
case .unknown, _:
self = .unknown
}
}
case fileNotFound
case invalidFormat
case invalidTextDictionary
case invalidUTF8
case unknown
init(_ code: CCErrorCode) {
switch code {
case .fileNotFound:
self = .fileNotFound
case .invalidFormat:
self = .invalidFormat
case .invalidTextDictionary:
self = .invalidTextDictionary
case .invalidUTF8:
self = .invalidUTF8
case .unknown, _:
self = .unknown
}
}
}

View File

@ -9,47 +9,51 @@ import Foundation
import copencc
extension ChineseConverter {
struct DictionaryLoader {
private static let subdirectory = "Dictionary"
private static let dictCache = WeakValueCache<String, ConversionDictionary>()
private let bundle: Bundle
init(bundle: Bundle) {
self.bundle = bundle
}
func dict(_ name: ChineseConverter.DictionaryName) throws -> ConversionDictionary {
guard let path = bundle.path(forResource: name.description, ofType: "ocd2", inDirectory: DictionaryLoader.subdirectory) else {
throw ConversionError.fileNotFound
}
return try DictionaryLoader.dictCache.value(for: path) {
return try ConversionDictionary(path: path)
}
}
}
struct DictionaryLoader {
private static let subdirectory = "Dictionary"
private static let dictCache = WeakValueCache<String, ConversionDictionary>()
private let bundle: Bundle
init(bundle: Bundle) {
self.bundle = bundle
}
func dict(_ name: ChineseConverter.DictionaryName) throws -> ConversionDictionary {
guard
let path = bundle.path(
forResource: name.description, ofType: "ocd2",
inDirectory: DictionaryLoader.subdirectory)
else {
throw ConversionError.fileNotFound
}
return try DictionaryLoader.dictCache.value(for: path) {
return try ConversionDictionary(path: path)
}
}
}
}
extension ChineseConverter.DictionaryLoader {
func segmentation(options: ChineseConverter.Options) throws -> ConversionDictionary {
let dictName = options.segmentationDictName
return try dict(dictName)
}
func conversionChain(options: ChineseConverter.Options) throws -> [ConversionDictionary] {
return try options.conversionChain.compactMap { names in
switch names.count {
case 0:
return nil
case 1:
return try dict(names.first!)
case _:
let dicts = try names.map(dict)
return ConversionDictionary(group: dicts)
}
}
}
func segmentation(options: ChineseConverter.Options) throws -> ConversionDictionary {
let dictName = options.segmentationDictName
return try dict(dictName)
}
func conversionChain(options: ChineseConverter.Options) throws -> [ConversionDictionary] {
return try options.conversionChain.compactMap { names in
switch names.count {
case 0:
return nil
case 1:
return try dict(names.first!)
case _:
let dicts = try names.map(dict)
return ConversionDictionary(group: dicts)
}
}
}
}

View File

@ -8,99 +8,99 @@
import Foundation
extension ChineseConverter {
enum DictionaryName: CustomStringConvertible {
case hkVariants
case hkVariantsRev
case hkVariantsRevPhrases
case jpVariants
case stCharacters
case stPhrases
case tsCharacters
case tsPhrases
case twPhrases
case twPhrasesRev
case twVariants
case twVariantsRev
case twVariantsRevPhrases
var description: String {
switch self {
case .hkVariants: return "HKVariants"
case .hkVariantsRev: return "HKVariantsRev"
case .hkVariantsRevPhrases: return "HKVariantsRevPhrases"
case .jpVariants: return "JPVariants"
case .stCharacters: return "STCharacters"
case .stPhrases: return "STPhrases"
case .tsCharacters: return "TSCharacters"
case .tsPhrases: return "TSPhrases"
case .twPhrases: return "TWPhrases"
case .twPhrasesRev: return "TWPhrasesRev"
case .twVariants: return "TWVariants"
case .twVariantsRev: return "TWVariantsRev"
case .twVariantsRevPhrases: return "TWVariantsRevPhrases"
}
}
}
enum DictionaryName: CustomStringConvertible {
case hkVariants
case hkVariantsRev
case hkVariantsRevPhrases
case jpVariants
case stCharacters
case stPhrases
case tsCharacters
case tsPhrases
case twPhrases
case twPhrasesRev
case twVariants
case twVariantsRev
case twVariantsRevPhrases
var description: String {
switch self {
case .hkVariants: return "HKVariants"
case .hkVariantsRev: return "HKVariantsRev"
case .hkVariantsRevPhrases: return "HKVariantsRevPhrases"
case .jpVariants: return "JPVariants"
case .stCharacters: return "STCharacters"
case .stPhrases: return "STPhrases"
case .tsCharacters: return "TSCharacters"
case .tsPhrases: return "TSPhrases"
case .twPhrases: return "TWPhrases"
case .twPhrasesRev: return "TWPhrasesRev"
case .twVariants: return "TWVariants"
case .twVariantsRev: return "TWVariantsRev"
case .twVariantsRevPhrases: return "TWVariantsRevPhrases"
}
}
}
}
extension ChineseConverter.Options {
var segmentationDictName: ChineseConverter.DictionaryName {
if contains(.traditionalize) {
return .stPhrases
} else if contains(.simplify) {
return .tsPhrases
} else if contains(.hkStandard) {
return .hkVariants
} else if contains(.twStandard) {
return .twVariants
} else if contains(.hkStandardRev) {
return .hkVariantsRev
} else if contains(.twStandardRev) {
return .twVariantsRev
} else {
return .stPhrases
}
}
var conversionChain: [[ChineseConverter.DictionaryName]] {
var result: [[ChineseConverter.DictionaryName]] = []
if contains(.traditionalize) {
result.append([.stPhrases, .stCharacters])
if contains(.twIdiom) {
result.append([.twPhrases])
}
if contains(.hkStandard) {
result.append([.hkVariants])
} else if contains(.twStandard) {
result.append([.twVariants])
}
} else if contains(.simplify) {
if contains(.hkStandard) {
result.append([.hkVariantsRevPhrases, .hkVariantsRev])
} else if contains(.twStandard) {
result.append([.twVariantsRevPhrases, .twVariantsRev])
}
if contains(.twIdiom) {
result.append([.twPhrasesRev])
}
result.append([.tsPhrases, .tsCharacters])
} else {
if contains(.hkStandard) {
result.append([.hkVariants])
} else if contains(.twStandard) {
result.append([.twVariants])
} else if contains(.hkStandardRev) {
result.append([.hkVariantsRev])
} else if contains(.twStandardRev) {
result.append([.twVariantsRev])
}
}
if result.isEmpty {
return [[.stPhrases, .stCharacters]]
}
return result
}
var segmentationDictName: ChineseConverter.DictionaryName {
if contains(.traditionalize) {
return .stPhrases
} else if contains(.simplify) {
return .tsPhrases
} else if contains(.hkStandard) {
return .hkVariants
} else if contains(.twStandard) {
return .twVariants
} else if contains(.hkStandardRev) {
return .hkVariantsRev
} else if contains(.twStandardRev) {
return .twVariantsRev
} else {
return .stPhrases
}
}
var conversionChain: [[ChineseConverter.DictionaryName]] {
var result: [[ChineseConverter.DictionaryName]] = []
if contains(.traditionalize) {
result.append([.stPhrases, .stCharacters])
if contains(.twIdiom) {
result.append([.twPhrases])
}
if contains(.hkStandard) {
result.append([.hkVariants])
} else if contains(.twStandard) {
result.append([.twVariants])
}
} else if contains(.simplify) {
if contains(.hkStandard) {
result.append([.hkVariantsRevPhrases, .hkVariantsRev])
} else if contains(.twStandard) {
result.append([.twVariantsRevPhrases, .twVariantsRev])
}
if contains(.twIdiom) {
result.append([.twPhrasesRev])
}
result.append([.tsPhrases, .tsCharacters])
} else {
if contains(.hkStandard) {
result.append([.hkVariants])
} else if contains(.twStandard) {
result.append([.twVariants])
} else if contains(.hkStandardRev) {
result.append([.hkVariantsRev])
} else if contains(.twStandardRev) {
result.append([.twVariantsRev])
}
}
if result.isEmpty {
return [[.stPhrases, .stCharacters]]
}
return result
}
}

View File

@ -8,35 +8,35 @@
import Foundation
class WeakBox<Value: AnyObject> {
private(set) weak var value: Value?
init(_ value: Value) {
self.value = value
}
private(set) weak var value: Value?
init(_ value: Value) {
self.value = value
}
}
class WeakValueCache<Key: Hashable, Value: AnyObject> {
private var storage: [Key: WeakBox<Value>] = [:]
private var lock = NSLock()
func value(for key: Key) -> Value? {
return storage[key]?.value
}
func value(for key: Key, make: () throws -> Value) rethrows -> Value {
if let value = storage[key]?.value {
return value
}
lock.lock()
defer { lock.unlock() }
if let value = storage[key]?.value {
return value
}
let value = try make()
storage[key] = WeakBox(value)
return value
}
private var storage: [Key: WeakBox<Value>] = [:]
private var lock = NSLock()
func value(for key: Key) -> Value? {
return storage[key]?.value
}
func value(for key: Key, make: () throws -> Value) rethrows -> Value {
if let value = storage[key]?.value {
return value
}
lock.lock()
defer { lock.unlock() }
if let value = storage[key]?.value {
return value
}
let value = try make()
storage[key] = WeakBox(value)
return value
}
}

View File

@ -1,64 +1,67 @@
import XCTest
@testable import OpenCC
let testCases: [(String, ChineseConverter.Options)] = [
("s2t", [.traditionalize]),
("t2s", [.simplify]),
("s2hk", [.traditionalize, .hkStandard]),
("hk2s", [.simplify, .hkStandard]),
("s2tw", [.traditionalize, .twStandard]),
("tw2s", [.simplify, .twStandard]),
("s2twp", [.traditionalize, .twStandard, .twIdiom]),
("tw2sp", [.simplify, .twStandard, .twIdiom]),
("s2t", [.traditionalize]),
("t2s", [.simplify]),
("s2hk", [.traditionalize, .hkStandard]),
("hk2s", [.simplify, .hkStandard]),
("s2tw", [.traditionalize, .twStandard]),
("tw2s", [.simplify, .twStandard]),
("s2twp", [.traditionalize, .twStandard, .twIdiom]),
("tw2sp", [.simplify, .twStandard, .twIdiom]),
]
class OpenCCTests: XCTestCase {
func converter(option: ChineseConverter.Options) throws -> ChineseConverter {
return try ChineseConverter(options: option)
}
func testConversion() throws {
func testCase(name: String, ext: String) -> String {
let url = Bundle.module.url(forResource: name, withExtension: ext, subdirectory: "testcases")!
return try! String(contentsOf: url)
}
for (name, opt) in testCases {
let coverter = try ChineseConverter(options: opt)
let input = testCase(name: name, ext: "in")
let converted = coverter.convert(input)
let output = testCase(name: name, ext: "ans")
XCTAssertEqual(converted, output, "Conversion \(name) fails")
}
}
func testConverterCreationPerformance() {
let options: ChineseConverter.Options = [.traditionalize, .twStandard, .twIdiom]
measure {
for _ in 0..<10 {
_ = try! ChineseConverter(options: options)
}
}
}
func testDictionaryCache() {
let options: ChineseConverter.Options = [.traditionalize, .twStandard, .twIdiom]
let holder = try! ChineseConverter(options: options)
measure {
for _ in 0..<1_000 {
_ = try! ChineseConverter(options: options)
}
}
_ = holder.convert("foo")
}
func testConversionPerformance() throws {
let cov = try converter(option: [.traditionalize, .twStandard, .twIdiom])
let url = Bundle.module.url(forResource: "zuozhuan", withExtension: "txt", subdirectory: "benchmark")!
// 1.9 MB, 624k word
let str = try String(contentsOf: url)
measure {
_ = cov.convert(str)
}
}
func converter(option: ChineseConverter.Options) throws -> ChineseConverter {
return try ChineseConverter(options: option)
}
func testConversion() throws {
func testCase(name: String, ext: String) -> String {
let url = Bundle.module.url(
forResource: name, withExtension: ext, subdirectory: "testcases")!
return try! String(contentsOf: url)
}
for (name, opt) in testCases {
let coverter = try ChineseConverter(options: opt)
let input = testCase(name: name, ext: "in")
let converted = coverter.convert(input)
let output = testCase(name: name, ext: "ans")
XCTAssertEqual(converted, output, "Conversion \(name) fails")
}
}
func testConverterCreationPerformance() {
let options: ChineseConverter.Options = [.traditionalize, .twStandard, .twIdiom]
measure {
for _ in 0..<10 {
_ = try! ChineseConverter(options: options)
}
}
}
func testDictionaryCache() {
let options: ChineseConverter.Options = [.traditionalize, .twStandard, .twIdiom]
let holder = try! ChineseConverter(options: options)
measure {
for _ in 0..<1_000 {
_ = try! ChineseConverter(options: options)
}
}
_ = holder.convert("foo")
}
func testConversionPerformance() throws {
let cov = try converter(option: [.traditionalize, .twStandard, .twIdiom])
let url = Bundle.module.url(
forResource: "zuozhuan", withExtension: "txt", subdirectory: "benchmark")!
// 1.9 MB, 624k word
let str = try String(contentsOf: url)
measure {
_ = cov.convert(str)
}
}
}