IME // Completely deprecate OpenVanilla Suite.

This commit is contained in:
ShikiSuen 2022-02-02 19:00:41 +08:00
parent 766c00ff84
commit 7babbb130c
8 changed files with 75 additions and 456 deletions

View File

@ -1,174 +0,0 @@
/*
* OVUTF8Helper.h
*
* Copyright 2011-2022 OpenVanilla Project (MIT License).
* All rights reserved. See "LICENSE.TXT" for details.
*/
#ifndef OVUTF8Helper_h
#define OVUTF8Helper_h
#include <cstddef>
#include <string>
#include <vector>
#include <iostream>
namespace OpenVanilla {
using namespace std;
class OVUTF8Helper {
public:
static const vector<string> SplitStringByCodePoint(const string& utf8String)
{
size_t index, length = utf8String.length();
vector<string> result;
for (index = 0; index < length; index++) {
unsigned char c = utf8String[index];
if (!(c & 0x80)) {
result.push_back(utf8String.substr(index, 1));
}
else if ((c & (0x80 | 0x40)) && !(c & 0x20)) {
result.push_back(utf8String.substr(index, 2));
index += 1;
}
else if ((c & (0x80 | 0x40 | 0x20)) && !(c & 0x10)) {
result.push_back(utf8String.substr(index, 3));
index += 2;
}
else if ((c & (0x80 | 0x40 | 0x20 | 0x10)) && !(c & 0x8)) {
result.push_back(utf8String.substr(index, 4));
index += 3;
}
}
return result;
}
static const string CombineCodePoints(const vector<string>& codePoints)
{
string result;
for (vector<string>::const_iterator iter = codePoints.begin(); iter != codePoints.end() ; ++iter)
result += *iter;
return result;
}
static const wstring ConvertStringToUTF16WideString(const string& utf8String)
{
wstring wideString;
vector<string> charVector = OVUTF8Helper::SplitStringByCodePoint(utf8String);
size_t length = charVector.size();
for (size_t index = 0; index < length; index++)
{
unsigned int codePoint = OVUTF8Helper::CodePointFromSingleUTF8String(charVector[index]);
if (codePoint < 0x10000) {
wideString += (wchar_t)codePoint;
}
else {
const unsigned int leadOffset = 0xD800 - (0x10000 >> 10);
unsigned int lead = leadOffset + (codePoint >> 10);
unsigned int trail = 0xdc00 + (codePoint & 0x3ff);
wideString += (wchar_t)lead;
wideString += (wchar_t)trail;
}
}
return wideString;
}
static const string StringFromUTF16WideString(const wstring& utf16WideString)
{
string utf8String;
size_t length = utf16WideString.length();
unsigned int u16;
for (size_t index = 0; index < length; index++) {
u16 = utf16WideString[index];
if (u16 >= 0xd800 && u16 <= 0xdbff) {
if (index + 1 < length) {
unsigned int trailing = utf16WideString[index + 1];
if (trailing >= 0xdc00 && trailing <= 0xdfff) {
index++;
const unsigned int surrogateOffset = 0x10000 - (0xD800 << 10) - 0xDC00;
unsigned int codePoint = (u16 << 10) + trailing + surrogateOffset;
utf8String += OVUTF8Helper::SingleUTF8StringFromCodePoint(codePoint);
}
}
}
else if (!(u16 >= 0xdc00 && u16 <= 0xdfff))
{
utf8String += OVUTF8Helper::SingleUTF8StringFromCodePoint(u16);
}
}
return utf8String;
}
static const string SingleUTF8StringFromCodePoint(unsigned int codePoint)
{
string utf8String;
if (codePoint < 0x80) {
utf8String += (unsigned char)codePoint;
}
else if (codePoint < 0x800) {
utf8String += (unsigned char)((codePoint >> 6) | 0xc0);
utf8String += (unsigned char)((codePoint & 0x3f) | 0x80);
}
else if (codePoint < 0x10000) {
utf8String += (unsigned char)((codePoint >> 12) | 0xe0);
utf8String += (unsigned char)(((codePoint >> 6) & 0x3f) | 0x80);
utf8String += (unsigned char)((codePoint & 0x3f) | 0x80);
}
else {
utf8String += (unsigned char)((codePoint >> 18) | 0xf0);
utf8String += (unsigned char)((codePoint >> 12) | 0x80);
utf8String += (unsigned char)(((codePoint >> 6) & 0x3f) | 0x80);
utf8String += (unsigned char)((codePoint & 0x3f) | 0x80);
}
return utf8String;
}
static unsigned int CodePointFromSingleUTF8String(const string& utf8String)
{
switch(utf8String.length()) {
case 1:
return (unsigned int)utf8String[0];
case 2:
return (((unsigned char)utf8String[0] << 6) & 0x7ff) + ((unsigned char)utf8String[1] & 0x3f);
case 3:
return (((unsigned char)utf8String[0] << 12) & 0xffff) + (((unsigned char)utf8String[1] << 6) & 0xfff) + ((unsigned char)utf8String[2] & 0x3f);
case 4:
return (((unsigned char)utf8String[0] << 18) & 0x1fffff) + (((unsigned char)utf8String[1] << 12) & 0x3ffff) + (((unsigned char)utf8String[2] << 6) & 0xfff) + ((unsigned char)utf8String[3] & 0x3f);
}
return 0;
}
};
class OVUTF8 {
public:
static const string FromUTF16(const wstring& utf16String)
{
return OVUTF8Helper::StringFromUTF16WideString(utf16String);
}
};
class OVUTF16 {
public:
static const wstring FromUTF8(const string& utf8String)
{
return OVUTF8Helper::ConvertStringToUTF16WideString(utf8String);
}
};
}
#endif

View File

@ -1,228 +0,0 @@
/*
* OVWildcard.h
*
* Copyright 2011-2022 OpenVanilla Project (MIT License).
* All rights reserved. See "LICENSE.TXT" for details.
*/
#ifndef OVWildcard_h
#define OVWildcard_h
#include <iostream>
#include <string>
#include <vector>
#include <cctype>
namespace OpenVanilla {
using namespace std;
class OVWildcard {
public:
OVWildcard(const string& expression, char matchOneChar = '?', char matchZeroOrMoreChar = '*', bool matchEndOfLine = true, bool caseSensitive = false)
: m_caseSensitive(caseSensitive)
, m_expression(expression)
, m_matchEndOfLine(matchEndOfLine)
, m_matchOneChar(matchOneChar)
, m_matchZeroOrMoreChar(matchZeroOrMoreChar)
{
size_t index;
for (index = 0; index < expression.length(); index++) {
if (expression[index] == matchOneChar || expression[index] == matchZeroOrMoreChar) break;
}
m_longestHeadMatchString = expression.substr(0, index);
for (string::size_type i = 0; i < expression.length(); i++) {
char c = expression[i];
if (c == matchOneChar) {
m_states.push_back(State(AnyOne, 0));
}
else if (c == matchZeroOrMoreChar) {
char nextChar = 0;
string::size_type j;
for (j = i + 1; j < expression.length(); j++) {
char k = expression[j];
if (k != matchZeroOrMoreChar) {
if (k == matchOneChar) k = -1;
nextChar = k;
break;
}
}
i = j;
m_states.push_back(State(AnyUntil, nextChar));
}
else {
m_states.push_back(State(Exact, c));
}
}
}
bool match(const string& target, size_t fromState = 0) const
{
string::size_type i = 0, slength = target.length();
vector<State>::size_type j, vlength = m_states.size();
for (j = fromState; j < vlength; j++) {
State state = m_states[j];
Directive d = state.first;
int k = state.second;
if (i >= slength) {
if (d == AnyUntil && !k) return true;
return false;
}
switch (d) {
case Exact:
if (!equalChars(target[i], k)) return false;
i++;
break;
case AnyOne:
i++;
break;
case AnyUntil:
if (k == -1) {
// means *?, equals ?, so just advance one character
i++;
}
else if (k == 0) {
// until end, always true
return true;
}
else {
bool found = false;
string::size_type backIndex;
for (backIndex = slength - 1; backIndex >= i; backIndex--) {
if (equalChars(target[backIndex], k)) {
string substring = target.substr(backIndex + 1, slength - (backIndex + 1));
if (match(substring, j + 1)) {
found = true;
i = backIndex + 1;
break;
}
}
if (!backIndex)
break;
}
if (!found)
return false;
}
break;
}
}
if (m_matchEndOfLine && i != slength)
return false;
return true;
}
const string longestHeadMatchString() const
{
return m_longestHeadMatchString;
}
const string expression() const
{
return m_expression;
}
bool isCaseSensitive() const
{
return m_caseSensitive;
}
char matchOneChar() const
{
return m_matchOneChar;
}
char matchZeroOrMoreChar() const
{
return m_matchZeroOrMoreChar;
}
friend ostream& operator<<(ostream& stream, const OVWildcard& wildcard);
protected:
enum Directive {
Exact,
AnyOne,
AnyUntil
};
typedef pair<Directive, int> State;
bool equalChars(char a, char b) const
{
if (m_caseSensitive)
return a == b;
else
return tolower(a) == tolower(b);
}
bool m_caseSensitive;
bool m_matchEndOfLine;
char m_matchOneChar;
char m_matchZeroOrMoreChar;
vector<State> m_states;
string m_expression;
string m_longestHeadMatchString;
public:
static const bool Match(const string& text, const string& expression, char matchOneChar = '?', char matchZeroOrMoreChar = '*', bool matchEndOfLine = true, bool caseSensitive = false)
{
OVWildcard exp(expression, matchOneChar, matchZeroOrMoreChar, matchEndOfLine, caseSensitive);
return exp.match(text);
}
static const vector<OVWildcard> WildcardsFromStrings(const vector<string>& expressions, char matchOneChar = '?', char matchZeroOrMoreChar = '*', bool matchEndOfLine = true, bool caseSensitive = false)
{
vector<OVWildcard> result;
vector<string>::const_iterator iter = expressions.begin();
for ( ; iter != expressions.end(); iter++)
result.push_back(OVWildcard(*iter, matchOneChar, matchZeroOrMoreChar, matchEndOfLine, caseSensitive));
return result;
}
static bool MultiWildcardMatchAny(const string& target, const vector<string>& expressions, char matchOneChar = '?', char matchZeroOrMoreChar = '*', bool matchEndOfLine = true, bool caseSensitive = false)
{
return MultiWildcardMatchAny(target, WildcardsFromStrings(expressions, matchOneChar, matchZeroOrMoreChar, matchEndOfLine, caseSensitive));
}
static bool MultiWildcardMatchAny(const string& target, const vector<OVWildcard>& expressions)
{
vector<OVWildcard>::const_iterator iter = expressions.begin();
for ( ; iter != expressions.end(); iter++) {
if ((*iter).match(target))
return true;
}
return false;
}
};
inline ostream& operator<<(ostream& stream, const OVWildcard& wildcard)
{
vector<OVWildcard::State>::size_type i, size = wildcard.m_states.size();
for (i = 0; i < size; i++) {
const OVWildcard::State& state = wildcard.m_states[i];
stream << "State " << i << ": " << state.first << ", " << state.second << endl;
}
return stream;
}
}
#endif

View File

@ -267,37 +267,3 @@ class InputPhrase: NSObject {
super.init()
}
}
class StringUtils: NSObject {
static func convertToCharIndex(from utf16Index: Int, in string: String) -> Int {
var length = 0
for (i, c) in string.enumerated() {
if length >= utf16Index {
return i
}
length += c.utf16.count
}
return string.count
}
@objc (nextUtf16PositionForIndex:in:)
static func nextUtf16Position(for index: Int, in string: String) -> Int {
var index = convertToCharIndex(from: index, in: string)
if index < string.count {
index += 1
}
let count = string[..<string.index(string.startIndex, offsetBy: index)].utf16.count
return count
}
@objc (previousUtf16PositionForIndex:in:)
static func previousUtf16Position(for index: Int, in string: String) -> Int {
var index = convertToCharIndex(from: index, in: string)
if index > 0 {
index -= 1
}
let count = string[..<string.index(string.startIndex, offsetBy: index)].utf16.count
return count
}
}

View File

@ -11,7 +11,6 @@
#import "vChewingLM.h"
#import "UserOverrideModel.h"
#import "LanguageModelManager.h"
#import "OVUTF8Helper.h"
#import "KeyHandler.h"
#import "vChewing-Swift.h"
@ -19,7 +18,6 @@ using namespace std;
using namespace Taiyan::Mandarin;
using namespace Taiyan::Gramambular;
using namespace vChewing;
using namespace OpenVanilla;
NSString *const kBopomofoModeIdentifierCHT = @"org.atelierInmu.inputmethod.vChewing.TradBopomofo";
NSString *const kBopomofoModeIdentifierCHS = @"org.atelierInmu.inputmethod.vChewing.SimpBopomofo";
@ -1070,11 +1068,12 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot";
for (vector<NodeAnchor>::iterator wi = _walkedNodes.begin(), we = _walkedNodes.end(); wi != we; ++wi) {
if ((*wi).node) {
string nodeStr = (*wi).node->currentKeyValue().value;
vector<string> codepoints = OVUTF8Helper::SplitStringByCodePoint(nodeStr);
size_t codepointCount = codepoints.size();
NSString *valueString = [NSString stringWithUTF8String:nodeStr.c_str()];
[composingBuffer appendString:valueString];
NSArray *splited = [valueString split];
NSInteger codepointCount = splited.count;
NSString *readingString = [NSString stringWithUTF8String:(*wi).node->currentKeyValue().key.c_str()];
InputPhrase *phrase = [[InputPhrase alloc] initWithText:valueString reading:readingString];
@ -1092,7 +1091,7 @@ static NSString *const kGraphVizOutputfile = @"/tmp/vChewing-visualization.dot";
readingCursorIndex += spanningLength;
} else {
for (size_t i = 0; i < codepointCount && readingCursorIndex < builderCursorIndex; i++) {
composedStringCursorIndex += [[NSString stringWithUTF8String:codepoints[i].c_str()] length];
composedStringCursorIndex += [splited[i] length];
readingCursorIndex++;
}
}

View File

@ -0,0 +1,64 @@
/*
* StringUtils.swift
*
* Copyright 2021-2022 vChewing Project (3-Clause BSD License).
* Derived from 2011-2022 OpenVanilla Project (MIT License).
* Some rights reserved. See "LICENSE.TXT" for details.
*/
import Foundation
/// Utilities to convert the length of an NSString and a Swift string.
class StringUtils: NSObject {
/// Converts the index in an NSString to the index in a Swift string.
///
/// An Emoji might be compose by more than one UTF-16 code points, however
/// the length of an NSString is only the sum of the UTF-16 code points. It
/// causes that the NSString and Swift string representation of the same
/// string have different lengths once the string contains such Emoji. The
/// method helps to find the index in a Swift string by passing the index
/// in an NSString.
static func convertToCharIndex(from utf16Index: Int, in string: String) -> Int {
var length = 0
for (i, character) in string.enumerated() {
if length >= utf16Index {
return i
}
length += character.utf16.count
}
return string.count
}
@objc (nextUtf16PositionForIndex:in:)
static func nextUtf16Position(for index: Int, in string: String) -> Int {
var index = convertToCharIndex(from: index, in: string)
if index < string.count {
index += 1
}
let count = string[..<string.index(string.startIndex, offsetBy: index)].utf16.count
return count
}
@objc (previousUtf16PositionForIndex:in:)
static func previousUtf16Position(for index: Int, in string: String) -> Int {
var index = convertToCharIndex(from: index, in: string)
if index > 0 {
index -= 1
}
let count = string[..<string.index(string.startIndex, offsetBy: index)].utf16.count
return count
}
}
extension NSString {
@objc var count: Int {
(self as String).count
}
@objc var split: [NSString] {
Array(self as String).map {
NSString(string: String($0))
}
}
}

View File

@ -14,6 +14,7 @@
5B21711E279B9AD900F91A2B /* ArchiveUtil.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B21711C279B9AD700F91A2B /* ArchiveUtil.swift */; };
5B217126279BA37500F91A2B /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B217124279BA37300F91A2B /* AppDelegate.swift */; };
5B217128279BB22700F91A2B /* frmAboutWindow.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B217127279BB22700F91A2B /* frmAboutWindow.swift */; };
5B36562327AA9A2D0003DFDD /* StringUtils.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5B36562227AA9A2D0003DFDD /* StringUtils.swift */; };
5B42B64027876FDC00BB9B9F /* UserOverrideModel.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5B42B63E27876FDC00BB9B9F /* UserOverrideModel.mm */; };
5B58E87F278413E7003EA2AD /* BSDLicense.txt in Resources */ = {isa = PBXBuildFile; fileRef = 5B58E87D278413E7003EA2AD /* BSDLicense.txt */; };
5B5F4F8E27928F9300922DC2 /* vChewingLM.mm in Sources */ = {isa = PBXBuildFile; fileRef = 5B5F4F8D27928F9300922DC2 /* vChewingLM.mm */; };
@ -106,6 +107,7 @@
5B21711C279B9AD700F91A2B /* ArchiveUtil.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ArchiveUtil.swift; sourceTree = "<group>"; };
5B217124279BA37300F91A2B /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
5B217127279BB22700F91A2B /* frmAboutWindow.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = frmAboutWindow.swift; sourceTree = "<group>"; };
5B36562227AA9A2D0003DFDD /* StringUtils.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StringUtils.swift; sourceTree = "<group>"; };
5B42B63E27876FDC00BB9B9F /* UserOverrideModel.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = UserOverrideModel.mm; sourceTree = "<group>"; };
5B42B63F27876FDC00BB9B9F /* UserOverrideModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = UserOverrideModel.h; sourceTree = "<group>"; };
5B42B64127877D6500BB9B9F /* zh-Hans */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hans"; path = "Source/zh-Hans.lproj/preferences.strings"; sourceTree = "<group>"; };
@ -205,8 +207,6 @@
6A0D4F1E15FC0EB100ABF4B3 /* Walker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Walker.h; sourceTree = "<group>"; };
6A0D4F2015FC0EB100ABF4B3 /* Mandarin.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = Mandarin.mm; sourceTree = "<group>"; };
6A0D4F2115FC0EB100ABF4B3 /* Mandarin.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = Mandarin.hh; sourceTree = "<group>"; };
6A0D4F4115FC0EB100ABF4B3 /* OVUTF8Helper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = OVUTF8Helper.h; sourceTree = "<group>"; };
6A0D4F4215FC0EB100ABF4B3 /* OVWildcard.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = OVWildcard.h; sourceTree = "<group>"; };
6A0D4F4915FC0EE100ABF4B3 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = Source/en.lproj/InfoPlist.strings; sourceTree = "<group>"; };
6A0D4F4B15FC0EE100ABF4B3 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = Source/en.lproj/Localizable.strings; sourceTree = "<group>"; };
6A0D4F5415FC0EF900ABF4B3 /* zh-Hant */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = "zh-Hant"; path = "Source/zh-Hant.lproj/InfoPlist.strings"; sourceTree = "<group>"; };
@ -285,6 +285,7 @@
5BC2D2842793B434002C0BEC /* KeyValueBlobReader.h */,
5BC2D2862793B434002C0BEC /* KeyValueBlobReader.cpp */,
5BC772A927A5A1E800CA8391 /* InputState.swift */,
5B36562227AA9A2D0003DFDD /* StringUtils.swift */,
);
path = ControllerModules;
sourceTree = "<group>";
@ -300,7 +301,7 @@
5BDD25E0279D64FB00AA18F8 /* 3rdParty */ = {
isa = PBXGroup;
children = (
6A0D4F2215FC0EB100ABF4B3 /* OpenVanilla */,
6A0D4F1F15FC0EB100ABF4B3 /* OVMandarin */,
5BDD25E1279D64FB00AA18F8 /* AWFileHash */,
5BDD25E4279D64FB00AA18F8 /* SSZipArchive */,
);
@ -478,23 +479,13 @@
path = Gramambular;
sourceTree = "<group>";
};
6A0D4F1F15FC0EB100ABF4B3 /* Mandarin */ = {
6A0D4F1F15FC0EB100ABF4B3 /* OVMandarin */ = {
isa = PBXGroup;
children = (
6A0D4F2015FC0EB100ABF4B3 /* Mandarin.mm */,
6A0D4F2115FC0EB100ABF4B3 /* Mandarin.hh */,
);
path = Mandarin;
sourceTree = "<group>";
};
6A0D4F2215FC0EB100ABF4B3 /* OpenVanilla */ = {
isa = PBXGroup;
children = (
6A0D4F1F15FC0EB100ABF4B3 /* Mandarin */,
6A0D4F4115FC0EB100ABF4B3 /* OVUTF8Helper.h */,
6A0D4F4215FC0EB100ABF4B3 /* OVWildcard.h */,
);
path = OpenVanilla;
path = OVMandarin;
sourceTree = "<group>";
};
6A0D4F4715FC0EB900ABF4B3 /* Resources */ = {
@ -741,6 +732,7 @@
5BD13F482794F0A6000E429F /* PhraseReplacementMap.mm in Sources */,
5BDD25FA279D6D1200AA18F8 /* mztools.m in Sources */,
5B810D9F27A3A5E50032C1A9 /* LMConsolidator.mm in Sources */,
5B36562327AA9A2D0003DFDD /* StringUtils.swift in Sources */,
5BC3EE1C278FC48C00F5E44C /* VTCandidateController.swift in Sources */,
5BDF2D032791C71200838ADB /* NonModalAlertWindowController.swift in Sources */,
5BC3EE1D278FC48C00F5E44C /* HorizontalCandidateController.swift in Sources */,