實作簡單的用戶選字記憶模型
這個模型基本上只是根據游標前的兩個 unigram 記憶當前的用戶選字。當有超過 一個以上的用戶選字時,則要給每個選字評分。評分標準是選字頻率乘上一個透過 半衰期遞減的最近選字經歷時間。如此一來我們在「少用但最近選過」及「常用但 最近少選」之間取得一個平衡。半衰期透過經驗法則決定。 目前這個簡易模型並不存入磁碟,因此下一次重開機後就會洗掉重來。目前這樣選 擇純粹是因為模型有半衰期,因此長時間存放後還是會遺忘。 這個模型的好處是對既有詞庫提供詞的影響很小,對於連續單字詞的 override 有 還不錯的幫助。如此對於人名、地名、公司名等專有名詞,應該可以減少選字的頻 率。這個模型應用起來的缺點是,如果用戶修改的字詞原來是個雙字詞,例如先前 的兩個 unigram 分別是 A, BB ,而用戶想改的是 BB 的第二個字,使選完後的三 個字分別是 A, B', C,這個 C 往往是記不起來的,但如果一開始用戶逐字選取, 亦即在 BB 只出現 B 時就選取 B' 然後再打 C ,則 A, B', C 這個組合往往能被 正確記憶。實際發生原因在此不討論,但跟底層所用的組字網架的架構有關。確實 要改進的話得要從底層重新架構來下手,但至少目前這個模型給的建議偏保守,不 至干擾原有的預設選字。衡諸得失,這個模型提供一些邊際上的改善,應該還是值 得採用的。
This commit is contained in:
parent
a17438b67a
commit
d672136843
|
@ -49,6 +49,7 @@
|
||||||
6AFF97F3253B299E007F1C49 /* OVNonModalAlertWindowController.m in Sources */ = {isa = PBXBuildFile; fileRef = 6AFF97F1253B299E007F1C49 /* OVNonModalAlertWindowController.m */; };
|
6AFF97F3253B299E007F1C49 /* OVNonModalAlertWindowController.m in Sources */ = {isa = PBXBuildFile; fileRef = 6AFF97F1253B299E007F1C49 /* OVNonModalAlertWindowController.m */; };
|
||||||
D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */; };
|
D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */ = {isa = PBXBuildFile; fileRef = D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */; };
|
||||||
D48550A325EBE689006A204C /* OpenCC in Frameworks */ = {isa = PBXBuildFile; productRef = D48550A225EBE689006A204C /* OpenCC */; };
|
D48550A325EBE689006A204C /* OpenCC in Frameworks */ = {isa = PBXBuildFile; productRef = D48550A225EBE689006A204C /* OpenCC */; };
|
||||||
|
6AE30A491F7F40B7008735BD /* UserOverrideModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6AE30A471F7F40B7008735BD /* UserOverrideModel.cpp */; };
|
||||||
/* End PBXBuildFile section */
|
/* End PBXBuildFile section */
|
||||||
|
|
||||||
/* Begin PBXContainerItemProxy section */
|
/* Begin PBXContainerItemProxy section */
|
||||||
|
@ -211,6 +212,8 @@
|
||||||
6AFF97F1253B299E007F1C49 /* OVNonModalAlertWindowController.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = OVNonModalAlertWindowController.m; sourceTree = "<group>"; };
|
6AFF97F1253B299E007F1C49 /* OVNonModalAlertWindowController.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = OVNonModalAlertWindowController.m; sourceTree = "<group>"; };
|
||||||
D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "McBopomofo-Bridging-Header.h"; sourceTree = "<group>"; };
|
D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "McBopomofo-Bridging-Header.h"; sourceTree = "<group>"; };
|
||||||
D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpenCCBridge.swift; sourceTree = "<group>"; };
|
D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpenCCBridge.swift; sourceTree = "<group>"; };
|
||||||
|
6AE30A471F7F40B7008735BD /* UserOverrideModel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = UserOverrideModel.cpp; sourceTree = "<group>"; };
|
||||||
|
6AE30A481F7F40B7008735BD /* UserOverrideModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = UserOverrideModel.h; sourceTree = "<group>"; };
|
||||||
/* End PBXFileReference section */
|
/* End PBXFileReference section */
|
||||||
|
|
||||||
/* Begin PBXFrameworksBuildPhase section */
|
/* Begin PBXFrameworksBuildPhase section */
|
||||||
|
@ -286,6 +289,10 @@
|
||||||
6A0D4ECC15FC0D6400ABF4B3 /* PreferencesWindowController.m */,
|
6A0D4ECC15FC0D6400ABF4B3 /* PreferencesWindowController.m */,
|
||||||
D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */,
|
D427A9C025ED28CC005D43E0 /* OpenCCBridge.swift */,
|
||||||
D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */,
|
D427A9BF25ED28CC005D43E0 /* McBopomofo-Bridging-Header.h */,
|
||||||
|
6A0D4ECD15FC0D6400ABF4B3 /* UpdateNotificationController.h */,
|
||||||
|
6A0D4ECE15FC0D6400ABF4B3 /* UpdateNotificationController.m */,
|
||||||
|
6AE30A471F7F40B7008735BD /* UserOverrideModel.cpp */,
|
||||||
|
6AE30A481F7F40B7008735BD /* UserOverrideModel.h */,
|
||||||
);
|
);
|
||||||
path = Source;
|
path = Source;
|
||||||
sourceTree = "<group>";
|
sourceTree = "<group>";
|
||||||
|
@ -647,6 +654,7 @@
|
||||||
6A0D4F0015FC0DA600ABF4B3 /* VTHorizontalCandidateView.m in Sources */,
|
6A0D4F0015FC0DA600ABF4B3 /* VTHorizontalCandidateView.m in Sources */,
|
||||||
6AFF97F3253B299E007F1C49 /* OVNonModalAlertWindowController.m in Sources */,
|
6AFF97F3253B299E007F1C49 /* OVNonModalAlertWindowController.m in Sources */,
|
||||||
6A0D4F0115FC0DA600ABF4B3 /* VTVerticalCandidateController.m in Sources */,
|
6A0D4F0115FC0DA600ABF4B3 /* VTVerticalCandidateController.m in Sources */,
|
||||||
|
6AE30A491F7F40B7008735BD /* UserOverrideModel.cpp in Sources */,
|
||||||
6A0D4F0215FC0DA600ABF4B3 /* VTVerticalCandidateTableView.m in Sources */,
|
6A0D4F0215FC0DA600ABF4B3 /* VTVerticalCandidateTableView.m in Sources */,
|
||||||
6A0D4F0315FC0DA600ABF4B3 /* VTVerticalKeyLabelStripView.m in Sources */,
|
6A0D4F0315FC0DA600ABF4B3 /* VTVerticalKeyLabelStripView.m in Sources */,
|
||||||
D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */,
|
D427A9C125ED28CC005D43E0 /* OpenCCBridge.swift in Sources */,
|
||||||
|
|
|
@ -0,0 +1,202 @@
|
||||||
|
//
|
||||||
|
// UserOverrideModel.cpp
|
||||||
|
//
|
||||||
|
// Copyright (c) 2017 The McBopomofo Project.
|
||||||
|
//
|
||||||
|
// Permission is hereby granted, free of charge, to any person
|
||||||
|
// obtaining a copy of this software and associated documentation
|
||||||
|
// files (the "Software"), to deal in the Software without
|
||||||
|
// restriction, including without limitation the rights to use,
|
||||||
|
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
// copies of the Software, and to permit persons to whom the
|
||||||
|
// Software is furnished to do so, subject to the following
|
||||||
|
// conditions:
|
||||||
|
//
|
||||||
|
// The above copyright notice and this permission notice shall be
|
||||||
|
// included in all copies or substantial portions of the Software.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
// OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "UserOverrideModel.h"
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <cmath>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
using namespace McBopomofo;
|
||||||
|
|
||||||
|
// About 20 generations.
|
||||||
|
static const double DecayThreshould = 1.0 / 1048576.0;
|
||||||
|
|
||||||
|
static double Score(size_t eventCount,
|
||||||
|
size_t totalCount,
|
||||||
|
double eventTimestamp,
|
||||||
|
double timestamp,
|
||||||
|
double lambda);
|
||||||
|
static string WalkedNodesToKey(const std::vector<NodeAnchor>& walkedNodes,
|
||||||
|
size_t cursorIndex);
|
||||||
|
|
||||||
|
UserOverrideModel::UserOverrideModel(size_t capacity, double decayConstant)
|
||||||
|
: m_capacity(capacity) {
|
||||||
|
assert(m_capacity > 0);
|
||||||
|
m_decayExponent = log(0.5) / decayConstant;
|
||||||
|
}
|
||||||
|
|
||||||
|
void UserOverrideModel::observe(const std::vector<NodeAnchor>& walkedNodes,
|
||||||
|
size_t cursorIndex,
|
||||||
|
const string& candidate,
|
||||||
|
double timestamp) {
|
||||||
|
string key = WalkedNodesToKey(walkedNodes, cursorIndex);
|
||||||
|
auto mapIter = m_lruMap.find(key);
|
||||||
|
if (mapIter == m_lruMap.end()) {
|
||||||
|
auto keyValuePair = KeyObservationPair(key, Observation());
|
||||||
|
Observation& observation = keyValuePair.second;
|
||||||
|
observation.update(candidate, timestamp);
|
||||||
|
|
||||||
|
m_lruList.push_front(keyValuePair);
|
||||||
|
auto listIter = m_lruList.begin();
|
||||||
|
auto lruKeyValue = std::pair<std::string,
|
||||||
|
std::list<KeyObservationPair>::iterator>(key, listIter);
|
||||||
|
m_lruMap.insert(lruKeyValue);
|
||||||
|
|
||||||
|
if (m_lruList.size() > m_capacity) {
|
||||||
|
auto lastKeyValuePair = m_lruList.end();
|
||||||
|
--lastKeyValuePair;
|
||||||
|
m_lruMap.erase(lastKeyValuePair->first);
|
||||||
|
m_lruList.pop_back();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
auto listIter = mapIter->second;
|
||||||
|
m_lruList.splice(m_lruList.begin(), m_lruList, listIter);
|
||||||
|
|
||||||
|
auto& keyValuePair = *listIter;
|
||||||
|
Observation& observation = keyValuePair.second;
|
||||||
|
observation.update(candidate, timestamp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
string UserOverrideModel::suggest(const std::vector<NodeAnchor>& walkedNodes,
|
||||||
|
size_t cursorIndex,
|
||||||
|
double timestamp) {
|
||||||
|
string key = WalkedNodesToKey(walkedNodes, cursorIndex);
|
||||||
|
auto mapIter = m_lruMap.find(key);
|
||||||
|
if (mapIter == m_lruMap.end()) {
|
||||||
|
return string();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto listIter = mapIter->second;
|
||||||
|
auto& keyValuePair = *listIter;
|
||||||
|
const Observation& observation = keyValuePair.second;
|
||||||
|
|
||||||
|
string candidate;
|
||||||
|
double score = 0.0;
|
||||||
|
for (auto i = observation.overrides.begin();
|
||||||
|
i != observation.overrides.end();
|
||||||
|
++i) {
|
||||||
|
const Override& o = i->second;
|
||||||
|
double overrideScore = Score(o.count,
|
||||||
|
observation.count,
|
||||||
|
o.timestamp,
|
||||||
|
timestamp,
|
||||||
|
m_decayExponent);
|
||||||
|
if (overrideScore == 0.0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (overrideScore > score) {
|
||||||
|
candidate = i->first;
|
||||||
|
score = overrideScore;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return candidate;
|
||||||
|
}
|
||||||
|
|
||||||
|
void UserOverrideModel::Observation::update(const string& candidate,
|
||||||
|
double timestamp) {
|
||||||
|
count++;
|
||||||
|
auto& o = overrides[candidate];
|
||||||
|
o.timestamp = timestamp;
|
||||||
|
o.count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
static double Score(size_t eventCount,
|
||||||
|
size_t totalCount,
|
||||||
|
double eventTimestamp,
|
||||||
|
double timestamp,
|
||||||
|
double lambda) {
|
||||||
|
double decay = exp((timestamp - eventTimestamp) * lambda);
|
||||||
|
if (decay < DecayThreshould) {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
double prob = (double)eventCount / (double)totalCount;
|
||||||
|
return prob * decay;
|
||||||
|
}
|
||||||
|
|
||||||
|
static string WalkedNodesToKey(const std::vector<NodeAnchor>& walkedNodes,
|
||||||
|
size_t cursorIndex) {
|
||||||
|
std::stringstream s;
|
||||||
|
std::vector<NodeAnchor> n;
|
||||||
|
size_t ll = 0;
|
||||||
|
for (std::vector<NodeAnchor>::const_iterator i = walkedNodes.begin();
|
||||||
|
i != walkedNodes.end();
|
||||||
|
++i) {
|
||||||
|
const auto& nn = *i;
|
||||||
|
n.push_back(nn);
|
||||||
|
ll += nn.spanningLength;
|
||||||
|
if (ll >= cursorIndex) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<NodeAnchor>::const_reverse_iterator r = n.rbegin();
|
||||||
|
|
||||||
|
if (r == n.rend()) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
string current = (*r).node->currentKeyValue().key;
|
||||||
|
++r;
|
||||||
|
|
||||||
|
s.clear();
|
||||||
|
s.str(std::string());
|
||||||
|
if (r != n.rend()) {
|
||||||
|
s << "("
|
||||||
|
<< (*r).node->currentKeyValue().key
|
||||||
|
<< ","
|
||||||
|
<< (*r).node->currentKeyValue().value
|
||||||
|
<< ")";
|
||||||
|
++r;
|
||||||
|
} else {
|
||||||
|
s << "()";
|
||||||
|
}
|
||||||
|
string prev = s.str();
|
||||||
|
|
||||||
|
s.clear();
|
||||||
|
s.str(std::string());
|
||||||
|
if (r != n.rend()) {
|
||||||
|
s << "("
|
||||||
|
<< (*r).node->currentKeyValue().key
|
||||||
|
<< ","
|
||||||
|
<< (*r).node->currentKeyValue().value
|
||||||
|
<< ")";
|
||||||
|
++r;
|
||||||
|
} else {
|
||||||
|
s << "()";
|
||||||
|
}
|
||||||
|
string anterior = s.str();
|
||||||
|
|
||||||
|
s.clear();
|
||||||
|
s.str(std::string());
|
||||||
|
s << "(" << anterior << "," << prev << "," << current << ")";
|
||||||
|
|
||||||
|
return s.str();
|
||||||
|
}
|
|
@ -0,0 +1,81 @@
|
||||||
|
//
|
||||||
|
// UserOverrideModel.h
|
||||||
|
//
|
||||||
|
// Copyright (c) 2017 The McBopomofo Project.
|
||||||
|
//
|
||||||
|
// Permission is hereby granted, free of charge, to any person
|
||||||
|
// obtaining a copy of this software and associated documentation
|
||||||
|
// files (the "Software"), to deal in the Software without
|
||||||
|
// restriction, including without limitation the rights to use,
|
||||||
|
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
// copies of the Software, and to permit persons to whom the
|
||||||
|
// Software is furnished to do so, subject to the following
|
||||||
|
// conditions:
|
||||||
|
//
|
||||||
|
// The above copyright notice and this permission notice shall be
|
||||||
|
// included in all copies or substantial portions of the Software.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
// OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef USEROVERRIDEMODEL_H
|
||||||
|
#define USEROVERRIDEMODEL_H
|
||||||
|
|
||||||
|
#include <list>
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "Gramambular.h"
|
||||||
|
|
||||||
|
namespace McBopomofo {
|
||||||
|
|
||||||
|
using namespace Formosa::Gramambular;
|
||||||
|
|
||||||
|
class UserOverrideModel {
|
||||||
|
public:
|
||||||
|
UserOverrideModel(size_t capacity, double decayConstant);
|
||||||
|
|
||||||
|
void observe(const std::vector<NodeAnchor>& walkedNodes,
|
||||||
|
size_t cursorIndex,
|
||||||
|
const string& candidate,
|
||||||
|
double timestamp);
|
||||||
|
|
||||||
|
string suggest(const std::vector<NodeAnchor>& walkedNodes,
|
||||||
|
size_t cursorIndex,
|
||||||
|
double timestamp);
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct Override {
|
||||||
|
size_t count;
|
||||||
|
double timestamp;
|
||||||
|
|
||||||
|
Override() : count(0), timestamp(0.0) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Observation {
|
||||||
|
size_t count;
|
||||||
|
std::map<std::string, Override> overrides;
|
||||||
|
|
||||||
|
Observation() : count(0) {}
|
||||||
|
void update(const string& candidate, double timestamp);
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef std::pair<std::string, Observation> KeyObservationPair;
|
||||||
|
|
||||||
|
size_t m_capacity;
|
||||||
|
double m_decayExponent;
|
||||||
|
std::list<KeyObservationPair> m_lruList;
|
||||||
|
std::map<std::string, std::list<KeyObservationPair>::iterator> m_lruMap;
|
||||||
|
};
|
||||||
|
|
||||||
|
}; // namespace McBopomofo
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue