diff --git a/Source/Modules/LangModelRelated/OldFileReferences/UserOverrideModel.cpp b/Source/Modules/LangModelRelated/OldFileReferences/UserOverrideModel.cpp deleted file mode 100644 index 8b4fb8ac..00000000 --- a/Source/Modules/LangModelRelated/OldFileReferences/UserOverrideModel.cpp +++ /dev/null @@ -1,220 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include "UserOverrideModel.h" - -#include -#include -#include - -namespace vChewing -{ - -// About 20 generations. -static const double DecayThreshold = 1.0 / 1048576.0; - -static double Score(size_t eventCount, size_t totalCount, double eventTimestamp, double timestamp, double lambda); -static bool IsEndingPunctuation(const std::string &value); -static std::string WalkedNodesToKey(const std::vector &walkedNodes, size_t cursorIndex); - -UserOverrideModel::UserOverrideModel(size_t capacity, double decayConstant) : m_capacity(capacity) -{ - assert(m_capacity > 0); - m_decayExponent = log(0.5) / decayConstant; -} - -void UserOverrideModel::observe(const std::vector &walkedNodes, size_t cursorIndex, - const std::string &candidate, double timestamp) -{ - std::string key = WalkedNodesToKey(walkedNodes, cursorIndex); - auto mapIter = m_lruMap.find(key); - if (mapIter == m_lruMap.end()) - { - auto keyValuePair = KeyObservationPair(key, Observation()); - Observation &observation = keyValuePair.second; - observation.update(candidate, timestamp); - - m_lruList.push_front(keyValuePair); - auto listIter = m_lruList.begin(); - auto lruKeyValue = std::pair::iterator>(key, listIter); - m_lruMap.insert(lruKeyValue); - - if (m_lruList.size() > m_capacity) - { - auto lastKeyValuePair = m_lruList.end(); - --lastKeyValuePair; - m_lruMap.erase(lastKeyValuePair->first); - m_lruList.pop_back(); - } - } - else - { - auto listIter = mapIter->second; - m_lruList.splice(m_lruList.begin(), m_lruList, listIter); - - auto &keyValuePair = *listIter; - Observation &observation = keyValuePair.second; - observation.update(candidate, timestamp); - } -} - -std::string UserOverrideModel::suggest(const std::vector &walkedNodes, size_t cursorIndex, - double timestamp) -{ - std::string key = WalkedNodesToKey(walkedNodes, cursorIndex); - auto mapIter = m_lruMap.find(key); - if (mapIter == m_lruMap.end()) - { - return std::string(); - } - - auto listIter = mapIter->second; - auto &keyValuePair = *listIter; - const Observation &observation = keyValuePair.second; - - std::string candidate; - double score = 0.0; - for (auto i = observation.overrides.begin(); i != observation.overrides.end(); ++i) - { - const Override &o = i->second; - double overrideScore = Score(o.count, observation.count, o.timestamp, timestamp, m_decayExponent); - if (overrideScore == 0.0) - { - continue; - } - - if (overrideScore > score) - { - candidate = i->first; - score = overrideScore; - } - } - return candidate; -} - -void UserOverrideModel::Observation::update(const std::string &candidate, double timestamp) -{ - count++; - auto &o = overrides[candidate]; - o.timestamp = timestamp; - o.count++; -} - -static double Score(size_t eventCount, size_t totalCount, double eventTimestamp, double timestamp, double lambda) -{ - double decay = exp((timestamp - eventTimestamp) * lambda); - if (decay < DecayThreshold) - { - return 0.0; - } - - double prob = (double)eventCount / (double)totalCount; - return prob * decay; -} - -static bool IsEndingPunctuation(const std::string &value) -{ - return value == "," || value == "。" || value == "!" || value == "?" || value == "」" || value == "』" || - value == "”" || value == "’"; -} -static std::string WalkedNodesToKey(const std::vector &walkedNodes, size_t cursorIndex) -{ - std::stringstream s; - std::vector n; - size_t ll = 0; - for (std::vector::const_iterator i = walkedNodes.begin(); i != walkedNodes.end(); ++i) - { - const auto &nn = *i; - n.push_back(nn); - ll += nn.spanningLength; - if (ll >= cursorIndex) - { - break; - } - } - - std::vector::const_reverse_iterator r = n.rbegin(); - - if (r == n.rend()) - { - return ""; - } - - std::string current = (*r).node->currentKeyValue().key; - ++r; - - s.clear(); - s.str(std::string()); - if (r != n.rend()) - { - std::string value = (*r).node->currentKeyValue().value; - if (IsEndingPunctuation(value)) - { - s << "()"; - r = n.rend(); - } - else - { - s << "(" << (*r).node->currentKeyValue().key << "," << value << ")"; - ++r; - } - } - else - { - s << "()"; - } - std::string prev = s.str(); - - s.clear(); - s.str(std::string()); - if (r != n.rend()) - { - std::string value = (*r).node->currentKeyValue().value; - if (IsEndingPunctuation(value)) - { - s << "()"; - r = n.rend(); - } - else - { - s << "(" << (*r).node->currentKeyValue().key << "," << value << ")"; - ++r; - } - } - else - { - s << "()"; - } - std::string anterior = s.str(); - - s.clear(); - s.str(std::string()); - s << "(" << anterior << "," << prev << "," << current << ")"; - - return s.str(); -} - -} // namespace vChewing diff --git a/Source/Modules/LangModelRelated/OldFileReferences/UserOverrideModel.h b/Source/Modules/LangModelRelated/OldFileReferences/UserOverrideModel.h deleted file mode 100644 index 6479c2d6..00000000 --- a/Source/Modules/LangModelRelated/OldFileReferences/UserOverrideModel.h +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2011 and onwards The OpenVanilla Project (MIT License). -// All possible vChewing-specific modifications are of: -// (c) 2021 and onwards The vChewing Project (MIT-NTL License). -/* -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -1. The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -2. No trademark license is granted to use the trade names, trademarks, service -marks, or product names of Contributor, except as required to fulfill notice -requirements above. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#ifndef USEROVERRIDEMODEL_H -#define USEROVERRIDEMODEL_H - -#include -#include - -#include "Gramambular.h" - -namespace vChewing -{ - -using namespace Gramambular; - -class UserOverrideModel -{ - public: - UserOverrideModel(size_t capacity, double decayConstant); - - void observe(const std::vector &walkedNodes, size_t cursorIndex, - const std::string &candidate, double timestamp); - - std::string suggest(const std::vector &walkedNodes, size_t cursorIndex, double timestamp); - - private: - struct Override - { - size_t count; - double timestamp; - - Override() : count(0), timestamp(0.0) - { - } - }; - - struct Observation - { - size_t count; - std::map overrides; - - Observation() : count(0) - { - } - void update(const std::string &candidate, double timestamp); - }; - - typedef std::pair KeyObservationPair; - - size_t m_capacity; - double m_decayExponent; - std::list m_lruList; - std::map::iterator> m_lruMap; -}; - -}; // namespace vChewing - -#endif diff --git a/vChewing.xcodeproj/project.pbxproj b/vChewing.xcodeproj/project.pbxproj index dea262d5..504beb88 100644 --- a/vChewing.xcodeproj/project.pbxproj +++ b/vChewing.xcodeproj/project.pbxproj @@ -318,8 +318,6 @@ D47B92BF27972AC800458394 /* main.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = main.swift; sourceTree = ""; tabWidth = 2; usesTabs = 0; }; D47F7DCD278BFB57002F9DD7 /* ctlPrefWindow.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ctlPrefWindow.swift; sourceTree = ""; tabWidth = 2; usesTabs = 0; }; D47F7DCF278C0897002F9DD7 /* ctlNonModalAlertWindow.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ctlNonModalAlertWindow.swift; sourceTree = ""; tabWidth = 2; usesTabs = 0; }; - D47F7DD1278C1263002F9DD7 /* UserOverrideModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = UserOverrideModel.h; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; - D47F7DD2278C1263002F9DD7 /* UserOverrideModel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; lineEnding = 0; path = UserOverrideModel.cpp; sourceTree = ""; tabWidth = 4; usesTabs = 0; }; D4A13D5927A59D5C003BE359 /* ctlInputMethod.swift */ = {isa = PBXFileReference; explicitFileType = sourcecode.swift; fileEncoding = 4; indentWidth = 2; lineEnding = 0; path = ctlInputMethod.swift; sourceTree = ""; tabWidth = 2; usesTabs = 0; }; D4E33D8927A838CF006DB1CF /* Base */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = Base; path = Base.lproj/Localizable.strings; sourceTree = ""; }; D4E33D8E27A838F0006DB1CF /* Base */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = Base; path = Base.lproj/InfoPlist.strings; sourceTree = ""; }; @@ -470,7 +468,6 @@ 5B62A32427AE757300A19448 /* LangModelRelated */ = { isa = PBXGroup; children = ( - 5B62A32527AE758000A19448 /* OldFileReferences */, 5B407308281672610023DFFF /* SubLMs */, 5B949BDA2816DDBC00D87B5D /* LMConsolidator.swift */, 5BD0113A28180D6100609769 /* LMInstantiator.swift */, @@ -479,15 +476,6 @@ path = LangModelRelated; sourceTree = ""; }; - 5B62A32527AE758000A19448 /* OldFileReferences */ = { - isa = PBXGroup; - children = ( - D47F7DD2278C1263002F9DD7 /* UserOverrideModel.cpp */, - D47F7DD1278C1263002F9DD7 /* UserOverrideModel.h */, - ); - path = OldFileReferences; - sourceTree = ""; - }; 5B62A33027AE78E500A19448 /* Resources */ = { isa = PBXGroup; children = (