From 3eecdca587535866dee252fcfaebd45fe1836dfd Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Sat, 29 Jan 2022 17:51:21 +0800 Subject: [PATCH] LMConsolidator // Regex Patch for proper match. - Previously it doesn't match non-break whitespace. --- Source/Engine/LanguageModel/LMConsolidator.mm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Source/Engine/LanguageModel/LMConsolidator.mm b/Source/Engine/LanguageModel/LMConsolidator.mm index bfba1f80..8e6593c1 100644 --- a/Source/Engine/LanguageModel/LMConsolidator.mm +++ b/Source/Engine/LanguageModel/LMConsolidator.mm @@ -49,11 +49,13 @@ bool LMConsolidator::ConsolidateContent(const char *path, bool shouldsort) { vecEntry.push_back(zfdBuffer); } // 第一遍 for 用來統整每行內的內容。 - regex sedCJKWhiteSpace(" "), sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // RegEx 先定義好。 + // regex sedCJKWhiteSpace("\\x{3000}"), sedNonBreakWhiteSpace("\\x{A0}"), sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // 這樣寫會導致輸入法敲不了任何字,推測 Xcode 13 支援的 cpp / objCpp 可能對某些 Regex 寫法有相容性問題。 + regex sedCJKWhiteSpace(" "), sedNonBreakWhiteSpace(" "), sedWhiteSpace("\\s+"), sedLeadingSpace("^\\s"), sedTrailingSpace("\\s$"); // RegEx 先定義好。 for(int i=0;i