From 41b8693430ec02f49083ab35efdc5e4f0f2fb60e Mon Sep 17 00:00:00 2001 From: factosea <285808407@qq.com> Date: Wed, 10 Jul 2024 19:46:09 +0800 Subject: [PATCH] use regex cache --- include/util/taoserror.h | 1 + include/util/tcompare.h | 3 +- source/dnode/mgmt/node_mgmt/src/dmEnv.c | 3 + source/libs/parser/src/parUtil.c | 4 +- source/libs/scalar/src/sclvector.c | 2 + source/util/src/tcompare.c | 138 +++++++++++++++++++----- source/util/src/terror.c | 1 + source/util/src/tworker.c | 3 - 8 files changed, 125 insertions(+), 30 deletions(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 24f9d041fc..8efd67f745 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -836,6 +836,7 @@ int32_t taosGetErrSize(); #define TSDB_CODE_PAR_TBNAME_DUPLICATED TAOS_DEF_ERROR_CODE(0, 0x267E) #define TSDB_CODE_PAR_TAG_NAME_DUPLICATED TAOS_DEF_ERROR_CODE(0, 0x267F) #define TSDB_CODE_PAR_NOT_ALLOWED_DIFFERENT_BY_ROW_FUNC TAOS_DEF_ERROR_CODE(0, 0x2680) +#define TSDB_CODE_PAR_REGULAR_EXPRESSION_ERROR TAOS_DEF_ERROR_CODE(0, 0x2681) #define TSDB_CODE_PAR_INTERNAL_ERROR TAOS_DEF_ERROR_CODE(0, 0x26FF) //planner diff --git a/include/util/tcompare.h b/include/util/tcompare.h index 9694bee92d..4f574a9b79 100644 --- a/include/util/tcompare.h +++ b/include/util/tcompare.h @@ -45,6 +45,8 @@ typedef struct SPatternCompareInfo { TdUcs4 umatchOne; // unicode version matchOne } SPatternCompareInfo; +int32_t InitRegexCache(); +void DestroyRegexCache(); int32_t patternMatch(const char *pattern, size_t psize, const char *str, size_t ssize, const SPatternCompareInfo *pInfo); int32_t wcsPatternMatch(const TdUcs4 *pattern, size_t psize, const TdUcs4 *str, size_t ssize, const SPatternCompareInfo *pInfo); @@ -83,7 +85,6 @@ int32_t compareLenBinaryVal(const void *pLeft, const void *pRight); int32_t comparestrRegexMatch(const void *pLeft, const void *pRight); int32_t comparestrRegexNMatch(const void *pLeft, const void *pRight); -void DestoryThreadLocalRegComp(); int32_t comparewcsRegexMatch(const void *pLeft, const void *pRight); int32_t comparewcsRegexNMatch(const void *pLeft, const void *pRight); diff --git a/source/dnode/mgmt/node_mgmt/src/dmEnv.c b/source/dnode/mgmt/node_mgmt/src/dmEnv.c index 4be1af30b5..46f9965d1a 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmEnv.c +++ b/source/dnode/mgmt/node_mgmt/src/dmEnv.c @@ -18,6 +18,7 @@ #include "audit.h" #include "libs/function/tudf.h" #include "tgrant.h" +#include "tcompare.h" #define DM_INIT_AUDIT() \ do { \ @@ -163,6 +164,7 @@ int32_t dmInit() { if (dmInitMonitor() != 0) return -1; if (dmInitAudit() != 0) return -1; if (dmInitDnode(dmInstance()) != 0) return -1; + if (InitRegexCache() != 0) return -1; #if defined(USE_S3) if (s3Begin() != 0) return -1; #endif @@ -192,6 +194,7 @@ void dmCleanup() { udfStopUdfd(); taosStopCacheRefreshWorker(); dmDiskClose(); + DestroyRegexCache(); #if defined(USE_S3) s3End(); diff --git a/source/libs/parser/src/parUtil.c b/source/libs/parser/src/parUtil.c index d67c7d306f..e6b6bcc903 100644 --- a/source/libs/parser/src/parUtil.c +++ b/source/libs/parser/src/parUtil.c @@ -223,7 +223,9 @@ static char* getSyntaxErrFormat(int32_t errCode) { return "Tag name:%s duplicated"; case TSDB_CODE_PAR_NOT_ALLOWED_DIFFERENT_BY_ROW_FUNC: return "Some functions cannot appear in the select list at the same time"; - default: + case TSDB_CODE_PAR_REGULAR_EXPRESSION_ERROR: + return "Syntax error in regular expression"; + default: return "Unknown error"; } } diff --git a/source/libs/scalar/src/sclvector.c b/source/libs/scalar/src/sclvector.c index c5789a65ca..673919b2f5 100644 --- a/source/libs/scalar/src/sclvector.c +++ b/source/libs/scalar/src/sclvector.c @@ -1667,6 +1667,7 @@ int32_t doVectorCompareImpl(SScalarParam *pLeft, SScalarParam *pRight, SScalarPa } } else { for (int32_t i = startIndex; i < numOfRows && i >= 0; i += step) { + if (terrno != TSDB_CODE_SUCCESS) break; int32_t leftIndex = (i >= pLeft->numOfRows) ? 0 : i; int32_t rightIndex = (i >= pRight->numOfRows) ? 0 : i; @@ -1688,6 +1689,7 @@ int32_t doVectorCompareImpl(SScalarParam *pLeft, SScalarParam *pRight, SScalarPa } else { // if (GET_PARAM_TYPE(pLeft) == TSDB_DATA_TYPE_JSON || GET_PARAM_TYPE(pRight) == TSDB_DATA_TYPE_JSON) { for (int32_t i = startIndex; i < numOfRows && i >= startIndex; i += step) { + if (terrno != TSDB_CODE_SUCCESS) break; int32_t leftIndex = (i >= pLeft->numOfRows) ? 0 : i; int32_t rightIndex = (i >= pRight->numOfRows) ? 0 : i; diff --git a/source/util/src/tcompare.c b/source/util/src/tcompare.c index 26122a4a29..7f14f3a1e0 100644 --- a/source/util/src/tcompare.c +++ b/source/util/src/tcompare.c @@ -1203,54 +1203,142 @@ int32_t comparestrRegexNMatch(const void *pLeft, const void *pRight) { return comparestrRegexMatch(pLeft, pRight) ? 0 : 1; } -static threadlocal regex_t pRegex; -static threadlocal char *pOldPattern = NULL; -static regex_t *threadGetRegComp(const char *pPattern) { - if (NULL != pOldPattern) { - if( strcmp(pOldPattern, pPattern) == 0) { - return &pRegex; - } else { - DestoryThreadLocalRegComp(); +typedef struct UsingRegex { + regex_t pRegex; + int32_t usingCount; + int32_t lastUsedTime; +} UsingRegex; + +typedef struct RegexCache { + SHashObj *regexHash; + int32_t regexCaheSize; + TdThreadRwlock regexLock; + int32_t lastClearTime; +} RegexCache; +static RegexCache sRegexCache; +#define MAX_REGEX_CACHE_SIZE 20 +#define REGEX_CACHE_CLEAR_TIME 30 + +int32_t InitRegexCache() { + if (taosThreadRwlockInit(&sRegexCache.regexLock, NULL) != 0) { + uError("failed to create RegexCache lock"); + return -1; + } + sRegexCache.regexHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); + if (sRegexCache.regexHash == NULL) { + uError("failed to create RegexCache"); + return -1; + } + sRegexCache.regexCaheSize = MAX_REGEX_CACHE_SIZE; + sRegexCache.lastClearTime = taosGetTimestampSec(); + return 0; +} + +void DestroyRegexCache(){ + UsingRegex **ppUsingRegex = taosHashIterate(sRegexCache.regexHash, NULL); + while ((ppUsingRegex != NULL)) { + regfree(&(*ppUsingRegex)->pRegex); + taosMemoryFree(*ppUsingRegex); + ppUsingRegex = taosHashIterate(sRegexCache.regexHash, ppUsingRegex); + } + taosHashCleanup(sRegexCache.regexHash); + taosThreadRwlockDestroy(&sRegexCache.regexLock); +} + +static void clearOlderRegex() { + if (taosGetTimestampSec() - sRegexCache.lastClearTime < REGEX_CACHE_CLEAR_TIME || + taosHashGetSize(sRegexCache.regexHash) < sRegexCache.regexCaheSize) { + return; + } + taosThreadRwlockWrlock(&sRegexCache.regexLock); + if (taosHashGetSize(sRegexCache.regexHash) >= sRegexCache.regexCaheSize) { + UsingRegex **ppUsingRegex = taosHashIterate(sRegexCache.regexHash, NULL); + while ((ppUsingRegex != NULL)) { + if ((*ppUsingRegex)->usingCount == 0 && + taosGetTimestampSec() - (*ppUsingRegex)->lastUsedTime > REGEX_CACHE_CLEAR_TIME) { + regfree(&(*ppUsingRegex)->pRegex); + taosMemoryFree(*ppUsingRegex); + taosHashRelease(sRegexCache.regexHash, ppUsingRegex); + sRegexCache.lastClearTime = taosGetTimestampSec(); + } + ppUsingRegex = taosHashIterate(sRegexCache.regexHash, ppUsingRegex); } } - pOldPattern = taosMemoryMalloc(strlen(pPattern) + 1); - if (NULL == pOldPattern) { + taosThreadRwlockUnlock(&sRegexCache.regexLock); +} + +static UsingRegex *getRegComp(const char *pPattern) { + taosThreadRwlockRdlock(&sRegexCache.regexLock); + UsingRegex **ppUsingRegex = (UsingRegex **)taosHashGet(sRegexCache.regexHash, pPattern, strlen(pPattern)); + if (ppUsingRegex != NULL) { + (*ppUsingRegex)->usingCount++; + taosThreadRwlockUnlock(&sRegexCache.regexLock); + return *ppUsingRegex; + } + taosThreadRwlockUnlock(&sRegexCache.regexLock); + + UsingRegex *pUsingRegex = taosMemoryMalloc(sizeof(UsingRegex)); + if (pUsingRegex == NULL) { uError("Failed to Malloc when compile regex pattern %s.", pPattern); return NULL; } - strcpy(pOldPattern, pPattern); int32_t cflags = REG_EXTENDED; - int32_t ret = regcomp(&pRegex, pPattern, cflags); + int32_t ret = regcomp(&pUsingRegex->pRegex, pPattern, cflags); if (ret != 0) { char msgbuf[256] = {0}; - regerror(ret, &pRegex, msgbuf, tListLen(msgbuf)); + regerror(ret, &pUsingRegex->pRegex, msgbuf, tListLen(msgbuf)); uError("Failed to compile regex pattern %s. reason %s", pPattern, msgbuf); - DestoryThreadLocalRegComp(); + taosMemoryFree(pUsingRegex); + terrno = TSDB_CODE_PAR_REGULAR_EXPRESSION_ERROR; return NULL; } - return &pRegex; + + taosThreadRwlockWrlock(&sRegexCache.regexLock); + int code = taosHashPut(sRegexCache.regexHash, pPattern, strlen(pPattern), &pUsingRegex, sizeof(UsingRegex *)); + if (code != 0) { + if( terrno == TSDB_CODE_DUP_KEY) { + regfree(&pUsingRegex->pRegex); + taosMemoryFree(pUsingRegex); + + UsingRegex **ppUsingRegex = (UsingRegex **)taosHashGet(sRegexCache.regexHash, pPattern, strlen(pPattern)); + if(ppUsingRegex) { + pUsingRegex = (*ppUsingRegex); + } else { + uError("Failed to get regex pattern %s from cache, exception internal error.", pPattern); + taosThreadRwlockUnlock(&sRegexCache.regexLock); + return NULL; + } + } else { + uError("Failed to put regex pattern %s into cache, exception internal error.", pPattern); + taosThreadRwlockUnlock(&sRegexCache.regexLock); + return NULL; + } + } + atomic_add_fetch_32(&pUsingRegex->usingCount, 1); + taosThreadRwlockUnlock(&sRegexCache.regexLock); + + clearOlderRegex(); + return pUsingRegex; } -void DestoryThreadLocalRegComp() { - if (NULL != pOldPattern) { - regfree(&pRegex); - taosMemoryFree(pOldPattern); - pOldPattern = NULL; - } +void recycleRegex(UsingRegex *regex){ + atomic_add_fetch_32(®ex->usingCount, -1); + regex->lastUsedTime = taosGetTimestampSec(); } static int32_t doExecRegexMatch(const char *pString, const char *pPattern) { int32_t ret = 0; char msgbuf[256] = {0}; - regex_t *regex = threadGetRegComp(pPattern); - if (regex == NULL) { + UsingRegex *pUsingRegex = getRegComp(pPattern); + if (pUsingRegex == NULL) { return 1; } regmatch_t pmatch[1]; - ret = regexec(regex, pString, 1, pmatch, 0); + ret = regexec(&pUsingRegex->pRegex, pString, 1, pmatch, 0); + recycleRegex(pUsingRegex); if (ret != 0 && ret != REG_NOMATCH) { - regerror(ret, regex, msgbuf, sizeof(msgbuf)); + regerror(ret, &pUsingRegex->pRegex, msgbuf, sizeof(msgbuf)); uDebug("Failed to match %s with pattern %s, reason %s", pString, pPattern, msgbuf) } diff --git a/source/util/src/terror.c b/source/util/src/terror.c index c7fd6f65c5..c3f3a07b86 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -683,6 +683,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_PAR_TBNAME_ERROR, "Pseudo tag tbname n TAOS_DEFINE_ERROR(TSDB_CODE_PAR_TBNAME_DUPLICATED, "Table name duplicated") TAOS_DEFINE_ERROR(TSDB_CODE_PAR_TAG_NAME_DUPLICATED, "Tag name duplicated") TAOS_DEFINE_ERROR(TSDB_CODE_PAR_NOT_ALLOWED_DIFFERENT_BY_ROW_FUNC, "Some functions cannot appear in the select list at the same time") +TAOS_DEFINE_ERROR(TSDB_CODE_PAR_REGULAR_EXPRESSION_ERROR, "Syntax error in regular expression") TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INTERNAL_ERROR, "Parser internal error") //planner diff --git a/source/util/src/tworker.c b/source/util/src/tworker.c index 4a8a0823b7..7a97dc3527 100644 --- a/source/util/src/tworker.c +++ b/source/util/src/tworker.c @@ -104,7 +104,6 @@ static void *tQWorkerThreadFp(SQueueWorker *worker) { } destroyThreadLocalGeosCtx(); - DestoryThreadLocalRegComp(); return NULL; } @@ -224,7 +223,6 @@ static void *tAutoQWorkerThreadFp(SQueueWorker *worker) { taosUpdateItemSize(qinfo.queue, 1); } - DestoryThreadLocalRegComp(); return NULL; } @@ -636,7 +634,6 @@ static void *tQueryAutoQWorkerThreadFp(SQueryAutoQWorker *worker) { } destroyThreadLocalGeosCtx(); - DestoryThreadLocalRegComp(); return NULL; }