Merge pull request #26512 from taosdata/enh/TD-29679

use regex cache
This commit is contained in:
dapan1121 2024-07-15 19:01:50 +08:00 committed by GitHub
commit c10106a4da
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 310 additions and 33 deletions

View File

@ -626,6 +626,7 @@ bool nodesIsArithmeticOp(const SOperatorNode* pOp);
bool nodesIsComparisonOp(const SOperatorNode* pOp);
bool nodesIsJsonOp(const SOperatorNode* pOp);
bool nodesIsRegularOp(const SOperatorNode* pOp);
bool nodesIsMatchRegularOp(const SOperatorNode* pOp);
bool nodesIsBitwiseOp(const SOperatorNode* pOp);
bool nodesExprHasColumn(SNode* pNode);

View File

@ -837,6 +837,7 @@ int32_t taosGetErrSize();
#define TSDB_CODE_PAR_TBNAME_DUPLICATED TAOS_DEF_ERROR_CODE(0, 0x267E)
#define TSDB_CODE_PAR_TAG_NAME_DUPLICATED TAOS_DEF_ERROR_CODE(0, 0x267F)
#define TSDB_CODE_PAR_NOT_ALLOWED_DIFFERENT_BY_ROW_FUNC TAOS_DEF_ERROR_CODE(0, 0x2680)
#define TSDB_CODE_PAR_REGULAR_EXPRESSION_ERROR TAOS_DEF_ERROR_CODE(0, 0x2681)
#define TSDB_CODE_PAR_INTERNAL_ERROR TAOS_DEF_ERROR_CODE(0, 0x26FF)
//planner

View File

@ -45,7 +45,10 @@ typedef struct SPatternCompareInfo {
TdUcs4 umatchOne; // unicode version matchOne
} SPatternCompareInfo;
int32_t InitRegexCache();
void DestroyRegexCache();
int32_t patternMatch(const char *pattern, size_t psize, const char *str, size_t ssize, const SPatternCompareInfo *pInfo);
int32_t checkRegexPattern(const char *pPattern);
int32_t wcsPatternMatch(const TdUcs4 *pattern, size_t psize, const TdUcs4 *str, size_t ssize, const SPatternCompareInfo *pInfo);
@ -83,7 +86,6 @@ int32_t compareLenBinaryVal(const void *pLeft, const void *pRight);
int32_t comparestrRegexMatch(const void *pLeft, const void *pRight);
int32_t comparestrRegexNMatch(const void *pLeft, const void *pRight);
void DestoryThreadLocalRegComp();
int32_t comparewcsRegexMatch(const void *pLeft, const void *pRight);
int32_t comparewcsRegexNMatch(const void *pLeft, const void *pRight);

View File

@ -18,6 +18,7 @@
#include "audit.h"
#include "libs/function/tudf.h"
#include "tgrant.h"
#include "tcompare.h"
#define DM_INIT_AUDIT() \
do { \
@ -163,6 +164,7 @@ int32_t dmInit() {
if (dmInitMonitor() != 0) return -1;
if (dmInitAudit() != 0) return -1;
if (dmInitDnode(dmInstance()) != 0) return -1;
if (InitRegexCache() != 0) return -1;
#if defined(USE_S3)
if (s3Begin() != 0) return -1;
#endif
@ -192,6 +194,7 @@ void dmCleanup() {
udfStopUdfd();
taosStopCacheRefreshWorker();
dmDiskClose();
DestroyRegexCache();
#if defined(USE_S3)
s3End();

View File

@ -2203,6 +2203,17 @@ bool nodesIsRegularOp(const SOperatorNode* pOp) {
return false;
}
bool nodesIsMatchRegularOp(const SOperatorNode* pOp) {
switch (pOp->opType) {
case OP_TYPE_MATCH:
case OP_TYPE_NMATCH:
return true;
default:
break;
}
return false;
}
bool nodesIsBitwiseOp(const SOperatorNode* pOp) {
switch (pOp->opType) {
case OP_TYPE_BIT_AND:

View File

@ -223,6 +223,8 @@ static char* getSyntaxErrFormat(int32_t errCode) {
return "Tag name:%s duplicated";
case TSDB_CODE_PAR_NOT_ALLOWED_DIFFERENT_BY_ROW_FUNC:
return "Some functions cannot appear in the select list at the same time";
case TSDB_CODE_PAR_REGULAR_EXPRESSION_ERROR:
return "Syntax error in regular expression";
default:
return "Unknown error";
}

View File

@ -1654,6 +1654,12 @@ static int32_t sclGetCompOperatorResType(SOperatorNode *pOp) {
(!IS_STR_DATA_TYPE(rdt.type) && (rdt.type != TSDB_DATA_TYPE_NULL))) {
return TSDB_CODE_TSC_INVALID_OPERATION;
}
if (nodesIsMatchRegularOp(pOp)) {
SValueNode* node = (SValueNode*)(pOp->pRight);
if(checkRegexPattern(node->literal) != TSDB_CODE_SUCCESS){
return TSDB_CODE_PAR_REGULAR_EXPRESSION_ERROR;
}
}
}
pOp->node.resType.type = TSDB_DATA_TYPE_BOOL;
pOp->node.resType.bytes = tDataTypes[TSDB_DATA_TYPE_BOOL].bytes;

View File

@ -1675,10 +1675,8 @@ int32_t doVectorCompareImpl(SScalarParam *pLeft, SScalarParam *pRight, SScalarPa
pRes[i] = false;
continue;
}
char *pLeftData = colDataGetData(pLeft->columnData, leftIndex);
char *pRightData = colDataGetData(pRight->columnData, rightIndex);
pRes[i] = filterDoCompare(fp, optr, pLeftData, pRightData);
if (pRes[i]) {
++num;
@ -1714,7 +1712,6 @@ int32_t doVectorCompareImpl(SScalarParam *pLeft, SScalarParam *pRight, SScalarPa
if (!pLeftData || !pRightData) {
result = false;
}
if (!result) {
colDataSetInt8(pOut->columnData, i, (int8_t *)&result);
} else {

View File

@ -43,6 +43,7 @@
#include "tglobal.h"
#include "tlog.h"
#include "tvariant.h"
#include "tcompare.h"
#define _DEBUG_PRINT_ 0
@ -52,6 +53,12 @@
#define PRINTF(...)
#endif
class constantTest {
public:
constantTest() { InitRegexCache(); }
~constantTest() { DestroyRegexCache(); }
};
static constantTest test;
namespace {
SColumnInfo createColumnInfo(int32_t colId, int32_t type, int32_t bytes) {

View File

@ -24,6 +24,7 @@
#include "tutil.h"
#include "types.h"
#include "osString.h"
#include "ttimer.h"
int32_t setChkInBytes1(const void *pLeft, const void *pRight) {
return NULL != taosHashGet((SHashObj *)pRight, pLeft, 1) ? 1 : 0;
@ -1203,54 +1204,153 @@ int32_t comparestrRegexNMatch(const void *pLeft, const void *pRight) {
return comparestrRegexMatch(pLeft, pRight) ? 0 : 1;
}
static threadlocal regex_t pRegex;
static threadlocal char *pOldPattern = NULL;
static regex_t *threadGetRegComp(const char *pPattern) {
if (NULL != pOldPattern) {
if( strcmp(pOldPattern, pPattern) == 0) {
return &pRegex;
} else {
DestoryThreadLocalRegComp();
typedef struct UsingRegex {
regex_t pRegex;
int32_t lastUsedTime;
} UsingRegex;
typedef struct RegexCache {
SHashObj *regexHash;
void *regexCacheTmr;
void *timer;
} RegexCache;
static RegexCache sRegexCache;
#define MAX_REGEX_CACHE_SIZE 20
#define REGEX_CACHE_CLEAR_TIME 30
static void checkRegexCache(void* param, void* tmrId) {
taosTmrReset(checkRegexCache, REGEX_CACHE_CLEAR_TIME * 1000, param, sRegexCache.regexCacheTmr, &tmrId);
if (taosHashGetSize(sRegexCache.regexHash) < MAX_REGEX_CACHE_SIZE) {
return;
}
if (taosHashGetSize(sRegexCache.regexHash) >= MAX_REGEX_CACHE_SIZE) {
UsingRegex **ppUsingRegex = taosHashIterate(sRegexCache.regexHash, NULL);
while ((ppUsingRegex != NULL)) {
if (taosGetTimestampSec() - (*ppUsingRegex)->lastUsedTime > REGEX_CACHE_CLEAR_TIME) {
size_t len = 0;
char* key = (char*)taosHashGetKey(ppUsingRegex, &len);
taosHashRemove(sRegexCache.regexHash, key, len);
}
ppUsingRegex = taosHashIterate(sRegexCache.regexHash, ppUsingRegex);
}
}
pOldPattern = taosMemoryMalloc(strlen(pPattern) + 1);
if (NULL == pOldPattern) {
}
void regexCacheFree(void *ppUsingRegex) {
regfree(&(*(UsingRegex **)ppUsingRegex)->pRegex);
taosMemoryFree(*(UsingRegex **)ppUsingRegex);
}
int32_t InitRegexCache() {
sRegexCache.regexHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK);
if (sRegexCache.regexHash == NULL) {
uError("failed to create RegexCache");
return -1;
}
taosHashSetFreeFp(sRegexCache.regexHash, regexCacheFree);
sRegexCache.regexCacheTmr = taosTmrInit(0, 0, 0, "REGEXCACHE");
if (sRegexCache.regexCacheTmr == NULL) {
uError("failed to create regex cache check timer");
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
sRegexCache.timer = taosTmrStart(checkRegexCache, REGEX_CACHE_CLEAR_TIME * 1000, NULL, sRegexCache.regexCacheTmr);
if (sRegexCache.timer == NULL) {
uError("failed to start regex cache timer");
return -1;
}
return 0;
}
void DestroyRegexCache(){
uInfo("[regex cache] destory regex cache");
taosTmrStopA(&sRegexCache.timer);
taosHashCleanup(sRegexCache.regexHash);
taosTmrCleanUp(sRegexCache.regexCacheTmr);
}
int32_t checkRegexPattern(const char *pPattern) {
if (pPattern == NULL) {
return TSDB_CODE_PAR_REGULAR_EXPRESSION_ERROR;
}
regex_t regex;
int32_t cflags = REG_EXTENDED;
int32_t ret = regcomp(&regex, pPattern, cflags);
if (ret != 0) {
char msgbuf[256] = {0};
regerror(ret, &regex, msgbuf, tListLen(msgbuf));
uError("Failed to compile regex pattern %s. reason %s", pPattern, msgbuf);
return TSDB_CODE_PAR_REGULAR_EXPRESSION_ERROR;
}
regfree(&regex);
return TSDB_CODE_SUCCESS;
}
static UsingRegex **getRegComp(const char *pPattern) {
UsingRegex **ppUsingRegex = (UsingRegex **)taosHashAcquire(sRegexCache.regexHash, pPattern, strlen(pPattern));
if (ppUsingRegex != NULL) {
(*ppUsingRegex)->lastUsedTime = taosGetTimestampSec();
return ppUsingRegex;
}
UsingRegex *pUsingRegex = taosMemoryMalloc(sizeof(UsingRegex));
if (pUsingRegex == NULL) {
uError("Failed to Malloc when compile regex pattern %s.", pPattern);
return NULL;
}
strcpy(pOldPattern, pPattern);
int32_t cflags = REG_EXTENDED;
int32_t ret = regcomp(&pRegex, pPattern, cflags);
int32_t ret = regcomp(&pUsingRegex->pRegex, pPattern, cflags);
if (ret != 0) {
char msgbuf[256] = {0};
regerror(ret, &pRegex, msgbuf, tListLen(msgbuf));
regerror(ret, &pUsingRegex->pRegex, msgbuf, tListLen(msgbuf));
uError("Failed to compile regex pattern %s. reason %s", pPattern, msgbuf);
DestoryThreadLocalRegComp();
taosMemoryFree(pUsingRegex);
return NULL;
}
return &pRegex;
while (true) {
int code = taosHashPut(sRegexCache.regexHash, pPattern, strlen(pPattern), &pUsingRegex, sizeof(UsingRegex *));
if (code != 0 && code != TSDB_CODE_DUP_KEY) {
regexCacheFree(&pUsingRegex);
uError("Failed to put regex pattern %s into cache, exception internal error.", pPattern);
return NULL;
}
ppUsingRegex = (UsingRegex **)taosHashAcquire(sRegexCache.regexHash, pPattern, strlen(pPattern));
if (ppUsingRegex) {
if (*ppUsingRegex != pUsingRegex) {
regexCacheFree(&pUsingRegex);
}
pUsingRegex = (*ppUsingRegex);
break;
} else {
continue;
}
}
pUsingRegex->lastUsedTime = taosGetTimestampSec();
return ppUsingRegex;
}
void DestoryThreadLocalRegComp() {
if (NULL != pOldPattern) {
regfree(&pRegex);
taosMemoryFree(pOldPattern);
pOldPattern = NULL;
}
void releaseRegComp(UsingRegex **regex){
taosHashRelease(sRegexCache.regexHash, regex);
}
static int32_t doExecRegexMatch(const char *pString, const char *pPattern) {
int32_t ret = 0;
char msgbuf[256] = {0};
regex_t *regex = threadGetRegComp(pPattern);
if (regex == NULL) {
UsingRegex **pUsingRegex = getRegComp(pPattern);
if (pUsingRegex == NULL) {
return 1;
}
regmatch_t pmatch[1];
ret = regexec(regex, pString, 1, pmatch, 0);
ret = regexec(&(*pUsingRegex)->pRegex, pString, 1, pmatch, 0);
releaseRegComp(pUsingRegex);
if (ret != 0 && ret != REG_NOMATCH) {
regerror(ret, regex, msgbuf, sizeof(msgbuf));
regerror(ret, &(*pUsingRegex)->pRegex, msgbuf, sizeof(msgbuf));
uDebug("Failed to match %s with pattern %s, reason %s", pString, pPattern, msgbuf)
}

View File

@ -684,6 +684,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_PAR_TBNAME_ERROR, "Pseudo tag tbname n
TAOS_DEFINE_ERROR(TSDB_CODE_PAR_TBNAME_DUPLICATED, "Table name duplicated")
TAOS_DEFINE_ERROR(TSDB_CODE_PAR_TAG_NAME_DUPLICATED, "Tag name duplicated")
TAOS_DEFINE_ERROR(TSDB_CODE_PAR_NOT_ALLOWED_DIFFERENT_BY_ROW_FUNC, "Some functions cannot appear in the select list at the same time")
TAOS_DEFINE_ERROR(TSDB_CODE_PAR_REGULAR_EXPRESSION_ERROR, "Syntax error in regular expression")
TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INTERNAL_ERROR, "Parser internal error")
//planner

View File

@ -104,7 +104,6 @@ static void *tQWorkerThreadFp(SQueueWorker *worker) {
}
destroyThreadLocalGeosCtx();
DestoryThreadLocalRegComp();
return NULL;
}
@ -224,7 +223,6 @@ static void *tAutoQWorkerThreadFp(SQueueWorker *worker) {
taosUpdateItemSize(qinfo.queue, 1);
}
DestoryThreadLocalRegComp();
return NULL;
}
@ -636,7 +634,6 @@ static void *tQueryAutoQWorkerThreadFp(SQueryAutoQWorker *worker) {
}
destroyThreadLocalGeosCtx();
DestoryThreadLocalRegComp();
return NULL;
}

View File

@ -193,6 +193,10 @@
,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/like.py -Q 2
,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/like.py -Q 3
,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/like.py -Q 4
,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/match.py
,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/match.py -Q 2
,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/match.py -Q 3
,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/match.py -Q 4
,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/td-28068.py
,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/td-28068.py -Q 2
,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/td-28068.py -Q 3

View File

@ -0,0 +1,141 @@
import taos
import sys
import datetime
import inspect
import threading
import time
from util.log import *
from util.sql import *
from util.cases import *
from util.common import tdCom
class TDTestCase:
def init(self, conn, logSql, replicaVar=1):
self.replicaVar = int(replicaVar)
tdLog.debug(f"start to excute {__file__}")
tdSql.init(conn.cursor(), True)
def initConnection(self):
self.records = 10000000
self.numOfTherads = 50
self.ts = 1537146000000
self.host = "127.0.0.1"
self.user = "root"
self.password = "taosdata"
self.config = "/home/xp/git/TDengine/sim/dnode1/cfg"
self.conn = taos.connect(
self.host,
self.user,
self.password,
self.config)
def initDB(self):
tdSql.execute("drop database if exists db")
tdSql.execute("create database if not exists db")
def stopTest(self):
tdSql.execute("drop database if exists db")
def threadTest(self, threadID):
print(f"Thread {threadID} starting...")
tdsqln = tdCom.newTdSql()
for i in range(2, 50):
tdsqln.query(f"select distinct table_name from information_schema.ins_columns where table_name match 't.*{i}dx'")
tdsqln.checkRows(0)
for i in range(100):
tdsqln.query(f"select distinct table_name from information_schema.ins_columns where table_name match 't.*1x'")
tdsqln.checkRows(2)
tdsqln.query("select * from db.t1x")
tdsqln.checkRows(5)
tdsqln.query("select * from db.t1x where c1 match '_c'")
tdsqln.checkRows(2)
tdsqln.query("select * from db.t1x where c1 match '%__c'")
tdsqln.checkRows(0)
tdsqln.error("select * from db.t1x where c1 match '*d'")
print(f"Thread {threadID} finished.")
def match_test(self):
tdSql.execute("create table db.t1x (ts timestamp, c1 varchar(100))")
tdSql.execute("create table db.t_1x (ts timestamp, c1 varchar(100))")
tdSql.query(f"select distinct table_name from information_schema.ins_columns where table_name match 't.*1x'")
tdSql.checkRows(2)
for i in range(2, 50):
tdSql.query(f"select distinct table_name from information_schema.ins_columns where table_name match 't.*{i}x'")
tdSql.checkRows(0)
tdSql.error("select * from db.t1x where c1 match '*d'")
tdSql.query("insert into db.t1x values(now, 'abc'), (now+1s, 'a%c'),(now+2s, 'a_c'),(now+3s, '_c'),(now+4s, '%c')")
tdSql.query("select * from db.t1x")
tdSql.checkRows(5)
tdSql.query("select * from db.t1x where c1 match '_c'")
tdSql.checkRows(2)
tdSql.query("select * from db.t1x where c1 match '%__c'")
tdSql.checkRows(0)
tdSql.error("select * from db.t1x where c1 match '*d'")
threads = []
for i in range(10):
t = threading.Thread(target=self.threadTest, args=(i,))
threads.append(t)
t.start()
time.sleep(31)
tdSql.query(f"select distinct table_name from information_schema.ins_columns where table_name match 't.*1x'")
tdSql.checkRows(2)
for i in range(2, 50):
tdSql.query(f"select distinct table_name from information_schema.ins_columns where table_name match 't.*{i}x'")
tdSql.checkRows(0)
tdSql.query("select * from db.t1x")
tdSql.checkRows(5)
tdSql.query("select * from db.t1x where c1 match '_c'")
tdSql.checkRows(2)
tdSql.query("select * from db.t1x where c1 match '%__c'")
tdSql.checkRows(0)
tdSql.execute("create table db.t3x (ts timestamp, c1 varchar(100))")
tdSql.execute("insert into db.t3x values(now, '我是中文'), (now+1s, '我是_中文'), (now+2s, '我是%中文'), (now+3s, '%中文'),(now+4s, '_中文')")
tdSql.query("select * from db.t3x where c1 match '%中文'")
tdSql.checkRows(2)
tdSql.query("select * from db.t3x where c1 match '中文'")
tdSql.checkRows(5)
tdSql.error("select * from db.t1x where c1 match '*d'")
for thread in threads:
print(f"Thread waitting for finish...")
thread.join()
print(f"Mutithread test finished.")
def run(self):
tdLog.printNoPrefix("==========start match_test run ...............")
tdSql.prepare(replica = self.replicaVar)
self.initConnection()
self.initDB()
self.match_test()
self.stopTest()
def stop(self):
tdSql.close()
tdLog.success(f"{__file__} successfully executed")
tdCases.addLinux(__file__, TDTestCase())
tdCases.addWindows(__file__, TDTestCase())

View File

@ -104,6 +104,10 @@ python3 ./test.py -f 2-query/like.py
python3 ./test.py -f 2-query/like.py -Q 2
python3 ./test.py -f 2-query/like.py -Q 3
python3 ./test.py -f 2-query/like.py -Q 4
python3 ./test.py -f 2-query/match.py
python3 ./test.py -f 2-query/match.py -Q 2
python3 ./test.py -f 2-query/match.py -Q 3
python3 ./test.py -f 2-query/match.py -Q 4
python3 ./test.py -f 3-enterprise/restore/restoreDnode.py -N 5 -M 3 -i False
python3 ./test.py -f 3-enterprise/restore/restoreVnode.py -N 5 -M 3 -i False
python3 ./test.py -f 3-enterprise/restore/restoreMnode.py -N 5 -M 3 -i False