checkout regex pattern at client

This commit is contained in:
factosea 2024-07-12 11:48:48 +08:00
parent cfda747613
commit 57f826608a
8 changed files with 42 additions and 26 deletions

View File

@ -618,6 +618,7 @@ bool nodesIsArithmeticOp(const SOperatorNode* pOp);
bool nodesIsComparisonOp(const SOperatorNode* pOp);
bool nodesIsJsonOp(const SOperatorNode* pOp);
bool nodesIsRegularOp(const SOperatorNode* pOp);
bool nodesIsMatchRegularOp(const SOperatorNode* pOp);
bool nodesIsBitwiseOp(const SOperatorNode* pOp);
bool nodesExprHasColumn(SNode* pNode);

View File

@ -48,6 +48,7 @@ typedef struct SPatternCompareInfo {
int32_t InitRegexCache();
void DestroyRegexCache();
int32_t patternMatch(const char *pattern, size_t psize, const char *str, size_t ssize, const SPatternCompareInfo *pInfo);
int32_t checkRegexPattern(const char *pPattern);
int32_t wcsPatternMatch(const TdUcs4 *pattern, size_t psize, const TdUcs4 *str, size_t ssize, const SPatternCompareInfo *pInfo);

View File

@ -2205,6 +2205,17 @@ bool nodesIsRegularOp(const SOperatorNode* pOp) {
return false;
}
bool nodesIsMatchRegularOp(const SOperatorNode* pOp) {
switch (pOp->opType) {
case OP_TYPE_MATCH:
case OP_TYPE_NMATCH:
return true;
default:
break;
}
return false;
}
bool nodesIsBitwiseOp(const SOperatorNode* pOp) {
switch (pOp->opType) {
case OP_TYPE_BIT_AND:

View File

@ -3217,7 +3217,6 @@ bool filterExecuteImplMisc(void *pinfo, int32_t numOfRows, SColumnInfoData *pRes
continue;
}
terrno = TSDB_CODE_SUCCESS;
void *colData = colDataGetData((SColumnInfoData *)info->cunits[uidx].colData, i);
// match/nmatch for nchar type need convert from ucs4 to mbs
if (info->cunits[uidx].dataType == TSDB_DATA_TYPE_NCHAR &&
@ -3236,7 +3235,6 @@ bool filterExecuteImplMisc(void *pinfo, int32_t numOfRows, SColumnInfoData *pRes
p[i] = filterDoCompare(gDataCompare[info->cunits[uidx].func], info->cunits[uidx].optr, colData,
info->cunits[uidx].valData);
}
if (terrno != TSDB_CODE_SUCCESS) break;
if (p[i] == 0) {
all = false;
@ -3360,9 +3358,8 @@ int32_t filterSetExecFunc(SFilterInfo *info) {
return TSDB_CODE_SUCCESS;
}
terrno = TSDB_CODE_SUCCESS;
info->func = filterExecuteImplMisc;
return terrno;
return TSDB_CODE_SUCCESS;
}
int32_t filterPreprocess(SFilterInfo *info) {
@ -4747,7 +4744,6 @@ int32_t filterExecute(SFilterInfo *info, SSDataBlock *pSrc, SColumnInfoData **p,
}
bool keepAll = (*info->func)(info, pSrc->info.rows, *p, statis, numOfCols, &output.numOfQualified);
if (terrno != TSDB_CODE_SUCCESS) return terrno;
// todo this should be return during filter procedure
if (keepAll) {

View File

@ -1654,6 +1654,12 @@ static int32_t sclGetCompOperatorResType(SOperatorNode *pOp) {
(rdt.type != TSDB_DATA_TYPE_NCHAR && rdt.type != TSDB_DATA_TYPE_VARCHAR && rdt.type != TSDB_DATA_TYPE_NULL)) {
return TSDB_CODE_TSC_INVALID_OPERATION;
}
if (nodesIsMatchRegularOp(pOp)) {
SValueNode* node = (SValueNode*)(pOp->pRight);
if(checkRegexPattern(node->literal) != TSDB_CODE_SUCCESS){
return TSDB_CODE_PAR_REGULAR_EXPRESSION_ERROR;
}
}
}
pOp->node.resType.type = TSDB_DATA_TYPE_BOOL;
pOp->node.resType.bytes = tDataTypes[TSDB_DATA_TYPE_BOOL].bytes;

View File

@ -1677,9 +1677,7 @@ int32_t doVectorCompareImpl(SScalarParam *pLeft, SScalarParam *pRight, SScalarPa
}
char *pLeftData = colDataGetData(pLeft->columnData, leftIndex);
char *pRightData = colDataGetData(pRight->columnData, rightIndex);
terrno = TSDB_CODE_SUCCESS;
pRes[i] = filterDoCompare(fp, optr, pLeftData, pRightData);
if (terrno != TSDB_CODE_SUCCESS) break;
if (pRes[i]) {
++num;
}
@ -1714,7 +1712,6 @@ int32_t doVectorCompareImpl(SScalarParam *pLeft, SScalarParam *pRight, SScalarPa
if (!pLeftData || !pRightData) {
result = false;
}
terrno = TSDB_CODE_SUCCESS;
if (!result) {
colDataSetInt8(pOut->columnData, i, (int8_t *)&result);
} else {
@ -1724,7 +1721,6 @@ int32_t doVectorCompareImpl(SScalarParam *pLeft, SScalarParam *pRight, SScalarPa
++num;
}
}
if (terrno != TSDB_CODE_SUCCESS) break;
if (freeLeft) {
taosMemoryFreeClear(pLeftData);

View File

@ -1211,27 +1211,23 @@ typedef struct UsingRegex {
typedef struct RegexCache {
SHashObj *regexHash;
int32_t regexCaheSize;
void *regexCacheTimer;
void *timer;
int32_t lastClearTime;
} RegexCache;
static RegexCache sRegexCache;
#define MAX_REGEX_CACHE_SIZE 20
#define REGEX_CACHE_CLEAR_TIME 30
static void checkRegexCache(void* param, void* tmrId) {
if (taosGetTimestampSec() - sRegexCache.lastClearTime < REGEX_CACHE_CLEAR_TIME ||
taosHashGetSize(sRegexCache.regexHash) < sRegexCache.regexCaheSize) {
if (taosHashGetSize(sRegexCache.regexHash) < MAX_REGEX_CACHE_SIZE) {
return;
}
if (taosHashGetSize(sRegexCache.regexHash) >= sRegexCache.regexCaheSize) {
if (taosHashGetSize(sRegexCache.regexHash) >= MAX_REGEX_CACHE_SIZE) {
UsingRegex **ppUsingRegex = taosHashIterate(sRegexCache.regexHash, NULL);
while ((ppUsingRegex != NULL)) {
if (taosGetTimestampSec() - (*ppUsingRegex)->lastUsedTime > REGEX_CACHE_CLEAR_TIME) {
taosHashRelease(sRegexCache.regexHash, ppUsingRegex);
sRegexCache.lastClearTime = taosGetTimestampSec();
}
ppUsingRegex = taosHashIterate(sRegexCache.regexHash, ppUsingRegex);
}
@ -1252,9 +1248,6 @@ int32_t InitRegexCache() {
return -1;
}
taosHashSetFreeFp(sRegexCache.regexHash, regexCacheFree);
sRegexCache.regexCaheSize = MAX_REGEX_CACHE_SIZE;
sRegexCache.lastClearTime = taosGetTimestampSec();
sRegexCache.regexCacheTimer = taosTmrInit(0, 0, 0, "REGEXCACHE");
if (sRegexCache.regexCacheTimer == NULL) {
uError("failed to create regex cache check timer");
@ -1272,15 +1265,28 @@ int32_t InitRegexCache() {
}
void DestroyRegexCache(){
UsingRegex **ppUsingRegex = taosHashIterate(sRegexCache.regexHash, NULL);
taosTmrStopA(&sRegexCache.timer);
while ((ppUsingRegex != NULL)) {
regexCacheFree(ppUsingRegex);
ppUsingRegex = taosHashIterate(sRegexCache.regexHash, ppUsingRegex);
}
taosHashCleanup(sRegexCache.regexHash);
}
int32_t checkRegexPattern(const char *pPattern) {
if (pPattern == NULL) {
return TSDB_CODE_PAR_REGULAR_EXPRESSION_ERROR;
}
regex_t regex;
int32_t cflags = REG_EXTENDED;
int32_t ret = regcomp(&regex, pPattern, cflags);
if (ret != 0) {
char msgbuf[256] = {0};
regerror(ret, &regex, msgbuf, tListLen(msgbuf));
uError("Failed to compile regex pattern %s. reason %s", pPattern, msgbuf);
return TSDB_CODE_PAR_REGULAR_EXPRESSION_ERROR;
}
regfree(&regex);
return TSDB_CODE_SUCCESS;
}
static UsingRegex **getRegComp(const char *pPattern) {
UsingRegex **ppUsingRegex = (UsingRegex **)taosHashAcquire(sRegexCache.regexHash, pPattern, strlen(pPattern));
if (ppUsingRegex != NULL) {
@ -1291,7 +1297,6 @@ static UsingRegex **getRegComp(const char *pPattern) {
UsingRegex *pUsingRegex = taosMemoryMalloc(sizeof(UsingRegex));
if (pUsingRegex == NULL) {
uError("Failed to Malloc when compile regex pattern %s.", pPattern);
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
int32_t cflags = REG_EXTENDED;
@ -1301,7 +1306,6 @@ static UsingRegex **getRegComp(const char *pPattern) {
regerror(ret, &pUsingRegex->pRegex, msgbuf, tListLen(msgbuf));
uError("Failed to compile regex pattern %s. reason %s", pPattern, msgbuf);
taosMemoryFree(pUsingRegex);
terrno = TSDB_CODE_PAR_REGULAR_EXPRESSION_ERROR;
return NULL;
}
@ -1309,6 +1313,7 @@ static UsingRegex **getRegComp(const char *pPattern) {
int code = taosHashPut(sRegexCache.regexHash, pPattern, strlen(pPattern), &pUsingRegex, sizeof(UsingRegex *));
if (code != 0) {
if (terrno == TSDB_CODE_DUP_KEY) {
terrno = TSDB_CODE_SUCCESS;
ppUsingRegex = (UsingRegex **)taosHashAcquire(sRegexCache.regexHash, pPattern, strlen(pPattern));
if (ppUsingRegex) {
if (*ppUsingRegex != pUsingRegex) {
@ -1322,7 +1327,6 @@ static UsingRegex **getRegComp(const char *pPattern) {
} else {
regexCacheFree(&pUsingRegex);
uError("Failed to put regex pattern %s into cache, exception internal error.", pPattern);
terrno = TSDB_CODE_QRY_EXECUTOR_INTERNAL_ERROR;
return NULL;
}
}

View File

@ -71,6 +71,7 @@ class TDTestCase:
tdSql.query(f"select distinct table_name from information_schema.ins_columns where table_name match 't.*{i}x'")
tdSql.checkRows(0)
tdSql.error("select * from db.t1x where c1 match '*d'")
tdSql.query("insert into db.t1x values(now, 'abc'), (now+1s, 'a%c'),(now+2s, 'a_c'),(now+3s, '_c'),(now+4s, '%c')")
tdSql.query("select * from db.t1x")