From 1e69538c379a88c5222471ca6ceb81e61caad3a5 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 27 Dec 2022 16:55:38 +0800 Subject: [PATCH 1/8] fix(query): opt filter perf. --- source/libs/scalar/inc/sclInt.h | 3 -- source/libs/scalar/src/filter.c | 2 +- source/libs/scalar/src/sclvector.c | 67 ++++++++++++++++++++++-------- source/util/src/tcompare.c | 10 ++--- 4 files changed, 56 insertions(+), 26 deletions(-) diff --git a/source/libs/scalar/inc/sclInt.h b/source/libs/scalar/inc/sclInt.h index d3f29c0e49..d3da73abef 100644 --- a/source/libs/scalar/inc/sclInt.h +++ b/source/libs/scalar/inc/sclInt.h @@ -50,9 +50,6 @@ typedef struct SScalarCtx { #define SCL_IS_COMPARISON_OPERATOR(_opType) ((_opType) >= OP_TYPE_GREATER_THAN && (_opType) < OP_TYPE_IS_NOT_UNKNOWN) #define SCL_DOWNGRADE_DATETYPE(_type) \ ((_type) == TSDB_DATA_TYPE_BIGINT || TSDB_DATA_TYPE_DOUBLE == (_type) || (_type) == TSDB_DATA_TYPE_UBIGINT) -#define SCL_NO_NEED_CONVERT_COMPARISION(_ltype, _rtype, _optr) \ - (IS_NUMERIC_TYPE(_ltype) && IS_NUMERIC_TYPE(_rtype) && \ - ((_optr) >= OP_TYPE_GREATER_THAN && (_optr) <= OP_TYPE_NOT_EQUAL)) #define sclFatal(...) qFatal(__VA_ARGS__) #define sclError(...) qError(__VA_ARGS__) diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 59e39e3f6f..8fad010524 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -4082,7 +4082,7 @@ bool filterExecute(SFilterInfo *info, SSDataBlock *pSrc, SColumnInfoData **p, SC SArray *pList = taosArrayInit(1, POINTER_BYTES); taosArrayPush(pList, &pSrc); - int32_t code = scalarCalculate(info->sclCtx.node, pList, &output); + code = scalarCalculate(info->sclCtx.node, pList, &output); taosArrayDestroy(pList); FLT_ERR_RET(code); diff --git a/source/libs/scalar/src/sclvector.c b/source/libs/scalar/src/sclvector.c index a1995bdf50..2eb12c8269 100644 --- a/source/libs/scalar/src/sclvector.c +++ b/source/libs/scalar/src/sclvector.c @@ -37,6 +37,11 @@ #define IS_HELPER_NULL(col, i) colDataIsNull_s(col, i) || IS_JSON_NULL(col->info.type, colDataGetVarData(col, i)) +bool noConvertBeforeCompare(int32_t leftType, int32_t rightType, int32_t optr) { + return IS_NUMERIC_TYPE(leftType) && IS_NUMERIC_TYPE(rightType) && + (optr >= OP_TYPE_GREATER_THAN && optr <= OP_TYPE_NOT_EQUAL); +} + void convertNumberToNumber(const void *inData, void *outData, int8_t inType, int8_t outType) { switch (outType) { case TSDB_DATA_TYPE_BOOL: { @@ -338,6 +343,7 @@ static FORCE_INLINE void varToBool(char *buf, SScalarParam *pOut, int32_t rowInd colDataAppendInt8(pOut->columnData, rowIndex, (int8_t *)&v); } +// todo remove this malloc static FORCE_INLINE void varToNchar(char *buf, SScalarParam *pOut, int32_t rowIndex, int32_t *overflow) { int32_t len = 0; int32_t inputLen = varDataLen(buf); @@ -399,6 +405,8 @@ int32_t vectorConvertFromVarData(SSclVectorConvCtx *pCtx, int32_t *overflow) { } pCtx->pOut->numOfRows = pCtx->pIn->numOfRows; + char* tmp = NULL; + for (int32_t i = pCtx->startIndex; i <= pCtx->endIndex; ++i) { if (IS_HELPER_NULL(pCtx->pIn->columnData, i)) { colDataAppendNULL(pCtx->pOut->columnData, i); @@ -421,12 +429,16 @@ int32_t vectorConvertFromVarData(SSclVectorConvCtx *pCtx, int32_t *overflow) { continue; } } + int32_t bufSize = pCtx->pIn->columnData->info.bytes; - char *tmp = taosMemoryMalloc(varDataTLen(data)); - if (!tmp) { - sclError("out of memory in vectorConvertFromVarData"); - return TSDB_CODE_OUT_OF_MEMORY; + if (tmp == NULL) { + tmp = taosMemoryMalloc(bufSize); + if (tmp == NULL) { + sclError("out of memory in vectorConvertFromVarData"); + return TSDB_CODE_OUT_OF_MEMORY; + } } + if (vton) { memcpy(tmp, data, varDataTLen(data)); } else { @@ -434,6 +446,7 @@ int32_t vectorConvertFromVarData(SSclVectorConvCtx *pCtx, int32_t *overflow) { memcpy(tmp, varDataVal(data), varDataLen(data)); tmp[varDataLen(data)] = 0; } else if (TSDB_DATA_TYPE_NCHAR == convertType) { + // we need to convert it to native char string, and then perform the string to numeric data ASSERT(varDataLen(data) <= bufSize); int len = taosUcs4ToMbs((TdUcs4 *)varDataVal(data), varDataLen(data), tmp); @@ -448,9 +461,11 @@ int32_t vectorConvertFromVarData(SSclVectorConvCtx *pCtx, int32_t *overflow) { } (*func)(tmp, pCtx->pOut, i, overflow); - taosMemoryFreeClear(tmp); } + if (tmp != NULL) { + taosMemoryFreeClear(tmp); + } return TSDB_CODE_SUCCESS; } @@ -925,25 +940,43 @@ int32_t vectorConvertCols(SScalarParam *pLeft, SScalarParam *pRight, SScalarPara return TSDB_CODE_SUCCESS; } + int8_t type = 0; + int32_t code = 0; + SScalarParam *param1 = NULL, *paramOut1 = NULL; SScalarParam *param2 = NULL, *paramOut2 = NULL; - int32_t code = 0; - if (leftType < rightType) { + // always convert least data + if (IS_VAR_DATA_TYPE(leftType) && IS_VAR_DATA_TYPE(rightType) && (pLeft->numOfRows != pRight->numOfRows) && + leftType != TSDB_DATA_TYPE_JSON && rightType != TSDB_DATA_TYPE_JSON) { param1 = pLeft; param2 = pRight; paramOut1 = pLeftOut; paramOut2 = pRightOut; - } else { - param1 = pRight; - param2 = pLeft; - paramOut1 = pRightOut; - paramOut2 = pLeftOut; - } - int8_t type = vectorGetConvertType(GET_PARAM_TYPE(param1), GET_PARAM_TYPE(param2)); - if (0 == type) { - return TSDB_CODE_SUCCESS; + if (pLeft->numOfRows > pRight->numOfRows) { + type = leftType; + } else { + type = rightType; + } + } else { + // we only define half value in the convert-matrix, so make sure param1 always less equal than param2 + if (leftType < rightType) { + param1 = pLeft; + param2 = pRight; + paramOut1 = pLeftOut; + paramOut2 = pRightOut; + } else { + param1 = pRight; + param2 = pLeft; + paramOut1 = pRightOut; + paramOut2 = pLeftOut; + } + + type = vectorGetConvertType(GET_PARAM_TYPE(param1), GET_PARAM_TYPE(param2)); + if (0 == type) { + return TSDB_CODE_SUCCESS; + } } if (type != GET_PARAM_TYPE(param1)) { @@ -1683,7 +1716,7 @@ void vectorCompareImpl(SScalarParam *pLeft, SScalarParam *pRight, SScalarParam * SScalarParam *param1 = NULL; SScalarParam *param2 = NULL; - if (SCL_NO_NEED_CONVERT_COMPARISION(GET_PARAM_TYPE(pLeft), GET_PARAM_TYPE(pRight), optr)) { + if (noConvertBeforeCompare(GET_PARAM_TYPE(pLeft), GET_PARAM_TYPE(pRight), optr)) { param1 = pLeft; param2 = pRight; } else { diff --git a/source/util/src/tcompare.c b/source/util/src/tcompare.c index d84a3d25c6..32e27f886b 100644 --- a/source/util/src/tcompare.c +++ b/source/util/src/tcompare.c @@ -1071,7 +1071,7 @@ int32_t patternMatch(const char *patterStr, const char *str, size_t size, const return (str[j] == 0 || j >= size) ? TSDB_PATTERN_MATCH : TSDB_PATTERN_NOMATCH; } -int32_t WCSPatternMatch(const TdUcs4 *patterStr, const TdUcs4 *str, size_t size, const SPatternCompareInfo *pInfo) { +int32_t WCSPatternMatch(const TdUcs4 *pattern, const TdUcs4 *str, size_t size, const SPatternCompareInfo *pInfo) { TdUcs4 c, c1; TdUcs4 matchOne = L'_'; // "_" TdUcs4 matchAll = L'%'; // "%" @@ -1079,10 +1079,10 @@ int32_t WCSPatternMatch(const TdUcs4 *patterStr, const TdUcs4 *str, size_t size, int32_t i = 0; int32_t j = 0; - while ((c = patterStr[i++]) != 0) { + while ((c = pattern[i++]) != 0) { if (c == matchAll) { /* Match "%" */ - while ((c = patterStr[i++]) == matchAll || c == matchOne) { + while ((c = pattern[i++]) == matchAll || c == matchOne) { if (c == matchOne && (j >= size || str[j++] == 0)) { return TSDB_PATTERN_NOWILDCARDMATCH; } @@ -1100,7 +1100,7 @@ int32_t WCSPatternMatch(const TdUcs4 *patterStr, const TdUcs4 *str, size_t size, break; } - int32_t ret = WCSPatternMatch(&patterStr[i], ++str, size - n - 1, pInfo); + int32_t ret = WCSPatternMatch(&pattern[i], ++str, size - n - 1, pInfo); if (ret != TSDB_PATTERN_NOMATCH) { return ret; } @@ -1198,7 +1198,7 @@ int32_t compareStrPatternNotMatch(const void *pLeft, const void *pRight) { int32_t compareWStrPatternMatch(const void *pLeft, const void *pRight) { SPatternCompareInfo pInfo = {'%', '_'}; - assert(varDataLen(pRight) <= TSDB_MAX_FIELD_LEN * TSDB_NCHAR_SIZE); + ASSERT(varDataLen(pRight) <= TSDB_MAX_FIELD_LEN * TSDB_NCHAR_SIZE); char *pattern = taosMemoryCalloc(varDataLen(pRight) + TSDB_NCHAR_SIZE, 1); memcpy(pattern, varDataVal(pRight), varDataLen(pRight)); From 024a7a58a1ce5c099f23c83f554df8924ecdd9e2 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 27 Dec 2022 18:46:32 +0800 Subject: [PATCH 2/8] fix: add some logs to detect vnodes.json emptyed --- source/dnode/mgmt/mgmt_vnode/src/vmFile.c | 18 +++++++++++++----- source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 7 ++++--- source/dnode/mgmt/mgmt_vnode/src/vmInt.c | 10 +++++----- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmFile.c b/source/dnode/mgmt/mgmt_vnode/src/vmFile.c index a49e855e39..215185c555 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmFile.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmFile.c @@ -16,7 +16,7 @@ #define _DEFAULT_SOURCE #include "vmInt.h" -#define MAX_CONTENT_LEN 1024 * 1024 +#define MAX_CONTENT_LEN 2 * 1024 * 1024 SVnodeObj **vmGetVnodeListFromHash(SVnodeMgmt *pMgmt, int32_t *numOfVnodes) { taosThreadRwlockRdlock(&pMgmt->lock); @@ -60,7 +60,7 @@ int32_t vmGetVnodeListFromFile(SVnodeMgmt *pMgmt, SWrapperCfg **ppCfgs, int32_t pFile = taosOpenFile(file, TD_FILE_READ); if (pFile == NULL) { - dDebug("file %s not exist", file); + dInfo("file %s not exist", file); code = 0; goto _OVER; } @@ -133,7 +133,7 @@ int32_t vmGetVnodeListFromFile(SVnodeMgmt *pMgmt, SWrapperCfg **ppCfgs, int32_t *numOfVnodes = vnodesNum; code = 0; - dDebug("succcessed to read file %s, numOfVnodes:%d", file, vnodesNum); + dInfo("succcessed to read file %s, numOfVnodes:%d", file, vnodesNum); _OVER: if (content != NULL) taosMemoryFree(content); @@ -163,6 +163,7 @@ int32_t vmWriteVnodeListToFile(SVnodeMgmt *pMgmt) { if (ppVnodes == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; code = -1; + dError("failed to write %s while get vnodelist", file); goto _OVER; } @@ -172,6 +173,7 @@ int32_t vmWriteVnodeListToFile(SVnodeMgmt *pMgmt) { if (content == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; code = -1; + dError("failed to write %s while malloc content", file); goto _OVER; } @@ -213,6 +215,12 @@ _OVER: if (code != 0) return -1; - dDebug("successed to write %s, numOfVnodes:%d", realfile, numOfVnodes); - return taosRenameFile(file, realfile); + dInfo("successed to write %s, numOfVnodes:%d", realfile, numOfVnodes); + code = taosRenameFile(file, realfile); + + if (code != 0) { + dError("failed to rename %s to %s", file, realfile); + } + + return code; } \ No newline at end of file diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index bc46772858..082fe7341b 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -213,7 +213,7 @@ int32_t vmProcessCreateVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { SVnodeObj *pVnode = vmAcquireVnode(pMgmt, req.vgId); if (pVnode != NULL) { - dDebug("vgId:%d, already exist", req.vgId); + dInfo("vgId:%d, already exist", req.vgId); tFreeSCreateVnodeReq(&req); vmReleaseVnode(pMgmt, pVnode); terrno = TSDB_CODE_VND_ALREADY_EXIST; @@ -358,7 +358,7 @@ int32_t vmProcessDropVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { } int32_t vgId = dropReq.vgId; - dDebug("vgId:%d, start to drop vnode", vgId); + dInfo("vgId:%d, start to drop vnode", vgId); if (dropReq.dnodeId != pMgmt->pData->dnodeId) { terrno = TSDB_CODE_INVALID_MSG; @@ -368,7 +368,7 @@ int32_t vmProcessDropVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { SVnodeObj *pVnode = vmAcquireVnode(pMgmt, vgId); if (pVnode == NULL) { - dDebug("vgId:%d, failed to drop since %s", vgId, terrstr()); + dInfo("vgId:%d, failed to drop since %s", vgId, terrstr()); terrno = TSDB_CODE_VND_NOT_EXIST; return -1; } @@ -383,6 +383,7 @@ int32_t vmProcessDropVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { vmCloseVnode(pMgmt, pVnode); vmWriteVnodeListToFile(pMgmt); + dInfo("vgId:%d, is dropped", vgId); return 0; } diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c index 313a88fc5c..f808c67ef6 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c @@ -138,7 +138,7 @@ static void *vmOpenVnodeInThread(void *param) { SVnodeMgmt *pMgmt = pThread->pMgmt; char path[TSDB_FILENAME_LEN]; - dDebug("thread:%d, start to open %d vnodes", pThread->threadIndex, pThread->vnodeNum); + dInfo("thread:%d, start to open %d vnodes", pThread->threadIndex, pThread->vnodeNum); setThreadName("open-vnodes"); for (int32_t v = 0; v < pThread->vnodeNum; ++v) { @@ -156,14 +156,14 @@ static void *vmOpenVnodeInThread(void *param) { pThread->failed++; } else { vmOpenVnode(pMgmt, pCfg, pImpl); - dDebug("vgId:%d, is opened by thread:%d", pCfg->vgId, pThread->threadIndex); + dInfo("vgId:%d, is opened by thread:%d", pCfg->vgId, pThread->threadIndex); pThread->opened++; atomic_add_fetch_32(&pMgmt->state.openVnodes, 1); } } - dDebug("thread:%d, numOfVnodes:%d, opened:%d failed:%d", pThread->threadIndex, pThread->vnodeNum, pThread->opened, - pThread->failed); + dInfo("thread:%d, numOfVnodes:%d, opened:%d failed:%d", pThread->threadIndex, pThread->vnodeNum, pThread->opened, + pThread->failed); return NULL; } @@ -496,7 +496,7 @@ static void *vmRestoreVnodeInThread(void *param) { dError("vgId:%d, failed to restore vnode by thread:%d", pVnode->vgId, pThread->threadIndex); pThread->failed++; } else { - dDebug("vgId:%d, is restored by thread:%d", pVnode->vgId, pThread->threadIndex); + dInfo("vgId:%d, is restored by thread:%d", pVnode->vgId, pThread->threadIndex); pThread->opened++; atomic_add_fetch_32(&pMgmt->state.openVnodes, 1); } From 011c83956b5ffb6f0f1fd29d81ebb56cfdee6a55 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 28 Dec 2022 19:12:36 +0800 Subject: [PATCH 3/8] refactor: do some internal refactor. --- include/util/tcompare.h | 29 ++-- source/libs/scalar/src/filter.c | 18 +- source/libs/scalar/src/sclvector.c | 4 +- source/util/src/tcompare.c | 267 +++++++++++++++++------------ source/util/test/utilTests.cpp | 235 +++++++++++++++++++++++++ 5 files changed, 425 insertions(+), 128 deletions(-) diff --git a/include/util/tcompare.h b/include/util/tcompare.h index c7a3ca20f2..f92e1c3970 100644 --- a/include/util/tcompare.h +++ b/include/util/tcompare.h @@ -36,17 +36,18 @@ extern "C" { #define FLT_GREATEREQUAL(_x, _y) (FLT_EQUAL((_x), (_y)) || ((_x) > (_y))) #define FLT_LESSEQUAL(_x, _y) (FLT_EQUAL((_x), (_y)) || ((_x) < (_y))) -#define PATTERN_COMPARE_INFO_INITIALIZER \ - { '%', '_' } +#define PATTERN_COMPARE_INFO_INITIALIZER { '%', '_', L'%', L'_' } typedef struct SPatternCompareInfo { - char matchAll; // symbol for match all wildcard, default: '%' - char matchOne; // symbol for match one wildcard, default: '_' + char matchAll; // symbol for match all wildcard, default: '%' + char matchOne; // symbol for match one wildcard, default: '_' + TdUcs4 umatchAll; // unicode version matchAll + TdUcs4 umatchOne; // unicode version matchOne } SPatternCompareInfo; -int32_t patternMatch(const char *pattern, const char *str, size_t size, const SPatternCompareInfo *pInfo); +int32_t patternMatch(const char *pattern, size_t psize, const char *str, size_t ssize, const SPatternCompareInfo *pInfo); -int32_t WCSPatternMatch(const TdUcs4 *pattern, const TdUcs4 *str, size_t size, const SPatternCompareInfo *pInfo); +int32_t wcsPatternMatch(const TdUcs4 *pattern, size_t psize, const TdUcs4 *str, size_t ssize, const SPatternCompareInfo *pInfo); int32_t taosArrayCompareString(const void *a, const void *b); @@ -79,9 +80,11 @@ int32_t compareDoubleVal(const void *pLeft, const void *pRight); int32_t compareLenPrefixedStr(const void *pLeft, const void *pRight); int32_t compareLenPrefixedWStr(const void *pLeft, const void *pRight); -int32_t compareStrRegexComp(const void *pLeft, const void *pRight); -int32_t compareStrRegexCompMatch(const void *pLeft, const void *pRight); -int32_t compareStrRegexCompNMatch(const void *pLeft, const void *pRight); +int32_t comparestrRegexMatch(const void *pLeft, const void *pRight); +int32_t comparestrRegexNMatch(const void *pLeft, const void *pRight); + +int32_t comparewcsRegexMatch(const void *pLeft, const void *pRight); +int32_t comparewcsRegexNMatch(const void *pLeft, const void *pRight); int32_t compareInt8ValDesc(const void *pLeft, const void *pRight); int32_t compareInt16ValDesc(const void *pLeft, const void *pRight); @@ -99,11 +102,11 @@ int32_t compareUint64ValDesc(const void *pLeft, const void *pRight); int32_t compareLenPrefixedStrDesc(const void *pLeft, const void *pRight); int32_t compareLenPrefixedWStrDesc(const void *pLeft, const void *pRight); -int32_t compareStrPatternMatch(const void *pLeft, const void *pRight); -int32_t compareStrPatternNotMatch(const void *pLeft, const void *pRight); +int32_t comparestrPatternMatch(const void *pLeft, const void *pRight); +int32_t comparestrPatternNMatch(const void *pLeft, const void *pRight); -int32_t compareWStrPatternMatch(const void *pLeft, const void *pRight); -int32_t compareWStrPatternNotMatch(const void *pLeft, const void *pRight); +int32_t comparewcsPatternMatch(const void *pLeft, const void *pRight); +int32_t comparewcsPatternNMatch(const void *pLeft, const void *pRight); int32_t compareInt8Int16(const void *pLeft, const void *pRight); int32_t compareInt8Int32(const void *pLeft, const void *pRight); diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 063699e8e4..8b7651765b 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -130,9 +130,9 @@ __compar_fn_t gDataCompare[] = {compareInt32Val, compareFloatVal, compareDoubleVal, compareLenPrefixedStr, - compareStrPatternMatch, + comparestrPatternMatch, compareChkInString, - compareWStrPatternMatch, + comparewcsPatternMatch, compareLenPrefixedWStr, compareUint8Val, compareUint16Val, @@ -142,15 +142,17 @@ __compar_fn_t gDataCompare[] = {compareInt32Val, setChkInBytes2, setChkInBytes4, setChkInBytes8, - compareStrRegexCompMatch, - compareStrRegexCompNMatch, + comparestrRegexMatch, + comparestrRegexNMatch, setChkNotInBytes1, setChkNotInBytes2, setChkNotInBytes4, setChkNotInBytes8, compareChkNotInString, - compareStrPatternNotMatch, - compareWStrPatternNotMatch}; + comparestrPatternNMatch, + comparewcsPatternNMatch, + comparewcsRegexMatch, + comparewcsRegexNMatch,}; __compar_fn_t gInt8SignCompare[] = {compareInt8Val, compareInt8Int16, compareInt8Int32, compareInt8Int64, compareInt8Float, compareInt8Double}; @@ -295,9 +297,9 @@ int8_t filterGetCompFuncIdx(int32_t type, int32_t optr) { case TSDB_DATA_TYPE_NCHAR: { if (optr == OP_TYPE_MATCH) { - comparFn = 19; + comparFn = 28; } else if (optr == OP_TYPE_NMATCH) { - comparFn = 20; + comparFn = 29; } else if (optr == OP_TYPE_LIKE) { comparFn = 9; } else if (optr == OP_TYPE_NOT_LIKE) { diff --git a/source/libs/scalar/src/sclvector.c b/source/libs/scalar/src/sclvector.c index 2eb12c8269..24ac5be845 100644 --- a/source/libs/scalar/src/sclvector.c +++ b/source/libs/scalar/src/sclvector.c @@ -916,9 +916,11 @@ int32_t vectorGetConvertType(int32_t type1, int32_t type2) { int32_t vectorConvertSingleCol(SScalarParam *input, SScalarParam *output, int32_t type, int32_t startIndex, int32_t numOfRows) { - SDataType t = {.type = type, .bytes = tDataTypes[type].bytes}; output->numOfRows = input->numOfRows; + SDataType t = {.type = type}; + t.bytes = IS_VAR_DATA_TYPE(t.type)? input->columnData->info.bytes:tDataTypes[type].bytes; + int32_t code = sclCreateColumnInfoData(&t, input->numOfRows, output); if (code != TSDB_CODE_SUCCESS) { return TSDB_CODE_OUT_OF_MEMORY; diff --git a/source/util/src/tcompare.c b/source/util/src/tcompare.c index 32e27f886b..21e7d9e0cd 100644 --- a/source/util/src/tcompare.c +++ b/source/util/src/tcompare.c @@ -1007,59 +1007,66 @@ int32_t compareJsonValDesc(const void *pLeft, const void *pRight) { return compa * '_': Matches one character * */ -int32_t patternMatch(const char *patterStr, const char *str, size_t size, const SPatternCompareInfo *pInfo) { +int32_t patternMatch(const char *pattern, size_t psize, const char *str, size_t ssize, const SPatternCompareInfo *pInfo) { char c, c1; int32_t i = 0; int32_t j = 0; - int32_t o = 0; - int32_t m = 0; + int32_t nMatchChar = 0; - while ((c = patterStr[i++]) != 0) { + while ((c = pattern[i++]) != 0 && (i <= psize)) { if (c == pInfo->matchAll) { /* Match "*" */ - while ((c = patterStr[i++]) == pInfo->matchAll || c == pInfo->matchOne) { + while ((c = pattern[i++]) == pInfo->matchAll || c == pInfo->matchOne) { + if (i > psize) { // overflow check + break; + } + if (c == pInfo->matchOne) { - if (j > size || str[j++] == 0) { - // empty string, return not match + if (j > ssize || str[j++] == 0) { // empty string, return not match return TSDB_PATTERN_NOWILDCARDMATCH; } else { - ++o; + ++nMatchChar; } } } - if (c == 0) { + if (c == 0 || i > psize) { return TSDB_PATTERN_MATCH; /* "*" at the end of the pattern matches */ } - char next[3] = {toupper(c), tolower(c), 0}; - m = o; - while (1) { - size_t n = strcspn(str + m, next); - str += m + n; + char acceptArray[3] = {toupper(c), tolower(c), 0}; - if (str[0] == 0 || (n >= size)) { + str += nMatchChar; + int32_t remain = ssize - nMatchChar; + while (1) { + size_t n = strcspn(str, acceptArray); + + str += n; + remain -= n; + + if (str[0] == 0 || (remain <= 0)) { break; } - int32_t ret = patternMatch(&patterStr[i], ++str, size - n - 1, pInfo); + int32_t ret = patternMatch(&pattern[i], psize - i, ++str, --remain, pInfo); if (ret != TSDB_PATTERN_NOMATCH) { return ret; } - m = 0; } + return TSDB_PATTERN_NOWILDCARDMATCH; } c1 = str[j++]; - ++o; + ++nMatchChar; - if (j <= size) { - if (c == '\\' && patterStr[i] == '_' && c1 == '_') { + if (j <= ssize) { + if (c == '\\' && pattern[i] == '_' && c1 == '_') { i++; continue; } + if (c == c1 || tolower(c) == tolower(c1) || (c == pInfo->matchOne && c1 != 0)) { continue; } @@ -1068,39 +1075,52 @@ int32_t patternMatch(const char *patterStr, const char *str, size_t size, const return TSDB_PATTERN_NOMATCH; } - return (str[j] == 0 || j >= size) ? TSDB_PATTERN_MATCH : TSDB_PATTERN_NOMATCH; + return (str[j] == 0 || j >= ssize) ? TSDB_PATTERN_MATCH : TSDB_PATTERN_NOMATCH; } -int32_t WCSPatternMatch(const TdUcs4 *pattern, const TdUcs4 *str, size_t size, const SPatternCompareInfo *pInfo) { +int32_t wcsPatternMatch(const TdUcs4 *pattern, size_t psize, const TdUcs4 *str, size_t ssize, const SPatternCompareInfo *pInfo) { TdUcs4 c, c1; - TdUcs4 matchOne = L'_'; // "_" - TdUcs4 matchAll = L'%'; // "%" int32_t i = 0; int32_t j = 0; + int32_t nMatchChar = 0; - while ((c = pattern[i++]) != 0) { - if (c == matchAll) { /* Match "%" */ - - while ((c = pattern[i++]) == matchAll || c == matchOne) { - if (c == matchOne && (j >= size || str[j++] == 0)) { - return TSDB_PATTERN_NOWILDCARDMATCH; - } - } - if (c == 0) { - return TSDB_PATTERN_MATCH; - } - - TdUcs4 accept[3] = {towupper(c), towlower(c), 0}; - while (1) { - size_t n = wcscspn(str, accept); - - str += n; - if (str[0] == 0 || (n >= size)) { + while ((c = pattern[i++]) != 0 && (i <= psize)) { + /* Match "%" */ + if (c == pInfo->umatchAll) { + while ((c = pattern[i++]) == pInfo->umatchAll || c == pInfo->umatchOne) { + if (i > psize) { break; } - int32_t ret = WCSPatternMatch(&pattern[i], ++str, size - n - 1, pInfo); + if (c == pInfo->umatchOne) { + if (j >= ssize || str[j++] == 0) { + return TSDB_PATTERN_NOWILDCARDMATCH; + } else { + ++nMatchChar; + } + } + } + + if (c == 0 || i > psize) { + return TSDB_PATTERN_MATCH; + } + + TdUcs4 acceptArray[3] = {towupper(c), towlower(c), 0}; + + str += nMatchChar; + int32_t remain = ssize - nMatchChar; + while (1) { + size_t n = wcscspn(str, acceptArray); + + str += n; + remain -= n; + + if (str[0] == 0 || (remain <= 0)) { + break; + } + + int32_t ret = wcsPatternMatch(&pattern[i], psize-i, ++str, --remain, pInfo); if (ret != TSDB_PATTERN_NOMATCH) { return ret; } @@ -1110,9 +1130,15 @@ int32_t WCSPatternMatch(const TdUcs4 *pattern, const TdUcs4 *str, size_t size, c } c1 = str[j++]; + nMatchChar++; - if (j <= size) { - if (c == c1 || towlower(c) == towlower(c1) || (c == matchOne && c1 != 0)) { + if (j <= ssize) { + if (c == L'\\' && pattern[i] == L'_' && c1 == L'_') { + i++; + continue; + } + + if (c == c1 || towlower(c) == towlower(c1) || (c == pInfo->umatchOne && c1 != 0)) { continue; } } @@ -1120,16 +1146,38 @@ int32_t WCSPatternMatch(const TdUcs4 *pattern, const TdUcs4 *str, size_t size, c return TSDB_PATTERN_NOMATCH; } - return (str[j] == 0 || j >= size) ? TSDB_PATTERN_MATCH : TSDB_PATTERN_NOMATCH; + return (str[j] == 0 || j >= ssize) ? TSDB_PATTERN_MATCH : TSDB_PATTERN_NOMATCH; } -int32_t compareStrRegexCompMatch(const void *pLeft, const void *pRight) { return compareStrRegexComp(pLeft, pRight); } - -int32_t compareStrRegexCompNMatch(const void *pLeft, const void *pRight) { - return compareStrRegexComp(pLeft, pRight) ? 0 : 1; +int32_t comparestrRegexNMatch(const void *pLeft, const void *pRight) { + return comparestrRegexMatch(pLeft, pRight) ? 0 : 1; } -int32_t compareStrRegexComp(const void *pLeft, const void *pRight) { +static int32_t doExecRegexMatch(const char *pString, const char *pPattern) { + int32_t ret = 0; + regex_t regex; + char msgbuf[256] = {0}; + + int32_t cflags = REG_EXTENDED; + if ((ret = regcomp(®ex, pPattern, cflags)) != 0) { + regerror(ret, ®ex, msgbuf, tListLen(msgbuf)); + + uError("Failed to compile regex pattern %s. reason %s", pPattern, msgbuf); + regfree(®ex); + return 1; + } + + ret = regexec(®ex, pString, 0, NULL, 0); + if (ret != 0 && ret != REG_NOMATCH) { + regerror(ret, ®ex, msgbuf, sizeof(msgbuf)); + uDebug("Failed to match %s with pattern %s, reason %s", pString, pPattern, msgbuf) + } + + regfree(®ex); + return (ret == 0) ? 0 : 1; +} + +int32_t comparestrRegexMatch(const void *pLeft, const void *pRight) { size_t sz = varDataLen(pRight); char *pattern = taosMemoryMalloc(sz + 1); memcpy(pattern, varDataVal(pRight), varDataLen(pRight)); @@ -1140,30 +1188,48 @@ int32_t compareStrRegexComp(const void *pLeft, const void *pRight) { memcpy(str, varDataVal(pLeft), sz); str[sz] = 0; - int32_t errCode = 0; - regex_t regex; - char msgbuf[256] = {0}; + int32_t ret = doExecRegexMatch(str, pattern); - int32_t cflags = REG_EXTENDED; - if ((errCode = regcomp(®ex, pattern, cflags)) != 0) { - regerror(errCode, ®ex, msgbuf, sizeof(msgbuf)); - uError("Failed to compile regex pattern %s. reason %s", pattern, msgbuf); - regfree(®ex); - taosMemoryFree(str); - taosMemoryFree(pattern); - return 1; - } - - errCode = regexec(®ex, str, 0, NULL, 0); - if (errCode != 0 && errCode != REG_NOMATCH) { - regerror(errCode, ®ex, msgbuf, sizeof(msgbuf)); - uDebug("Failed to match %s with pattern %s, reason %s", str, pattern, msgbuf) - } - int32_t result = (errCode == 0) ? 0 : 1; - regfree(®ex); taosMemoryFree(str); taosMemoryFree(pattern); - return result; + + return (ret == 0) ? 0 : 1;; +} + +int32_t comparewcsRegexMatch(const void* pString, const void* pPattern) { + size_t len = varDataLen(pPattern); + char *pattern = taosMemoryMalloc(len + 1); + + int convertLen = taosUcs4ToMbs((TdUcs4 *)varDataVal(pPattern), len, pattern); + if (convertLen < 0) { + taosMemoryFree(pattern); + return TSDB_CODE_APP_ERROR; + } + + pattern[len] = 0; + + len = varDataLen(pString); + char *str = taosMemoryMalloc(len + 1); + convertLen = taosUcs4ToMbs((TdUcs4 *)varDataVal(pString), len, str); + if (convertLen < 0) { + taosMemoryFree(str); + taosMemoryFree(pattern); + + return TSDB_CODE_APP_ERROR; + } + + str[len] = 0; + + int32_t ret = doExecRegexMatch(str, pattern); + + taosMemoryFree(str); + taosMemoryFree(pattern); + + return (ret == 0) ? 0 : 1; +} + +int32_t comparewcsRegexNMatch(const void *pLeft, const void *pRight) { + return comparewcsRegexMatch(pLeft, pRight) ? 0 : 1; } int32_t taosArrayCompareString(const void *a, const void *b) { @@ -1173,46 +1239,35 @@ int32_t taosArrayCompareString(const void *a, const void *b) { return compareLenPrefixedStr(x, y); } -int32_t compareStrPatternMatch(const void *pLeft, const void *pRight) { - SPatternCompareInfo pInfo = {'%', '_'}; - - assert(varDataLen(pRight) <= TSDB_MAX_FIELD_LEN); - char *pattern = taosMemoryCalloc(varDataLen(pRight) + 1, sizeof(char)); - memcpy(pattern, varDataVal(pRight), varDataLen(pRight)); +int32_t comparestrPatternMatch(const void *pLeft, const void *pRight) { + SPatternCompareInfo pInfo = PATTERN_COMPARE_INFO_INITIALIZER; + ASSERT(varDataLen(pRight) <= TSDB_MAX_FIELD_LEN); + size_t pLen = varDataLen(pRight); size_t sz = varDataLen(pLeft); - char *buf = taosMemoryMalloc(sz + 1); - memcpy(buf, varDataVal(pLeft), sz); - buf[sz] = 0; - int32_t ret = patternMatch(pattern, buf, sz, &pInfo); - taosMemoryFree(buf); - taosMemoryFree(pattern); + int32_t ret = patternMatch(varDataVal(pRight), pLen, varDataVal(pLeft), sz, &pInfo); return (ret == TSDB_PATTERN_MATCH) ? 0 : 1; } -int32_t compareStrPatternNotMatch(const void *pLeft, const void *pRight) { - return compareStrPatternMatch(pLeft, pRight) ? 0 : 1; +int32_t comparestrPatternNMatch(const void *pLeft, const void *pRight) { + return comparestrPatternMatch(pLeft, pRight) ? 0 : 1; } -int32_t compareWStrPatternMatch(const void *pLeft, const void *pRight) { - SPatternCompareInfo pInfo = {'%', '_'}; +int32_t comparewcsPatternMatch(const void *pLeft, const void *pRight) { + SPatternCompareInfo pInfo = PATTERN_COMPARE_INFO_INITIALIZER; - ASSERT(varDataLen(pRight) <= TSDB_MAX_FIELD_LEN * TSDB_NCHAR_SIZE); - - char *pattern = taosMemoryCalloc(varDataLen(pRight) + TSDB_NCHAR_SIZE, 1); - memcpy(pattern, varDataVal(pRight), varDataLen(pRight)); - - int32_t ret = - WCSPatternMatch((TdUcs4 *)pattern, (TdUcs4 *)varDataVal(pLeft), varDataLen(pLeft) / TSDB_NCHAR_SIZE, &pInfo); - taosMemoryFree(pattern); + size_t psize = varDataLen(pRight); + int32_t ret = wcsPatternMatch((TdUcs4 *)varDataVal(pRight), psize / TSDB_NCHAR_SIZE, (TdUcs4 *)varDataVal(pLeft), + varDataLen(pLeft) / TSDB_NCHAR_SIZE, &pInfo); return (ret == TSDB_PATTERN_MATCH) ? 0 : 1; } -int32_t compareWStrPatternNotMatch(const void *pLeft, const void *pRight) { - return compareWStrPatternMatch(pLeft, pRight) ? 0 : 1; +int32_t comparewcsPatternNMatch(const void *pLeft, const void *pRight) { + return comparewcsPatternMatch(pLeft, pRight) ? 0 : 1; } + __compar_fn_t getComparFunc(int32_t type, int32_t optr) { __compar_fn_t comparFn = NULL; @@ -1285,13 +1340,13 @@ __compar_fn_t getComparFunc(int32_t type, int32_t optr) { break; case TSDB_DATA_TYPE_BINARY: { if (optr == OP_TYPE_MATCH) { - comparFn = compareStrRegexCompMatch; + comparFn = comparestrRegexMatch; } else if (optr == OP_TYPE_NMATCH) { - comparFn = compareStrRegexCompNMatch; + comparFn = comparestrRegexNMatch; } else if (optr == OP_TYPE_LIKE) { /* wildcard query using like operator */ - comparFn = compareStrPatternMatch; + comparFn = comparestrPatternMatch; } else if (optr == OP_TYPE_NOT_LIKE) { /* wildcard query using like operator */ - comparFn = compareStrPatternNotMatch; + comparFn = comparestrPatternNMatch; } else if (optr == OP_TYPE_IN) { comparFn = compareChkInString; } else if (optr == OP_TYPE_NOT_IN) { @@ -1305,13 +1360,13 @@ __compar_fn_t getComparFunc(int32_t type, int32_t optr) { case TSDB_DATA_TYPE_NCHAR: { if (optr == OP_TYPE_MATCH) { - comparFn = compareStrRegexCompMatch; + comparFn = comparewcsRegexMatch; } else if (optr == OP_TYPE_NMATCH) { - comparFn = compareStrRegexCompNMatch; + comparFn = comparewcsRegexNMatch; } else if (optr == OP_TYPE_LIKE) { - comparFn = compareWStrPatternMatch; + comparFn = comparewcsPatternMatch; } else if (optr == OP_TYPE_NOT_LIKE) { - comparFn = compareWStrPatternNotMatch; + comparFn = comparewcsPatternNMatch; } else if (optr == OP_TYPE_IN) { comparFn = compareChkInString; } else if (optr == OP_TYPE_NOT_IN) { diff --git a/source/util/test/utilTests.cpp b/source/util/test/utilTests.cpp index e69de29bb2..27496ff9b9 100644 --- a/source/util/test/utilTests.cpp +++ b/source/util/test/utilTests.cpp @@ -0,0 +1,235 @@ +#include +#include +#include + +#include "tarray.h" +#include "tcompare.h" + +namespace { +} // namespace + +TEST(utilTest, wchar_pattern_match_test) { + const TdWchar* pattern = L"%1"; + + int32_t ret = 0; + SPatternCompareInfo pInfo = PATTERN_COMPARE_INFO_INITIALIZER; + + const TdWchar* str0 = L"14"; + ret = wcsPatternMatch(reinterpret_cast(pattern), 2, reinterpret_cast(str0), wcslen(str0), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOWILDCARDMATCH); + + const TdWchar* str1 = L"11"; + ret = wcsPatternMatch(reinterpret_cast(pattern), 2, reinterpret_cast(str1), wcslen(str1), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const TdWchar* str2 = L"41"; + ret = wcsPatternMatch(reinterpret_cast(pattern), 2, reinterpret_cast(str2), wcslen(str2), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const TdWchar* pattern3 = L"%_"; + const TdWchar* str3 = L"88"; + ret = wcsPatternMatch(reinterpret_cast(pattern3), 2, reinterpret_cast(str3), wcslen(str3), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const TdWchar* pattern4 = L"%___"; + const TdWchar* str4 = L"88"; + ret = wcsPatternMatch(reinterpret_cast(pattern4), 4, reinterpret_cast(str4), wcslen(str4), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOWILDCARDMATCH); + + const TdWchar* pattern5 = L"%___"; + const TdWchar* str5 = L"883391"; + ret = wcsPatternMatch(reinterpret_cast(pattern5), 4, reinterpret_cast(str5), wcslen(str5), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const TdWchar* pattern6 = L"%___66"; + const TdWchar* str6 = L"88339166"; + ret = wcsPatternMatch(reinterpret_cast(pattern6), 6, reinterpret_cast(str6), wcslen(str6), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const TdWchar* pattern7 = L"%____66"; + const TdWchar* str7 = L"66166"; + ret = wcsPatternMatch(reinterpret_cast(pattern7), 7, reinterpret_cast(str7), wcslen(str7), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOWILDCARDMATCH); + + const TdWchar* pattern8 = L"6%____66"; + const TdWchar* str8 = L"666166"; + ret = wcsPatternMatch(reinterpret_cast(pattern8), 8, reinterpret_cast(str8), wcslen(str8), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOWILDCARDMATCH); + + const TdWchar* pattern9 = L"6\\__6"; + const TdWchar* str9 = L"6_66"; + ret = wcsPatternMatch(reinterpret_cast(pattern9), 6, reinterpret_cast(str9), wcslen(str9), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); +} + +TEST(utilTest, wchar_pattern_match_no_terminated) { + const TdWchar* pattern = L"%1 "; + + int32_t ret = 0; + SPatternCompareInfo pInfo = PATTERN_COMPARE_INFO_INITIALIZER; + + const TdWchar* str0 = L"14 "; + ret = wcsPatternMatch(reinterpret_cast(pattern), 2, reinterpret_cast(str0), 2, &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOWILDCARDMATCH); + + const TdWchar* str1 = L"11 "; + ret = wcsPatternMatch(reinterpret_cast(pattern), 2, reinterpret_cast(str1), 2, &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const TdWchar* str2 = L"41 "; + ret = wcsPatternMatch(reinterpret_cast(pattern), 2, reinterpret_cast(str2), 2, &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const TdWchar* pattern3 = L"%_ "; + const TdWchar* str3 = L"88 "; + ret = wcsPatternMatch(reinterpret_cast(pattern3), 2, reinterpret_cast(str3), 2, &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const TdWchar* pattern4 = L"%___ "; + const TdWchar* str4 = L"88 "; + ret = wcsPatternMatch(reinterpret_cast(pattern4), 4, reinterpret_cast(str4), 2, &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOWILDCARDMATCH); + + const TdWchar* pattern5 = L"%___ "; + const TdWchar* str5 = L"883391 "; + ret = wcsPatternMatch(reinterpret_cast(pattern5), 4, reinterpret_cast(str5), 6, &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const TdWchar* pattern6 = L"%___66 "; + const TdWchar* str6 = L"88339166 "; + ret = wcsPatternMatch(reinterpret_cast(pattern6), 6, reinterpret_cast(str6), 8, &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const TdWchar* pattern7 = L"%____66 "; + const TdWchar* str7 = L"66166 "; + ret = wcsPatternMatch(reinterpret_cast(pattern7), 7, reinterpret_cast(str7), 5, &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOWILDCARDMATCH); + + const TdWchar* pattern8 = L"6%____66 "; + const TdWchar* str8 = L"666166 "; + ret = wcsPatternMatch(reinterpret_cast(pattern8), 8, reinterpret_cast(str8), 6, &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOWILDCARDMATCH); + + const TdWchar* pattern9 = L"6\\_6 "; + const TdWchar* str9 = L"6_6 "; + ret = wcsPatternMatch(reinterpret_cast(pattern9), 4, reinterpret_cast(str9), 3, &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const TdWchar* pattern10 = L"% "; + const TdWchar* str10 = L"6_6 "; + ret = wcsPatternMatch(reinterpret_cast(pattern10), 1, reinterpret_cast(str10), 3, &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); +} + +TEST(utilTest, char_pattern_match_test) { + const char* pattern = "%1"; + + int32_t ret = 0; + SPatternCompareInfo pInfo = PATTERN_COMPARE_INFO_INITIALIZER; + + const char* str0 = "14"; + ret = patternMatch(pattern, 2, str0, strlen(str0), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOWILDCARDMATCH); + + const char* str1 = "11"; + ret = patternMatch(pattern, 2, str1, strlen(str1), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const char* str2 = "41"; + ret = patternMatch(pattern, 2, str2, strlen(str2), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const char* pattern3 = "%_"; + const char* str3 = "88"; + ret = patternMatch(pattern3, 2, str3, strlen(str3), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const char* pattern4 = "%___"; + const char* str4 = "88"; + ret = patternMatch(pattern4, 4, str4, strlen(str4), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOWILDCARDMATCH); + + const char* pattern5 = "%___"; + const char* str5 = "883391"; + ret = patternMatch(pattern5, 4, str5, strlen(str5), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const char* pattern6 = "%___66"; + const char* str6 = "88339166"; + ret = patternMatch(pattern6, 6, str6, strlen(str6), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const char* pattern7 = "%____66"; + const char* str7 = "66166"; + ret = patternMatch(pattern7, 7, str7, strlen(str7), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOWILDCARDMATCH); + + const char* pattern8 = "6%____66"; + const char* str8 = "666166"; + ret = patternMatch(pattern8, 8, str8, strlen(str8), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOWILDCARDMATCH); + + const char* pattern9 = "6\\_6"; + const char* str9 = "6_6"; + ret = patternMatch(pattern9, 5, str9, strlen(str9), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); +} + +TEST(utilTest, char_pattern_match_no_terminated) { + const char* pattern = "%1 "; + + int32_t ret = 0; + SPatternCompareInfo pInfo = PATTERN_COMPARE_INFO_INITIALIZER; + + const char* str0 = "14"; + ret = patternMatch(pattern, 2, str0, strlen(str0), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOWILDCARDMATCH); + + const char* str1 = "11"; + ret = patternMatch(pattern, 2, str1, strlen(str1), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const char* str2 = "41"; + ret = patternMatch(pattern, 2, str2, strlen(str2), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const char* pattern3 = "%_ "; + const char* str3 = "88"; + ret = patternMatch(pattern3, 2, str3, strlen(str3), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const char* pattern4 = "%___ "; + const char* str4 = "88"; + ret = patternMatch(pattern4, 4, str4, strlen(str4), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOWILDCARDMATCH); + + const char* pattern5 = "%___ "; + const char* str5 = "883391"; + ret = patternMatch(pattern5, 4, str5, strlen(str5), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const char* pattern6 = "%___66 "; + const char* str6 = "88339166"; + ret = patternMatch(pattern6, 6, str6, strlen(str6), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const char* pattern7 = "%____66 "; + const char* str7 = "66166"; + ret = patternMatch(pattern7, 7, str7, strlen(str7), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOWILDCARDMATCH); + + const char* pattern8 = "6%____66 "; + const char* str8 = "666166"; + ret = patternMatch(pattern8, 8, str8, strlen(str8), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOWILDCARDMATCH); + + const char* pattern9 = "6\\_6 "; + const char* str9 = "6_6"; + ret = patternMatch(pattern9, 4, str9, strlen(str9), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const char* pattern10 = "% "; + const char* str10 = "6_6"; + ret = patternMatch(pattern10, 1, str10, strlen(str10), &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); +} \ No newline at end of file From 0e471afc03b500f16e367b20f603d78ad39d525b Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 29 Dec 2022 14:48:57 +0800 Subject: [PATCH 4/8] fix(query): fix the invalid access. --- include/util/tutil.h | 2 ++ source/util/src/tcompare.c | 45 ++++++++++++--------------- source/util/src/tutil.c | 56 ++++++++++++++++++++++++++++++++++ source/util/test/utilTests.cpp | 32 +++++++++++++++++++ 4 files changed, 109 insertions(+), 26 deletions(-) diff --git a/include/util/tutil.h b/include/util/tutil.h index de96300155..9f36cdba7c 100644 --- a/include/util/tutil.h +++ b/include/util/tutil.h @@ -34,6 +34,8 @@ char *strtolower(char *dst, const char *src); char *strntolower(char *dst, const char *src, int32_t n); char *strntolower_s(char *dst, const char *src, int32_t n); int64_t strnatoi(char *num, int32_t len); +size_t tstrncspn(const char *str, size_t ssize, const char *reject, size_t rsize); + char *strbetween(char *string, char *begin, char *end); char *paGetToken(char *src, char **token, int32_t *tokenLen); diff --git a/source/util/src/tcompare.c b/source/util/src/tcompare.c index 21e7d9e0cd..54b6d6d265 100644 --- a/source/util/src/tcompare.c +++ b/source/util/src/tcompare.c @@ -17,6 +17,7 @@ #define _XOPEN_SOURCE #define _DEFAULT_SOURCE #include "tcompare.h" +#include "tutil.h" #include "regex.h" #include "tdef.h" #include "thash.h" @@ -1014,16 +1015,12 @@ int32_t patternMatch(const char *pattern, size_t psize, const char *str, size_t int32_t j = 0; int32_t nMatchChar = 0; - while ((c = pattern[i++]) != 0 && (i <= psize)) { + while ((i < psize) && ((c = pattern[i++]) != 0)) { if (c == pInfo->matchAll) { /* Match "*" */ - while ((c = pattern[i++]) == pInfo->matchAll || c == pInfo->matchOne) { - if (i > psize) { // overflow check - break; - } - + while ((i < psize) && ((c = pattern[i++]) == pInfo->matchAll || c == pInfo->matchOne)) { if (c == pInfo->matchOne) { - if (j > ssize || str[j++] == 0) { // empty string, return not match + if (j >= ssize || str[j++] == 0) { // empty string, return not match return TSDB_PATTERN_NOWILDCARDMATCH; } else { ++nMatchChar; @@ -1031,21 +1028,21 @@ int32_t patternMatch(const char *pattern, size_t psize, const char *str, size_t } } - if (c == 0 || i > psize) { + if (i >= psize && (c == pInfo->umatchOne || c == pInfo->umatchAll)) { return TSDB_PATTERN_MATCH; /* "*" at the end of the pattern matches */ } - char acceptArray[3] = {toupper(c), tolower(c), 0}; + char rejectList[2] = {toupper(c), tolower(c)}; str += nMatchChar; int32_t remain = ssize - nMatchChar; while (1) { - size_t n = strcspn(str, acceptArray); + size_t n = tstrncspn(str, remain, rejectList, 2); str += n; remain -= n; - if (str[0] == 0 || (remain <= 0)) { + if ((remain <= 0) || str[0] == 0) { break; } @@ -1075,7 +1072,7 @@ int32_t patternMatch(const char *pattern, size_t psize, const char *str, size_t return TSDB_PATTERN_NOMATCH; } - return (str[j] == 0 || j >= ssize) ? TSDB_PATTERN_MATCH : TSDB_PATTERN_NOMATCH; + return (j >= ssize || str[j] == 0) ? TSDB_PATTERN_MATCH : TSDB_PATTERN_NOMATCH; } int32_t wcsPatternMatch(const TdUcs4 *pattern, size_t psize, const TdUcs4 *str, size_t ssize, const SPatternCompareInfo *pInfo) { @@ -1085,14 +1082,10 @@ int32_t wcsPatternMatch(const TdUcs4 *pattern, size_t psize, const TdUcs4 *str, int32_t j = 0; int32_t nMatchChar = 0; - while ((c = pattern[i++]) != 0 && (i <= psize)) { - /* Match "%" */ - if (c == pInfo->umatchAll) { - while ((c = pattern[i++]) == pInfo->umatchAll || c == pInfo->umatchOne) { - if (i > psize) { - break; - } + while ((i < psize) && ((c = pattern[i++]) != 0)) { + if (c == pInfo->umatchAll) { /* Match "%" */ + while ((i < psize) && ((c = pattern[i++]) == pInfo->umatchAll || c == pInfo->umatchOne)) { if (c == pInfo->umatchOne) { if (j >= ssize || str[j++] == 0) { return TSDB_PATTERN_NOWILDCARDMATCH; @@ -1102,7 +1095,7 @@ int32_t wcsPatternMatch(const TdUcs4 *pattern, size_t psize, const TdUcs4 *str, } } - if (c == 0 || i > psize) { + if (i >= psize && (c == pInfo->umatchOne || c == pInfo->umatchAll)) { return TSDB_PATTERN_MATCH; } @@ -1116,11 +1109,11 @@ int32_t wcsPatternMatch(const TdUcs4 *pattern, size_t psize, const TdUcs4 *str, str += n; remain -= n; - if (str[0] == 0 || (remain <= 0)) { + if ((remain <= 0) || str[0] == 0) { break; } - int32_t ret = wcsPatternMatch(&pattern[i], psize-i, ++str, --remain, pInfo); + int32_t ret = wcsPatternMatch(&pattern[i], psize - i, ++str, --remain, pInfo); if (ret != TSDB_PATTERN_NOMATCH) { return ret; } @@ -1146,7 +1139,7 @@ int32_t wcsPatternMatch(const TdUcs4 *pattern, size_t psize, const TdUcs4 *str, return TSDB_PATTERN_NOMATCH; } - return (str[j] == 0 || j >= ssize) ? TSDB_PATTERN_MATCH : TSDB_PATTERN_NOMATCH; + return (j >= ssize || str[j] == 0) ? TSDB_PATTERN_MATCH : TSDB_PATTERN_NOMATCH; } int32_t comparestrRegexNMatch(const void *pLeft, const void *pRight) { @@ -1198,7 +1191,7 @@ int32_t comparestrRegexMatch(const void *pLeft, const void *pRight) { int32_t comparewcsRegexMatch(const void* pString, const void* pPattern) { size_t len = varDataLen(pPattern); - char *pattern = taosMemoryMalloc(len + 1); + char *pattern = taosMemoryMalloc(len + TSDB_NCHAR_SIZE); int convertLen = taosUcs4ToMbs((TdUcs4 *)varDataVal(pPattern), len, pattern); if (convertLen < 0) { @@ -1206,7 +1199,7 @@ int32_t comparewcsRegexMatch(const void* pString, const void* pPattern) { return TSDB_CODE_APP_ERROR; } - pattern[len] = 0; + pattern[convertLen] = 0; len = varDataLen(pString); char *str = taosMemoryMalloc(len + 1); @@ -1218,7 +1211,7 @@ int32_t comparewcsRegexMatch(const void* pString, const void* pPattern) { return TSDB_CODE_APP_ERROR; } - str[len] = 0; + str[convertLen] = 0; int32_t ret = doExecRegexMatch(str, pattern); diff --git a/source/util/src/tutil.c b/source/util/src/tutil.c index addb9f55ba..7297849870 100644 --- a/source/util/src/tutil.c +++ b/source/util/src/tutil.c @@ -376,3 +376,59 @@ void taosIp2String(uint32_t ip, char *str) { void taosIpPort2String(uint32_t ip, uint16_t port, char *str) { sprintf(str, "%u.%u.%u.%u:%u", ip & 0xFF, (ip >> 8) & 0xFF, (ip >> 16) & 0xFF, (uint8_t)(ip >> 24), port); } + +size_t tstrncspn(const char *str, size_t size, const char *reject, size_t rsize) { + if (rsize == 0 || rsize == 1) { + char* p = strnchr(str, reject[0], size, false); + return (p == NULL)? size:(p-str); + } + + /* Use multiple small memsets to enable inlining on most targets. */ + unsigned char table[256]; + unsigned char *p = memset(table, 0, 64); + memset(p + 64, 0, 64); + memset(p + 128, 0, 64); + memset(p + 192, 0, 64); + + unsigned char *s = (unsigned char *)reject; + int32_t index = 0; + do { + p[s[index++]] = 1; + } while (index < rsize); + + s = (unsigned char*) str; + int32_t times = size >> 2; + if (times == 0) { + for(int32_t i = 0; i < size; ++i) { + if (p[s[i]]) { + return i; + } + } + + return size; + } + + index = 0; + uint32_t c0, c1, c2, c3; + for(int32_t i = 0; i < times; ++i, index += 4) { + int32_t j = index; + c0 = p[s[j]]; + c1 = p[s[j + 1]]; + c2 = p[s[j + 2]]; + c3 = p[s[j + 3]]; + + if ((c0 | c1 | c2 | c3) != 0) { + size_t count = ((i + 1) >> 2); + return (c0 | c1) != 0 ? count - c0 + 1 : count - c2 + 3; + } + } + + int32_t offset = times * 4; + for(int32_t i = offset; i < size; ++i) { + if (p[s[i]]) { + return i; + } + } + + return size; +} diff --git a/source/util/test/utilTests.cpp b/source/util/test/utilTests.cpp index 27496ff9b9..5ab93bedd3 100644 --- a/source/util/test/utilTests.cpp +++ b/source/util/test/utilTests.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include "tarray.h" @@ -232,4 +233,35 @@ TEST(utilTest, char_pattern_match_no_terminated) { const char* str10 = "6_6"; ret = patternMatch(pattern10, 1, str10, strlen(str10), &pInfo); ASSERT_EQ(ret, TSDB_PATTERN_MATCH); +} + +TEST(utilTest, tstrncspn) { + const char* p1 = "abc"; + const char* reject = "d"; + size_t v = tstrncspn(p1, strlen(p1), reject, 1); + ASSERT_EQ(v, 3); + + const char* reject1 = "a"; + v = tstrncspn(p1, strlen(p1), reject1, 1); + ASSERT_EQ(v, 0); + + const char* reject2 = "de"; + v = tstrncspn(p1, strlen(p1), reject2, 2); + ASSERT_EQ(v, 3); + + const char* p2 = "abcdefghijklmn"; + v = tstrncspn(p2, strlen(p2), reject2, 2); + ASSERT_EQ(v, 3); + + const char* reject3 = "12345n"; + v = tstrncspn(p2, strlen(p2), reject3, 6); + ASSERT_EQ(v, 13); + + const char* reject4 = ""; + v = tstrncspn(p2, strlen(p2), reject4, 0); + ASSERT_EQ(v, 14); + + const char* reject5 = "911"; + v = tstrncspn(p2, strlen(p2), reject5, 0); + ASSERT_EQ(v, 14); } \ No newline at end of file From f970dd24fce3afce8a30d7db1201c7461ac30f97 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 29 Dec 2022 16:34:14 +0800 Subject: [PATCH 5/8] fix(query): fix the invalid access. --- include/util/tutil.h | 4 ++++ source/util/src/tcompare.c | 17 +++++++++-------- source/util/src/tutil.c | 24 ++++++++++++++++++++++++ source/util/test/utilTests.cpp | 30 ++++++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 8 deletions(-) diff --git a/include/util/tutil.h b/include/util/tutil.h index 9f36cdba7c..9fb68aebdc 100644 --- a/include/util/tutil.h +++ b/include/util/tutil.h @@ -29,12 +29,16 @@ extern "C" { int32_t strdequote(char *src); size_t strtrim(char *src); char *strnchr(const char *haystack, char needle, int32_t len, bool skipquote); +TdUcs4* wcsnchr(const TdUcs4* haystack, TdUcs4 needle, size_t len); + char **strsplit(char *src, const char *delim, int32_t *num); char *strtolower(char *dst, const char *src); char *strntolower(char *dst, const char *src, int32_t n); char *strntolower_s(char *dst, const char *src, int32_t n); int64_t strnatoi(char *num, int32_t len); + size_t tstrncspn(const char *str, size_t ssize, const char *reject, size_t rsize); +size_t twcsncspn(const TdUcs4 *wcs, size_t size, const TdUcs4 *reject, size_t rsize); char *strbetween(char *string, char *begin, char *end); char *paGetToken(char *src, char **token, int32_t *tokenLen); diff --git a/source/util/src/tcompare.c b/source/util/src/tcompare.c index 54b6d6d265..62ccd2c50f 100644 --- a/source/util/src/tcompare.c +++ b/source/util/src/tcompare.c @@ -998,6 +998,7 @@ int32_t compareUint64Uint32(const void *pLeft, const void *pRight) { } int32_t compareJsonValDesc(const void *pLeft, const void *pRight) { return compareJsonVal(pRight, pLeft); } + /* * Compare two strings * TSDB_MATCH: Match @@ -1055,10 +1056,10 @@ int32_t patternMatch(const char *pattern, size_t psize, const char *str, size_t return TSDB_PATTERN_NOWILDCARDMATCH; } - c1 = str[j++]; - ++nMatchChar; + if (j < ssize) { + c1 = str[j++]; + ++nMatchChar; - if (j <= ssize) { if (c == '\\' && pattern[i] == '_' && c1 == '_') { i++; continue; @@ -1099,12 +1100,12 @@ int32_t wcsPatternMatch(const TdUcs4 *pattern, size_t psize, const TdUcs4 *str, return TSDB_PATTERN_MATCH; } - TdUcs4 acceptArray[3] = {towupper(c), towlower(c), 0}; + TdUcs4 rejectList[2] = {towupper(c), towlower(c)}; str += nMatchChar; int32_t remain = ssize - nMatchChar; while (1) { - size_t n = wcscspn(str, acceptArray); + size_t n = twcsncspn(str, remain, rejectList, 2); str += n; remain -= n; @@ -1122,10 +1123,10 @@ int32_t wcsPatternMatch(const TdUcs4 *pattern, size_t psize, const TdUcs4 *str, return TSDB_PATTERN_NOWILDCARDMATCH; } - c1 = str[j++]; - nMatchChar++; + if (j < ssize) { + c1 = str[j++]; + nMatchChar++; - if (j <= ssize) { if (c == L'\\' && pattern[i] == L'_' && c1 == L'_') { i++; continue; diff --git a/source/util/src/tutil.c b/source/util/src/tutil.c index 7297849870..cf1d3be3a6 100644 --- a/source/util/src/tutil.c +++ b/source/util/src/tutil.c @@ -144,6 +144,16 @@ char *strnchr(const char *haystack, char needle, int32_t len, bool skipquote) { return NULL; } +TdUcs4* wcsnchr(const TdUcs4* haystack, TdUcs4 needle, size_t len) { + for(int32_t i = 0; i < len; ++i) { + if (haystack[i] == needle) { + return (TdUcs4*) &haystack[i]; + } + } + + return NULL; +} + char *strtolower(char *dst, const char *src) { int32_t esc = 0; char quote = 0, *p = dst, c; @@ -432,3 +442,17 @@ size_t tstrncspn(const char *str, size_t size, const char *reject, size_t rsize) return size; } + +size_t twcsncspn(const TdUcs4 *wcs, size_t size, const TdUcs4 *reject, size_t rsize) { + if (rsize == 0 || rsize == 1) { + TdUcs4* p = wcsnchr(wcs, reject[0], size); + return (p == NULL)? size:(p-wcs); + } + + size_t index = 0; + while ((index < size) && (wcsnchr(reject, wcs[index], rsize) == NULL)) { + ++index; + } + + return index; +} diff --git a/source/util/test/utilTests.cpp b/source/util/test/utilTests.cpp index 5ab93bedd3..c56ef348cc 100644 --- a/source/util/test/utilTests.cpp +++ b/source/util/test/utilTests.cpp @@ -61,6 +61,21 @@ TEST(utilTest, wchar_pattern_match_test) { const TdWchar* str9 = L"6_66"; ret = wcsPatternMatch(reinterpret_cast(pattern9), 6, reinterpret_cast(str9), wcslen(str9), &pInfo); ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const TdWchar* pattern10 = L"%"; + const TdWchar* str10 = L""; + ret = wcsPatternMatch(reinterpret_cast(pattern10), 1, reinterpret_cast(str10), 0, &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const TdWchar* pattern11 = L"china%"; + const TdWchar* str11 = L"CHI "; + ret = wcsPatternMatch(reinterpret_cast(pattern11), 6, reinterpret_cast(str11), 3, &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOMATCH); + + const TdWchar* pattern12 = L"abc%"; + const TdWchar* str12 = L""; + ret = wcsPatternMatch(reinterpret_cast(pattern12), 4, reinterpret_cast(str12), 0, &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOMATCH); } TEST(utilTest, wchar_pattern_match_no_terminated) { @@ -174,6 +189,21 @@ TEST(utilTest, char_pattern_match_test) { const char* str9 = "6_6"; ret = patternMatch(pattern9, 5, str9, strlen(str9), &pInfo); ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const char* pattern10 = "%"; + const char* str10 = " "; + ret = patternMatch(pattern10, 1, str10, 0, &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_MATCH); + + const char* pattern11 = "china%"; + const char* str11 = "abc "; + ret = patternMatch(pattern11, 6, str11, 3, &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOMATCH); + + const char* pattern12 = "abc%"; + const char* str12 = NULL; + ret = patternMatch(pattern12, 4, str12, 0, &pInfo); + ASSERT_EQ(ret, TSDB_PATTERN_NOMATCH); } TEST(utilTest, char_pattern_match_no_terminated) { From 7eeea8a29c3d03b1dd3eac93732c38b9df4c5595 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 29 Dec 2022 17:56:20 +0800 Subject: [PATCH 6/8] fix(query): fix the invalid access, and do some internal refactor. --- source/common/src/ttypes.c | 5 ++++- source/libs/scalar/src/filter.c | 2 +- source/libs/scalar/src/sclvector.c | 14 ++------------ source/libs/scalar/test/scalar/scalarTests.cpp | 10 +++++++--- source/util/src/tcompare.c | 5 +++-- 5 files changed, 17 insertions(+), 19 deletions(-) diff --git a/source/common/src/ttypes.c b/source/common/src/ttypes.c index 7b5d0a8805..d412fd89da 100644 --- a/source/common/src/ttypes.c +++ b/source/common/src/ttypes.c @@ -131,7 +131,10 @@ void assignVal(char *val, const char *src, int32_t len, int32_t type) { varDataCopy(val, src); break; default: { - memcpy(val, src, len); + if (len > 0) { + memcpy(val, src, len); + } + break; } } diff --git a/source/libs/scalar/src/filter.c b/source/libs/scalar/src/filter.c index 8b7651765b..d0c27560ca 100644 --- a/source/libs/scalar/src/filter.c +++ b/source/libs/scalar/src/filter.c @@ -338,7 +338,7 @@ int8_t filterGetCompFuncIdx(int32_t type, int32_t optr) { __compar_fn_t filterGetCompFunc(int32_t type, int32_t optr) { return gDataCompare[filterGetCompFuncIdx(type, optr)]; } __compar_fn_t filterGetCompFuncEx(int32_t lType, int32_t rType, int32_t optr) { - if (TSDB_DATA_TYPE_NULL == rType) { + if (TSDB_DATA_TYPE_NULL == rType || TSDB_DATA_TYPE_JSON == rType) { return NULL; } diff --git a/source/libs/scalar/src/sclvector.c b/source/libs/scalar/src/sclvector.c index 24ac5be845..8f2fe87a53 100644 --- a/source/libs/scalar/src/sclvector.c +++ b/source/libs/scalar/src/sclvector.c @@ -1723,18 +1723,8 @@ void vectorCompareImpl(SScalarParam *pLeft, SScalarParam *pRight, SScalarParam * param2 = pRight; } else { vectorConvertCols(pLeft, pRight, &pLeftOut, &pRightOut, startIndex, numOfRows); - - if (pLeftOut.columnData != NULL) { - param1 = &pLeftOut; - } else { - param1 = pLeft; - } - - if (pRightOut.columnData != NULL) { - param2 = &pRightOut; - } else { - param2 = pRight; - } + param1 = (pLeftOut.columnData != NULL) ? &pLeftOut : pLeft; + param2 = (pRightOut.columnData != NULL) ? &pRightOut : pRight; } doVectorCompare(param1, param2, pOut, startIndex, numOfRows, _ord, optr); diff --git a/source/libs/scalar/test/scalar/scalarTests.cpp b/source/libs/scalar/test/scalar/scalarTests.cpp index f5a40c9a87..97002ed9bf 100644 --- a/source/libs/scalar/test/scalar/scalarTests.cpp +++ b/source/libs/scalar/test/scalar/scalarTests.cpp @@ -344,7 +344,7 @@ TEST(constantTest, int_or_binary) { ASSERT_EQ(nodeType(res), QUERY_NODE_VALUE); SValueNode *v = (SValueNode *)res; ASSERT_EQ(v->node.resType.type, TSDB_DATA_TYPE_BIGINT); - ASSERT_EQ(v->datum.b, scltLeftV | scltRightV); + ASSERT_EQ(v->datum.i, scltLeftV | scltRightV); nodesDestroyNode(res); } @@ -1101,7 +1101,8 @@ void makeCalculate(void *json, void *key, int32_t rightType, void *rightData, do opType == OP_TYPE_LIKE || opType == OP_TYPE_NOT_LIKE || opType == OP_TYPE_MATCH || opType == OP_TYPE_NMATCH) { printf("op:%s,3result:%d,except:%f\n", operatorTypeStr(opType), *((bool *)colDataGetData(column, 0)), exceptValue); - ASSERT_EQ(*((bool *)colDataGetData(column, 0)), exceptValue); + assert(*(bool *)colDataGetData(column, 0) == exceptValue); +// ASSERT_EQ((int) *((bool *)colDataGetData(column, 0)), (int)exceptValue); } taosArrayDestroyEx(blockList, scltFreeDataBlock); @@ -1426,7 +1427,7 @@ TEST(columnTest, json_column_logic_op) { printf("--------------------json string-- 6.6hello {1, 8, 2, 2, 3, 0, 0, 0, 0}-------------------\n"); key = "k9"; - bool eRes8[len + len1] = {false, false, false, false, false, false, false, true, true, false, true, false, true}; + bool eRes8[len + len1] = {false, false, false, false, false, false, false, true, true, false, true, true, true}; for (int i = 0; i < len; i++) { makeCalculate(row, key, TSDB_DATA_TYPE_INT, &input[i], eRes8[i], op[i], false); } @@ -1437,6 +1438,9 @@ TEST(columnTest, json_column_logic_op) { for (int i = len; i < len + len1; i++) { void *rightData = prepareNchar(inputNchar[i - len]); + if (i == 11) { + printf("abc\n"); + } makeCalculate(row, key, TSDB_DATA_TYPE_NCHAR, rightData, eRes8[i], op[i], false); taosMemoryFree(rightData); } diff --git a/source/util/src/tcompare.c b/source/util/src/tcompare.c index 62ccd2c50f..ca8b64fe1e 100644 --- a/source/util/src/tcompare.c +++ b/source/util/src/tcompare.c @@ -1161,7 +1161,8 @@ static int32_t doExecRegexMatch(const char *pString, const char *pPattern) { return 1; } - ret = regexec(®ex, pString, 0, NULL, 0); + regmatch_t pmatch[1]; + ret = regexec(®ex, pString, 1, pmatch, 0); if (ret != 0 && ret != REG_NOMATCH) { regerror(ret, ®ex, msgbuf, sizeof(msgbuf)); uDebug("Failed to match %s with pattern %s, reason %s", pString, pPattern, msgbuf) @@ -1192,7 +1193,7 @@ int32_t comparestrRegexMatch(const void *pLeft, const void *pRight) { int32_t comparewcsRegexMatch(const void* pString, const void* pPattern) { size_t len = varDataLen(pPattern); - char *pattern = taosMemoryMalloc(len + TSDB_NCHAR_SIZE); + char *pattern = taosMemoryMalloc(len + 1); int convertLen = taosUcs4ToMbs((TdUcs4 *)varDataVal(pPattern), len, pattern); if (convertLen < 0) { From adbc11f8e4638792080147a100a6b0ace9a88d5c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 29 Dec 2022 23:05:14 +0800 Subject: [PATCH 7/8] fix(query): set simd conf --- source/common/src/tglobal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 98b9b566ec..deefa65595 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -715,6 +715,8 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsNumOfSnodeWriteThreads = cfgGetItem(pCfg, "numOfSnodeUniqueThreads")->i32; tsRpcQueueMemoryAllowed = cfgGetItem(pCfg, "rpcQueueMemoryAllowed")->i64; + tsSIMDBuiltins = (bool) cfgGetItem(pCfg, "SIMD-builtins")->bval; + tsEnableMonitor = cfgGetItem(pCfg, "monitor")->bval; tsMonitorInterval = cfgGetItem(pCfg, "monitorInterval")->i32; tstrncpy(tsMonitorFqdn, cfgGetItem(pCfg, "monitorFqdn")->str, TSDB_FQDN_LEN); From 2cbbc88937df037126ab442493d0287dd9ddd462 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 30 Dec 2022 11:14:31 +0800 Subject: [PATCH 8/8] enh: speed assert in release mode --- include/util/tlog.h | 11 ++++++++--- source/util/src/tlog.c | 24 ++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/include/util/tlog.h b/include/util/tlog.h index e6ef7f388f..c7158def29 100644 --- a/include/util/tlog.h +++ b/include/util/tlog.h @@ -83,9 +83,14 @@ void taosPrintLongString(const char *flags, ELogLevel level, int32_t dflag, cons #endif ; -bool taosAssert(bool condition, const char *file, int32_t line, const char *format, ...); -#define ASSERTS(condition, ...) taosAssert(condition, __FILE__, __LINE__, __VA_ARGS__) -#define ASSERT(condition) ASSERTS(condition, "assert info not provided") +bool taosAssertDebug(bool condition, const char *file, int32_t line, const char *format, ...); +bool taosAssertRelease(bool condition); +#define ASSERTS(condition, ...) taosAssertDebug(condition, __FILE__, __LINE__, __VA_ARGS__) +#ifdef NDEBUG +#define ASSERT(condition) taosAssertRelease(condition) +#else +#define ASSERT(condition) taosAssertDebug(condition, __FILE__, __LINE__, "assert info not provided") +#endif // clang-format off #define uFatal(...) { if (uDebugFlag & DEBUG_FATAL) { taosPrintLog("UTL FATAL", DEBUG_FATAL, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }} diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index f6f814d82b..53d0cad5ea 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -790,7 +790,7 @@ cmp_end: return ret; } -bool taosAssert(bool condition, const char *file, int32_t line, const char *format, ...) { +bool taosAssertDebug(bool condition, const char *file, int32_t line, const char *format, ...) { if (condition) return false; const char *flags = "UTL FATAL "; @@ -822,4 +822,24 @@ bool taosAssert(bool condition, const char *file, int32_t line, const char *form } return true; -} \ No newline at end of file +} + +#ifdef NDEBUG +bool taosAssertRelease(bool condition) { + if (condition) return false; + + const char *flags = "UTL FATAL "; + ELogLevel level = DEBUG_FATAL; + int32_t dflag = 255; // tsLogEmbedded ? 255 : uDebugFlag + + taosPrintLog(flags, level, dflag, "tAssert called in release mode, exit:%d", tsAssert); + taosPrintTrace(flags, level, dflag); + + if (tsAssert) { + taosMsleep(300); + abort(); + } + + return true; +} +#endif \ No newline at end of file