From aa8b12e29bbf6b04a6f819e42103e91c460c8a5c Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 6 Aug 2021 01:01:08 +0800 Subject: [PATCH 1/9] [TD-5797] support multi distinct --- src/client/src/tscSQLParser.c | 8 +- src/query/inc/qExecutor.h | 10 ++- src/query/src/qExecutor.c | 141 +++++++++++++++++++++------------- 3 files changed, 101 insertions(+), 58 deletions(-) diff --git a/src/client/src/tscSQLParser.c b/src/client/src/tscSQLParser.c index 5739333886..a6db366643 100644 --- a/src/client/src/tscSQLParser.c +++ b/src/client/src/tscSQLParser.c @@ -1948,10 +1948,10 @@ bool isValidDistinctSql(SQueryInfo* pQueryInfo) { && (pQueryInfo->type & TSDB_QUERY_TYPE_TABLE_QUERY) != TSDB_QUERY_TYPE_TABLE_QUERY) { return false; } - if (tscNumOfExprs(pQueryInfo) == 1){ - return true; - } - return false; + //if (tscNumOfExprs(pQueryInfo) == 1){ + // return true; + //} + return true; } static bool hasNoneUserDefineExpr(SQueryInfo* pQueryInfo) { diff --git a/src/query/inc/qExecutor.h b/src/query/inc/qExecutor.h index ce70a9ba4a..8249a84e7f 100644 --- a/src/query/inc/qExecutor.h +++ b/src/query/inc/qExecutor.h @@ -508,13 +508,21 @@ typedef struct SStateWindowOperatorInfo { bool reptScan; } SStateWindowOperatorInfo ; +typedef struct SDistinctDataInfo { + int32_t index; + int32_t type; + int32_t bytes; +} SDistinctDataInfo; + typedef struct SDistinctOperatorInfo { SHashObj *pSet; SSDataBlock *pRes; bool recordNullVal; //has already record the null value, no need to try again int64_t threshold; int64_t outputCapacity; - int32_t colIndex; + int32_t totalBytes; + char* buf; + SArray* pDistinctDataInfo; } SDistinctOperatorInfo; struct SGlobalMerger; diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index 5323b4306f..fa54e9dc6b 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -44,6 +44,10 @@ #define SDATA_BLOCK_INITIALIZER (SDataBlockInfo) {{0}, 0} +#define MULTI_KEY_DELIM "-" + +#define HASH_CAPACITY_LIMIT 10000000 + #define TIME_WINDOW_COPY(_dst, _src) do {\ (_dst).skey = (_src).skey;\ (_dst).ekey = (_src).ekey;\ @@ -6109,6 +6113,8 @@ static void destroyConditionOperatorInfo(void* param, int32_t numOfOutput) { static void destroyDistinctOperatorInfo(void* param, int32_t numOfOutput) { SDistinctOperatorInfo* pInfo = (SDistinctOperatorInfo*) param; taosHashCleanup(pInfo->pSet); + tfree(pInfo->buf); + taosArrayDestroy(pInfo->pDistinctDataInfo); pInfo->pRes = destroyOutputBuf(pInfo->pRes); } @@ -6600,20 +6606,65 @@ SOperatorInfo* createTagScanOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SExprInf return pOperator; } +static bool initMultiDistinctInfo(SDistinctOperatorInfo *pInfo, SOperatorInfo* pOperator, SSDataBlock *pBlock) { + if (taosArrayGetSize(pInfo->pDistinctDataInfo) == pOperator->numOfOutput) { + // distinct info already inited + return true; + } + for (int i = 0; i < pOperator->numOfOutput; i++) { + pInfo->totalBytes += pOperator->pExpr[i].base.colBytes; + } + for (int i = 0; i < pOperator->numOfOutput; i++) { + int numOfBlock = taosArrayGetSize(pBlock->pDataBlock); + assert(i < numOfBlock); + for (int j = 0; j < numOfBlock; j++) { + SColumnInfoData* pColDataInfo = taosArrayGet(pBlock->pDataBlock, j); + if (pColDataInfo->info.colId == pOperator->pExpr[i].base.resColId) { + SDistinctDataInfo item = {.index = j, .type = pColDataInfo->info.type, .bytes = pColDataInfo->info.bytes}; + taosArrayInsert(pInfo->pDistinctDataInfo, i, &item); + } + } + } + pInfo->totalBytes += strlen(MULTI_KEY_DELIM) * (pOperator->numOfOutput); + pInfo->buf = calloc(1, pInfo->totalBytes); + return taosArrayGetSize(pInfo->pDistinctDataInfo) == pOperator->numOfOutput ? true : false; +} +static void buildMultiDistinctKey(SDistinctOperatorInfo *pInfo, SSDataBlock *pBlock, int32_t rowId) { + char *p = pInfo->buf; + memset(p, 0, pInfo->totalBytes); + + for (int i = 0; i < taosArrayGetSize(pInfo->pDistinctDataInfo); i++) { + SDistinctDataInfo* pDistDataInfo = (SDistinctDataInfo *)taosArrayGet(pInfo->pDistinctDataInfo, i); + SColumnInfoData* pColDataInfo = taosArrayGet(pBlock->pDataBlock, pDistDataInfo->index); + char *val = ((char *)pColDataInfo->pData) + pColDataInfo->info.bytes * rowId; + if (isNull(val, pDistDataInfo->type)) { + p += pDistDataInfo->bytes; + continue; + } + if (IS_VAR_DATA_TYPE(pDistDataInfo->type)) { + memcpy(p, varDataVal(val), varDataLen(val)); + p += varDataLen(val); + } else { + memcpy(p, val, pDistDataInfo->bytes); + p += pDistDataInfo->bytes; + } + memcpy(p, MULTI_KEY_DELIM, strlen(MULTI_KEY_DELIM)); + p += strlen(MULTI_KEY_DELIM); + } +} static SSDataBlock* hashDistinct(void* param, bool* newgroup) { SOperatorInfo* pOperator = (SOperatorInfo*) param; if (pOperator->status == OP_EXEC_DONE) { return NULL; } - - + SDistinctOperatorInfo* pInfo = pOperator->info; SSDataBlock* pRes = pInfo->pRes; - pRes->info.rows = 0; SSDataBlock* pBlock = NULL; + while(1) { publishOperatorProfEvent(pOperator->upstream[0], QUERY_PROF_BEFORE_OPERATOR_EXEC); pBlock = pOperator->upstream[0]->exec(pOperator->upstream[0], newgroup); @@ -6624,63 +6675,44 @@ static SSDataBlock* hashDistinct(void* param, bool* newgroup) { pOperator->status = OP_EXEC_DONE; break; } - if (pInfo->colIndex == -1) { - for (int i = 0; i < taosArrayGetSize(pBlock->pDataBlock); i++) { - SColumnInfoData* pColDataInfo = taosArrayGet(pBlock->pDataBlock, i); - if (pColDataInfo->info.colId == pOperator->pExpr[0].base.resColId) { - pInfo->colIndex = i; - break; - } - } - } - if (pInfo->colIndex == -1) { + if (!initMultiDistinctInfo(pInfo, pOperator, pBlock)) { setQueryStatus(pOperator->pRuntimeEnv, QUERY_COMPLETED); pOperator->status = OP_EXEC_DONE; - return NULL; + break; } - SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, pInfo->colIndex); - - int16_t bytes = pColInfoData->info.bytes; - int16_t type = pColInfoData->info.type; - - // ensure the output buffer size - SColumnInfoData* pResultColInfoData = taosArrayGet(pRes->pDataBlock, 0); + // ensure result output buf if (pRes->info.rows + pBlock->info.rows > pInfo->outputCapacity) { int32_t newSize = pRes->info.rows + pBlock->info.rows; - char* tmp = realloc(pResultColInfoData->pData, newSize * bytes); - if (tmp == NULL) { - return NULL; - } else { - pResultColInfoData->pData = tmp; - pInfo->outputCapacity = newSize; + for (int i = 0; i < taosArrayGetSize(pRes->pDataBlock); i++) { + SColumnInfoData* pResultColInfoData = taosArrayGet(pRes->pDataBlock, i); + SDistinctDataInfo* pDistDataInfo = taosArrayGet(pInfo->pDistinctDataInfo, i); + char* tmp = realloc(pResultColInfoData->pData, newSize * pDistDataInfo->bytes); + if (tmp == NULL) { + return NULL; + } else { + pResultColInfoData->pData = tmp; + } } + pInfo->outputCapacity = newSize; } - for(int32_t i = 0; i < pBlock->info.rows; ++i) { - char* val = ((char*)pColInfoData->pData) + bytes * i; - if (isNull(val, type)) { - continue; - } - char* p = val; - size_t keyLen = 0; - if (IS_VAR_DATA_TYPE(pOperator->pExpr->base.colType)) { - tstr* var = (tstr*)(val); - p = var->data; - keyLen = varDataLen(var); - } else { - keyLen = bytes; - } + for (int32_t i = 0; i < pBlock->info.rows; i++) { + buildMultiDistinctKey(pInfo, pBlock, i); + if (taosHashGet(pInfo->pSet, pInfo->buf, pInfo->totalBytes) == NULL) { + int32_t dummy; + taosHashPut(pInfo->pSet, pInfo->buf, pInfo->totalBytes, &dummy, sizeof(dummy)); + for (int j = 0; j < taosArrayGetSize(pRes->pDataBlock); j++) { + SDistinctDataInfo* pDistDataInfo = taosArrayGet(pInfo->pDistinctDataInfo, j); // distinct meta info + SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, pDistDataInfo->index); //src + SColumnInfoData* pResultColInfoData = taosArrayGet(pRes->pDataBlock, j); // dist - int dummy; - void* res = taosHashGet(pInfo->pSet, p, keyLen); - if (res == NULL) { - taosHashPut(pInfo->pSet, p, keyLen, &dummy, sizeof(dummy)); - char* start = pResultColInfoData->pData + bytes * pInfo->pRes->info.rows; - memcpy(start, val, bytes); + char* val = ((char*)pColInfoData->pData) + pDistDataInfo->bytes * i; + char *start = pResultColInfoData->pData + pDistDataInfo->bytes * pInfo->pRes->info.rows; + memcpy(start, val, pDistDataInfo->bytes); + } pRes->info.rows += 1; - } + } } - if (pRes->info.rows >= pInfo->threshold) { break; } @@ -6691,11 +6723,14 @@ static SSDataBlock* hashDistinct(void* param, bool* newgroup) { SOperatorInfo* createDistinctOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperatorInfo* upstream, SExprInfo* pExpr, int32_t numOfOutput) { SDistinctOperatorInfo* pInfo = calloc(1, sizeof(SDistinctOperatorInfo)); - pInfo->colIndex = -1; - pInfo->threshold = 10000000; // distinct result threshold - pInfo->outputCapacity = 4096; - pInfo->pSet = taosHashInit(64, taosGetDefaultHashFunction(pExpr->base.colType), false, HASH_NO_LOCK); + pInfo->totalBytes = 0; + pInfo->buf = NULL; + pInfo->threshold = HASH_CAPACITY_LIMIT; // distinct result threshold + pInfo->outputCapacity = 4096; + pInfo->pDistinctDataInfo = taosArrayInit(numOfOutput, sizeof(SDistinctDataInfo)); + pInfo->pSet = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_NO_LOCK); pInfo->pRes = createOutputBuf(pExpr, numOfOutput, (int32_t) pInfo->outputCapacity); + SOperatorInfo* pOperator = calloc(1, sizeof(SOperatorInfo)); pOperator->name = "DistinctOperator"; From 0fc9f7e3f30b7c6b2528b1d15fd7116212f04d30 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sat, 7 Aug 2021 16:33:34 +0800 Subject: [PATCH 2/9] [TD-5797] support distict multi column --- src/common/src/tglobal.c | 14 ++++++++++++++ src/query/src/qExecutor.c | 3 ++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/common/src/tglobal.c b/src/common/src/tglobal.c index a58303e9fc..b8d12f7b01 100644 --- a/src/common/src/tglobal.c +++ b/src/common/src/tglobal.c @@ -84,6 +84,9 @@ int32_t tsMaxNumOfOrderedResults = 100000; // 10 ms for sliding time, the value will changed in case of time precision changed int32_t tsMinSlidingTime = 10; +// the maxinum number of distict query result +int32_t tsMaxNumOfDistinctResults = 1000 * 10000; + // 1 us for interval time range, changed accordingly int32_t tsMinIntervalTime = 1; @@ -541,6 +544,17 @@ static void doInitGlobalConfig(void) { cfg.unitType = TAOS_CFG_UTYPE_NONE; taosInitConfigOption(cfg); + cfg.option = "maxNumOfDistinctRes"; + cfg.ptr = &tsMaxNumOfDistinctResults; + cfg.valType = TAOS_CFG_VTYPE_INT32; + cfg.cfgType = TSDB_CFG_CTYPE_B_CONFIG | TSDB_CFG_CTYPE_B_SHOW | TSDB_CFG_CTYPE_B_CLIENT; + cfg.minValue = 10*10000; + cfg.maxValue = 10000*10000; + cfg.ptrLength = 0; + cfg.unitType = TAOS_CFG_UTYPE_NONE; + taosInitConfigOption(cfg); + + cfg.option = "numOfMnodes"; cfg.ptr = &tsNumOfMnodes; cfg.valType = TAOS_CFG_VTYPE_INT32; diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index fa54e9dc6b..219e9ad1e7 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -3394,6 +3394,7 @@ void setDefaultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SOptrBasicInfo *pInfo, i SResultRowInfo* pResultRowInfo = &pInfo->resultRowInfo; int64_t tid = 0; + pRuntimeEnv->keyBuf = realloc(pRuntimeEnv->keyBuf, sizeof(tid) + sizeof(int64_t) + POINTER_BYTES); SResultRow* pRow = doSetResultOutBufByKey(pRuntimeEnv, pResultRowInfo, tid, (char *)&tid, sizeof(tid), true, uid); for (int32_t i = 0; i < pDataBlock->info.numOfCols; ++i) { @@ -6725,7 +6726,7 @@ SOperatorInfo* createDistinctOperatorInfo(SQueryRuntimeEnv* pRuntimeEnv, SOperat SDistinctOperatorInfo* pInfo = calloc(1, sizeof(SDistinctOperatorInfo)); pInfo->totalBytes = 0; pInfo->buf = NULL; - pInfo->threshold = HASH_CAPACITY_LIMIT; // distinct result threshold + pInfo->threshold = tsMaxNumOfDistinctResults; // distinct result threshold pInfo->outputCapacity = 4096; pInfo->pDistinctDataInfo = taosArrayInit(numOfOutput, sizeof(SDistinctDataInfo)); pInfo->pSet = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_NO_LOCK); From 9c9aae1632cb72487bb55868a27f59079a9aefa6 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sat, 7 Aug 2021 17:20:41 +0800 Subject: [PATCH 3/9] [TD-5797] support distict multi column --- src/common/inc/tglobal.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/inc/tglobal.h b/src/common/inc/tglobal.h index 7290db6ec9..fcdfeafefe 100644 --- a/src/common/inc/tglobal.h +++ b/src/common/inc/tglobal.h @@ -59,6 +59,7 @@ extern char tsLocale[]; extern char tsCharset[]; // default encode string extern int8_t tsEnableCoreFile; extern int32_t tsCompressMsgSize; +extern int32_t tsMaxNumOfDistinctResults; extern char tsTempDir[]; //query buffer management From c68ae361b2eb7875f3cab6de112e9295bf4ae99b Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sun, 8 Aug 2021 04:16:18 +0800 Subject: [PATCH 4/9] [TD-5799] self test distinct --- tests/script/general/parser/distinct.sim | 88 ++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 tests/script/general/parser/distinct.sim diff --git a/tests/script/general/parser/distinct.sim b/tests/script/general/parser/distinct.sim new file mode 100644 index 0000000000..e0eb74b5a5 --- /dev/null +++ b/tests/script/general/parser/distinct.sim @@ -0,0 +1,88 @@ +system sh/stop_dnodes.sh + +system sh/deploy.sh -n dnode1 -i 1 +system sh/cfg.sh -n dnode1 -c walLevel -v 1 +system sh/cfg.sh -n dnode1 -c maxtablesPerVnode -v 5 +system sh/exec.sh -n dnode1 -s start +sleep 100 +sql connect + +$dbPrefix = sav_db +$tbPrefix = sav_tb +$stbPrefix = sav_stb +$tbNum = 20 +$rowNum = 10 +$totalNum = $tbNum * $rowNum +$ts0 = 1537146000000 +$delta = 600000 +print ========== alter.sim +$i = 0 +$db = $dbPrefix +$stb = $stbPrefix + +sql drop database if exists $db +sql create database $db +sql use $db +print ====== create tables +sql create table $stb (ts timestamp, c1 int) tags(t1 int, t2 int) + +$i = 0 +$ts = $ts0 +while $i < $tbNum + $tb = $tbPrefix . $i + sql create table $tb using $stb tags( $i , 0 ) + $i = $i + 1 + sql insert into $tb values('2015-08-18 00:00:00', 1); + sql insert into $tb values('2015-08-18 00:06:00', 2); + sql insert into $tb values('2015-08-18 00:12:00', 3); + sql insert into $tb values('2015-08-18 00:18:00', 4); + sql insert into $tb values('2015-08-18 00:24:00', 5); + sql insert into $tb values('2015-08-18 00:30:00', 6); +endw +$i = 0 +$tb = $tbPrefix . $i + +print ====== table created + +#### select distinct tag +sql select distinct t1 from $stb +if $rows != $tbNum then + return -1 +endi + +#### select distinct tag +sql select distinct t2 from $stb +if $rows != 1 then + print $rows + return -1 +endi + +#### select multi normal column +sql select distinct ts, c1 from $stb +if $rows != 6 then + return -1 +endi + +#### select multi column +sql select distinct ts from $stb +if $rows != 6 then + return -1 +endi + +### select multi normal column +### select distinct multi column on sub table + +sql select distinct ts, c1 from $tb +if $rows != 6 then + return -1 +endi + + +### select distinct +sql drop database $db +sql show databases +if $rows != 0 then + return -1 +endi + +system sh/exec.sh -n dnode1 -s stop -x SIGINT From 8e48713565b78eecd8d6e7a323175b135f7f5c05 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sun, 8 Aug 2021 07:15:15 +0800 Subject: [PATCH 5/9] [TD-5799] fix compile error --- src/query/src/qExecutor.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index 219e9ad1e7..f0e75872f1 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -6616,7 +6616,7 @@ static bool initMultiDistinctInfo(SDistinctOperatorInfo *pInfo, SOperatorInfo* p pInfo->totalBytes += pOperator->pExpr[i].base.colBytes; } for (int i = 0; i < pOperator->numOfOutput; i++) { - int numOfBlock = taosArrayGetSize(pBlock->pDataBlock); + int numOfBlock = (int)(taosArrayGetSize(pBlock->pDataBlock)); assert(i < numOfBlock); for (int j = 0; j < numOfBlock; j++) { SColumnInfoData* pColDataInfo = taosArrayGet(pBlock->pDataBlock, j); @@ -6626,7 +6626,7 @@ static bool initMultiDistinctInfo(SDistinctOperatorInfo *pInfo, SOperatorInfo* p } } } - pInfo->totalBytes += strlen(MULTI_KEY_DELIM) * (pOperator->numOfOutput); + pInfo->totalBytes += (int32_t)strlen(MULTI_KEY_DELIM) * (pOperator->numOfOutput); pInfo->buf = calloc(1, pInfo->totalBytes); return taosArrayGetSize(pInfo->pDistinctDataInfo) == pOperator->numOfOutput ? true : false; } From f7d04ae567acc15e5a772a7b733700b5720eac24 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 12 Aug 2021 04:05:57 +0000 Subject: [PATCH 6/9] [TD-5797] function forbidden --- src/client/src/tscSQLParser.c | 26 ++++++++++++++++++++++++-- src/client/src/tscUtil.c | 1 - 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/client/src/tscSQLParser.c b/src/client/src/tscSQLParser.c index a6db366643..fe5b6990ed 100644 --- a/src/client/src/tscSQLParser.c +++ b/src/client/src/tscSQLParser.c @@ -2043,8 +2043,12 @@ int32_t validateSelectNodeList(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, SArray* pS const char* msg1 = "too many items in selection clause"; const char* msg2 = "functions or others can not be mixed up"; const char* msg3 = "not support query expression"; - const char* msg4 = "only support distinct one column or tag"; + const char* msg4 = "invalid distinct query"; const char* msg5 = "invalid function name"; + const char* msg6 = "not support distinct mixed with agg function"; + const char* msg7 = "not support distinct mixed with join"; + const char* msg8 = "not support distinct mixed with groupby"; + const char* msg9 = "not support distinct in nest query"; // too many result columns not support order by in query if (taosArrayGetSize(pSelNodeList) > TSDB_MAX_COLUMNS) { @@ -2056,6 +2060,7 @@ int32_t validateSelectNodeList(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, SArray* pS } bool hasDistinct = false; + bool hasAgg = false; size_t numOfExpr = taosArrayGetSize(pSelNodeList); for (int32_t i = 0; i < numOfExpr; ++i) { int32_t outputIndex = (int32_t)tscNumOfExprs(pQueryInfo); @@ -2067,6 +2072,7 @@ int32_t validateSelectNodeList(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, SArray* pS int32_t type = pItem->pNode->type; if (type == SQL_NODE_SQLFUNCTION) { + hasAgg = true; pItem->pNode->functionId = isValidFunction(pItem->pNode->Expr.operand.z, pItem->pNode->Expr.operand.n); SUdfInfo* pUdfInfo = NULL; if (pItem->pNode->functionId < 0) { @@ -2102,10 +2108,25 @@ int32_t validateSelectNodeList(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, SArray* pS } } + + //TODO(dengyihao), refactor as function + //handle distinct func mixed with other func if (hasDistinct == true) { if (!isValidDistinctSql(pQueryInfo) ) { return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg4); } + if (hasAgg) { + return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg6); + } + if (joinQuery) { + return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg7); + } + if (pQueryInfo->groupbyExpr.numOfGroupCols != 0) { + return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg8); + } + if (pQueryInfo->pDownstream != NULL) { + return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg9); + } pQueryInfo->distinct = true; } @@ -8395,12 +8416,12 @@ static int32_t doValidateSubquery(SSqlNode* pSqlNode, int32_t index, SSqlObj* pS pSub->pUdfInfo = pUdfInfo; pSub->udfCopy = true; + pSub->pDownstream = pQueryInfo; int32_t code = validateSqlNode(pSql, p, pSub); if (code != TSDB_CODE_SUCCESS) { return code; } - pSub->pDownstream = pQueryInfo; // create dummy table meta info STableMetaInfo* pTableMetaInfo1 = calloc(1, sizeof(STableMetaInfo)); @@ -8509,6 +8530,7 @@ int32_t validateSqlNode(SSqlObj* pSql, SSqlNode* pSqlNode, SQueryInfo* pQueryInf if (validateGroupbyNode(pQueryInfo, pSqlNode->pGroupby, pCmd) != TSDB_CODE_SUCCESS) { return TSDB_CODE_TSC_INVALID_OPERATION; } + if (validateSelectNodeList(pCmd, pQueryInfo, pSqlNode->pSelNodeList, false, timeWindowQuery, true) != TSDB_CODE_SUCCESS) { diff --git a/src/client/src/tscUtil.c b/src/client/src/tscUtil.c index 5a724af7bf..008c5c0a43 100644 --- a/src/client/src/tscUtil.c +++ b/src/client/src/tscUtil.c @@ -1502,7 +1502,6 @@ void tscFreeSqlObj(SSqlObj* pSql) { tscFreeSqlResult(pSql); tscResetSqlCmd(pCmd, false); - memset(pCmd->payload, 0, (size_t)pCmd->allocSize); tfree(pCmd->payload); pCmd->allocSize = 0; From ee3afee2be09e7bec5cc8a83e5ec03485c3a273a Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 12 Aug 2021 08:23:24 +0000 Subject: [PATCH 7/9] [TD-5797] fix invalid table bug --- src/client/src/tscServer.c | 2 +- src/client/src/tscUtil.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/client/src/tscServer.c b/src/client/src/tscServer.c index 3e8dfac1da..752e60b3b0 100644 --- a/src/client/src/tscServer.c +++ b/src/client/src/tscServer.c @@ -409,7 +409,7 @@ static void doProcessMsgFromServer(SSchedMsg* pSchedMsg) { if ((TSDB_QUERY_HAS_TYPE(pQueryInfo->type, (TSDB_QUERY_TYPE_STABLE_SUBQUERY | TSDB_QUERY_TYPE_SUBQUERY | TSDB_QUERY_TYPE_TAG_FILTER_QUERY)) && !TSDB_QUERY_HAS_TYPE(pQueryInfo->type, TSDB_QUERY_TYPE_PROJECTION_QUERY)) || - (TSDB_QUERY_HAS_TYPE(pQueryInfo->type, TSDB_QUERY_TYPE_NEST_SUBQUERY))) { + (TSDB_QUERY_HAS_TYPE(pQueryInfo->type, TSDB_QUERY_TYPE_NEST_SUBQUERY)) || (TSDB_QUERY_HAS_TYPE(pQueryInfo->type, TSDB_QUERY_TYPE_STABLE_SUBQUERY) && pQueryInfo->distinct)) { // do nothing in case of super table subquery } else { pSql->retry += 1; diff --git a/src/client/src/tscUtil.c b/src/client/src/tscUtil.c index 008c5c0a43..6ef9cbae5c 100644 --- a/src/client/src/tscUtil.c +++ b/src/client/src/tscUtil.c @@ -3620,7 +3620,8 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, __async_cb_func_t pNewQueryInfo->prjOffset = pQueryInfo->prjOffset; pNewQueryInfo->numOfTables = 0; pNewQueryInfo->pTableMetaInfo = NULL; - pNewQueryInfo->bufLen = pQueryInfo->bufLen; + pNewQueryInfo->bufLen = pQueryInfo->bufLen; + pNewQueryInfo->distinct = pQueryInfo->distinct; pNewQueryInfo->buf = malloc(pQueryInfo->bufLen); if (pNewQueryInfo->buf == NULL) { From 7abcd83812cee62cb71f594904c11a204586e4af Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 12 Aug 2021 12:03:05 +0000 Subject: [PATCH 8/9] [TD-5797] fix invalid table bug --- src/client/src/tscSQLParser.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/src/client/src/tscSQLParser.c b/src/client/src/tscSQLParser.c index fe5b6990ed..90ad811e55 100644 --- a/src/client/src/tscSQLParser.c +++ b/src/client/src/tscSQLParser.c @@ -1940,20 +1940,6 @@ static void addPrimaryTsColIntoResult(SQueryInfo* pQueryInfo, SSqlCmd* pCmd) { pQueryInfo->type |= TSDB_QUERY_TYPE_PROJECTION_QUERY; } -bool isValidDistinctSql(SQueryInfo* pQueryInfo) { - if (pQueryInfo == NULL) { - return false; - } - if ((pQueryInfo->type & TSDB_QUERY_TYPE_STABLE_QUERY) != TSDB_QUERY_TYPE_STABLE_QUERY - && (pQueryInfo->type & TSDB_QUERY_TYPE_TABLE_QUERY) != TSDB_QUERY_TYPE_TABLE_QUERY) { - return false; - } - //if (tscNumOfExprs(pQueryInfo) == 1){ - // return true; - //} - return true; -} - static bool hasNoneUserDefineExpr(SQueryInfo* pQueryInfo) { size_t numOfExprs = taosArrayGetSize(pQueryInfo->exprList); for (int32_t i = 0; i < numOfExprs; ++i) { @@ -2043,7 +2029,7 @@ int32_t validateSelectNodeList(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, SArray* pS const char* msg1 = "too many items in selection clause"; const char* msg2 = "functions or others can not be mixed up"; const char* msg3 = "not support query expression"; - const char* msg4 = "invalid distinct query"; + const char* msg4 = "not support distinct mixed with proj"; const char* msg5 = "invalid function name"; const char* msg6 = "not support distinct mixed with agg function"; const char* msg7 = "not support distinct mixed with join"; @@ -2062,12 +2048,14 @@ int32_t validateSelectNodeList(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, SArray* pS bool hasDistinct = false; bool hasAgg = false; size_t numOfExpr = taosArrayGetSize(pSelNodeList); + int32_t distIdx = -1; for (int32_t i = 0; i < numOfExpr; ++i) { int32_t outputIndex = (int32_t)tscNumOfExprs(pQueryInfo); tSqlExprItem* pItem = taosArrayGet(pSelNodeList, i); if (hasDistinct == false) { hasDistinct = (pItem->distinct == true); + distIdx = i; } int32_t type = pItem->pNode->type; @@ -2112,9 +2100,9 @@ int32_t validateSelectNodeList(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, SArray* pS //TODO(dengyihao), refactor as function //handle distinct func mixed with other func if (hasDistinct == true) { - if (!isValidDistinctSql(pQueryInfo) ) { + if (distIdx != 0) { return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg4); - } + } if (hasAgg) { return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg6); } From 649b740c18d8ae33a00c930acb337ee762327d0e Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 12 Aug 2021 18:19:38 +0000 Subject: [PATCH 9/9] [TD-5797] refactor errmsg --- src/client/src/tscSQLParser.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/src/client/src/tscSQLParser.c b/src/client/src/tscSQLParser.c index 90ad811e55..277c9a527b 100644 --- a/src/client/src/tscSQLParser.c +++ b/src/client/src/tscSQLParser.c @@ -2029,12 +2029,11 @@ int32_t validateSelectNodeList(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, SArray* pS const char* msg1 = "too many items in selection clause"; const char* msg2 = "functions or others can not be mixed up"; const char* msg3 = "not support query expression"; - const char* msg4 = "not support distinct mixed with proj"; + const char* msg4 = "not support distinct mixed with proj/agg func"; const char* msg5 = "invalid function name"; - const char* msg6 = "not support distinct mixed with agg function"; - const char* msg7 = "not support distinct mixed with join"; - const char* msg8 = "not support distinct mixed with groupby"; - const char* msg9 = "not support distinct in nest query"; + const char* msg6 = "not support distinct mixed with join"; + const char* msg7 = "not support distinct mixed with groupby"; + const char* msg8 = "not support distinct in nest query"; // too many result columns not support order by in query if (taosArrayGetSize(pSelNodeList) > TSDB_MAX_COLUMNS) { @@ -2047,20 +2046,21 @@ int32_t validateSelectNodeList(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, SArray* pS bool hasDistinct = false; bool hasAgg = false; - size_t numOfExpr = taosArrayGetSize(pSelNodeList); + size_t numOfExpr = taosArrayGetSize(pSelNodeList); int32_t distIdx = -1; for (int32_t i = 0; i < numOfExpr; ++i) { int32_t outputIndex = (int32_t)tscNumOfExprs(pQueryInfo); tSqlExprItem* pItem = taosArrayGet(pSelNodeList, i); - if (hasDistinct == false) { - hasDistinct = (pItem->distinct == true); - distIdx = i; - } + hasDistinct = (pItem->distinct == true); + distIdx = hasDistinct ? i : -1; + } int32_t type = pItem->pNode->type; if (type == SQL_NODE_SQLFUNCTION) { - hasAgg = true; + hasAgg = true; + if (hasDistinct) break; + pItem->pNode->functionId = isValidFunction(pItem->pNode->Expr.operand.z, pItem->pNode->Expr.operand.n); SUdfInfo* pUdfInfo = NULL; if (pItem->pNode->functionId < 0) { @@ -2100,20 +2100,17 @@ int32_t validateSelectNodeList(SSqlCmd* pCmd, SQueryInfo* pQueryInfo, SArray* pS //TODO(dengyihao), refactor as function //handle distinct func mixed with other func if (hasDistinct == true) { - if (distIdx != 0) { + if (distIdx != 0 || hasAgg) { return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg4); } - if (hasAgg) { + if (joinQuery) { return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg6); } - if (joinQuery) { + if (pQueryInfo->groupbyExpr.numOfGroupCols != 0) { return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg7); } - if (pQueryInfo->groupbyExpr.numOfGroupCols != 0) { - return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg8); - } if (pQueryInfo->pDownstream != NULL) { - return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg9); + return invalidOperationMsg(tscGetErrorMsgPayload(pCmd), msg8); } pQueryInfo->distinct = true; }