diff --git a/include/client/taos.h b/include/client/taos.h index 09327154e6..f75a84baa8 100644 --- a/include/client/taos.h +++ b/include/client/taos.h @@ -319,6 +319,7 @@ DLL_EXPORT int32_t tmq_offset_seek(tmq_t *tmq, const char *pTopicName, int32_t DLL_EXPORT int64_t tmq_position(tmq_t *tmq, const char *pTopicName, int32_t vgId); // The current offset is the offset of the last consumed message + 1 DLL_EXPORT int64_t tmq_committed(tmq_t *tmq, const char *pTopicName, int32_t vgId); +DLL_EXPORT TAOS *tmq_get_connect(tmq_t *tmq); DLL_EXPORT const char *tmq_get_table_name(TAOS_RES *res); DLL_EXPORT tmq_res_t tmq_get_res_type(TAOS_RES *res); DLL_EXPORT const char *tmq_get_topic_name(TAOS_RES *res); diff --git a/source/client/src/clientRawBlockWrite.c b/source/client/src/clientRawBlockWrite.c index f5cdfeadad..b7e92d2e65 100644 --- a/source/client/src/clientRawBlockWrite.c +++ b/source/client/src/clientRawBlockWrite.c @@ -1411,7 +1411,7 @@ int taos_write_raw_block_with_fields(TAOS* taos, int rows, char* pData, const ch code = smlBuildOutput(pQuery, pVgHash); if (code != TSDB_CODE_SUCCESS) { uError("smlBuildOutput failed"); - return code; + goto end; } launchQueryImpl(pRequest, pQuery, true, NULL); @@ -1496,7 +1496,7 @@ int taos_write_raw_block(TAOS* taos, int rows, char* pData, const char* tbname) code = smlBuildOutput(pQuery, pVgHash); if (code != TSDB_CODE_SUCCESS) { uError("smlBuildOutput failed"); - return code; + goto end; } launchQueryImpl(pRequest, pQuery, true, NULL); diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 6241c089e9..6ee5508048 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1768,7 +1768,7 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout) { tscError("consumer:0x%" PRIx64 " msg from vgId:%d discarded, since %s", tmq->consumerId, pollRspWrapper->vgId, tstrerror(pRspWrapper->code)); taosWLockLatch(&tmq->lock); SMqClientVg* pVg = getVgInfo(tmq, pollRspWrapper->topicName, pollRspWrapper->vgId); - pVg->emptyBlockReceiveTs = taosGetTimestampMs(); + if(pVg) pVg->emptyBlockReceiveTs = taosGetTimestampMs(); taosWUnLockLatch(&tmq->lock); } setVgIdle(tmq, pollRspWrapper->topicName, pollRspWrapper->vgId); @@ -3106,3 +3106,10 @@ int32_t tmq_offset_seek(tmq_t* tmq, const char* pTopicName, int32_t vgId, int64_ return code; } + +TAOS *tmq_get_connect(tmq_t *tmq){ + if (tmq && tmq->pTscObj) { + return (TAOS *)(&(tmq->pTscObj->id)); + } + return NULL; +} diff --git a/source/dnode/mnode/impl/inc/mndSubscribe.h b/source/dnode/mnode/impl/inc/mndSubscribe.h index ba4328b8fe..10864da5fb 100644 --- a/source/dnode/mnode/impl/inc/mndSubscribe.h +++ b/source/dnode/mnode/impl/inc/mndSubscribe.h @@ -32,11 +32,11 @@ void mndReleaseSubscribe(SMnode *pMnode, SMqSubscribeObj *pSub); int32_t mndMakeSubscribeKey(char *key, const char *cgroup, const char *topicName); -static FORCE_INLINE int32_t mndMakePartitionKey(char *key, const char *cgroup, const char *topicName, int32_t vgId) { - return snprintf(key, TSDB_PARTITION_KEY_LEN, "%d:%s:%s", vgId, cgroup, topicName); -} +//static FORCE_INLINE int32_t mndMakePartitionKey(char *key, const char *cgroup, const char *topicName, int32_t vgId) { +// return snprintf(key, TSDB_PARTITION_KEY_LEN, "%d:%s:%s", vgId, cgroup, topicName); +//} -int32_t mndDropSubByDB(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); +//int32_t mndDropSubByDB(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); int32_t mndDropSubByTopic(SMnode *pMnode, STrans *pTrans, const char *topic); int32_t mndSetDropSubCommitLogs(SMnode *pMnode, STrans *pTrans, SMqSubscribeObj *pSub); diff --git a/source/dnode/mnode/impl/src/mndConsumer.c b/source/dnode/mnode/impl/src/mndConsumer.c index c1494fd0d0..7273e13317 100644 --- a/source/dnode/mnode/impl/src/mndConsumer.c +++ b/source/dnode/mnode/impl/src/mndConsumer.c @@ -311,6 +311,34 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) { taosArrayPush(pRebSub->removedConsumers, &pConsumer->consumerId); } taosRUnLockLatch(&pConsumer->lock); + }else{ + int32_t newTopicNum = taosArrayGetSize(pConsumer->currentTopics); + for (int32_t i = 0; i < newTopicNum; i++) { + char * topic = taosArrayGetP(pConsumer->currentTopics, i); + SMqSubscribeObj *pSub = mndAcquireSubscribe(pMnode, pConsumer->cgroup, topic); + if (pSub == NULL) { + continue; + } + taosRLockLatch(&pSub->lock); + + // 2.2 iterate all vg assigned to the consumer of that topic + SMqConsumerEp *pConsumerEp = taosHashGet(pSub->consumerHash, &pConsumer->consumerId, sizeof(int64_t)); + int32_t vgNum = taosArrayGetSize(pConsumerEp->vgs); + + for (int32_t j = 0; j < vgNum; j++) { + SMqVgEp *pVgEp = taosArrayGetP(pConsumerEp->vgs, j); + SVgObj * pVgroup = mndAcquireVgroup(pMnode, pVgEp->vgId); + if (!pVgroup) { + char key[TSDB_SUBSCRIBE_KEY_LEN]; + mndMakeSubscribeKey(key, pConsumer->cgroup, topic); + mndGetOrCreateRebSub(pRebMsg->rebSubHash, key); + mInfo("vnode splitted, vgId:%d rebalance will be triggered", pVgEp->vgId); + } + mndReleaseVgroup(pMnode, pVgroup); + } + taosRUnLockLatch(&pSub->lock); + mndReleaseSubscribe(pMnode, pSub); + } } } else if (status == MQ_CONSUMER_STATUS_LOST) { if (hbStatus > MND_CONSUMER_LOST_CLEAR_THRESHOLD) { // clear consumer if lost a day @@ -343,7 +371,7 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) { } if (taosHashGetSize(pRebMsg->rebSubHash) != 0) { - mInfo("mq rebalance will be triggered"); + mInfo("mq rebalance will be triggered"); SRpcMsg rpcMsg = { .msgType = TDMT_MND_TMQ_DO_REBALANCE, .pCont = pRebMsg, @@ -548,8 +576,8 @@ static int32_t mndProcessAskEpReq(SRpcMsg *pMsg) { for (int32_t j = 0; j < vgNum; j++) { SMqVgEp *pVgEp = taosArrayGetP(pConsumerEp->vgs, j); - char offsetKey[TSDB_PARTITION_KEY_LEN]; - mndMakePartitionKey(offsetKey, pConsumer->cgroup, topic, pVgEp->vgId); +// char offsetKey[TSDB_PARTITION_KEY_LEN]; +// mndMakePartitionKey(offsetKey, pConsumer->cgroup, topic, pVgEp->vgId); if(epoch == -1){ SVgObj *pVgroup = mndAcquireVgroup(pMnode, pVgEp->vgId); diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 93bafb34e1..408b664e50 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -361,8 +361,71 @@ static void transferVgroupsForConsumers(SMqRebOutputObj *pOutput, SHashObj *pHas } } +static int32_t processRemoveAddVgs(SMnode *pMnode, SMqRebOutputObj *pOutput){ + int32_t totalVgNum = 0; + SVgObj* pVgroup = NULL; + void* pIter = NULL; + SArray* newVgs = taosArrayInit(0, POINTER_BYTES); + while (1) { + pIter = sdbFetch(pMnode->pSdb, SDB_VGROUP, pIter, (void**)&pVgroup); + if (pIter == NULL) { + break; + } + + if (!mndVgroupInDb(pVgroup, pOutput->pSub->dbUid)) { + sdbRelease(pMnode->pSdb, pVgroup); + continue; + } + + totalVgNum++; + SMqVgEp* pVgEp = taosMemoryMalloc(sizeof(SMqVgEp)); + pVgEp->epSet = mndGetVgroupEpset(pMnode, pVgroup); + pVgEp->vgId = pVgroup->vgId; + taosArrayPush(newVgs, &pVgEp); + sdbRelease(pMnode->pSdb, pVgroup); + } + + pIter = NULL; + while (1) { + pIter = taosHashIterate(pOutput->pSub->consumerHash, pIter); + if (pIter == NULL) break; + SMqConsumerEp *pConsumerEp = (SMqConsumerEp *)pIter; + + int32_t j = 0; + while (j < taosArrayGetSize(pConsumerEp->vgs)) { + SMqVgEp *pVgEp = taosArrayGetP(pConsumerEp->vgs, j); + bool find = false; + for(int32_t k = 0; k < taosArrayGetSize(newVgs); k++){ + SMqVgEp *pnewVgEp = taosArrayGetP(newVgs, k); + if(pVgEp->vgId == pnewVgEp->vgId){ + tDeleteSMqVgEp(pnewVgEp); + taosArrayRemove(newVgs, k); + find = true; + break; + } + } + if(!find){ + mInfo("processRemoveAddVgs old vgId:%d", pVgEp->vgId); + tDeleteSMqVgEp(pVgEp); + taosArrayRemove(pConsumerEp->vgs, j); + continue; + } + j++; + } + } + + if(taosArrayGetSize(pOutput->pSub->unassignedVgs) == 0 && taosArrayGetSize(newVgs) != 0){ + taosArrayAddAll(pOutput->pSub->unassignedVgs, newVgs); + mInfo("processRemoveAddVgs add new vg num:%d", (int)taosArrayGetSize(newVgs)); + taosArrayDestroy(newVgs); + }else{ + taosArrayDestroyP(newVgs, (FDelete)tDeleteSMqVgEp); + } + return totalVgNum; +} + static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqRebOutputObj *pOutput) { - int32_t totalVgNum = pOutput->pSub->vgNum; + int32_t totalVgNum = processRemoveAddVgs(pMnode, pOutput); const char *pSubKey = pOutput->pSub->key; int32_t numOfRemoved = taosArrayGetSize(pInput->pRebInfo->removedConsumers); @@ -1093,33 +1156,33 @@ int32_t mndSetDropSubCommitLogs(SMnode *pMnode, STrans *pTrans, SMqSubscribeObj return 0; } -int32_t mndDropSubByDB(SMnode *pMnode, STrans *pTrans, SDbObj *pDb) { - int32_t code = 0; - SSdb *pSdb = pMnode->pSdb; - - void *pIter = NULL; - SMqSubscribeObj *pSub = NULL; - while (1) { - pIter = sdbFetch(pSdb, SDB_SUBSCRIBE, pIter, (void **)&pSub); - if (pIter == NULL) break; - - if (pSub->dbUid != pDb->uid) { - sdbRelease(pSdb, pSub); - continue; - } - - if (mndSetDropSubCommitLogs(pMnode, pTrans, pSub) < 0) { - sdbRelease(pSdb, pSub); - sdbCancelFetch(pSdb, pIter); - code = -1; - break; - } - - sdbRelease(pSdb, pSub); - } - - return code; -} +//int32_t mndDropSubByDB(SMnode *pMnode, STrans *pTrans, SDbObj *pDb) { +// int32_t code = 0; +// SSdb *pSdb = pMnode->pSdb; +// +// void *pIter = NULL; +// SMqSubscribeObj *pSub = NULL; +// while (1) { +// pIter = sdbFetch(pSdb, SDB_SUBSCRIBE, pIter, (void **)&pSub); +// if (pIter == NULL) break; +// +// if (pSub->dbUid != pDb->uid) { +// sdbRelease(pSdb, pSub); +// continue; +// } +// +// if (mndSetDropSubCommitLogs(pMnode, pTrans, pSub) < 0) { +// sdbRelease(pSdb, pSub); +// sdbCancelFetch(pSdb, pIter); +// code = -1; +// break; +// } +// +// sdbRelease(pSdb, pSub); +// } +// +// return code; +//} int32_t mndDropSubByTopic(SMnode *pMnode, STrans *pTrans, const char *topicName) { SSdb *pSdb = pMnode->pSdb; diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index d819b71f18..9d27a27365 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -2672,14 +2672,14 @@ int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj *pVgro SDbObj dbObj = {0}; SArray *pArray = mndBuildDnodesArray(pMnode, 0); - int32_t numOfTopics = 0; - if (mndGetNumOfTopics(pMnode, pDb->name, &numOfTopics) != 0) { - goto _OVER; - } - if (numOfTopics > 0) { - terrno = TSDB_CODE_MND_TOPIC_MUST_BE_DELETED; - goto _OVER; - } +// int32_t numOfTopics = 0; +// if (mndGetNumOfTopics(pMnode, pDb->name, &numOfTopics) != 0) { +// goto _OVER; +// } +// if (numOfTopics > 0) { +// terrno = TSDB_CODE_MND_TOPIC_MUST_BE_DELETED; +// goto _OVER; +// } int32_t numOfStreams = 0; if (mndGetNumOfStreams(pMnode, pDb->name, &numOfStreams) != 0) { diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 56ea636234..adf3abe4d9 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -712,7 +712,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg continue; } if (pHandle->consumerId == req.newConsumerId) { // do nothing - tqInfo("vgId:%d no switch consumer:0x%" PRIx64 " remains, because redo wal log", req.vgId, req.newConsumerId); + tqInfo("vgId:%d no switch consumer:0x%" PRIx64 " remains", req.vgId, req.newConsumerId); } else { tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId, req.newConsumerId); diff --git a/source/dnode/vnode/src/tq/tqHandleSnapshot.c b/source/dnode/vnode/src/tq/tqHandleSnapshot.c index 7d3e2f7837..3ce838ce8b 100644 --- a/source/dnode/vnode/src/tq/tqHandleSnapshot.c +++ b/source/dnode/vnode/src/tq/tqHandleSnapshot.c @@ -174,21 +174,18 @@ int32_t tqSnapWrite(STqSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { STQ* pTq = pWriter->pTq; SDecoder decoder = {0}; SDecoder* pDecoder = &decoder; - STqHandle handle; + STqHandle handle = {0}; tDecoderInit(pDecoder, pData + sizeof(SSnapDataHdr), nData - sizeof(SSnapDataHdr)); code = tDecodeSTqHandle(pDecoder, &handle); - if (code) goto _err; + if (code) goto end; taosWLockLatch(&pTq->lock); code = tqMetaSaveHandle(pTq, handle.subKey, &handle); taosWUnLockLatch(&pTq->lock); - if (code < 0) goto _err; - tDecoderClear(pDecoder); - return code; - -_err: +end: tDecoderClear(pDecoder); - tqError("vgId:%d, vnode snapshot tq write failed since %s", TD_VID(pTq->pVnode), tstrerror(code)); + tqDestroyTqHandle(&handle); + tqInfo("vgId:%d, vnode snapshot tq write result:%d", TD_VID(pTq->pVnode), code); return code; } diff --git a/source/dnode/vnode/src/tq/tqMeta.c b/source/dnode/vnode/src/tq/tqMeta.c index bea63fccb9..4c403dc18f 100644 --- a/source/dnode/vnode/src/tq/tqMeta.c +++ b/source/dnode/vnode/src/tq/tqMeta.c @@ -198,58 +198,51 @@ int32_t tqMetaRestoreCheckInfo(STQ* pTq) { int32_t tqMetaSaveHandle(STQ* pTq, const char* key, const STqHandle* pHandle) { int32_t code; int32_t vlen; + void* buf = NULL; + SEncoder encoder; tEncodeSize(tEncodeSTqHandle, pHandle, vlen, code); if (code < 0) { - return -1; + goto end; } tqDebug("tq save %s(%d) handle consumer:0x%" PRIx64 " epoch:%d vgId:%d", pHandle->subKey, (int32_t)strlen(pHandle->subKey), pHandle->consumerId, pHandle->epoch, TD_VID(pTq->pVnode)); - void* buf = taosMemoryCalloc(1, vlen); + buf = taosMemoryCalloc(1, vlen); if (buf == NULL) { - return -1; + code = TSDB_CODE_OUT_OF_MEMORY; + goto end; } - SEncoder encoder; + tEncoderInit(&encoder, buf, vlen); - if (tEncodeSTqHandle(&encoder, pHandle) < 0) { - tEncoderClear(&encoder); - taosMemoryFree(buf); - return -1; + code = tEncodeSTqHandle(&encoder, pHandle); + if (code < 0) { + goto end; } - TXN* txn; - - if (tdbBegin(pTq->pMetaDB, &txn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < - 0) { - tEncoderClear(&encoder); - taosMemoryFree(buf); - return -1; + TXN* txn = NULL; + code = tdbBegin(pTq->pMetaDB, &txn, tdbDefaultMalloc, tdbDefaultFree, NULL, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); + if (code < 0) { + goto end; } - if (tdbTbUpsert(pTq->pExecStore, key, (int)strlen(key), buf, vlen, txn) < 0) { - tEncoderClear(&encoder); - taosMemoryFree(buf); - return -1; + code = tdbTbUpsert(pTq->pExecStore, key, (int)strlen(key), buf, vlen, txn); + if (code < 0) { + goto end; } - if (tdbCommit(pTq->pMetaDB, txn) < 0) { - tEncoderClear(&encoder); - taosMemoryFree(buf); - return -1; - } - - if (tdbPostCommit(pTq->pMetaDB, txn) < 0) { - tEncoderClear(&encoder); - taosMemoryFree(buf); - return -1; + code = tdbCommit(pTq->pMetaDB, txn); + if (code < 0) { + goto end; } + code = tdbPostCommit(pTq->pMetaDB, txn); +end: tEncoderClear(&encoder); taosMemoryFree(buf); - return 0; + return code; } int32_t tqMetaDeleteHandle(STQ* pTq, const char* key) { @@ -349,15 +342,18 @@ static int buildHandle(STQ* pTq, STqHandle* handle){ static int restoreHandle(STQ* pTq, void* pVal, int vLen, STqHandle* handle){ int32_t vgId = TD_VID(pTq->pVnode); SDecoder decoder; + int32_t code = 0; tDecoderInit(&decoder, (uint8_t*)pVal, vLen); - tDecodeSTqHandle(&decoder, handle); - tDecoderClear(&decoder); - - if(buildHandle(pTq, handle) < 0){ - return -1; - } + code = tDecodeSTqHandle(&decoder, handle); + if (code) goto end; + code = buildHandle(pTq, handle); + if (code) goto end; tqInfo("restoreHandle %s consumer 0x%" PRIx64 " vgId:%d", handle->subKey, handle->consumerId, vgId); - return taosHashPut(pTq->pHandle, handle->subKey, strlen(handle->subKey), handle, sizeof(STqHandle)); + code = taosHashPut(pTq->pHandle, handle->subKey, strlen(handle->subKey), handle, sizeof(STqHandle)); + +end: + tDecoderClear(&decoder); + return code; } int32_t tqCreateHandle(STQ* pTq, SMqRebVgReq* req, STqHandle* handle){ diff --git a/source/dnode/vnode/src/tq/tqOffsetSnapshot.c b/source/dnode/vnode/src/tq/tqOffsetSnapshot.c index 85d4dc6c0f..8a7f672e5d 100644 --- a/source/dnode/vnode/src/tq/tqOffsetSnapshot.c +++ b/source/dnode/vnode/src/tq/tqOffsetSnapshot.c @@ -85,6 +85,7 @@ int32_t tqOffsetSnapRead(STqOffsetReader* pReader, uint8_t** ppData) { *ppData = (uint8_t*)buf; pReader->readEnd = 1; + taosCloseFile(&pFile); return 0; } diff --git a/source/dnode/vnode/src/tq/tqStreamStateSnap.c b/source/dnode/vnode/src/tq/tqStreamStateSnap.c index 4a1b3961cd..c4ddaa9e54 100644 --- a/source/dnode/vnode/src/tq/tqStreamStateSnap.c +++ b/source/dnode/vnode/src/tq/tqStreamStateSnap.c @@ -104,6 +104,7 @@ int32_t streamStateSnapRead(SStreamStateReader* pReader, uint8_t** ppData) { pHdr->type = SNAP_DATA_STREAM_STATE_BACKEND; pHdr->size = len; memcpy(pHdr->data, rowData, len); + taosMemoryFree(rowData); tqDebug("vgId:%d, vnode stream-state snapshot read data success", TD_VID(pReader->pTq->pVnode)); return code; diff --git a/source/libs/command/inc/commandInt.h b/source/libs/command/inc/commandInt.h index 996891c77a..f74c61ea78 100644 --- a/source/libs/command/inc/commandInt.h +++ b/source/libs/command/inc/commandInt.h @@ -104,6 +104,7 @@ extern "C" { #define EXPLAIN_VGROUP_SLOT_FORMAT "vgroup_slot=%d,%d" #define EXPLAIN_UID_SLOT_FORMAT "uid_slot=%d,%d" #define EXPLAIN_SRC_SCAN_FORMAT "src_scan=%d,%d" +#define EXPLAIN_PLAN_BLOCKING "blocking=%d" #define COMMAND_RESET_LOG "resetLog" #define COMMAND_SCHEDULE_POLICY "schedulePolicy" diff --git a/source/libs/command/src/explain.c b/source/libs/command/src/explain.c index 655775e224..27cfaab3cf 100644 --- a/source/libs/command/src/explain.c +++ b/source/libs/command/src/explain.c @@ -629,6 +629,8 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i EXPLAIN_ROW_APPEND(EXPLAIN_WIDTH_FORMAT, pAggNode->node.pOutputDataBlockDesc->outputRowSize); EXPLAIN_ROW_APPEND_LIMIT(pAggNode->node.pLimit); EXPLAIN_ROW_APPEND_SLIMIT(pAggNode->node.pSlimit); + EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); + EXPLAIN_ROW_APPEND(EXPLAIN_PLAN_BLOCKING, !pAggNode->node.forceCreateNonBlockingOptr); EXPLAIN_ROW_END(); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index d798353140..28e07fe98d 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -2843,10 +2843,26 @@ static bool pushDownLimitTo(SLogicNode* pNodeWithLimit, SLogicNode* pNodeLimitPu case QUERY_NODE_LOGIC_PLAN_AGG: { if (nodeType(pNodeWithLimit) == QUERY_NODE_LOGIC_PLAN_PROJECT && (isPartTagAgg((SAggLogicNode*)pNodeLimitPushTo) || isPartTableAgg((SAggLogicNode*)pNodeLimitPushTo))) { - // when part by tag, slimit will be cloned to agg, and it will be pipelined. + // when part by tag/tbname, slimit will be cloned to agg, and it will be pipelined. // The scan below will do scanning with group order return cloneLimit(pNodeWithLimit, pNodeLimitPushTo, CLONE_SLIMIT); } + // else if not part by tag and tbname, the partition node below indicates that results are sorted, the agg node can + // be pipelined. + if (nodeType(pNodeWithLimit) == QUERY_NODE_LOGIC_PLAN_PROJECT && LIST_LENGTH(pNodeLimitPushTo->pChildren) == 1) { + SLogicNode* pChild = (SLogicNode*)nodesListGetNode(pNodeLimitPushTo->pChildren, 0); + if (nodeType(pChild) == QUERY_NODE_LOGIC_PLAN_PARTITION) { + pNodeLimitPushTo->forceCreateNonBlockingOptr = true; + return cloneLimit(pNodeWithLimit, pNodeLimitPushTo, CLONE_SLIMIT); + } + // Currently, partColOpt is executed after pushDownLimitOpt, and partColOpt will replace partition node with + // sort node. + // To avoid dependencies between these two optimizations, we add sort node too. + if (nodeType(pChild) == QUERY_NODE_LOGIC_PLAN_SORT && ((SSortLogicNode*)pChild)->calcGroupId) { + pNodeLimitPushTo->forceCreateNonBlockingOptr = true; + return cloneLimit(pNodeWithLimit, pNodeLimitPushTo, CLONE_SLIMIT); + } + } break; } case QUERY_NODE_LOGIC_PLAN_SCAN: @@ -3593,6 +3609,7 @@ static SSortLogicNode* partColOptCreateSort(SPartitionLogicNode* pPartition) { nodesListMakeAppend(&pSort->pSortKeys, (SNode*)pOrder); pOrder->order = ORDER_ASC; pOrder->pExpr = nodesCloneNode(node); + pOrder->nullOrder = NULL_ORDER_FIRST; if (!pOrder->pExpr) code = TSDB_CODE_OUT_OF_MEMORY; } } diff --git a/source/libs/planner/src/planSpliter.c b/source/libs/planner/src/planSpliter.c index 881cc02062..33d6a329ee 100644 --- a/source/libs/planner/src/planSpliter.c +++ b/source/libs/planner/src/planSpliter.c @@ -41,6 +41,8 @@ typedef struct SSplitRule { typedef bool (*FSplFindSplitNode)(SSplitContext* pCxt, SLogicSubplan* pSubplan, SLogicNode* pNode, void* pInfo); +static int32_t stbSplCreateMergeKeys(SNodeList* pSortKeys, SNodeList* pTargets, SNodeList** pOutput); + static void splSetSubplanVgroups(SLogicSubplan* pSubplan, SLogicNode* pNode) { if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pNode)) { TSWAP(pSubplan->pVgroupList, ((SScanLogicNode*)pNode)->pVgroupList); @@ -883,15 +885,119 @@ static int32_t stbSplSplitAggNodeForPartTable(SSplitContext* pCxt, SStableSplitI return code; } +static SFunctionNode* createGroupKeyAggFunc(SColumnNode* pGroupCol) { + SFunctionNode* pFunc = (SFunctionNode*)nodesMakeNode(QUERY_NODE_FUNCTION); + if (pFunc) { + strcpy(pFunc->functionName, "_group_key"); + strcpy(pFunc->node.aliasName, pGroupCol->node.aliasName); + strcpy(pFunc->node.userAlias, pGroupCol->node.userAlias); + int32_t code = nodesListMakeStrictAppend(&pFunc->pParameterList, nodesCloneNode((SNode*)pGroupCol)); + if (code == TSDB_CODE_SUCCESS) { + code = fmGetFuncInfo(pFunc, NULL, 0); + } + if (TSDB_CODE_SUCCESS != code) { + nodesDestroyNode((SNode*)pFunc); + pFunc = NULL; + } + char name[TSDB_FUNC_NAME_LEN + TSDB_NAME_DELIMITER_LEN + TSDB_POINTER_PRINT_BYTES + 1] = {0}; + int32_t len = snprintf(name, sizeof(name) - 1, "%s.%p", pFunc->functionName, pFunc); + taosCreateMD5Hash(name, len); + strncpy(pFunc->node.aliasName, name, TSDB_COL_NAME_LEN - 1); + } + return pFunc; +} + +/** + * @brief For pipelined agg node, add a SortMergeNode to merge result from vnodes. + * For agg + partition, results are sorted by group id, use group sort. + * For agg + sort for group, results are sorted by partition keys, not group id, merges keys should be the same + * as partition keys + */ +static int32_t stbSplAggNodeCreateMerge(SSplitContext* pCtx, SStableSplitInfo* pInfo, SLogicNode* pChildAgg) { + bool groupSort = true; + SNodeList* pMergeKeys = NULL; + int32_t code = TSDB_CODE_SUCCESS; + bool sortForGroup = false; + + if (pChildAgg->pChildren->length != 1) return TSDB_CODE_TSC_INTERNAL_ERROR; + + SLogicNode* pChild = (SLogicNode*)nodesListGetNode(pChildAgg->pChildren, 0); + if (nodeType(pChild) == QUERY_NODE_LOGIC_PLAN_SORT) { + SSortLogicNode* pSort = (SSortLogicNode*)pChild; + if (pSort->calcGroupId) { + SNode *node, *node2; + groupSort = false; + sortForGroup = true; + SNodeList* extraAggFuncs = NULL; + uint32_t originalLen = LIST_LENGTH(pSort->node.pTargets), idx = 0; + code = stbSplCreateMergeKeys(pSort->pSortKeys, pSort->node.pTargets, &pMergeKeys); + if (TSDB_CODE_SUCCESS != code) return code; + + // Create group_key func for all sort keys. + // We only need newly added nodes in pSort.node.pTargets when stbSplCreateMergeKeys + FOREACH(node, pSort->node.pTargets) { + if (idx++ < originalLen) continue; + SFunctionNode* pGroupKeyFunc = createGroupKeyAggFunc((SColumnNode*)node); + if (!pGroupKeyFunc) { + code = TSDB_CODE_OUT_OF_MEMORY; + break; + } + code = nodesListMakeStrictAppend(&extraAggFuncs, (SNode*)pGroupKeyFunc); + if (code != TSDB_CODE_SUCCESS) { + nodesDestroyNode((SNode*)pGroupKeyFunc); + } + } + + if (TSDB_CODE_SUCCESS == code) { + // add these extra group_key funcs into targets + code = createColumnByRewriteExprs(extraAggFuncs, &pChildAgg->pTargets); + } + if (code == TSDB_CODE_SUCCESS) { + nodesListAppendList(((SAggLogicNode*)pChildAgg)->pAggFuncs, extraAggFuncs); + extraAggFuncs = NULL; + } + + if (code == TSDB_CODE_SUCCESS) { + FOREACH(node, pMergeKeys) { + SOrderByExprNode* pOrder = (SOrderByExprNode*)node; + SColumnNode* pCol = (SColumnNode*)pOrder->pExpr; + FOREACH(node2, ((SAggLogicNode*)pChildAgg)->pAggFuncs) { + SFunctionNode* pFunc = (SFunctionNode*)node2; + if (0 != strcmp(pFunc->functionName, "_group_key")) continue; + SNode* pParam = nodesListGetNode(pFunc->pParameterList, 0); + if (!nodesEqualNode(pParam, (SNode*)pCol)) continue; + + // use the colName of group_key func to make sure finding the right slot id for merge keys. + strcpy(pCol->colName, pFunc->node.aliasName); + strcpy(pCol->node.aliasName, pFunc->node.aliasName); + memset(pCol->tableAlias, 0, TSDB_TABLE_NAME_LEN); + break; + } + } + } + if (TSDB_CODE_SUCCESS != code) { + nodesDestroyList(pMergeKeys); + nodesDestroyList(extraAggFuncs); + } + } + } + code = stbSplCreateMergeNode(pCtx, NULL, pInfo->pSplitNode, pMergeKeys, pChildAgg, groupSort); + if (TSDB_CODE_SUCCESS == code && sortForGroup) { + SMergeLogicNode* pMerge = + (SMergeLogicNode*)nodesListGetNode(pInfo->pSplitNode->pChildren, LIST_LENGTH(pInfo->pSplitNode->pChildren) - 1); + pMerge->inputWithGroupId = true; + } + return code; +} + static int32_t stbSplSplitAggNodeForCrossTable(SSplitContext* pCxt, SStableSplitInfo* pInfo) { SLogicNode* pPartAgg = NULL; int32_t code = stbSplCreatePartAggNode((SAggLogicNode*)pInfo->pSplitNode, &pPartAgg); - if (TSDB_CODE_SUCCESS == code) { // if slimit was pushed down to agg, agg will be pipelined mode, add sort merge before parent agg - if ((SAggLogicNode*)pInfo->pSplitNode->pSlimit) - code = stbSplCreateMergeNode(pCxt, NULL, pInfo->pSplitNode, NULL, pPartAgg, true); + if (pInfo->pSplitNode->forceCreateNonBlockingOptr) + code = stbSplAggNodeCreateMerge(pCxt, pInfo, pPartAgg); else code = stbSplCreateExchangeNode(pCxt, pInfo->pSplitNode, pPartAgg); } else { diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 01fbdcf193..c040d15a74 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -905,6 +905,7 @@ int32_t streamBackendTriggerChkp(void* arg, char* dst) { } _ERROR: + taosMemoryFree(ppCf); taosReleaseRef(streamBackendId, backendRid); taosArrayDestroy(refs); return code; diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 652ef7cde7..70371c4add 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -238,6 +238,8 @@ int32_t streamMetaReopen(SStreamMeta* pMeta, int64_t chkpId) { return -1; } } + taosMemoryFree(defaultPath); + taosMemoryFree(newPath); pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); while (pMeta->streamBackend == NULL) { diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c index 8a4500dd86..1e27aec03b 100644 --- a/source/libs/stream/src/streamSnapshot.c +++ b/source/libs/stream/src/streamSnapshot.c @@ -341,11 +341,15 @@ int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* si qDebug("%s start to read file %s, current offset:%" PRId64 ", size:%" PRId64 ", file no.%d", STREAM_STATE_TRANSFER, item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); uint8_t* buf = taosMemoryCalloc(1, sizeof(SStreamSnapBlockHdr) + kBlockSize); + if(buf == NULL){ + return TSDB_CODE_OUT_OF_MEMORY; + } int64_t nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset); if (nread == -1) { code = TAOS_SYSTEM_ERROR(terrno); qError("%s snap failed to read snap, file name:%s, type:%d,reason:%s", STREAM_STATE_TRANSFER, item->name, item->type, tstrerror(code)); + taosMemoryFree(buf); return -1; } else if (nread > 0 && nread <= kBlockSize) { // left bytes less than kBlockSize @@ -368,6 +372,7 @@ int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* si // finish *ppData = NULL; *size = 0; + taosMemoryFree(buf); return 0; } item = taosArrayGet(pHandle->pFileList, pHandle->currFileIdx); diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 1d90cb12ad..c6432092d3 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -53,6 +53,10 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/partition_by_col.py -Q 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/partition_by_col.py -Q 2 ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/partition_by_col.py +,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/partition_by_col_agg.py +,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/partition_by_col_agg.py -Q 2 +,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/partition_by_col_agg.py -Q 3 +,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/partition_by_col_agg.py -Q 4 ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqShow.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqDropStb.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/subscribeStb0.py @@ -164,6 +168,7 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmq3mnodeSwitch.py -N 6 -M 3 -i True ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmq3mnodeSwitch.py -N 6 -M 3 -n 3 -i True ,,y,system-test,./pytest.sh python3 test.py -f 7-tmq/tmqVnodeTransform.py -N 2 -n 1 +,,y,system-test,./pytest.sh python3 test.py -f 7-tmq/tmqVnodeSplit.py -N 2 -n 1 ,,y,system-test,./pytest.sh python3 test.py -f 7-tmq/tmqVnodeReplicate.py -M 3 -N 3 -n 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 99-TDcase/TD-19201.py ,,y,system-test,./pytest.sh python3 ./test.py -f 99-TDcase/TD-21561.py diff --git a/tests/system-test/2-query/partition_by_col_agg.py b/tests/system-test/2-query/partition_by_col_agg.py new file mode 100644 index 0000000000..c522eb1e2b --- /dev/null +++ b/tests/system-test/2-query/partition_by_col_agg.py @@ -0,0 +1,252 @@ +import taos +import sys +import time +import socket +import os +import threading +import math +from datetime import datetime + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +# from tmqCommon import * + +COMPARE_DATA = 0 +COMPARE_LEN = 1 + +class TDTestCase: + def __init__(self): + self.vgroups = 4 + self.ctbNum = 10 + self.rowsPerTbl = 10000 + self.duraion = '1h' + + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), False) + + def create_database(self,tsql, dbName,dropFlag=1,vgroups=2,replica=1, duration:str='1d'): + if dropFlag == 1: + tsql.execute("drop database if exists %s"%(dbName)) + + tsql.execute("create database if not exists %s vgroups %d replica %d duration %s"%(dbName, vgroups, replica, duration)) + tdLog.debug("complete to create database %s"%(dbName)) + return + + def create_stable(self,tsql, paraDict): + colString = tdCom.gen_column_type_str(colname_prefix=paraDict["colPrefix"], column_elm_list=paraDict["colSchema"]) + tagString = tdCom.gen_tag_type_str(tagname_prefix=paraDict["tagPrefix"], tag_elm_list=paraDict["tagSchema"]) + sqlString = f"create table if not exists %s.%s (%s) tags (%s)"%(paraDict["dbName"], paraDict["stbName"], colString, tagString) + tdLog.debug("%s"%(sqlString)) + tsql.execute(sqlString) + return + + def create_ctable(self,tsql=None, dbName='dbx',stbName='stb',ctbPrefix='ctb',ctbNum=1,ctbStartIdx=0): + for i in range(ctbNum): + sqlString = "create table %s.%s%d using %s.%s tags(%d, 'tb%d', 'tb%d', %d, %d, %d)" % \ + (dbName,ctbPrefix,i+ctbStartIdx,dbName,stbName,(i+ctbStartIdx) % 5,i+ctbStartIdx,i+ctbStartIdx,i+ctbStartIdx,i+ctbStartIdx,i+ctbStartIdx) + tsql.execute(sqlString) + + tdLog.debug("complete to create %d child tables by %s.%s" %(ctbNum, dbName, stbName)) + return + + def insert_data(self,tsql,dbName,ctbPrefix,ctbNum,rowsPerTbl,batchNum,startTs,tsStep): + tdLog.debug("start to insert data ............") + tsql.execute("use %s" %dbName) + pre_insert = "insert into " + sql = pre_insert + + for i in range(ctbNum): + rowsBatched = 0 + sql += " %s%d values "%(ctbPrefix,i) + for j in range(rowsPerTbl): + if (i < ctbNum/2): + sql += "(%d, %d, %d, %d,%d,%d,%d,true,'binary%d', 'nchar%d') "%(startTs + j*tsStep, j%1000, j%500, j%1000, j%5000, j%5400, j%128, j%10000, j%1000) + else: + sql += "(%d, %d, NULL, %d,NULL,%d,%d,true,'binary%d', 'nchar%d') "%(startTs + j*tsStep, j%1000, j%500, j%1000, j%128, j%10000, j%1000) + rowsBatched += 1 + if ((rowsBatched == batchNum) or (j == rowsPerTbl - 1)): + tsql.execute(sql) + rowsBatched = 0 + if j < rowsPerTbl - 1: + sql = "insert into %s%d values " %(ctbPrefix,i) + else: + sql = "insert into " + if sql != pre_insert: + tsql.execute(sql) + tdLog.debug("insert data ............ [OK]") + return + + def prepareTestEnv(self): + tdLog.printNoPrefix("======== prepare test env include database, stable, ctables, and insert data: ") + paraDict = {'dbName': 'test', + 'dropFlag': 1, + 'vgroups': 2, + 'stbName': 'meters', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1}, + {'type': 'BIGINT', 'count':1}, + {'type': 'FLOAT', 'count':1}, + {'type': 'DOUBLE', 'count':1}, + {'type': 'smallint', 'count':1}, + {'type': 'tinyint', 'count':1}, + {'type': 'bool', 'count':1}, + {'type': 'binary', 'len':10, 'count':1}, + {'type': 'nchar', 'len':10, 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'nchar', 'len':20, 'count':1},{'type': 'binary', 'len':20, 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'smallint', 'count':1},{'type': 'DOUBLE', 'count':1}], + 'ctbPrefix': 't', + 'ctbStartIdx': 0, + 'ctbNum': 100, + 'rowsPerTbl': 10000, + 'batchNum': 3000, + 'startTs': 1537146000000, + 'tsStep': 600000} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + tdLog.info("create database") + self.create_database(tsql=tdSql, dbName=paraDict["dbName"], dropFlag=paraDict["dropFlag"], vgroups=paraDict["vgroups"], replica=self.replicaVar, duration=self.duraion) + + tdLog.info("create stb") + self.create_stable(tsql=tdSql, paraDict=paraDict) + + tdLog.info("create child tables") + self.create_ctable(tsql=tdSql, dbName=paraDict["dbName"], \ + stbName=paraDict["stbName"],ctbPrefix=paraDict["ctbPrefix"],\ + ctbNum=paraDict["ctbNum"],ctbStartIdx=paraDict["ctbStartIdx"]) + self.insert_data(tsql=tdSql, dbName=paraDict["dbName"],\ + ctbPrefix=paraDict["ctbPrefix"],ctbNum=paraDict["ctbNum"],\ + rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"],\ + startTs=paraDict["startTs"],tsStep=paraDict["tsStep"]) + return + + def check_explain_res_has_row(self, plan_str_expect: str, rows): + plan_found = False + for row in rows: + if str(row).find(plan_str_expect) >= 0: + tdLog.debug("plan: [%s] found in: [%s]" % (plan_str_expect, str(row))) + plan_found = True + break + if not plan_found: + tdLog.exit("plan: %s not found in res: [%s]" % (plan_str_expect, str(rows))) + + + def test_sort_for_partition_hint(self): + pass + + def add_order_by(self, sql: str, order_by: str, select_list: str = "*") -> str: + return "select %s from (%s)t order by %s" % (select_list, sql, order_by) + + def add_hint(self, sql: str) -> str: + return "select /*+ sort_for_group() */ %s" % sql[6:] + + def query_with_time(self, sql): + start = datetime.now() + tdSql.query(sql, queryTimes=1) + return (datetime.now().timestamp() - start.timestamp()) * 1000 + + def explain_sql(self, sql: str): + sql = "explain verbose true " + sql + tdSql.query(sql, queryTimes=1) + return tdSql.queryResult + + def query_and_compare_res(self, sql1, sql2, compare_what: int = 0): + dur = self.query_with_time(sql1) + tdLog.debug("sql1 query with time: [%f]" % dur) + res1 = tdSql.queryResult + dur = self.query_with_time(sql2) + tdLog.debug("sql2 query with time: [%f]" % dur) + res2 = tdSql.queryResult + if res1 is None or res2 is None: + tdLog.exit("res1 or res2 is None") + if compare_what <= COMPARE_LEN: + if len(res1) != len(res2): + tdLog.exit("query and copare failed cause different rows, sql1: [%s], rows: [%d], sql2: [%s], rows: [%d]" % (sql1, len(res1), sql2, len(res2))) + if compare_what == COMPARE_DATA: + for i in range(0, len(res1)): + if res1[i] != res2[i]: + tdLog.exit("compare failed for row: [%d], sqls: [%s] res1: [%s], sql2 : [%s] res2: [%s]" % (i, sql1, res1[i], sql2, res2[i])) + tdLog.debug("sql: [%s] and sql: [%s] have same results, rows: [%d]" % (sql1, sql2, len(res1))) + + def query_and_compare_first_rows(self, sql1, sql2): + dur = self.query_with_time(sql1) + tdLog.debug("sql1 query with time: [%f]" % dur) + res1 = tdSql.queryResult + dur = self.query_with_time(sql2) + tdLog.debug("sql2 query with time: [%f]" % dur) + res2 = tdSql.queryResult + if res1 is None or res2 is None: + tdLog.exit("res1 or res2 is None") + for i in range(0, min(len(res1), len(res2))): + if res1[i] != res2[i]: + tdLog.exit("compare failed for row: [%d], sqls: [%s] res1: [%s], sql2 : [%s] res2: [%s]" % (i, sql1, res1[i], sql2, res2[i])) + tdLog.debug("sql: [%s] and sql: [%s] have same results, rows: [%d]" % (sql1, sql2, min(len(res1), len(res2)))) + + def prepare_and_query_and_compare(self, sqls: [], order_by: str, select_list: str = "*", compare_what: int = 0): + for sql in sqls: + sql_hint = self.add_order_by(self.add_hint(sql), order_by, select_list) + sql = self.add_order_by(sql, order_by, select_list) + self.check_explain_res_has_row("Sort", self.explain_sql(sql_hint)) + self.check_explain_res_has_row("Partition", self.explain_sql(sql)) + self.query_and_compare_res(sql, sql_hint, compare_what=compare_what) + + def check_explain(self, sql): + sql_hint = self.add_hint(sql) + explain_res = self.explain_sql(sql) + self.check_explain_res_has_row('SortMerge', explain_res) + self.check_explain_res_has_row("blocking=0", explain_res) + explain_res = self.explain_sql(sql_hint) + self.check_explain_res_has_row('SortMerge', explain_res) + self.check_explain_res_has_row('blocking=0', explain_res) + + def test_pipelined_agg_plan_with_slimit(self): + sql = 'select count(*), %s from meters partition by %s slimit 1' + self.check_explain(sql % ('c1','c1')) + self.check_explain(sql % ('c1,c2', 'c1,c2')) + + # should fail + # self.check_explain(sql % ('t1', 'c1,t1')) + # self.check_explain(sql % ('t1', 'c1,tbname')) + + def test_pipelined_agg_data_with_slimit(self): + sql_template = 'select %s from meters partition by %s %s' + + sql_elems = [ + ['count(*), min(c1), c2', 'c2', 'slimit 10', 'c2'], + ['count(*), c1, c2', 'c1, c2', 'slimit 100', 'c1,c2'], + ['count(*), c2, c1', 'c1, c2', 'slimit 1000', 'c1,c2'], + ['count(*), c4,c3', 'c3, c4', 'slimit 2000', 'c3,c4'], + ['count(*), c8,c6', 'c8, c6', 'slimit 3000', 'c8,c6'], + ['count(*), c1 +1 as a,c6', 'c1, c6', 'slimit 3000', 'a,c6'], + ['count(*), c1 +1 as a,c6', 'c1+1, c6', 'slimit 3000', 'a, c6'], + ] + + for ele in sql_elems: + sql = sql_template % (ele[0], ele[1], ele[2]) + sql_hint = self.add_hint(sql) + sql = self.add_order_by(sql, ele[3]) + sql_no_slimit = sql_template % (ele[0], ele[1], '') + sql_no_slimit = self.add_order_by(sql_no_slimit, ele[3]) + self.query_and_compare_first_rows(sql_hint, sql_no_slimit) + + def run(self): + self.prepareTestEnv() + #time.sleep(99999999) + self.test_pipelined_agg_plan_with_slimit() + self.test_pipelined_agg_data_with_slimit() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +event = threading.Event() + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/7-tmq/tmqVnodeSplit.py b/tests/system-test/7-tmq/tmqVnodeSplit.py new file mode 100644 index 0000000000..c6cdc2bf83 --- /dev/null +++ b/tests/system-test/7-tmq/tmqVnodeSplit.py @@ -0,0 +1,210 @@ + +import taos +import sys +import time +import socket +import os +import threading +import math + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +from util.cluster import * +sys.path.append("./7-tmq") +from tmqCommon import * + +class TDTestCase: + def __init__(self): + self.vgroups = 1 + self.ctbNum = 10 + self.rowsPerTbl = 10000 + + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), False) + + def getDataPath(self): + selfPath = tdCom.getBuildPath() + + return selfPath + '/../sim/dnode%d/data/vnode/vnode%d/wal/*'; + + def prepareTestEnv(self): + tdLog.printNoPrefix("======== prepare test env include database, stable, ctables, and insert data: ") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 1, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1},{'type': 'TIMESTAMP', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + 'ctbPrefix': 'ctb', + 'ctbStartIdx': 0, + 'ctbNum': 10, + 'rowsPerTbl': 10000, + 'batchNum': 10, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 60, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 0} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + tdCom.drop_all_db(); + tmqCom.initConsumerTable() + tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], wal_retention_period=36000,vgroups=paraDict["vgroups"],replica=self.replicaVar) + tdLog.info("create stb") + tmqCom.create_stable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"]) + return + + def restartAndRemoveWal(self, deleteWal): + tdDnodes = cluster.dnodes + tdSql.query("select * from information_schema.ins_vnodes") + for result in tdSql.queryResult: + if result[2] == 'dbt': + tdLog.debug("dnode is %d"%(result[0])) + dnodeId = result[0] + vnodeId = result[1] + + tdDnodes[dnodeId - 1].stoptaosd() + time.sleep(1) + dataPath = self.getDataPath() + dataPath = dataPath%(dnodeId,vnodeId) + tdLog.debug("dataPath:%s"%dataPath) + if deleteWal: + if os.system('rm -rf ' + dataPath) != 0: + tdLog.exit("rm error") + + tdDnodes[dnodeId - 1].starttaosd() + time.sleep(1) + break + tdLog.debug("restart dnode ok") + + def splitVgroups(self): + tdSql.query("select * from information_schema.ins_vnodes") + vnodeId = 0 + for result in tdSql.queryResult: + if result[2] == 'dbt': + vnodeId = result[1] + tdLog.debug("vnode is %d"%(vnodeId)) + break + splitSql = "split vgroup %d" %(vnodeId) + tdLog.debug("splitSql:%s"%(splitSql)) + tdSql.query(splitSql) + tdLog.debug("splitSql ok") + + def tmqCase1(self, deleteWal=False): + tdLog.printNoPrefix("======== test case 1: ") + paraDict = {'dbName': 'dbt', + 'dropFlag': 1, + 'event': '', + 'vgroups': 1, + 'stbName': 'stb', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1},{'type': 'TIMESTAMP', 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'DOUBLE', 'count':1},{'type': 'BINARY', 'len':32, 'count':1},{'type': 'NCHAR', 'len':32, 'count':1}], + 'ctbPrefix': 'ctb', + 'ctbStartIdx': 0, + 'ctbNum': 10, + 'rowsPerTbl': 10000, + 'batchNum': 10, + 'startTs': 1640966400000, # 2022-01-01 00:00:00.000 + 'pollDelay': 60, + 'showMsg': 1, + 'showRow': 1, + 'snapshot': 0} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + topicNameList = ['topic1'] + # expectRowsList = [] + tmqCom.initConsumerTable() + + tdLog.info("create topics from stb with filter") + queryString = "select * from %s.%s"%(paraDict['dbName'], paraDict['stbName']) + # sqlString = "create topic %s as stable %s" %(topicNameList[0], paraDict['stbName']) + sqlString = "create topic %s as %s" %(topicNameList[0], queryString) + tdLog.info("create topic sql: %s"%sqlString) + tdSql.execute(sqlString) + # tdSql.query(queryString) + # expectRowsList.append(tdSql.getRows()) + + # init consume info, and start tmq_sim, then check consume result + tdLog.info("insert consume info to consume processor") + consumerId = 0 + expectrowcnt = paraDict["rowsPerTbl"] * paraDict["ctbNum"] * 2 + topicList = topicNameList[0] + ifcheckdata = 1 + ifManualCommit = 1 + keyList = 'group.id:cgrp1, enable.auto.commit:true, auto.commit.interval.ms:200, auto.offset.reset:earliest' + tmqCom.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit) + + tdLog.info("start consume processor") + tmqCom.startTmqSimProcess(pollDelay=paraDict['pollDelay'],dbName=paraDict["dbName"],showMsg=paraDict['showMsg'], showRow=paraDict['showRow'],snapshot=paraDict['snapshot']) + tdLog.info("wait the consume result") + + tdLog.info("create ctb1") + tmqCom.create_ctable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=paraDict['ctbPrefix'], + ctbNum=paraDict["ctbNum"],ctbStartIdx=paraDict['ctbStartIdx']) + tdLog.info("insert data") + pInsertThread = tmqCom.asyncInsertDataByInterlace(paraDict) + + tmqCom.getStartConsumeNotifyFromTmqsim() + tmqCom.getStartCommitNotifyFromTmqsim() + + #restart dnode & remove wal + self.restartAndRemoveWal(deleteWal) + + # split vgroup + self.splitVgroups() + + tdLog.info("create ctb2") + paraDict['ctbPrefix'] = "ctbn" + tmqCom.create_ctable(tdSql, dbName=paraDict["dbName"],stbName=paraDict["stbName"],ctbPrefix=paraDict['ctbPrefix'], + ctbNum=paraDict["ctbNum"],ctbStartIdx=paraDict['ctbStartIdx']) + tdLog.info("insert data") + pInsertThread1 = tmqCom.asyncInsertDataByInterlace(paraDict) + pInsertThread.join() + pInsertThread1.join() + + expectRows = 1 + resultList = tmqCom.selectConsumeResult(expectRows) + + if expectrowcnt / 2 >= resultList[0]: + tdLog.info("expect consume rows: %d, act consume rows: %d"%(expectrowcnt / 2, resultList[0])) + tdLog.exit("%d tmq consume rows error!"%consumerId) + + # tmqCom.checkFileContent(consumerId, queryString) + + time.sleep(2) + for i in range(len(topicNameList)): + tdSql.query("drop topic %s"%topicNameList[i]) + + tdLog.printNoPrefix("======== test case 1 end ...... ") + + def run(self): + tdSql.prepare() + self.prepareTestEnv() + self.tmqCase1(True) + self.prepareTestEnv() + self.tmqCase1(False) + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +event = threading.Event() + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/7-tmq/tmqVnodeTransform.py b/tests/system-test/7-tmq/tmqVnodeTransform.py index 8db9ce0e13..fa50e46853 100644 --- a/tests/system-test/7-tmq/tmqVnodeTransform.py +++ b/tests/system-test/7-tmq/tmqVnodeTransform.py @@ -28,9 +28,9 @@ class TDTestCase: tdSql.init(conn.cursor(), False) def getDataPath(self): - selfPath = os.path.dirname(os.path.realpath(__file__)) + selfPath = tdCom.getBuildPath() - return selfPath + '/../../../sim/dnode%d/data/vnode/vnode%d/wal/*'; + return selfPath + '/../sim/dnode%d/data/vnode/vnode%d/wal/*'; def prepareTestEnv(self): tdLog.printNoPrefix("======== prepare test env include database, stable, ctables, and insert data: ") @@ -58,6 +58,7 @@ class TDTestCase: paraDict['ctbNum'] = self.ctbNum paraDict['rowsPerTbl'] = self.rowsPerTbl + tdCom.drop_all_db(); tmqCom.initConsumerTable() tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], wal_retention_period=36000,vgroups=paraDict["vgroups"],replica=self.replicaVar) tdLog.info("create stb") @@ -324,8 +325,9 @@ class TDTestCase: tdSql.prepare() self.prepareTestEnv() self.tmqCase1() - # self.tmqCase2() - # self.tmqCase3() + self.tmqCase2() + self.prepareTestEnv() + self.tmqCase3() def stop(self): tdSql.close()