From 25542c26c7998641bf62039a35a8edbac318944f Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 19 Apr 2023 10:43:13 +0800 Subject: [PATCH 001/110] fix:remove tmPushMsg in consumer --- source/dnode/vnode/src/tq/tq.c | 2 +- source/dnode/vnode/src/tq/tqUtil.c | 2 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 8f26d5868c..aca2729731 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -535,7 +535,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg atomic_store_32(&pHandle->epoch, -1); // remove if it has been register in the push manager, and return one empty block to consumer - tqUnregisterPushHandle(pTq, req.subKey, (int32_t)strlen(req.subKey), pHandle->consumerId, true); + //tqUnregisterPushHandle(pTq, req.subKey, (int32_t)strlen(req.subKey), pHandle->consumerId, true); atomic_store_64(&pHandle->consumerId, req.newConsumerId); atomic_add_fetch_32(&pHandle->epoch, 1); diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index 5ac747947f..81dd8abc3e 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -265,7 +265,7 @@ static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, // till now, all data has been transferred to consumer, new data needs to push client once arrived. if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) { - code = tqRegisterPushHandle(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); + //code = tqRegisterPushHandle(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); taosWUnLockLatch(&pTq->lock); return code; } diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index b62bf27def..3c8687fa4d 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -447,11 +447,11 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp walApplyVer(pVnode->pWal, version); - if (tqPushMsg(pVnode->pTq, pMsg->pCont, pMsg->contLen, pMsg->msgType, version) < 0) { + //if (tqPushMsg(pVnode->pTq, pMsg->pCont, pMsg->contLen, pMsg->msgType, version) < 0) { /*vInfo("vgId:%d, push msg end", pVnode->config.vgId);*/ - vError("vgId:%d, failed to push msg to TQ since %s", TD_VID(pVnode), tstrerror(terrno)); - return -1; - } + //vError("vgId:%d, failed to push msg to TQ since %s", TD_VID(pVnode), tstrerror(terrno)); + //return -1; + //} // commit if need if (needCommit) { From 1061eef1441c56058b7fc4d31df51c7d9e0d157a Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 19 Apr 2023 10:49:18 +0800 Subject: [PATCH 002/110] fix:move consumer msg from fetch thread to query thread --- source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 2 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index d61eb3ec03..fc724f2b45 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -519,7 +519,7 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_TMQ_COMMIT_OFFSET, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_TMQ_ADD_CHECKINFO, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_TMQ_DEL_CHECKINFO, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_TMQ_CONSUME, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_TMQ_CONSUME, vmPutMsgToQueryQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_DELETE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_BATCH_DEL, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_COMMIT, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 3c8687fa4d..579ef8a952 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -487,11 +487,16 @@ int32_t vnodePreprocessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) { int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) { vTrace("message in vnode query queue is processing"); - if ((pMsg->msgType == TDMT_SCH_QUERY) && !syncIsReadyForRead(pVnode->sync)) { + if ((pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_VND_TMQ_CONSUME) && !syncIsReadyForRead(pVnode->sync)) { vnodeRedirectRpcMsg(pVnode, pMsg, terrno); return 0; } + if (pMsg->msgType == TDMT_VND_TMQ_CONSUME && !pVnode->restored) { + vnodeRedirectRpcMsg(pVnode, pMsg, TSDB_CODE_SYN_RESTORING); + return 0; + } + SReadHandle handle = {.meta = pVnode->pMeta, .config = &pVnode->config, .vnode = pVnode, .pMsgCb = &pVnode->msgCb}; switch (pMsg->msgType) { case TDMT_SCH_QUERY: @@ -499,6 +504,8 @@ int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) { return qWorkerProcessQueryMsg(&handle, pVnode->pQuery, pMsg, 0); case TDMT_SCH_QUERY_CONTINUE: return qWorkerProcessCQueryMsg(&handle, pVnode->pQuery, pMsg, 0); + case TDMT_VND_TMQ_CONSUME: + return tqProcessPollReq(pVnode->pTq, pMsg); default: vError("unknown msg type:%d in query queue", pMsg->msgType); return TSDB_CODE_APP_ERROR; @@ -508,17 +515,12 @@ int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) { int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { vTrace("vgId:%d, msg:%p in fetch queue is processing", pVnode->config.vgId, pMsg); if ((pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_VND_TABLE_META || pMsg->msgType == TDMT_VND_TABLE_CFG || - pMsg->msgType == TDMT_VND_BATCH_META || pMsg->msgType == TDMT_VND_TMQ_CONSUME) && + pMsg->msgType == TDMT_VND_BATCH_META) && !syncIsReadyForRead(pVnode->sync)) { vnodeRedirectRpcMsg(pVnode, pMsg, terrno); return 0; } - if (pMsg->msgType == TDMT_VND_TMQ_CONSUME && !pVnode->restored) { - vnodeRedirectRpcMsg(pVnode, pMsg, TSDB_CODE_SYN_RESTORING); - return 0; - } - switch (pMsg->msgType) { case TDMT_SCH_FETCH: case TDMT_SCH_MERGE_FETCH: @@ -537,8 +539,6 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { return vnodeGetTableCfg(pVnode, pMsg, true); case TDMT_VND_BATCH_META: return vnodeGetBatchMeta(pVnode, pMsg); - case TDMT_VND_TMQ_CONSUME: - return tqProcessPollReq(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_RUN: return tqProcessTaskRunReq(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_DISPATCH: From 14fd2e790402c11f0c68d118f685ab6ee87fd626 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 19 Apr 2023 11:44:52 +0800 Subject: [PATCH 003/110] fix:remove lock for consume handler --- source/dnode/vnode/src/tq/tq.c | 22 +++++++++++----------- source/dnode/vnode/src/tq/tqUtil.c | 16 ++++++++-------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index aca2729731..bccfe2b9e1 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -325,25 +325,25 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { } // 2. check re-balance status - taosRLockLatch(&pTq->lock); +// taosRLockLatch(&pTq->lock); if (pHandle->consumerId != consumerId) { tqDebug("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId); terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; - taosRUnLockLatch(&pTq->lock); +// taosRUnLockLatch(&pTq->lock); return -1; } - taosRUnLockLatch(&pTq->lock); +// taosRUnLockLatch(&pTq->lock); // 3. update the epoch value - taosWLockLatch(&pTq->lock); +// taosWLockLatch(&pTq->lock); int32_t savedEpoch = pHandle->epoch; if (savedEpoch < reqEpoch) { tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch, reqEpoch); pHandle->epoch = reqEpoch; } - taosWUnLockLatch(&pTq->lock); +// taosWUnLockLatch(&pTq->lock); char buf[80]; tFormatOffset(buf, 80, &reqOffset); @@ -358,12 +358,12 @@ int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg tqDebug("vgId:%d, tq process delete sub req %s", pTq->pVnode->config.vgId, pReq->subKey); - taosWLockLatch(&pTq->lock); - int32_t code = taosHashRemove(pTq->pPushMgr, pReq->subKey, strlen(pReq->subKey)); - if (code != 0) { - tqDebug("vgId:%d, tq remove push handle %s", pTq->pVnode->config.vgId, pReq->subKey); - } - taosWUnLockLatch(&pTq->lock); +// taosWLockLatch(&pTq->lock); +// int32_t code = taosHashRemove(pTq->pPushMgr, pReq->subKey, strlen(pReq->subKey)); +// if (code != 0) { +// tqDebug("vgId:%d, tq remove push handle %s", pTq->pVnode->config.vgId, pReq->subKey); +// } +// taosWUnLockLatch(&pTq->lock); STqHandle* pHandle = taosHashGet(pTq->pHandle, pReq->subKey, strlen(pReq->subKey)); if (pHandle) { diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index 81dd8abc3e..ab1be15271 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -254,7 +254,7 @@ static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType); // lock - taosWLockLatch(&pTq->lock); +// taosWLockLatch(&pTq->lock); qSetTaskId(pHandle->execHandle.task, consumerId, pRequest->reqId); int code = tqScanData(pTq, pHandle, &dataRsp, pOffset); @@ -263,12 +263,12 @@ static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, } // till now, all data has been transferred to consumer, new data needs to push client once arrived. - if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && - dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) { - //code = tqRegisterPushHandle(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); - taosWUnLockLatch(&pTq->lock); - return code; - } +// if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && +// dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) { +// //code = tqRegisterPushHandle(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); +// taosWUnLockLatch(&pTq->lock); +// return code; +// } code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&dataRsp, TMQ_MSG_TYPE__POLL_RSP); @@ -281,7 +281,7 @@ static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, tFormatOffset(buf, 80, &dataRsp.rspOffset); tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, rsp offset type:%s, reqId:0x%" PRIx64 " code:%d", consumerId, pHandle->subKey, vgId, dataRsp.blockNum, buf, pRequest->reqId, code); - taosWUnLockLatch(&pTq->lock); +// taosWUnLockLatch(&pTq->lock); tDeleteSMqDataRsp(&dataRsp); } return code; From 594d68b8a4b86f116f489e927607e25687c356a1 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 19 Apr 2023 11:46:59 +0800 Subject: [PATCH 004/110] fix:remove lock for consume handler --- source/dnode/vnode/src/tq/tq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index bccfe2b9e1..36fcb35791 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -357,7 +357,7 @@ int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg SMqVDeleteReq* pReq = (SMqVDeleteReq*)msg; tqDebug("vgId:%d, tq process delete sub req %s", pTq->pVnode->config.vgId, pReq->subKey); - + int32_t code = 0; // taosWLockLatch(&pTq->lock); // int32_t code = taosHashRemove(pTq->pPushMgr, pReq->subKey, strlen(pReq->subKey)); // if (code != 0) { From 6f94281ab7ebb76b3ccd1b26be87f7e12912bacf Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 19 Apr 2023 17:30:58 +0800 Subject: [PATCH 005/110] fix:add log for wal --- include/libs/wal/wal.h | 2 ++ source/libs/wal/src/walRead.c | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index b51289de5e..835f786d97 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -149,6 +149,8 @@ typedef struct SWalReader { int64_t capacity; // int8_t curInvalid; // int8_t curStopped; + int64_t bodyCnt; + int64_t bodyTotalSize; TdThreadMutex mutex; SWalFilterCond cond; // TODO remove it diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c index dc3ff3e6de..9a46b4af2a 100644 --- a/source/libs/wal/src/walRead.c +++ b/source/libs/wal/src/walRead.c @@ -262,8 +262,8 @@ static int32_t walFetchBodyNew(SWalReader *pReader) { SWalCont *pReadHead = &pReader->pHead->head; int64_t ver = pReadHead->version; - wDebug("vgId:%d, wal starts to fetch body, ver:%" PRId64 " ,len:%d", pReader->pWal->cfg.vgId, ver, - pReadHead->bodyLen); + wInfo("vgId:%d, wal starts to fetch body, ver:%" PRId64 " ,len:%d, total cnt:%"PRId64 ", total size:%"PRId64, pReader->pWal->cfg.vgId, ver, + pReadHead->bodyLen, pReader->bodyCnt, pReader->bodyTotalSize); if (pReader->capacity < pReadHead->bodyLen) { SWalCkHead *ptr = (SWalCkHead *)taosMemoryRealloc(pReader->pHead, sizeof(SWalCkHead) + pReadHead->bodyLen); @@ -300,6 +300,8 @@ static int32_t walFetchBodyNew(SWalReader *pReader) { wDebug("vgId:%d, index:%" PRId64 " is fetched, cursor advance", pReader->pWal->cfg.vgId, ver); pReader->curVersion = ver + 1; + pReader->bodyCnt++; + pReader->bodyTotalSize += pReadHead->bodyLen; return 0; } From 4d9d1b520d95b55af544638e69a51affea055ed1 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 19 Apr 2023 18:16:57 +0800 Subject: [PATCH 006/110] fix:add log for wal --- source/libs/wal/src/walRead.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c index 9a46b4af2a..09b6db6afe 100644 --- a/source/libs/wal/src/walRead.c +++ b/source/libs/wal/src/walRead.c @@ -262,7 +262,7 @@ static int32_t walFetchBodyNew(SWalReader *pReader) { SWalCont *pReadHead = &pReader->pHead->head; int64_t ver = pReadHead->version; - wInfo("vgId:%d, wal starts to fetch body, ver:%" PRId64 " ,len:%d, total cnt:%"PRId64 ", total size:%"PRId64, pReader->pWal->cfg.vgId, ver, + wDebug("vgId:%d, wal starts to fetch body, ver:%" PRId64 " ,len:%d, total cnt:%"PRId64 ", total size:%"PRId64, pReader->pWal->cfg.vgId, ver, pReadHead->bodyLen, pReader->bodyCnt, pReader->bodyTotalSize); if (pReader->capacity < pReadHead->bodyLen) { From 77e03bfd782eb5609e596ea69f90b44967cdc351 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Sun, 23 Apr 2023 20:14:49 +0800 Subject: [PATCH 007/110] opti:change push mgr to consume msg for subscribe --- source/dnode/mnode/impl/src/mndConsumer.c | 2 - source/dnode/vnode/src/inc/tq.h | 2 + source/dnode/vnode/src/inc/vnodeInt.h | 1 + source/dnode/vnode/src/tq/tq.c | 62 ++++++++++---- source/dnode/vnode/src/tq/tqPush.c | 100 +++++++++++----------- source/dnode/vnode/src/tq/tqUtil.c | 27 +++--- source/dnode/vnode/src/vnd/vnodeSvr.c | 10 +-- 7 files changed, 123 insertions(+), 81 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndConsumer.c b/source/dnode/mnode/impl/src/mndConsumer.c index 65a2fa72a2..ca71e17d7e 100644 --- a/source/dnode/mnode/impl/src/mndConsumer.c +++ b/source/dnode/mnode/impl/src/mndConsumer.c @@ -449,7 +449,6 @@ static int32_t mndProcessAskEpReq(SRpcMsg *pMsg) { // 1. check consumer status int32_t status = atomic_load_32(&pConsumer->status); -#if 1 if (status == MQ_CONSUMER_STATUS__LOST_REBD) { mInfo("try to recover consumer:0x%" PRIx64, consumerId); SMqConsumerRecoverMsg *pRecoverMsg = rpcMallocCont(sizeof(SMqConsumerRecoverMsg)); @@ -463,7 +462,6 @@ static int32_t mndProcessAskEpReq(SRpcMsg *pMsg) { tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &pRpcMsg); } -#endif if (status != MQ_CONSUMER_STATUS__READY) { mInfo("consumer:0x%" PRIx64 " not ready, status: %s", consumerId, mndConsumerStatusName(status)); diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index acc0d29382..e1b1092c28 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -100,6 +100,7 @@ typedef struct { SWalRef* pRef; STqPushHandle pushHandle; // push STqExecHandle execHandle; // exec + SRpcMsg* msg; } STqHandle; typedef struct { @@ -114,6 +115,7 @@ struct STQ { int64_t walLogLastVer; SRWLatch lock; SHashObj* pPushMgr; // consumerId -> STqPushEntry + SArray * pPushArray; SHashObj* pHandle; // subKey -> STqHandle SHashObj* pCheckInfo; // topic -> SAlterCheckInfo STqOffsetStore* pOffsetStore; diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 416bc6cdc7..b24cb7e136 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -214,6 +214,7 @@ int32_t tqProcessTaskDropReq(STQ* pTq, int64_t version, char* msg, int32_t msgLe int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t version, char* msg, int32_t msgLen); int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit); +int32_t tqProcessSubmitReqForSubscribe(STQ* pTq); int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver); int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index a78239a4b5..ae4a7e1d61 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -71,6 +71,11 @@ static void destroyTqHandle(void* data) { walCloseReader(pData->pWalReader); tqCloseReader(pData->execHandle.pTqReader); } + if(pData->msg != NULL) { + rpcFreeCont(pData->msg->pCont); + taosMemoryFree(pData->msg); + pData->msg = NULL; + } } static void tqPushEntryFree(void* data) { @@ -104,6 +109,8 @@ STQ* tqOpen(const char* path, SVnode* pVnode) { pTq->pHandle = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK); taosHashSetFreeFp(pTq->pHandle, destroyTqHandle); + pTq->pPushArray = taosArrayInit(8, POINTER_BYTES); + taosInitRWLatch(&pTq->lock); pTq->pPushMgr = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_NO_LOCK); taosHashSetFreeFp(pTq->pPushMgr, tqPushEntryFree); @@ -152,6 +159,7 @@ void tqClose(STQ* pTq) { tqMetaClose(pTq); streamMetaClose(pTq->pStreamMeta); taosMemoryFree(pTq); + taosArrayDestroy(pTq->pPushArray); } void tqNotifyClose(STQ* pTq) { @@ -350,25 +358,15 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { } // 2. check re-balance status -// taosRLockLatch(&pTq->lock); + taosRLockLatch(&pTq->lock); if (pHandle->consumerId != consumerId) { tqDebug("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId); terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; -// taosRUnLockLatch(&pTq->lock); + taosRUnLockLatch(&pTq->lock); return -1; } -// taosRUnLockLatch(&pTq->lock); - - // 3. update the epoch value -// taosWLockLatch(&pTq->lock); - int32_t savedEpoch = pHandle->epoch; - if (savedEpoch < reqEpoch) { - tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch, - reqEpoch); - pHandle->epoch = reqEpoch; - } -// taosWUnLockLatch(&pTq->lock); + taosRUnLockLatch(&pTq->lock); char buf[80]; tFormatOffset(buf, 80, &reqOffset); @@ -560,8 +558,20 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg atomic_store_32(&pHandle->epoch, -1); // remove if it has been register in the push manager, and return one empty block to consumer - //tqUnregisterPushHandle(pTq, req.subKey, (int32_t)strlen(req.subKey), pHandle->consumerId, true); - +// tqUnregisterPushHandle(pTq, req.subKey, (int32_t)strlen(req.subKey), pHandle->consumerId, true); + for(size_t i = 0; i < taosArrayGetSize(pTq->pPushArray); i++) { + void* handle = taosArrayGetP(pTq->pPushArray, i); + if(handle == pHandle) { + tqInfo("vgId:%d remove handle when switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId, req.newConsumerId); + taosArrayRemove(pTq->pPushArray, i); + break; + } + } + if(pHandle->msg != NULL) { + rpcFreeCont(pHandle->msg->pCont); + taosMemoryFree(pHandle->msg); + pHandle->msg = NULL; + } atomic_store_64(&pHandle->consumerId, req.newConsumerId); atomic_add_fetch_32(&pHandle->epoch, 1); @@ -1067,6 +1077,28 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) { return 0; } +int32_t tqProcessSubmitReqForSubscribe(STQ* pTq) { + int32_t vgId = TD_VID(pTq->pVnode); + tqDebug("vgId:%d start set submit for subscribe", vgId); + + taosWLockLatch(&pTq->lock); + for(size_t i = 0; i < taosArrayGetSize(pTq->pPushArray); i++){ + STqHandle* pHandle = (STqHandle*)taosArrayGetP(pTq->pPushArray, i); + if(pHandle->msg == NULL){ + tqError("pHandle->msg should not be null"); + } + SRpcMsg msg = {.msgType = TDMT_VND_TMQ_CONSUME, .pCont = pHandle->msg->pCont, .contLen = pHandle->msg->contLen}; + tmsgPutToQueue(&pTq->pVnode->msgCb, QUERY_QUEUE, &msg); + taosMemoryFree(pHandle->msg); + pHandle->msg = NULL; + } + taosArrayClear(pTq->pPushArray); + // unlock + taosWUnLockLatch(&pTq->lock); + + return 0; +} + int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) { #if 0 void* pIter = NULL; diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 7a1a6b7454..d2d17792d3 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -268,59 +268,61 @@ static void doPushDataForEntry(void* pIter, STqExecHandle* pExec, STQ* pTq, int6 } } + int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) { - void* pReq = POINTER_SHIFT(msg, sizeof(SSubmitReq2Msg)); - int32_t len = msgLen - sizeof(SSubmitReq2Msg); - int32_t vgId = TD_VID(pTq->pVnode); +// void* pReq = POINTER_SHIFT(msg, sizeof(SSubmitReq2Msg)); +// int32_t len = msgLen - sizeof(SSubmitReq2Msg); +// int32_t vgId = TD_VID(pTq->pVnode); if (msgType == TDMT_VND_SUBMIT) { + tqProcessSubmitReqForSubscribe(pTq); // lock push mgr to avoid potential msg lost - taosWLockLatch(&pTq->lock); - - int32_t numOfRegisteredPush = taosHashGetSize(pTq->pPushMgr); - if (numOfRegisteredPush > 0) { - tqDebug("vgId:%d tq push msg version:%" PRId64 " type:%s, head:%p, body:%p len:%d, numOfPushed consumers:%d", - vgId, ver, TMSG_INFO(msgType), msg, pReq, len, numOfRegisteredPush); - - void* data = taosMemoryMalloc(len); - if (data == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - tqError("failed to copy data for stream since out of memory, vgId:%d", vgId); - taosWUnLockLatch(&pTq->lock); - return -1; - } - - memcpy(data, pReq, len); - - SArray* cachedKey = taosArrayInit(0, sizeof(SItem)); - void* pIter = NULL; - - while (1) { - pIter = taosHashIterate(pTq->pPushMgr, pIter); - if (pIter == NULL) { - break; - } - - STqPushEntry* pPushEntry = *(STqPushEntry**)pIter; - - STqHandle* pHandle = taosHashGet(pTq->pHandle, pPushEntry->subKey, strlen(pPushEntry->subKey)); - if (pHandle == NULL) { - tqDebug("vgId:%d, failed to find handle %s in pushing data to consumer, ignore", pTq->pVnode->config.vgId, - pPushEntry->subKey); - continue; - } - - STqExecHandle* pExec = &pHandle->execHandle; - doPushDataForEntry(pIter, pExec, pTq, ver, vgId, data, len, cachedKey); - } - - doRemovePushedEntry(cachedKey, pTq); - taosArrayDestroyEx(cachedKey, freeItem); - taosMemoryFree(data); - } - - // unlock - taosWUnLockLatch(&pTq->lock); +// taosWLockLatch(&pTq->lock); +// +// int32_t numOfRegisteredPush = taosHashGetSize(pTq->pPushMgr); +// if (numOfRegisteredPush > 0) { +// tqDebug("vgId:%d tq push msg version:%" PRId64 " type:%s, head:%p, body:%p len:%d, numOfPushed consumers:%d", +// vgId, ver, TMSG_INFO(msgType), msg, pReq, len, numOfRegisteredPush); +// +// void* data = taosMemoryMalloc(len); +// if (data == NULL) { +// terrno = TSDB_CODE_OUT_OF_MEMORY; +// tqError("failed to copy data for stream since out of memory, vgId:%d", vgId); +// taosWUnLockLatch(&pTq->lock); +// return -1; +// } +// +// memcpy(data, pReq, len); +// +// SArray* cachedKey = taosArrayInit(0, sizeof(SItem)); +// void* pIter = NULL; +// +// while (1) { +// pIter = taosHashIterate(pTq->pPushMgr, pIter); +// if (pIter == NULL) { +// break; +// } +// +// STqPushEntry* pPushEntry = *(STqPushEntry**)pIter; +// +// STqHandle* pHandle = taosHashGet(pTq->pHandle, pPushEntry->subKey, strlen(pPushEntry->subKey)); +// if (pHandle == NULL) { +// tqDebug("vgId:%d, failed to find handle %s in pushing data to consumer, ignore", pTq->pVnode->config.vgId, +// pPushEntry->subKey); +// continue; +// } +// +// STqExecHandle* pExec = &pHandle->execHandle; +// doPushDataForEntry(pIter, pExec, pTq, ver, vgId, data, len, cachedKey); +// } +// +// doRemovePushedEntry(cachedKey, pTq); +// taosArrayDestroyEx(cachedKey, freeItem); +// taosMemoryFree(data); +// } +// +// // unlock +// taosWUnLockLatch(&pTq->lock); } tqDebug("handle submit, restore:%d, size:%d", pTq->pVnode->restored, (int)taosHashGetSize(pTq->pStreamMeta->pTasks)); diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index 128ddedf6d..f76e641f2b 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -169,22 +169,29 @@ static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, SMqDataRsp dataRsp = {0}; tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType); - // lock -// taosWLockLatch(&pTq->lock); - qSetTaskId(pHandle->execHandle.task, consumerId, pRequest->reqId); int code = tqScanData(pTq, pHandle, &dataRsp, pOffset); if(code != 0) { goto end; } - // till now, all data has been transferred to consumer, new data needs to push client once arrived. -// if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && -// dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) { -// //code = tqRegisterPushHandle(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); -// taosWUnLockLatch(&pTq->lock); -// return code; -// } +// till now, all data has been transferred to consumer, new data needs to push client once arrived. + if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && + dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) { +// code = tqRegisterPushHandle(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); + // lock + taosWLockLatch(&pTq->lock); + if(pHandle->msg != NULL){ + tqError("pHandle->msg should be null"); + } + pHandle->msg = taosMemoryCalloc(1, sizeof(SRpcMsg)); + memcpy(pHandle->msg, pMsg, sizeof(SRpcMsg)); + pHandle->msg->pCont = rpcMallocCont(pMsg->contLen); + memcpy(pHandle->msg->pCont, pMsg->pCont, pMsg->contLen); + taosArrayPush(pTq->pPushArray, &pHandle); + taosWUnLockLatch(&pTq->lock); + return code; + } code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&dataRsp, TMQ_MSG_TYPE__POLL_RSP); diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 579ef8a952..b29081170d 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -447,11 +447,11 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp walApplyVer(pVnode->pWal, version); - //if (tqPushMsg(pVnode->pTq, pMsg->pCont, pMsg->contLen, pMsg->msgType, version) < 0) { - /*vInfo("vgId:%d, push msg end", pVnode->config.vgId);*/ - //vError("vgId:%d, failed to push msg to TQ since %s", TD_VID(pVnode), tstrerror(terrno)); - //return -1; - //} + if (tqPushMsg(pVnode->pTq, pMsg->pCont, pMsg->contLen, pMsg->msgType, version) < 0) { +// /*vInfo("vgId:%d, push msg end", pVnode->config.vgId);*/ + vError("vgId:%d, failed to push msg to TQ since %s", TD_VID(pVnode), tstrerror(terrno)); + return -1; + } // commit if need if (needCommit) { From 3678b2373ae7cf7aa05ad0c4e769c5c607be00d3 Mon Sep 17 00:00:00 2001 From: Ping Xiao Date: Sun, 23 Apr 2023 20:45:55 +0800 Subject: [PATCH 008/110] [TS-3140]: add test case for user privilege --- tests/parallel_test/cases.task | 1 + tests/system-test/0-others/user_privilege.py | 100 +++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 tests/system-test/0-others/user_privilege.py diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 6e662a9a15..1403d18414 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -126,6 +126,7 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/sysinfo.py ,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/user_control.py ,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/user_manage.py +,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/user_privilege.py ,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/fsync.py ,,n,system-test,python3 ./test.py -f 0-others/compatibility.py ,,n,system-test,python3 ./test.py -f 0-others/tag_index_basic.py diff --git a/tests/system-test/0-others/user_privilege.py b/tests/system-test/0-others/user_privilege.py new file mode 100644 index 0000000000..b470026be1 --- /dev/null +++ b/tests/system-test/0-others/user_privilege.py @@ -0,0 +1,100 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import taos +from taos.tmq import * +from util.cases import * +from util.common import * +from util.log import * +from util.sql import * +from util.sqlset import * + + +class TDTestCase: + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor()) + self.setsql = TDSetSql() + self.stbname = 'stb' + self.binary_length = 20 # the length of binary for column_dict + self.nchar_length = 20 # the length of nchar for column_dict + self.column_dict = { + 'ts': 'timestamp', + 'col1': 'float', + 'col2': 'int', + 'col3': 'float', + } + + self.tag_dict = { + 't1': 'int', + 't2': f'binary({self.binary_length})' + } + + self.tag_list = [ + f'1, "Beijing"', + f'2, "Shanghai"', + f'3, "Guangzhou"', + f'4, "Shenzhen"' + ] + + self.values_list = [ + f'now, 9.1, 200, 0.3' + ] + + self.tbnum = 4 + + def create_user(self): + user_name = 'test' + tdSql.execute(f'create user {user_name} pass "test"') + tdSql.execute(f'grant read on db.stb with t2 = "Beijing" to {user_name}') + + def prepare_data(self): + tdSql.execute(self.setsql.set_create_stable_sql(self.stbname, self.column_dict, self.tag_dict)) + for i in range(self.tbnum): + tdSql.execute(f'create table {self.stbname}_{i} using {self.stbname} tags({self.tag_list[i]})') + for j in self.values_list: + tdSql.execute(f'insert into {self.stbname}_{i} values({j})') + + def user_privilege_check(self): + testconn = taos.connect(user='test', password='test') + expectErrNotOccured = False + + try: + sql = "select count(*) from db.stb where t2 = 'Beijing'" + res = testconn.query(sql) + data = res.fetch_all() + count = data[0][0] + except BaseException: + expectErrNotOccured = True + + if expectErrNotOccured: + caller = inspect.getframeinfo(inspect.stack()[1][0]) + tdLog.exit(f"{caller.filename}({caller.lineno}) failed: sql:{sql}, expect error not occured") + elif count != 1: + tdLog.exit(f"{sql}, expect result doesn't match") + pass + + def run(self): + tdSql.prepare() + self.prepare_data() + self.create_user() + self.user_privilege_check() + + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) \ No newline at end of file From 8572f4a32fa6db44b563d01be8c0ce3aeb271af3 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Mon, 24 Apr 2023 19:41:30 +0800 Subject: [PATCH 009/110] opti:change push mgr to consume msg for subscribe --- source/client/src/clientTmq.c | 4 ++-- source/common/src/tglobal.c | 8 ++++---- source/common/src/tmsg.c | 6 +++--- source/dnode/vnode/src/tq/tq.c | 12 +++++++----- source/dnode/vnode/src/tq/tqUtil.c | 7 ++++++- 5 files changed, 22 insertions(+), 15 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 16a4f55840..f05a314e44 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1702,7 +1702,7 @@ static int32_t tmqPollImpl(tmq_t* tmq, int64_t timeout) { for (int j = 0; j < numOfVg; j++) { SMqClientVg* pVg = taosArrayGet(pTopic->vgs, j); if (taosGetTimestampMs() - pVg->emptyBlockReceiveTs < EMPTY_BLOCK_POLL_IDLE_DURATION) { // less than 100ms - tscTrace("consumer:0x%" PRIx64 " epoch %d, vgId:%d idle for 10ms before start next poll", tmq->consumerId, + tscDebug("consumer:0x%" PRIx64 " epoch %d, vgId:%d idle for 10ms before start next poll", tmq->consumerId, tmq->epoch, pVg->vgId); continue; } @@ -1710,7 +1710,7 @@ static int32_t tmqPollImpl(tmq_t* tmq, int64_t timeout) { int32_t vgStatus = atomic_val_compare_exchange_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE, TMQ_VG_STATUS__WAIT); if (vgStatus == TMQ_VG_STATUS__WAIT) { int32_t vgSkipCnt = atomic_add_fetch_32(&pVg->vgSkipCnt, 1); - tscTrace("consumer:0x%" PRIx64 " epoch %d wait poll-rsp, skip vgId:%d skip cnt %d", tmq->consumerId, tmq->epoch, + tscDebug("consumer:0x%" PRIx64 " epoch %d wait poll-rsp, skip vgId:%d skip cnt %d", tmq->consumerId, tmq->epoch, pVg->vgId, vgSkipCnt); continue; #if 0 diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index aa35b298e6..8cd3d7f5ab 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -1274,10 +1274,10 @@ int32_t taosCreateLog(const char *logname, int32_t logFileNum, const char *cfgDi taosSetAllDebugFlag(cfgGetItem(pCfg, "debugFlag")->i32, false); if (taosMulModeMkDir(tsLogDir, 0777) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - printf("failed to create dir:%s since %s", tsLogDir, terrstr()); - cfgCleanup(pCfg); - return -1; +// terrno = TAOS_SYSTEM_ERROR(errno); +// printf("failed to create dir:%s since %s", tsLogDir, terrstr()); +// cfgCleanup(pCfg); +// return -1; } if (taosInitLog(logname, logFileNum) != 0) { diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index d9802244b7..cd980d028c 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -5328,9 +5328,9 @@ int32_t tSerializeSMqPollReq(void *buf, int32_t bufLen, SMqPollReq *pReq) { int32_t tDeserializeSMqPollReq(void *buf, int32_t bufLen, SMqPollReq *pReq) { int32_t headLen = sizeof(SMsgHead); - SMsgHead *pHead = buf; - pHead->vgId = pReq->head.vgId; - pHead->contLen = pReq->head.contLen; +// SMsgHead *pHead = buf; +// pHead->vgId = pReq->head.vgId; +// pHead->contLen = pReq->head.contLen; SDecoder decoder = {0}; tDecoderInit(&decoder, (char *)buf + headLen, bufLen - headLen); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index ae4a7e1d61..73c7075d51 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1084,13 +1084,15 @@ int32_t tqProcessSubmitReqForSubscribe(STQ* pTq) { taosWLockLatch(&pTq->lock); for(size_t i = 0; i < taosArrayGetSize(pTq->pPushArray); i++){ STqHandle* pHandle = (STqHandle*)taosArrayGetP(pTq->pPushArray, i); - if(pHandle->msg == NULL){ + if(ASSERT(pHandle->msg != NULL)){ tqError("pHandle->msg should not be null"); + break; + }else{ + SRpcMsg msg = {.msgType = TDMT_VND_TMQ_CONSUME, .pCont = pHandle->msg->pCont, .contLen = pHandle->msg->contLen, .info = pHandle->msg->info}; + tmsgPutToQueue(&pTq->pVnode->msgCb, QUERY_QUEUE, &msg); + taosMemoryFree(pHandle->msg); + pHandle->msg = NULL; } - SRpcMsg msg = {.msgType = TDMT_VND_TMQ_CONSUME, .pCont = pHandle->msg->pCont, .contLen = pHandle->msg->contLen}; - tmsgPutToQueue(&pTq->pVnode->msgCb, QUERY_QUEUE, &msg); - taosMemoryFree(pHandle->msg); - pHandle->msg = NULL; } taosArrayClear(pTq->pPushArray); // unlock diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index f76e641f2b..3f92414c34 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -181,15 +181,20 @@ static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, // code = tqRegisterPushHandle(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); // lock taosWLockLatch(&pTq->lock); - if(pHandle->msg != NULL){ + if(ASSERT(pHandle->msg == NULL)){ tqError("pHandle->msg should be null"); + taosWUnLockLatch(&pTq->lock); + goto end; } pHandle->msg = taosMemoryCalloc(1, sizeof(SRpcMsg)); memcpy(pHandle->msg, pMsg, sizeof(SRpcMsg)); pHandle->msg->pCont = rpcMallocCont(pMsg->contLen); memcpy(pHandle->msg->pCont, pMsg->pCont, pMsg->contLen); + pHandle->msg->contLen = pMsg->contLen; + tqError("data is over, register to handle:%p, pCont:%p, len:%d", pHandle, pHandle->msg->pCont, pHandle->msg->contLen); taosArrayPush(pTq->pPushArray, &pHandle); taosWUnLockLatch(&pTq->lock); + tDeleteSMqDataRsp(&dataRsp); return code; } From 168e6f8936dca0586e2a7a4e57d50310f4bccc9c Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Mon, 24 Apr 2023 19:44:10 +0800 Subject: [PATCH 010/110] opti:change push mgr to consume msg for subscribe --- source/common/src/tglobal.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 8cd3d7f5ab..aa35b298e6 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -1274,10 +1274,10 @@ int32_t taosCreateLog(const char *logname, int32_t logFileNum, const char *cfgDi taosSetAllDebugFlag(cfgGetItem(pCfg, "debugFlag")->i32, false); if (taosMulModeMkDir(tsLogDir, 0777) != 0) { -// terrno = TAOS_SYSTEM_ERROR(errno); -// printf("failed to create dir:%s since %s", tsLogDir, terrstr()); -// cfgCleanup(pCfg); -// return -1; + terrno = TAOS_SYSTEM_ERROR(errno); + printf("failed to create dir:%s since %s", tsLogDir, terrstr()); + cfgCleanup(pCfg); + return -1; } if (taosInitLog(logname, logFileNum) != 0) { From 7b73130bd4e756fb4c9867b59ba00a4045852f9f Mon Sep 17 00:00:00 2001 From: Ping Xiao Date: Mon, 24 Apr 2023 19:53:50 +0800 Subject: [PATCH 011/110] udpate test case --- tests/system-test/0-others/user_privilege.py | 22 +++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/system-test/0-others/user_privilege.py b/tests/system-test/0-others/user_privilege.py index b470026be1..6d49ebfbfe 100644 --- a/tests/system-test/0-others/user_privilege.py +++ b/tests/system-test/0-others/user_privilege.py @@ -84,12 +84,32 @@ class TDTestCase: elif count != 1: tdLog.exit(f"{sql}, expect result doesn't match") pass + + def user_privilege_error_check(self): + testconn = taos.connect(user='test', password='test') + expectErrNotOccured = False + + sql_list = ["alter talbe db.stb_1 set t2 = 'Wuhan'", "drop table db.stb_1"] + + for sql in sql_list: + try: + res = testconn.execute(sql) + except BaseException: + expectErrNotOccured = True + + if expectErrNotOccured: + pass + else: + caller = inspect.getframeinfo(inspect.stack()[1][0]) + tdLog.exit(f"{caller.filename}({caller.lineno}) failed: sql:{sql}, expect error not occured") + pass def run(self): tdSql.prepare() self.prepare_data() self.create_user() - self.user_privilege_check() + self.user_privilege_check() + self.user_privilege_error_check() def stop(self): tdSql.close() From 1c63408b3e85510e1a372286d7be8c92fc30fb89 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Mon, 24 Apr 2023 20:18:20 +0800 Subject: [PATCH 012/110] opti:change push mgr to consume msg for subscribe --- include/libs/wal/wal.h | 2 -- source/client/src/clientTmq.c | 7 ++++--- source/libs/wal/src/walRead.c | 6 ++---- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index 835f786d97..b51289de5e 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -149,8 +149,6 @@ typedef struct SWalReader { int64_t capacity; // int8_t curInvalid; // int8_t curStopped; - int64_t bodyCnt; - int64_t bodyTotalSize; TdThreadMutex mutex; SWalFilterCond cond; // TODO remove it diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index f05a314e44..9e60f8b04d 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1702,7 +1702,7 @@ static int32_t tmqPollImpl(tmq_t* tmq, int64_t timeout) { for (int j = 0; j < numOfVg; j++) { SMqClientVg* pVg = taosArrayGet(pTopic->vgs, j); if (taosGetTimestampMs() - pVg->emptyBlockReceiveTs < EMPTY_BLOCK_POLL_IDLE_DURATION) { // less than 100ms - tscDebug("consumer:0x%" PRIx64 " epoch %d, vgId:%d idle for 10ms before start next poll", tmq->consumerId, + tscTrace("consumer:0x%" PRIx64 " epoch %d, vgId:%d idle for 10ms before start next poll", tmq->consumerId, tmq->epoch, pVg->vgId); continue; } @@ -1710,7 +1710,7 @@ static int32_t tmqPollImpl(tmq_t* tmq, int64_t timeout) { int32_t vgStatus = atomic_val_compare_exchange_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE, TMQ_VG_STATUS__WAIT); if (vgStatus == TMQ_VG_STATUS__WAIT) { int32_t vgSkipCnt = atomic_add_fetch_32(&pVg->vgSkipCnt, 1); - tscDebug("consumer:0x%" PRIx64 " epoch %d wait poll-rsp, skip vgId:%d skip cnt %d", tmq->consumerId, tmq->epoch, + tscTrace("consumer:0x%" PRIx64 " epoch %d wait poll-rsp, skip vgId:%d skip cnt %d", tmq->consumerId, tmq->epoch, pVg->vgId, vgSkipCnt); continue; #if 0 @@ -1805,12 +1805,13 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { " total:%" PRId64 " reqId:0x%" PRIx64, tmq->consumerId, pVg->vgId, buf, pVg->numOfRows, tmq->totalRows, pollRspWrapper->reqId); pRspWrapper = tmqFreeRspWrapper(pRspWrapper); + pVg->emptyBlockReceiveTs = taosGetTimestampMs(); taosFreeQitem(pollRspWrapper); } else { // build rsp int64_t numOfRows = 0; SMqRspObj* pRsp = tmqBuildRspFromWrapper(pollRspWrapper, pVg, &numOfRows); tmq->totalRows += numOfRows; - + pVg->emptyBlockReceiveTs = 0; tscDebug("consumer:0x%" PRIx64 " process poll rsp, vgId:%d, offset:%s, blocks:%d, rows:%" PRId64 " vg total:%" PRId64 " total:%" PRId64 ", reqId:0x%" PRIx64, tmq->consumerId, pVg->vgId, buf, pDataRsp->blockNum, numOfRows, pVg->numOfRows, tmq->totalRows, diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c index 09b6db6afe..6154e30938 100644 --- a/source/libs/wal/src/walRead.c +++ b/source/libs/wal/src/walRead.c @@ -262,8 +262,8 @@ static int32_t walFetchBodyNew(SWalReader *pReader) { SWalCont *pReadHead = &pReader->pHead->head; int64_t ver = pReadHead->version; - wDebug("vgId:%d, wal starts to fetch body, ver:%" PRId64 " ,len:%d, total cnt:%"PRId64 ", total size:%"PRId64, pReader->pWal->cfg.vgId, ver, - pReadHead->bodyLen, pReader->bodyCnt, pReader->bodyTotalSize); + wDebug("vgId:%d, wal starts to fetch body, ver:%" PRId64 " ,len:%d, total", pReader->pWal->cfg.vgId, ver, + pReadHead->bodyLen); if (pReader->capacity < pReadHead->bodyLen) { SWalCkHead *ptr = (SWalCkHead *)taosMemoryRealloc(pReader->pHead, sizeof(SWalCkHead) + pReadHead->bodyLen); @@ -300,8 +300,6 @@ static int32_t walFetchBodyNew(SWalReader *pReader) { wDebug("vgId:%d, index:%" PRId64 " is fetched, cursor advance", pReader->pWal->cfg.vgId, ver); pReader->curVersion = ver + 1; - pReader->bodyCnt++; - pReader->bodyTotalSize += pReadHead->bodyLen; return 0; } From 41bec8560a2587edabe2dd8a55841883a5a77b74 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 25 Apr 2023 09:58:28 +0800 Subject: [PATCH 013/110] opti:change push mgr to consume msg for subscribe --- source/dnode/vnode/src/tq/tq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 73c7075d51..8e35183ebb 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -158,8 +158,8 @@ void tqClose(STQ* pTq) { taosMemoryFree(pTq->path); tqMetaClose(pTq); streamMetaClose(pTq->pStreamMeta); - taosMemoryFree(pTq); taosArrayDestroy(pTq->pPushArray); + taosMemoryFree(pTq); } void tqNotifyClose(STQ* pTq) { From d1e5d6e0f992d39986926a3e092f9cde1d67a7cf Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 25 Apr 2023 16:23:58 +0800 Subject: [PATCH 014/110] fix:pHandle->msg is not null if rebalance --- source/client/src/clientTmq.c | 5 +++-- source/dnode/vnode/src/tq/tq.c | 2 +- source/dnode/vnode/src/tq/tqUtil.c | 11 +++++------ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 9e60f8b04d..54e929c9a4 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1363,6 +1363,7 @@ CREATE_MSG_FAIL: typedef struct SVgroupSaveInfo { STqOffsetVal offset; int64_t numOfRows; + int32_t vgStatus; } SVgroupSaveInfo; static void initClientTopicFromRsp(SMqClientTopic* pTopic, SMqSubTopicEp* pTopicEp, SHashObj* pVgOffsetHashMap, @@ -1398,7 +1399,7 @@ static void initClientTopicFromRsp(SMqClientTopic* pTopic, SMqSubTopicEp* pTopic .currentOffset = offsetNew, .vgId = pVgEp->vgId, .epSet = pVgEp->epSet, - .vgStatus = TMQ_VG_STATUS__IDLE, + .vgStatus = pInfo != NULL ? pInfo->vgStatus : TMQ_VG_STATUS__IDLE, .vgSkipCnt = 0, .emptyBlockReceiveTs = 0, .numOfRows = numOfRows, @@ -1457,7 +1458,7 @@ static bool doUpdateLocalEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) tscDebug("consumer:0x%" PRIx64 ", epoch:%d vgId:%d vgKey:%s, offset:%s", tmq->consumerId, epoch, pVgCur->vgId, vgKey, buf); - SVgroupSaveInfo info = {.offset = pVgCur->currentOffset, .numOfRows = pVgCur->numOfRows}; + SVgroupSaveInfo info = {.offset = pVgCur->currentOffset, .numOfRows = pVgCur->numOfRows, .vgStatus = pVgCur->vgStatus}; taosHashPut(pVgOffsetHashMap, vgKey, strlen(vgKey), &info, sizeof(SVgroupSaveInfo)); } } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 8e35183ebb..0080feadbe 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1079,11 +1079,11 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) { int32_t tqProcessSubmitReqForSubscribe(STQ* pTq) { int32_t vgId = TD_VID(pTq->pVnode); - tqDebug("vgId:%d start set submit for subscribe", vgId); taosWLockLatch(&pTq->lock); for(size_t i = 0; i < taosArrayGetSize(pTq->pPushArray); i++){ STqHandle* pHandle = (STqHandle*)taosArrayGetP(pTq->pPushArray, i); + tqDebug("vgId:%d start set submit for pHandle:%p", vgId, pHandle); if(ASSERT(pHandle->msg != NULL)){ tqError("pHandle->msg should not be null"); break; diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index 3f92414c34..663dc8bbb9 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -181,17 +181,16 @@ static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, // code = tqRegisterPushHandle(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); // lock taosWLockLatch(&pTq->lock); - if(ASSERT(pHandle->msg == NULL)){ - tqError("pHandle->msg should be null"); - taosWUnLockLatch(&pTq->lock); - goto end; +// tqDebug("data is over, register to handle:%p, msg:%p", pHandle, pHandle->msg); + if(pHandle->msg == NULL){ + pHandle->msg = taosMemoryCalloc(1, sizeof(SRpcMsg)); } - pHandle->msg = taosMemoryCalloc(1, sizeof(SRpcMsg)); + memcpy(pHandle->msg, pMsg, sizeof(SRpcMsg)); pHandle->msg->pCont = rpcMallocCont(pMsg->contLen); memcpy(pHandle->msg->pCont, pMsg->pCont, pMsg->contLen); pHandle->msg->contLen = pMsg->contLen; - tqError("data is over, register to handle:%p, pCont:%p, len:%d", pHandle, pHandle->msg->pCont, pHandle->msg->contLen); + tqDebug("data is over, register to handle:%p, pCont:%p, len:%d", pHandle, pHandle->msg->pCont, pHandle->msg->contLen); taosArrayPush(pTq->pPushArray, &pHandle); taosWUnLockLatch(&pTq->lock); tDeleteSMqDataRsp(&dataRsp); From 224d87b1313e1474660918d3791b8146b41b76fd Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 26 Apr 2023 17:58:14 +0800 Subject: [PATCH 015/110] fix:cosume null if rebalance --- source/client/src/clientTmq.c | 5 ++--- source/dnode/vnode/src/tq/tq.c | 10 ++++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 54e929c9a4..9e60f8b04d 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1363,7 +1363,6 @@ CREATE_MSG_FAIL: typedef struct SVgroupSaveInfo { STqOffsetVal offset; int64_t numOfRows; - int32_t vgStatus; } SVgroupSaveInfo; static void initClientTopicFromRsp(SMqClientTopic* pTopic, SMqSubTopicEp* pTopicEp, SHashObj* pVgOffsetHashMap, @@ -1399,7 +1398,7 @@ static void initClientTopicFromRsp(SMqClientTopic* pTopic, SMqSubTopicEp* pTopic .currentOffset = offsetNew, .vgId = pVgEp->vgId, .epSet = pVgEp->epSet, - .vgStatus = pInfo != NULL ? pInfo->vgStatus : TMQ_VG_STATUS__IDLE, + .vgStatus = TMQ_VG_STATUS__IDLE, .vgSkipCnt = 0, .emptyBlockReceiveTs = 0, .numOfRows = numOfRows, @@ -1458,7 +1457,7 @@ static bool doUpdateLocalEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) tscDebug("consumer:0x%" PRIx64 ", epoch:%d vgId:%d vgKey:%s, offset:%s", tmq->consumerId, epoch, pVgCur->vgId, vgKey, buf); - SVgroupSaveInfo info = {.offset = pVgCur->currentOffset, .numOfRows = pVgCur->numOfRows, .vgStatus = pVgCur->vgStatus}; + SVgroupSaveInfo info = {.offset = pVgCur->currentOffset, .numOfRows = pVgCur->numOfRows}; taosHashPut(pVgOffsetHashMap, vgKey, strlen(vgKey), &info, sizeof(SVgroupSaveInfo)); } } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 0080feadbe..7539761f4e 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -368,6 +368,16 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { } taosRUnLockLatch(&pTq->lock); + // 3. update the epoch value + taosWLockLatch(&pTq->lock); + int32_t savedEpoch = pHandle->epoch; + if (savedEpoch < reqEpoch) { + tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch, + reqEpoch); + pHandle->epoch = reqEpoch; + } + taosWUnLockLatch(&pTq->lock); + char buf[80]; tFormatOffset(buf, 80, &reqOffset); tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s, reqId:0x%" PRIx64, From 8677b56a4fe606f918d8579243efb8c12b1cc57a Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 27 Apr 2023 15:44:44 +0800 Subject: [PATCH 016/110] fix:assert error in tqProcessSubmitReqForSubscribe if put pHandle to array twice --- source/client/src/clientSml.c | 2 +- source/dnode/vnode/src/inc/tq.h | 3 +- source/dnode/vnode/src/tq/tq.c | 36 ++------ source/dnode/vnode/src/tq/tqPush.c | 144 ----------------------------- source/dnode/vnode/src/tq/tqUtil.c | 2 +- 5 files changed, 11 insertions(+), 176 deletions(-) diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index cac559b0c1..f727715a54 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -534,7 +534,7 @@ static int32_t smlGenerateSchemaAction(SSchema *colField, SHashObj *colHash, SSm uint16_t *index = colHash ? (uint16_t *)taosHashGet(colHash, kv->key, kv->keyLen) : NULL; if (index) { if (colField[*index].type != kv->type) { - uError("SML:0x%" PRIx64 " point type and db type mismatch. point type: %d, db type: %d, key: %s", info->id, colField[*index].type, kv->type, kv->key); + uError("SML:0x%" PRIx64 " point type and db type mismatch. db type: %d, point type: %d, key: %s", info->id, colField[*index].type, kv->type, kv->key); return TSDB_CODE_SML_INVALID_DATA; } diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index e1b1092c28..080e72c504 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -114,8 +114,7 @@ struct STQ { char* path; int64_t walLogLastVer; SRWLatch lock; - SHashObj* pPushMgr; // consumerId -> STqPushEntry - SArray * pPushArray; + SHashObj* pPushMgr; // consumerId -> STqHandle SHashObj* pHandle; // subKey -> STqHandle SHashObj* pCheckInfo; // topic -> SAlterCheckInfo STqOffsetStore* pOffsetStore; diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 7539761f4e..00684652f0 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -78,18 +78,6 @@ static void destroyTqHandle(void* data) { } } -static void tqPushEntryFree(void* data) { - STqPushEntry* p = *(void**)data; - if (p->pDataRsp->head.mqMsgType == TMQ_MSG_TYPE__POLL_RSP) { - tDeleteSMqDataRsp(p->pDataRsp); - } else if (p->pDataRsp->head.mqMsgType == TMQ_MSG_TYPE__TAOSX_RSP) { - tDeleteSTaosxRsp((STaosxRsp*)p->pDataRsp); - } - - taosMemoryFree(p->pDataRsp); - taosMemoryFree(p); -} - static bool tqOffsetLessOrEqual(const STqOffset* pLeft, const STqOffset* pRight) { return pLeft->val.type == TMQ_OFFSET__LOG && pRight->val.type == TMQ_OFFSET__LOG && pLeft->val.version <= pRight->val.version; @@ -109,11 +97,8 @@ STQ* tqOpen(const char* path, SVnode* pVnode) { pTq->pHandle = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK); taosHashSetFreeFp(pTq->pHandle, destroyTqHandle); - pTq->pPushArray = taosArrayInit(8, POINTER_BYTES); - taosInitRWLatch(&pTq->lock); - pTq->pPushMgr = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_NO_LOCK); - taosHashSetFreeFp(pTq->pPushMgr, tqPushEntryFree); + pTq->pPushMgr = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK); pTq->pCheckInfo = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK); taosHashSetFreeFp(pTq->pCheckInfo, (FDelete)tDeleteSTqCheckInfo); @@ -158,7 +143,6 @@ void tqClose(STQ* pTq) { taosMemoryFree(pTq->path); tqMetaClose(pTq); streamMetaClose(pTq->pStreamMeta); - taosArrayDestroy(pTq->pPushArray); taosMemoryFree(pTq); } @@ -569,14 +553,8 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg // remove if it has been register in the push manager, and return one empty block to consumer // tqUnregisterPushHandle(pTq, req.subKey, (int32_t)strlen(req.subKey), pHandle->consumerId, true); - for(size_t i = 0; i < taosArrayGetSize(pTq->pPushArray); i++) { - void* handle = taosArrayGetP(pTq->pPushArray, i); - if(handle == pHandle) { - tqInfo("vgId:%d remove handle when switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId, req.newConsumerId); - taosArrayRemove(pTq->pPushArray, i); - break; - } - } + taosHashRemove(pTq->pPushMgr, &pHandle->consumerId, sizeof(int64_t)); + if(pHandle->msg != NULL) { rpcFreeCont(pHandle->msg->pCont); taosMemoryFree(pHandle->msg); @@ -1091,8 +1069,9 @@ int32_t tqProcessSubmitReqForSubscribe(STQ* pTq) { int32_t vgId = TD_VID(pTq->pVnode); taosWLockLatch(&pTq->lock); - for(size_t i = 0; i < taosArrayGetSize(pTq->pPushArray); i++){ - STqHandle* pHandle = (STqHandle*)taosArrayGetP(pTq->pPushArray, i); + void *pIter = taosHashIterate(pTq->pPushMgr, NULL); + while(pIter){ + STqHandle* pHandle = *(STqHandle**)pIter; tqDebug("vgId:%d start set submit for pHandle:%p", vgId, pHandle); if(ASSERT(pHandle->msg != NULL)){ tqError("pHandle->msg should not be null"); @@ -1103,8 +1082,9 @@ int32_t tqProcessSubmitReqForSubscribe(STQ* pTq) { taosMemoryFree(pHandle->msg); pHandle->msg = NULL; } + pIter = taosHashIterate(pTq->pPushMgr, pIter); } - taosArrayClear(pTq->pPushArray); + taosHashClear(pTq->pPushMgr); // unlock taosWUnLockLatch(&pTq->lock); diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index d2d17792d3..dca988cbbd 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -206,69 +206,6 @@ int32_t tqPushMsgNew(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_ } #endif -typedef struct { - void* pKey; - int64_t keyLen; -} SItem; - -static void recordPushedEntry(SArray* cachedKey, void* pIter); -static void doRemovePushedEntry(SArray* pCachedKeys, STQ* pTq); - -static void freeItem(void* param) { - SItem* p = (SItem*)param; - taosMemoryFree(p->pKey); -} - -static void doPushDataForEntry(void* pIter, STqExecHandle* pExec, STQ* pTq, int64_t ver, int32_t vgId, char* pData, - int32_t dataLen, SArray* pCachedKey) { - STqPushEntry* pPushEntry = *(STqPushEntry**)pIter; - - SMqDataRsp* pRsp = pPushEntry->pDataRsp; - if (pRsp->reqOffset.version >= ver) { - tqDebug("vgId:%d, push entry req version %" PRId64 ", while push version %" PRId64 ", skip", vgId, - pRsp->reqOffset.version, ver); - return; - } - - qTaskInfo_t pTaskInfo = pExec->task; - - // prepare scan mem data - SPackedData submit = {.msgStr = pData, .msgLen = dataLen, .ver = ver}; - - if (qStreamSetScanMemData(pTaskInfo, submit) != 0) { - return; - } - qStreamSetOpen(pTaskInfo); - // here start to scan submit block to extract the subscribed data - int32_t totalRows = 0; - - while (1) { - SSDataBlock* pDataBlock = NULL; - uint64_t ts = 0; - if (qExecTask(pTaskInfo, &pDataBlock, &ts) < 0) { - tqDebug("vgId:%d, tq exec error since %s", vgId, terrstr()); - } - - if (pDataBlock == NULL) { - break; - } - - tqAddBlockDataToRsp(pDataBlock, pRsp, pExec->numOfCols, pTq->pVnode->config.tsdbCfg.precision); - pRsp->blockNum++; - totalRows += pDataBlock->info.rows; - } - - tqDebug("vgId:%d, tq handle push, subkey:%s, block num:%d, rows:%d", vgId, pPushEntry->subKey, pRsp->blockNum, - totalRows); - - if (pRsp->blockNum > 0) { - tqOffsetResetToLog(&pRsp->rspOffset, ver); - tqPushDataRsp(pTq, pPushEntry); - recordPushedEntry(pCachedKey, pIter); - } -} - - int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) { // void* pReq = POINTER_SHIFT(msg, sizeof(SSubmitReq2Msg)); // int32_t len = msgLen - sizeof(SSubmitReq2Msg); @@ -363,84 +300,3 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v return 0; } - -int32_t tqRegisterPushHandle(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp, - int32_t type) { - uint64_t consumerId = pRequest->consumerId; - int32_t vgId = TD_VID(pTq->pVnode); - STqHandle* pTqHandle = pHandle; - - STqPushEntry* pPushEntry = taosMemoryCalloc(1, sizeof(STqPushEntry)); - if (pPushEntry == NULL) { - tqDebug("tmq poll: consumer:0x%" PRIx64 ", vgId:%d failed to malloc, size:%d", consumerId, vgId, - (int32_t)sizeof(STqPushEntry)); - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - pPushEntry->info = pRpcMsg->info; - memcpy(pPushEntry->subKey, pTqHandle->subKey, TSDB_SUBSCRIBE_KEY_LEN); - - if (type == TMQ_MSG_TYPE__TAOSX_RSP) { - pPushEntry->pDataRsp = taosMemoryCalloc(1, sizeof(STaosxRsp)); - memcpy(pPushEntry->pDataRsp, pDataRsp, sizeof(STaosxRsp)); - } else if (type == TMQ_MSG_TYPE__POLL_RSP) { - pPushEntry->pDataRsp = taosMemoryCalloc(1, sizeof(SMqDataRsp)); - memcpy(pPushEntry->pDataRsp, pDataRsp, sizeof(SMqDataRsp)); - } - - SMqRspHead* pHead = &pPushEntry->pDataRsp->head; - pHead->consumerId = consumerId; - pHead->epoch = pRequest->epoch; - pHead->mqMsgType = type; - - taosHashPut(pTq->pPushMgr, pTqHandle->subKey, strlen(pTqHandle->subKey), &pPushEntry, sizeof(void*)); - - tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s offset:%" PRId64 ", vgId:%d save handle to push mgr, total:%d", - consumerId, pTqHandle->subKey, pDataRsp->reqOffset.version, vgId, taosHashGetSize(pTq->pPushMgr)); - return 0; -} - -int32_t tqUnregisterPushHandle(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer) { - int32_t vgId = TD_VID(pTq->pVnode); - STqPushEntry** pEntry = taosHashGet(pTq->pPushMgr, pKey, keyLen); - - if (pEntry != NULL) { - uint64_t cId = (*pEntry)->pDataRsp->head.consumerId; - ASSERT(consumerId == cId); - - tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s vgId:%d remove from push mgr, remains:%d", consumerId, - (*pEntry)->subKey, vgId, taosHashGetSize(pTq->pPushMgr) - 1); - - if (rspConsumer) { // rsp the old consumer with empty block. - tqPushDataRsp(pTq, *pEntry); - } - - taosHashRemove(pTq->pPushMgr, pKey, keyLen); - } - - return 0; -} - -void recordPushedEntry(SArray* cachedKey, void* pIter) { - size_t kLen = 0; - void* key = taosHashGetKey(pIter, &kLen); - SItem item = {.pKey = strndup(key, kLen), .keyLen = kLen}; - taosArrayPush(cachedKey, &item); -} - -void doRemovePushedEntry(SArray* pCachedKeys, STQ* pTq) { - int32_t vgId = TD_VID(pTq->pVnode); - int32_t numOfKeys = (int32_t)taosArrayGetSize(pCachedKeys); - - for (int32_t i = 0; i < numOfKeys; i++) { - SItem* pItem = taosArrayGet(pCachedKeys, i); - if (taosHashRemove(pTq->pPushMgr, pItem->pKey, pItem->keyLen) != 0) { - tqError("vgId:%d, tq push hash remove key error, key: %s", vgId, (char*)pItem->pKey); - } - } - - if (numOfKeys > 0) { - tqDebug("vgId:%d, pushed %d items and remain:%d", vgId, numOfKeys, (int32_t)taosHashGetSize(pTq->pPushMgr)); - } -} diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index 663dc8bbb9..2398ef41f4 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -191,7 +191,7 @@ static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, memcpy(pHandle->msg->pCont, pMsg->pCont, pMsg->contLen); pHandle->msg->contLen = pMsg->contLen; tqDebug("data is over, register to handle:%p, pCont:%p, len:%d", pHandle, pHandle->msg->pCont, pHandle->msg->contLen); - taosArrayPush(pTq->pPushArray, &pHandle); + taosHashPut(pTq->pPushMgr, &pHandle->consumerId, sizeof(int64_t), &pHandle, POINTER_BYTES); taosWUnLockLatch(&pTq->lock); tDeleteSMqDataRsp(&dataRsp); return code; From db5b5c828e602ddc3211e2a10af5f5de72bf74b4 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 27 Apr 2023 18:35:10 +0800 Subject: [PATCH 017/110] fix:add log for msg push --- source/client/src/clientTmq.c | 6 +++--- source/dnode/vnode/src/tq/tq.c | 34 ++++++++++++++++-------------- source/dnode/vnode/src/tq/tqUtil.c | 5 +++-- utils/test/c/tmqSim.c | 2 +- 4 files changed, 25 insertions(+), 22 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 76384fbe6a..33324552dc 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1377,7 +1377,7 @@ static void initClientTopicFromRsp(SMqClientTopic* pTopic, SMqSubTopicEp* pTopic tstrncpy(pTopic->topicName, pTopicEp->topic, TSDB_TOPIC_FNAME_LEN); tstrncpy(pTopic->db, pTopicEp->db, TSDB_DB_FNAME_LEN); - tscDebug("consumer:0x%" PRIx64 ", update topic:%s, numOfVgs:%d", tmq->consumerId, pTopic->topicName, vgNumGet); + tscDebug("consumer:0x%" PRIx64 ", update topic:%s, new numOfVgs:%d", tmq->consumerId, pTopic->topicName, vgNumGet); pTopic->vgs = taosArrayInit(vgNumGet, sizeof(SMqClientVg)); for (int32_t j = 0; j < vgNumGet; j++) { @@ -1447,14 +1447,14 @@ static bool doUpdateLocalEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) SMqClientTopic* pTopicCur = taosArrayGet(tmq->clientTopics, i); if (pTopicCur->vgs) { int32_t vgNumCur = taosArrayGetSize(pTopicCur->vgs); - tscDebug("consumer:0x%" PRIx64 ", new vg num: %d", tmq->consumerId, vgNumCur); + tscDebug("consumer:0x%" PRIx64 ", current vg num: %d", tmq->consumerId, vgNumCur); for (int32_t j = 0; j < vgNumCur; j++) { SMqClientVg* pVgCur = taosArrayGet(pTopicCur->vgs, j); makeTopicVgroupKey(vgKey, pTopicCur->topicName, pVgCur->vgId); char buf[80]; tFormatOffset(buf, 80, &pVgCur->currentOffset); - tscDebug("consumer:0x%" PRIx64 ", epoch:%d vgId:%d vgKey:%s, offset:%s", tmq->consumerId, epoch, pVgCur->vgId, + tscDebug("consumer:0x%" PRIx64 ", doUpdateLocalEp current vg, epoch:%d vgId:%d vgKey:%s, offset:%s", tmq->consumerId, tmq->epoch, pVgCur->vgId, vgKey, buf); SVgroupSaveInfo info = {.offset = pVgCur->currentOffset, .numOfRows = pVgCur->numOfRows}; diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 00684652f0..7f082c748b 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -553,8 +553,8 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg // remove if it has been register in the push manager, and return one empty block to consumer // tqUnregisterPushHandle(pTq, req.subKey, (int32_t)strlen(req.subKey), pHandle->consumerId, true); - taosHashRemove(pTq->pPushMgr, &pHandle->consumerId, sizeof(int64_t)); - + int32_t ret = taosHashRemove(pTq->pPushMgr, &pHandle->consumerId, sizeof(int64_t)); + tqError("vgId:%d remove pHandle:%p,ret:%d consumer Id:0x%" PRIx64, vgId, pHandle, ret, pHandle->consumerId); if(pHandle->msg != NULL) { rpcFreeCont(pHandle->msg->pCont); taosMemoryFree(pHandle->msg); @@ -1069,22 +1069,24 @@ int32_t tqProcessSubmitReqForSubscribe(STQ* pTq) { int32_t vgId = TD_VID(pTq->pVnode); taosWLockLatch(&pTq->lock); - void *pIter = taosHashIterate(pTq->pPushMgr, NULL); - while(pIter){ - STqHandle* pHandle = *(STqHandle**)pIter; - tqDebug("vgId:%d start set submit for pHandle:%p", vgId, pHandle); - if(ASSERT(pHandle->msg != NULL)){ - tqError("pHandle->msg should not be null"); - break; - }else{ - SRpcMsg msg = {.msgType = TDMT_VND_TMQ_CONSUME, .pCont = pHandle->msg->pCont, .contLen = pHandle->msg->contLen, .info = pHandle->msg->info}; - tmsgPutToQueue(&pTq->pVnode->msgCb, QUERY_QUEUE, &msg); - taosMemoryFree(pHandle->msg); - pHandle->msg = NULL; + if(taosHashGetSize(pTq->pPushMgr) > 0){ + void *pIter = taosHashIterate(pTq->pPushMgr, NULL); + while(pIter){ + STqHandle* pHandle = *(STqHandle**)pIter; + tqDebug("vgId:%d start set submit for pHandle:%p, consume id:0x%"PRIx64, vgId, pHandle, pHandle->consumerId); + if(ASSERT(pHandle->msg != NULL)){ + tqError("pHandle->msg should not be null"); + break; + }else{ + SRpcMsg msg = {.msgType = TDMT_VND_TMQ_CONSUME, .pCont = pHandle->msg->pCont, .contLen = pHandle->msg->contLen, .info = pHandle->msg->info}; + tmsgPutToQueue(&pTq->pVnode->msgCb, QUERY_QUEUE, &msg); + taosMemoryFree(pHandle->msg); + pHandle->msg = NULL; + } + pIter = taosHashIterate(pTq->pPushMgr, pIter); } - pIter = taosHashIterate(pTq->pPushMgr, pIter); + taosHashClear(pTq->pPushMgr); } - taosHashClear(pTq->pPushMgr); // unlock taosWUnLockLatch(&pTq->lock); diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index 2398ef41f4..da8dc1d379 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -190,8 +190,9 @@ static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, pHandle->msg->pCont = rpcMallocCont(pMsg->contLen); memcpy(pHandle->msg->pCont, pMsg->pCont, pMsg->contLen); pHandle->msg->contLen = pMsg->contLen; - tqDebug("data is over, register to handle:%p, pCont:%p, len:%d", pHandle, pHandle->msg->pCont, pHandle->msg->contLen); - taosHashPut(pTq->pPushMgr, &pHandle->consumerId, sizeof(int64_t), &pHandle, POINTER_BYTES); + int32_t ret = taosHashPut(pTq->pPushMgr, &pHandle->consumerId, sizeof(int64_t), &pHandle, POINTER_BYTES); + tqDebug("vgId:%d data is over, ret:%d, consumerId:0x%" PRIx64", register to pHandle:%p, pCont:%p, len:%d", vgId, ret, pHandle->consumerId, pHandle, pHandle->msg->pCont, pHandle->msg->contLen); + taosWUnLockLatch(&pTq->lock); tDeleteSMqDataRsp(&dataRsp); return code; diff --git a/utils/test/c/tmqSim.c b/utils/test/c/tmqSim.c index f2de219f4e..d98a45f0d3 100644 --- a/utils/test/c/tmqSim.c +++ b/utils/test/c/tmqSim.c @@ -232,7 +232,7 @@ void saveConfigToLogFile() { taosFprintfFile(g_fp, "%s:%s, ", g_stConfInfo.stThreads[i].key[k], g_stConfInfo.stThreads[i].value[k]); } taosFprintfFile(g_fp, "\n"); - taosFprintfFile(g_fp, " expect rows: %" PRIx64 "\n", g_stConfInfo.stThreads[i].expectMsgCnt); + taosFprintfFile(g_fp, " expect rows: %" PRId64 "\n", g_stConfInfo.stThreads[i].expectMsgCnt); } char tmpString[128]; From 1ee1b0422c710f035f2e2dcfc9d00eac030b1e19 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 27 Apr 2023 23:44:27 +0800 Subject: [PATCH 018/110] fix:change push mgr from SArray to Hash --- source/dnode/vnode/src/inc/tq.h | 2 +- source/dnode/vnode/src/inc/vnodeInt.h | 5 ++-- source/dnode/vnode/src/tq/tq.c | 28 ++++++++++------------- source/dnode/vnode/src/tq/tqPush.c | 33 +++++++++++++++++++++++++++ source/dnode/vnode/src/tq/tqUtil.c | 14 +----------- 5 files changed, 49 insertions(+), 33 deletions(-) diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 080e72c504..1b29b34073 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -147,7 +147,7 @@ int32_t tqFetchLog(STQ* pTq, STqHandle* pHandle, int64_t* fetchOffset, SWalCkHea int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxRsp* pRsp, int32_t* totalRows); int32_t tqAddBlockDataToRsp(const SSDataBlock* pBlock, SMqDataRsp* pRsp, int32_t numOfCols, int8_t precision); int32_t tqSendDataRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp, int32_t type); -int32_t tqPushDataRsp(STQ* pTq, STqPushEntry* pPushEntry); +int32_t tqPushDataRsp(STQ* pTq, STqHandle* pHandle); // tqMeta int32_t tqMetaOpen(STQ* pTq); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index b24cb7e136..7668d45108 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -193,9 +193,8 @@ STQ* tqOpen(const char* path, SVnode* pVnode); void tqNotifyClose(STQ*); void tqClose(STQ*); int tqPushMsg(STQ*, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver); -int tqRegisterPushHandle(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp, - int32_t type); -int tqUnregisterPushHandle(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer); +int tqRegisterPushEntry(STQ* pTq, void* handle, SRpcMsg* pMsg); +int tqUnregisterPushHandle(STQ* pTq, void* pHandle); int tqStartStreamTasks(STQ* pTq); // restore all stream tasks after vnode launching completed. int tqCommit(STQ*); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 7f082c748b..53a40eb839 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -98,7 +98,7 @@ STQ* tqOpen(const char* path, SVnode* pVnode) { taosHashSetFreeFp(pTq->pHandle, destroyTqHandle); taosInitRWLatch(&pTq->lock); - pTq->pPushMgr = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK); + pTq->pPushMgr = taosHashInit(64, MurmurHash3_32, false, HASH_NO_LOCK); pTq->pCheckInfo = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK); taosHashSetFreeFp(pTq->pCheckInfo, (FDelete)tDeleteSTqCheckInfo); @@ -220,17 +220,19 @@ static int32_t doSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqData return 0; } -int32_t tqPushDataRsp(STQ* pTq, STqPushEntry* pPushEntry) { - SMqDataRsp* pRsp = pPushEntry->pDataRsp; - SMqRspHead* pHeader = &pPushEntry->pDataRsp->head; - doSendDataRsp(&pPushEntry->info, pRsp, pHeader->epoch, pHeader->consumerId, pHeader->mqMsgType); +int32_t tqPushDataRsp(STQ* pTq, STqHandle* pHandle) { + SMqDataRsp dataRsp = {0}; + dataRsp.head.consumerId = pHandle->consumerId; + dataRsp.head.epoch = pHandle->epoch; + dataRsp.head.mqMsgType = TMQ_MSG_TYPE__POLL_RSP; + doSendDataRsp(&pHandle->msg->info, &dataRsp, pHandle->epoch, pHandle->consumerId, TMQ_MSG_TYPE__POLL_RSP); char buf1[80] = {0}; char buf2[80] = {0}; - tFormatOffset(buf1, tListLen(buf1), &pRsp->reqOffset); - tFormatOffset(buf2, tListLen(buf2), &pRsp->rspOffset); + tFormatOffset(buf1, tListLen(buf1), &dataRsp.reqOffset); + tFormatOffset(buf2, tListLen(buf2), &dataRsp.rspOffset); tqDebug("vgId:%d, from consumer:0x%" PRIx64 " (epoch %d) push rsp, block num: %d, req:%s, rsp:%s", - TD_VID(pTq->pVnode), pRsp->head.consumerId, pRsp->head.epoch, pRsp->blockNum, buf1, buf2); + TD_VID(pTq->pVnode), dataRsp.head.consumerId, dataRsp.head.epoch, dataRsp.blockNum, buf1, buf2); return 0; } @@ -552,14 +554,8 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg atomic_store_32(&pHandle->epoch, -1); // remove if it has been register in the push manager, and return one empty block to consumer -// tqUnregisterPushHandle(pTq, req.subKey, (int32_t)strlen(req.subKey), pHandle->consumerId, true); - int32_t ret = taosHashRemove(pTq->pPushMgr, &pHandle->consumerId, sizeof(int64_t)); - tqError("vgId:%d remove pHandle:%p,ret:%d consumer Id:0x%" PRIx64, vgId, pHandle, ret, pHandle->consumerId); - if(pHandle->msg != NULL) { - rpcFreeCont(pHandle->msg->pCont); - taosMemoryFree(pHandle->msg); - pHandle->msg = NULL; - } + tqUnregisterPushHandle(pTq, pHandle); + atomic_store_64(&pHandle->consumerId, req.newConsumerId); atomic_add_fetch_32(&pHandle->epoch, 1); diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index dca988cbbd..1ee19b7a7b 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -300,3 +300,36 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v return 0; } + + +int32_t tqRegisterPushEntry(STQ* pTq, void* handle, SRpcMsg* pMsg) { + int32_t vgId = TD_VID(pTq->pVnode); + STqHandle* pHandle = (STqHandle*) handle; + if(pHandle->msg == NULL){ + pHandle->msg = taosMemoryCalloc(1, sizeof(SRpcMsg)); + } + + memcpy(pHandle->msg, pMsg, sizeof(SRpcMsg)); + pHandle->msg->pCont = rpcMallocCont(pMsg->contLen); + memcpy(pHandle->msg->pCont, pMsg->pCont, pMsg->contLen); + pHandle->msg->contLen = pMsg->contLen; + int32_t ret = taosHashPut(pTq->pPushMgr, pHandle->subKey, strlen(pHandle->subKey), &pHandle, POINTER_BYTES); + tqDebug("vgId:%d data is over, ret:%d, consumerId:0x%" PRIx64", register to pHandle:%p, pCont:%p, len:%d", vgId, ret, pHandle->consumerId, pHandle, pHandle->msg->pCont, pHandle->msg->contLen); + return 0; +} + +int32_t tqUnregisterPushHandle(STQ* pTq, void *handle) { + STqHandle *pHandle = (STqHandle*)handle; + int32_t vgId = TD_VID(pTq->pVnode); + + int32_t ret = taosHashRemove(pTq->pPushMgr, pHandle->subKey, strlen(pHandle->subKey)); + tqError("vgId:%d remove pHandle:%p,ret:%d consumer Id:0x%" PRIx64, vgId, pHandle, ret, pHandle->consumerId); + if(pHandle->msg != NULL) { + tqPushDataRsp(pTq, pHandle); + + rpcFreeCont(pHandle->msg->pCont); + taosMemoryFree(pHandle->msg); + pHandle->msg = NULL; + } + return 0; +} diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index da8dc1d379..1f06132d2f 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -178,21 +178,9 @@ static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, // till now, all data has been transferred to consumer, new data needs to push client once arrived. if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) { -// code = tqRegisterPushHandle(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); // lock taosWLockLatch(&pTq->lock); -// tqDebug("data is over, register to handle:%p, msg:%p", pHandle, pHandle->msg); - if(pHandle->msg == NULL){ - pHandle->msg = taosMemoryCalloc(1, sizeof(SRpcMsg)); - } - - memcpy(pHandle->msg, pMsg, sizeof(SRpcMsg)); - pHandle->msg->pCont = rpcMallocCont(pMsg->contLen); - memcpy(pHandle->msg->pCont, pMsg->pCont, pMsg->contLen); - pHandle->msg->contLen = pMsg->contLen; - int32_t ret = taosHashPut(pTq->pPushMgr, &pHandle->consumerId, sizeof(int64_t), &pHandle, POINTER_BYTES); - tqDebug("vgId:%d data is over, ret:%d, consumerId:0x%" PRIx64", register to pHandle:%p, pCont:%p, len:%d", vgId, ret, pHandle->consumerId, pHandle, pHandle->msg->pCont, pHandle->msg->contLen); - + code = tqRegisterPushEntry(pTq, pHandle, pMsg); taosWUnLockLatch(&pTq->lock); tDeleteSMqDataRsp(&dataRsp); return code; From cce868d140249866698d80aa5e84c487299279aa Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 28 Apr 2023 09:45:16 +0800 Subject: [PATCH 019/110] fix:offset encode assert error --- source/client/src/clientTmq.c | 5 +++-- source/common/src/tmsg.c | 8 ++------ source/dnode/vnode/src/inc/tq.h | 2 +- source/dnode/vnode/src/tq/tq.c | 3 +-- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 33324552dc..0a31eac09c 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1790,8 +1790,9 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { pVg->epSet = *pollRspWrapper->pEpset; } - // update the local offset value only for the returned values. - pVg->currentOffset = pDataRsp->rspOffset; + if(pDataRsp->rspOffset.type != 0){ // if offset is validate + pVg->currentOffset = pDataRsp->rspOffset; // update the local offset value only for the returned values. + } atomic_store_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE); char buf[80]; diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index cbf856a799..2639b81c39 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -6839,10 +6839,8 @@ int32_t tEncodeSTqOffsetVal(SEncoder *pEncoder, const STqOffsetVal *pOffsetVal) if (tEncodeI64(pEncoder, pOffsetVal->ts) < 0) return -1; } else if (pOffsetVal->type == TMQ_OFFSET__LOG) { if (tEncodeI64(pEncoder, pOffsetVal->version) < 0) return -1; - } else if (pOffsetVal->type < 0) { - // do nothing } else { - ASSERT(0); + // do nothing } return 0; } @@ -6854,10 +6852,8 @@ int32_t tDecodeSTqOffsetVal(SDecoder *pDecoder, STqOffsetVal *pOffsetVal) { if (tDecodeI64(pDecoder, &pOffsetVal->ts) < 0) return -1; } else if (pOffsetVal->type == TMQ_OFFSET__LOG) { if (tDecodeI64(pDecoder, &pOffsetVal->version) < 0) return -1; - } else if (pOffsetVal->type < 0) { - // do nothing } else { - ASSERT(0); + // do nothing } return 0; } diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 1b29b34073..30b2fb74ca 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -114,7 +114,7 @@ struct STQ { char* path; int64_t walLogLastVer; SRWLatch lock; - SHashObj* pPushMgr; // consumerId -> STqHandle + SHashObj* pPushMgr; // subKey -> STqHandle SHashObj* pHandle; // subKey -> STqHandle SHashObj* pCheckInfo; // topic -> SAlterCheckInfo STqOffsetStore* pOffsetStore; diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 53a40eb839..bd29c8019d 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -551,13 +551,12 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg } taosWLockLatch(&pTq->lock); - atomic_store_32(&pHandle->epoch, -1); + atomic_store_32(&pHandle->epoch, 0); // remove if it has been register in the push manager, and return one empty block to consumer tqUnregisterPushHandle(pTq, pHandle); atomic_store_64(&pHandle->consumerId, req.newConsumerId); - atomic_add_fetch_32(&pHandle->epoch, 1); if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { qStreamCloseTsdbReader(pTaskInfo); From 18d05ff69fc159e5979695c9477720232ef39fc4 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 28 Apr 2023 15:53:56 +0800 Subject: [PATCH 020/110] fix:memory leak --- source/dnode/vnode/src/tq/tqPush.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 1ee19b7a7b..68240195d7 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -307,10 +307,14 @@ int32_t tqRegisterPushEntry(STQ* pTq, void* handle, SRpcMsg* pMsg) { STqHandle* pHandle = (STqHandle*) handle; if(pHandle->msg == NULL){ pHandle->msg = taosMemoryCalloc(1, sizeof(SRpcMsg)); + memcpy(pHandle->msg, pMsg, sizeof(SRpcMsg)); + pHandle->msg->pCont = rpcMallocCont(pMsg->contLen); + }else{ + void *tmp = pHandle->msg->pCont; + memcpy(pHandle->msg, pMsg, sizeof(SRpcMsg)); + pHandle->msg->pCont = tmp; } - memcpy(pHandle->msg, pMsg, sizeof(SRpcMsg)); - pHandle->msg->pCont = rpcMallocCont(pMsg->contLen); memcpy(pHandle->msg->pCont, pMsg->pCont, pMsg->contLen); pHandle->msg->contLen = pMsg->contLen; int32_t ret = taosHashPut(pTq->pPushMgr, pHandle->subKey, strlen(pHandle->subKey), &pHandle, POINTER_BYTES); From 8d84e8f8a54e69e45848ed66c20703204d9350ab Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 28 Apr 2023 19:29:17 +0800 Subject: [PATCH 021/110] fix:[TD-23788] client wait if task status error in taosx transform data --- source/client/src/clientTmq.c | 8 ++++++-- source/dnode/vnode/src/tq/tqUtil.c | 6 ++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 0a31eac09c..b5ae9116ef 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1830,7 +1830,9 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { if (pollRspWrapper->metaRsp.head.epoch == consumerEpoch) { SMqClientVg* pVg = pollRspWrapper->vgHandle; - pVg->currentOffset = pollRspWrapper->metaRsp.rspOffset; + if(pollRspWrapper->metaRsp.rspOffset.type != 0){ // if offset is validate + pVg->currentOffset = pollRspWrapper->metaRsp.rspOffset; + } atomic_store_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE); // build rsp SMqMetaRspObj* pRsp = tmqBuildMetaRspFromWrapper(pollRspWrapper); @@ -1848,7 +1850,9 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { if (pollRspWrapper->taosxRsp.head.epoch == consumerEpoch) { SMqClientVg* pVg = pollRspWrapper->vgHandle; - pVg->currentOffset = pollRspWrapper->taosxRsp.rspOffset; + if(pollRspWrapper->taosxRsp.rspOffset.type != 0){ // if offset is validate + pVg->currentOffset = pollRspWrapper->taosxRsp.rspOffset; + } atomic_store_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE); if (pollRspWrapper->taosxRsp.blockNum == 0) { diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index 1f06132d2f..1b5e498a7f 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -211,6 +211,12 @@ static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle, SMqMetaRsp metaRsp = {0}; STaosxRsp taosxRsp = {0}; tqInitTaosxRsp(&taosxRsp, pRequest); + qTaskInfo_t task = pHandle->execHandle.task; + if(qTaskIsExecuting(task)){ + code = tqSendDataRsp(pHandle, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP, vgId); + tDeleteSTaosxRsp(&taosxRsp); + return code; + } if (offset->type != TMQ_OFFSET__LOG) { if (tqScanTaosx(pTq, pHandle, &taosxRsp, &metaRsp, offset) < 0) { From 3b66e63444f1f9076ed54b23078ba2ae50b44857 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 28 Apr 2023 20:07:54 +0800 Subject: [PATCH 022/110] fix:[TD-23788] client wait if task status error in taosx transform data --- source/dnode/vnode/src/tq/tqUtil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index 1b5e498a7f..d186c63871 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -213,7 +213,7 @@ static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle, tqInitTaosxRsp(&taosxRsp, pRequest); qTaskInfo_t task = pHandle->execHandle.task; if(qTaskIsExecuting(task)){ - code = tqSendDataRsp(pHandle, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP, vgId); + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); tDeleteSTaosxRsp(&taosxRsp); return code; } From d8e0d02e6ef82dad5244133e4d8bdf5ccae7a025 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sun, 30 Apr 2023 16:32:51 +0800 Subject: [PATCH 023/110] refactor: do some internal refactor. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 117 ++++++++++++++----------- 1 file changed, 68 insertions(+), 49 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index eb15400d05..465220a0c6 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -3395,6 +3395,11 @@ static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader) { STableUidList* pUidList = &pStatus->uidList; while (1) { + if (pReader->flag == READER_STATUS_SHOULD_STOP) { + tsdbWarn("tsdb reader is stopped ASAP, %s", pReader->idStr); + return TSDB_CODE_SUCCESS; + } + STableBlockScanInfo** pBlockScanInfo = pStatus->pTableIter; initMemDataIterator(*pBlockScanInfo, pReader); @@ -3474,45 +3479,67 @@ static bool fileBlockPartiallyRead(SFileBlockDumpInfo* pDumpInfo, bool asc) { ((pDumpInfo->rowIndex > 0 && asc) || (pDumpInfo->rowIndex < (pDumpInfo->totalRows - 1) && (!asc))); } +typedef enum { + TSDB_READ_RETURN = 0x1, + TSDB_READ_CONTINUE = 0x2, +} ERetrieveType; + +static ERetrieveType doReadDataFromLastFiles(STsdbReader* pReader) { + int32_t code = TSDB_CODE_SUCCESS; + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + SDataBlockIter* pBlockIter = &pReader->status.blockIter; + + while(1) { + terrno = 0; + + code = doLoadLastBlockSequentially(pReader); + if (code != TSDB_CODE_SUCCESS || pReader->flag == READER_STATUS_SHOULD_STOP) { + terrno = code; + return TSDB_READ_RETURN; + } + + if (pResBlock->info.rows > 0) { + return TSDB_READ_RETURN; + } + + // all data blocks are checked in this last block file, now let's try the next file + ASSERT(pReader->status.pTableIter == NULL); + code = initForFirstBlockInFile(pReader, pBlockIter); + + // error happens or all the data files are completely checked + if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false) || + pReader->flag == READER_STATUS_SHOULD_STOP) { + terrno = code; + return TSDB_READ_RETURN; + } + + if (pBlockIter->numOfBlocks > 0) { // there are data blocks existed. + return TSDB_READ_CONTINUE; + } else { // all blocks in data file are checked, let's check the data in last files + resetTableListIndex(&pReader->status); + } + } +} + static int32_t buildBlockFromFiles(STsdbReader* pReader) { int32_t code = TSDB_CODE_SUCCESS; bool asc = ASCENDING_TRAVERSE(pReader->order); SDataBlockIter* pBlockIter = &pReader->status.blockIter; + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; if (pBlockIter->numOfBlocks == 0) { - _begin: - code = doLoadLastBlockSequentially(pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - if (pReader->resBlockInfo.pResBlock->info.rows > 0) { - return TSDB_CODE_SUCCESS; - } - - // all data blocks are checked in this last block file, now let's try the next file - if (pReader->status.pTableIter == NULL) { - code = initForFirstBlockInFile(pReader, pBlockIter); - - // error happens or all the data files are completely checked - if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) { - return code; - } - - // this file does not have data files, let's start check the last block file if exists - if (pBlockIter->numOfBlocks == 0) { - resetTableListIndex(&pReader->status); - goto _begin; - } + ERetrieveType type = doReadDataFromLastFiles(pReader); + if (type != TSDB_READ_RETURN) { + return terrno; } code = doBuildDataBlock(pReader); - if (code != TSDB_CODE_SUCCESS) { + if (code != TSDB_CODE_SUCCESS || pReader->flag == READER_STATUS_SHOULD_STOP) { return code; } - if (pReader->resBlockInfo.pResBlock->info.rows > 0) { + if (pResBlock->info.rows > 0) { return TSDB_CODE_SUCCESS; } } @@ -3530,30 +3557,22 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) { if (hasNext) { // check for the next block in the block accessed order list initBlockDumpInfo(pReader, pBlockIter); } else { - if (pReader->status.pCurrentFileset->nSttF > 0) { - // data blocks in current file are exhausted, let's try the next file now - SBlockData* pBlockData = &pReader->status.fileBlockData; - if (pBlockData->uid != 0) { - tBlockDataClear(pBlockData); - } + // all data blocks in files are checked, let's check the data in last files. + ASSERT(pReader->status.pCurrentFileset->nSttF > 0); - tBlockDataReset(pBlockData); - resetDataBlockIterator(pBlockIter, pReader->order); - resetTableListIndex(&pReader->status); - goto _begin; - } else { - code = initForFirstBlockInFile(pReader, pBlockIter); + // data blocks in current file are exhausted, let's try the next file now + SBlockData* pBlockData = &pReader->status.fileBlockData; + if (pBlockData->uid != 0) { + tBlockDataClear(pBlockData); + } - // error happens or all the data files are completely checked - if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) { - return code; - } + tBlockDataReset(pBlockData); + resetDataBlockIterator(pBlockIter, pReader->order); + resetTableListIndex(&pReader->status); - // this file does not have blocks, let's start check the last block file - if (pBlockIter->numOfBlocks == 0) { - resetTableListIndex(&pReader->status); - goto _begin; - } + ERetrieveType type = doReadDataFromLastFiles(pReader); + if (type != TSDB_READ_RETURN) { + return terrno; } } } @@ -3561,11 +3580,11 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) { code = doBuildDataBlock(pReader); } - if (code != TSDB_CODE_SUCCESS) { + if (code != TSDB_CODE_SUCCESS || pReader->flag == READER_STATUS_SHOULD_STOP) { return code; } - if (pReader->resBlockInfo.pResBlock->info.rows > 0) { + if (pResBlock->info.rows > 0) { return TSDB_CODE_SUCCESS; } } From 9cc4721e2acec9f6215174a16823ec4af0fcd316 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sun, 30 Apr 2023 16:37:09 +0800 Subject: [PATCH 024/110] refactor: do some internal refactor. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 465220a0c6..1ef86f5b30 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -3529,8 +3529,9 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) { SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; if (pBlockIter->numOfBlocks == 0) { + // let's try to extract data from stt files. ERetrieveType type = doReadDataFromLastFiles(pReader); - if (type != TSDB_READ_RETURN) { + if (type == TSDB_READ_RETURN) { return terrno; } @@ -3571,7 +3572,7 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) { resetTableListIndex(&pReader->status); ERetrieveType type = doReadDataFromLastFiles(pReader); - if (type != TSDB_READ_RETURN) { + if (type == TSDB_READ_RETURN) { return terrno; } } From 563fca5c54f1c0fb5fca22f993d3563efb3b612e Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 1 May 2023 14:47:01 +0800 Subject: [PATCH 025/110] fix(tmq): remove unnecessary error log. --- source/dnode/vnode/src/tq/tqOffset.c | 103 +++++++++++++++------------ source/dnode/vnode/src/tq/tqPush.c | 15 ---- 2 files changed, 58 insertions(+), 60 deletions(-) diff --git a/source/dnode/vnode/src/tq/tqOffset.c b/source/dnode/vnode/src/tq/tqOffset.c index e8051a1406..34e93cec2d 100644 --- a/source/dnode/vnode/src/tq/tqOffset.c +++ b/source/dnode/vnode/src/tq/tqOffset.c @@ -31,57 +31,67 @@ char* tqOffsetBuildFName(const char* path, int32_t fVer) { int32_t tqOffsetRestoreFromFile(STqOffsetStore* pStore, const char* fname) { TdFilePtr pFile = taosOpenFile(fname, TD_FILE_READ); - if (pFile != NULL) { - STqOffsetHead head = {0}; - int64_t code; + if (pFile == NULL) { + return TSDB_CODE_SUCCESS; + } - while (1) { - if ((code = taosReadFile(pFile, &head, sizeof(STqOffsetHead))) != sizeof(STqOffsetHead)) { - if (code == 0) { - break; - } else { - return -1; - } - } - int32_t size = htonl(head.size); - void* memBuf = taosMemoryCalloc(1, size); - if (memBuf == NULL) { + int32_t vgId = TD_VID(pStore->pTq->pVnode); + int64_t code = 0; + + STqOffsetHead head = {0}; + + while (1) { + if ((code = taosReadFile(pFile, &head, sizeof(STqOffsetHead))) != sizeof(STqOffsetHead)) { + if (code == 0) { + break; + } else { return -1; } - if ((code = taosReadFile(pFile, memBuf, size)) != size) { - taosMemoryFree(memBuf); - return -1; - } - STqOffset offset; - SDecoder decoder; - tDecoderInit(&decoder, memBuf, size); - if (tDecodeSTqOffset(&decoder, &offset) < 0) { - taosMemoryFree(memBuf); - tDecoderClear(&decoder); - return -1; - } - - tDecoderClear(&decoder); - if (taosHashPut(pStore->pHash, offset.subKey, strlen(offset.subKey), &offset, sizeof(STqOffset)) < 0) { - return -1; - } - - if (offset.val.type == TMQ_OFFSET__LOG) { - STqHandle* pHandle = taosHashGet(pStore->pTq->pHandle, offset.subKey, strlen(offset.subKey)); - if (pHandle) { - if (walRefVer(pHandle->pRef, offset.val.version) < 0) { - tqError("vgId: %d, tq handle %s ref ver %" PRId64 "error", pStore->pTq->pVnode->config.vgId, - pHandle->subKey, offset.val.version); - } - } - } - - taosMemoryFree(memBuf); } - taosCloseFile(&pFile); + int32_t size = htonl(head.size); + void* pMemBuf = taosMemoryCalloc(1, size); + if (pMemBuf == NULL) { + tqError("vgId:%d failed to restore offset from file, since out of memory, malloc size:%d", vgId, size); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + if ((code = taosReadFile(pFile, pMemBuf, size)) != size) { + taosMemoryFree(pMemBuf); + return -1; + } + + STqOffset offset; + SDecoder decoder; + tDecoderInit(&decoder, pMemBuf, size); + if (tDecodeSTqOffset(&decoder, &offset) < 0) { + taosMemoryFree(pMemBuf); + tDecoderClear(&decoder); + return code; + } + + tDecoderClear(&decoder); + if (taosHashPut(pStore->pHash, offset.subKey, strlen(offset.subKey), &offset, sizeof(STqOffset)) < 0) { + return -1; + } + + // todo remove this + if (offset.val.type == TMQ_OFFSET__LOG) { + STqHandle* pHandle = taosHashGet(pStore->pTq->pHandle, offset.subKey, strlen(offset.subKey)); + if (pHandle) { + if (walRefVer(pHandle->pRef, offset.val.version) < 0) { +// tqError("vgId: %d, tq handle %s ref ver %" PRId64 "error", pStore->pTq->pVnode->config.vgId, pHandle->subKey, +// offset.val.version); + } + } + } + + taosMemoryFree(pMemBuf); } - return 0; + + taosCloseFile(&pFile); + return TSDB_CODE_SUCCESS; } STqOffsetStore* tqOffsetOpen(STQ* pTq) { @@ -89,6 +99,7 @@ STqOffsetStore* tqOffsetOpen(STQ* pTq) { if (pStore == NULL) { return NULL; } + pStore->pTq = pTq; pStore->needCommit = 0; pTq->pOffsetStore = pStore; @@ -98,12 +109,14 @@ STqOffsetStore* tqOffsetOpen(STQ* pTq) { taosMemoryFree(pStore); return NULL; } + char* fname = tqOffsetBuildFName(pStore->pTq->path, 0); if (tqOffsetRestoreFromFile(pStore, fname) < 0) { taosMemoryFree(fname); taosMemoryFree(pStore); return NULL; } + taosMemoryFree(fname); return pStore; } diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 7a1a6b7454..ce222fcda7 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -335,21 +335,6 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v } if (msgType == TDMT_VND_SUBMIT) { -#if 0 - void* data = taosMemoryMalloc(len); - if (data == NULL) { - // todo: for all stream in this vnode, keep this offset in the offset files, and wait for a moment, and then retry - terrno = TSDB_CODE_OUT_OF_MEMORY; - tqError("vgId:%d, failed to copy submit data for stream processing, since out of memory", vgId); - return -1; - } - - memcpy(data, pReq, len); - SPackedData submit = {.msgStr = data, .msgLen = len, .ver = ver}; - - tqDebug("vgId:%d tq copy submit msg:%p len:%d ver:%" PRId64 " from %p for stream", vgId, data, len, ver, pReq); - tqProcessSubmitReq(pTq, submit); -#endif SPackedData submit = {0}; tqProcessSubmitReq(pTq, submit); } From 6c86847b12940450397a1194b7310fe1182e3421 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 1 May 2023 14:58:59 +0800 Subject: [PATCH 026/110] refactor: do some internal refactor. --- source/libs/stream/src/streamRecover.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index 0d1440fbde..0d214661c4 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -68,11 +68,12 @@ int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) { req.downstreamTaskId = pTask->fixedEpDispatcher.taskId; pTask->checkReqId = req.reqId; - qDebug("task %d at node %d check downstream task %d at node %d", pTask->id.taskId, pTask->nodeId, req.downstreamTaskId, + qDebug("s-task:%s at node %d check downstream task %d at node %d", pTask->id.idStr, pTask->nodeId, req.downstreamTaskId, req.downstreamNodeId); streamDispatchOneCheckReq(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); } else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; + int32_t vgSz = taosArrayGetSize(vgInfo); pTask->recoverTryingDownstream = vgSz; pTask->checkReqIds = taosArrayInit(vgSz, sizeof(int64_t)); @@ -83,14 +84,15 @@ int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) { taosArrayPush(pTask->checkReqIds, &req.reqId); req.downstreamNodeId = pVgInfo->vgId; req.downstreamTaskId = pVgInfo->taskId; - qDebug("task %d at node %d check downstream task %d at node %d (shuffle)", pTask->id.taskId, pTask->nodeId, + qDebug("s-task:%s at node %d check downstream task %d at node %d (shuffle)", pTask->id.idStr, pTask->nodeId, req.downstreamTaskId, req.downstreamNodeId); streamDispatchOneCheckReq(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); } } else { - qDebug("task %d at node %d direct launch recover since no downstream", pTask->id.taskId, pTask->nodeId); + qDebug("s-task:%s at node %d direct launch recover since no downstream", pTask->id.idStr, pTask->nodeId); streamTaskLaunchRecover(pTask, version); } + return 0; } From 3c61932eccc7ea3a968790a24f01ff2decbb0ce2 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 1 May 2023 19:28:54 +0800 Subject: [PATCH 027/110] refactor: do some internal refactor. --- include/libs/stream/tstream.h | 6 +++--- source/dnode/vnode/src/tq/tq.c | 28 ++++++++++++------------- source/dnode/vnode/src/tq/tqRestore.c | 13 ++++++------ source/libs/stream/inc/streamInc.h | 2 +- source/libs/stream/src/streamDispatch.c | 4 ++-- source/libs/stream/src/streamExec.c | 2 +- source/libs/stream/src/streamMeta.c | 2 +- source/libs/stream/src/streamRecover.c | 27 +++++++++++++++++------- 8 files changed, 47 insertions(+), 37 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 78fd9bed5d..48c15e9117 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -50,7 +50,6 @@ enum { TASK_STATUS__RECOVER_PREPARE, TASK_STATUS__RECOVER1, TASK_STATUS__RECOVER2, - TASK_STATUS__RESTORE, // only available for source task to replay WAL from the checkpoint }; enum { @@ -346,7 +345,7 @@ typedef struct SStreamMeta { FTaskExpand* expandFunc; int32_t vgId; SRWLatch lock; - int32_t walScan; + int32_t walScanCounter; } SStreamMeta; int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); @@ -545,8 +544,9 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz); // recover and fill history int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version); int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version); -int32_t streamProcessTaskCheckReq(SStreamTask* pTask, const SStreamTaskCheckReq* pReq); +int32_t streamTaskCheckStatus(SStreamTask* pTask); int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp, int64_t version); + // common int32_t streamSetParamForRecover(SStreamTask* pTask); int32_t streamRestoreParam(SStreamTask* pTask); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 36c35ab415..fed7a8cf74 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -111,8 +111,13 @@ STQ* tqOpen(const char* path, SVnode* pVnode) { pTq->pCheckInfo = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK); taosHashSetFreeFp(pTq->pCheckInfo, (FDelete)tDeleteSTqCheckInfo); - tqInitialize(pTq); - return pTq; + int32_t code = tqInitialize(pTq); + if (code != TSDB_CODE_SUCCESS) { + tqClose(pTq); + return NULL; + } else { + return pTq; + } } int32_t tqInitialize(STQ* pTq) { @@ -601,11 +606,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { pTask->chkInfo.currentVer = ver; // expand executor - if (pTask->fillHistory) { - pTask->status.taskStatus = TASK_STATUS__WAIT_DOWNSTREAM; - } else { - pTask->status.taskStatus = TASK_STATUS__RESTORE; - } + pTask->status.taskStatus = (pTask->fillHistory)? TASK_STATUS__WAIT_DOWNSTREAM:TASK_STATUS__NORMAL; if (pTask->taskLevel == TASK_LEVEL__SOURCE) { pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1); @@ -664,6 +665,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { } streamSetupTrigger(pTask); + tqInfo("vgId:%d expand stream task, s-task:%s, checkpoint ver:%" PRId64 " child id:%d, level:%d", vgId, pTask->id.idStr, pTask->chkInfo.version, pTask->selfChildId, pTask->taskLevel); @@ -693,8 +695,9 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { }; SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); + if (pTask) { - rsp.status = (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__NORMAL) ? 1 : 0; + rsp.status = streamTaskCheckStatus(pTask); streamMetaReleaseTask(pTq->pStreamMeta, pTask); tqDebug("tq recv task check req(reqId:0x%" PRIx64 @@ -1147,9 +1150,6 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); if (pTask != NULL) { if (pTask->status.taskStatus == TASK_STATUS__NORMAL) { - tqDebug("vgId:%d s-task:%s start to process run req", vgId, pTask->id.idStr); - streamProcessRunReq(pTask); - } else if (pTask->status.taskStatus == TASK_STATUS__RESTORE) { tqDebug("vgId:%d s-task:%s start to process block from wal, last chk point:%" PRId64, vgId, pTask->id.idStr, pTask->chkInfo.version); streamProcessRunReq(pTask); @@ -1313,10 +1313,10 @@ int32_t tqStartStreamTasks(STQ* pTq) { return 0; } - pMeta->walScan += 1; + pMeta->walScanCounter += 1; - if (pMeta->walScan > 1) { - tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->walScan); + if (pMeta->walScanCounter > 1) { + tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->walScanCounter); taosWUnLockLatch(&pTq->pStreamMeta->lock); return 0; } diff --git a/source/dnode/vnode/src/tq/tqRestore.c b/source/dnode/vnode/src/tq/tqRestore.c index 22903b95d9..6c8fce5b14 100644 --- a/source/dnode/vnode/src/tq/tqRestore.c +++ b/source/dnode/vnode/src/tq/tqRestore.c @@ -18,15 +18,14 @@ static int32_t createStreamRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle); // this function should be executed by stream threads. -// there is a case that the WAL increases more fast than the restore procedure, and this restore procedure -// will not stop eventually. +// extract submit block from WAL, and add them into the input queue for the sources tasks. int32_t tqStreamTasksScanWal(STQ* pTq) { int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; int64_t st = taosGetTimestampMs(); while (1) { - int32_t scan = pMeta->walScan; + int32_t scan = pMeta->walScanCounter; tqDebug("vgId:%d continue check if data in wal are available, scan:%d", vgId, scan); // check all restore tasks @@ -37,12 +36,12 @@ int32_t tqStreamTasksScanWal(STQ* pTq) { if (shouldIdle) { taosWLockLatch(&pMeta->lock); - pMeta->walScan -= 1; - times = pMeta->walScan; + pMeta->walScanCounter -= 1; + times = pMeta->walScanCounter; - ASSERT(pMeta->walScan >= 0); + ASSERT(pMeta->walScanCounter >= 0); - if (pMeta->walScan <= 0) { + if (pMeta->walScanCounter <= 0) { taosWUnLockLatch(&pMeta->lock); break; } diff --git a/source/libs/stream/inc/streamInc.h b/source/libs/stream/inc/streamInc.h index 876b80697a..b17afeec98 100644 --- a/source/libs/stream/inc/streamInc.h +++ b/source/libs/stream/inc/streamInc.h @@ -39,7 +39,7 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq); -int32_t streamDispatchOneCheckReq(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet); +int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet); int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecoverFinishReq* pReq, int32_t vgId, SEpSet* pEpSet); diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 549374ed94..d12eca7ce3 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -208,7 +208,7 @@ static int32_t streamAddBlockToDispatchMsg(const SSDataBlock* pBlock, SStreamDis return 0; } -int32_t streamDispatchOneCheckReq(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet) { +int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet) { void* buf = NULL; int32_t code = -1; SRpcMsg msg = {0}; @@ -240,7 +240,7 @@ int32_t streamDispatchOneCheckReq(SStreamTask* pTask, const SStreamTaskCheckReq* msg.pCont = buf; msg.msgType = TDMT_STREAM_TASK_CHECK; - qDebug("dispatch from s-task:%s to downstream s-task:%" PRIx64 ":%d node %d: check msg", pTask->id.idStr, + qDebug("s-task:%s dispatch check msg to downstream s-task:%" PRIx64 ":%d node %d: check msg", pTask->id.idStr, pReq->streamId, pReq->downstreamTaskId, nodeId); tmsgSendReq(pEpSet, &msg); diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index e711700ef2..f33e126068 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -28,7 +28,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, const void* data, SArray* while (pTask->taskLevel == TASK_LEVEL__SOURCE) { int8_t status = atomic_load_8(&pTask->status.taskStatus); - if (status != TASK_STATUS__NORMAL && status != TASK_STATUS__RESTORE) { + if (status != TASK_STATUS__NORMAL) { qError("stream task wait for the end of fill history, s-task:%s, status:%d", pTask->id.idStr, atomic_load_8(&pTask->status.taskStatus)); taosMsleep(2); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 065e9d280f..822ae2a485 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -287,6 +287,7 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { tdbTbcClose(pCur); return -1; } + tDecoderInit(&decoder, (uint8_t*)pVal, vLen); tDecodeStreamTask(&decoder, pTask); tDecoderClear(&decoder); @@ -305,7 +306,6 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { return -1; } - /*pTask->status.taskStatus = TASK_STATUS__NORMAL;*/ if (pTask->fillHistory) { pTask->status.taskStatus = TASK_STATUS__WAIT_DOWNSTREAM; streamTaskCheckDownstream(pTask, ver); diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index 0d214661c4..67f3a95827 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -54,6 +54,8 @@ int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version) { // checkstatus int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) { + qDebug("s-taks:%s in fill history stage, ver:%"PRId64, pTask->id.idStr, version); + SStreamTaskCheckReq req = { .streamId = pTask->id.streamId, .upstreamTaskId = pTask->id.taskId, @@ -63,6 +65,7 @@ int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) { // serialize if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { + req.reqId = tGenIdPI64(); req.downstreamNodeId = pTask->fixedEpDispatcher.nodeId; req.downstreamTaskId = pTask->fixedEpDispatcher.taskId; @@ -70,7 +73,7 @@ int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) { qDebug("s-task:%s at node %d check downstream task %d at node %d", pTask->id.idStr, pTask->nodeId, req.downstreamTaskId, req.downstreamNodeId); - streamDispatchOneCheckReq(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); + streamDispatchCheckMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); } else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; @@ -86,7 +89,7 @@ int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) { req.downstreamTaskId = pVgInfo->taskId; qDebug("s-task:%s at node %d check downstream task %d at node %d (shuffle)", pTask->id.idStr, pTask->nodeId, req.downstreamTaskId, req.downstreamNodeId); - streamDispatchOneCheckReq(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); + streamDispatchCheckMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); } } else { qDebug("s-task:%s at node %d direct launch recover since no downstream", pTask->id.idStr, pTask->nodeId); @@ -111,14 +114,14 @@ int32_t streamRecheckOneDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp req.downstreamTaskId, req.downstreamNodeId); if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { - streamDispatchOneCheckReq(pTask, &req, pRsp->downstreamNodeId, &pTask->fixedEpDispatcher.epSet); + streamDispatchCheckMsg(pTask, &req, pRsp->downstreamNodeId, &pTask->fixedEpDispatcher.epSet); } else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; int32_t vgSz = taosArrayGetSize(vgInfo); for (int32_t i = 0; i < vgSz; i++) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); if (pVgInfo->taskId == req.downstreamTaskId) { - streamDispatchOneCheckReq(pTask, &req, pRsp->downstreamNodeId, &pVgInfo->epSet); + streamDispatchCheckMsg(pTask, &req, pRsp->downstreamNodeId, &pVgInfo->epSet); } } } @@ -126,8 +129,8 @@ int32_t streamRecheckOneDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp return 0; } -int32_t streamProcessTaskCheckReq(SStreamTask* pTask, const SStreamTaskCheckReq* pReq) { - return atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__NORMAL; +int32_t streamTaskCheckStatus(SStreamTask* pTask) { + return atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__NORMAL? 1:0; } int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp, int64_t version) { @@ -137,7 +140,9 @@ int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* if (pRsp->status == 1) { if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { bool found = false; - for (int32_t i = 0; i < taosArrayGetSize(pTask->checkReqIds); i++) { + + int32_t numOfReqs = taosArrayGetSize(pTask->checkReqIds); + for (int32_t i = 0; i < numOfReqs; i++) { int64_t reqId = *(int64_t*)taosArrayGet(pTask->checkReqIds, i); if (reqId == pRsp->reqId) { found = true; @@ -151,9 +156,12 @@ int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* int32_t left = atomic_sub_fetch_32(&pTask->recoverTryingDownstream, 1); ASSERT(left >= 0); + if (left == 0) { taosArrayDestroy(pTask->checkReqIds); pTask->checkReqIds = NULL; + + qDebug("s-task:%s all downstream tasks:%d are ready, now enter into recover stage", pTask->id.idStr, numOfReqs); streamTaskLaunchRecover(pTask, version); } } else if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { @@ -165,7 +173,10 @@ int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* } else { ASSERT(0); } - } else { // not ready, it should wait for at least 100ms and then retry + } else { // not ready, wait for 100ms and retry + qDebug("s-task:%s downstream taskId:%"PRId64" (vgId:%d) not ready, wait for 100ms and retry", pTask->id.idStr, + pRsp->downstreamTaskId, pRsp->downstreamNodeId); + taosMsleep(100); streamRecheckOneDownstream(pTask, pRsp); } From a1263b8b04ef72588a4adc605eb23ad1afbd9144 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 1 May 2023 19:40:26 +0800 Subject: [PATCH 028/110] fix(query):fix syntax error. --- source/libs/stream/src/streamRecover.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index 67f3a95827..0324580885 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -174,7 +174,7 @@ int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* ASSERT(0); } } else { // not ready, wait for 100ms and retry - qDebug("s-task:%s downstream taskId:%"PRId64" (vgId:%d) not ready, wait for 100ms and retry", pTask->id.idStr, + qDebug("s-task:%s downstream taskId:%d (vgId:%d) not ready, wait for 100ms and retry", pTask->id.idStr, pRsp->downstreamTaskId, pRsp->downstreamNodeId); taosMsleep(100); streamRecheckOneDownstream(pTask, pRsp); From 3b814f24784b237f4f161124fbeb37e9677088a8 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 1 May 2023 23:21:29 +0800 Subject: [PATCH 029/110] fix(stream): do some internal refactor. --- source/dnode/vnode/src/tq/tqRestore.c | 6 +++--- source/libs/stream/src/stream.c | 9 +++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/source/dnode/vnode/src/tq/tqRestore.c b/source/dnode/vnode/src/tq/tqRestore.c index 6c8fce5b14..58cb7b9e63 100644 --- a/source/dnode/vnode/src/tq/tqRestore.c +++ b/source/dnode/vnode/src/tq/tqRestore.c @@ -20,13 +20,13 @@ static int32_t createStreamRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle); // this function should be executed by stream threads. // extract submit block from WAL, and add them into the input queue for the sources tasks. int32_t tqStreamTasksScanWal(STQ* pTq) { - int32_t vgId = TD_VID(pTq->pVnode); + int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; - int64_t st = taosGetTimestampMs(); + int64_t st = taosGetTimestampMs(); while (1) { int32_t scan = pMeta->walScanCounter; - tqDebug("vgId:%d continue check if data in wal are available, scan:%d", vgId, scan); + tqDebug("vgId:%d continue check if data in wal are available, walScanCounter:%d", vgId, scan); // check all restore tasks bool shouldIdle = true; diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index 046dab380e..9ed297bd6b 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -212,9 +212,10 @@ int32_t streamTaskOutput(SStreamTask* pTask, SStreamDataBlock* pBlock) { } int32_t streamProcessDispatchReq(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pRsp, bool exec) { - qDebug("vgId:%d s-task:%s receive dispatch req from taskId:%d", pReq->upstreamNodeId, pTask->id.idStr, - pReq->upstreamTaskId); + qDebug("s-task:%s receive dispatch req from taskId:%d(vgId:%d)", pTask->id.idStr, pReq->upstreamTaskId, + pReq->upstreamNodeId); + // todo add the input queue buffer limitation streamTaskEnqueueBlocks(pTask, pReq, pRsp); tDeleteStreamDispatchReq(pReq); @@ -222,10 +223,6 @@ int32_t streamProcessDispatchReq(SStreamTask* pTask, SStreamDispatchReq* pReq, S if (streamTryExec(pTask) < 0) { return -1; } - - /*if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {*/ - /*streamDispatch(pTask);*/ - /*}*/ } else { streamSchedExec(pTask); } From 79342cf1f46419e95f2252a14fca150a9551c4e4 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 2 May 2023 22:51:13 +0800 Subject: [PATCH 030/110] refactor(tqm): do some internal refactor. --- source/dnode/mnode/impl/inc/mndConsumer.h | 3 +- source/dnode/mnode/impl/inc/mndDef.h | 2 +- source/dnode/mnode/impl/src/mndConsumer.c | 51 ++++++++++------------- source/dnode/mnode/impl/src/mndDef.c | 2 +- 4 files changed, 24 insertions(+), 34 deletions(-) diff --git a/source/dnode/mnode/impl/inc/mndConsumer.h b/source/dnode/mnode/impl/inc/mndConsumer.h index aa38b94fd7..96401511d2 100644 --- a/source/dnode/mnode/impl/inc/mndConsumer.h +++ b/source/dnode/mnode/impl/inc/mndConsumer.h @@ -23,13 +23,12 @@ extern "C" { #endif enum { - MQ_CONSUMER_STATUS__MODIFY = 1, + MQ_CONSUMER_STATUS_REBALANCE = 1, // MQ_CONSUMER_STATUS__MODIFY_IN_REB, // this value is not used anymore MQ_CONSUMER_STATUS__READY, MQ_CONSUMER_STATUS__LOST, // MQ_CONSUMER_STATUS__LOST_IN_REB, // this value is not used anymore MQ_CONSUMER_STATUS__LOST_REBD, - MQ_CONSUMER_STATUS__REMOVED, }; int32_t mndInitConsumer(SMnode *pMnode); diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index fcd314d2ae..2579ff5231 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -142,7 +142,7 @@ typedef enum { CONSUMER_UPDATE__REMOVE, CONSUMER_UPDATE__LOST, CONSUMER_UPDATE__RECOVER, - CONSUMER_UPDATE__MODIFY, // subscribe req need change consume topic + CONSUMER_UPDATE__REBALANCE, // subscribe req need change consume topic } ECsmUpdateType; typedef struct { diff --git a/source/dnode/mnode/impl/src/mndConsumer.c b/source/dnode/mnode/impl/src/mndConsumer.c index 65a2fa72a2..16ed158fed 100644 --- a/source/dnode/mnode/impl/src/mndConsumer.c +++ b/source/dnode/mnode/impl/src/mndConsumer.c @@ -192,6 +192,7 @@ FAIL: return -1; } +// todo check the clear process static int32_t mndProcessConsumerClearMsg(SRpcMsg *pMsg) { SMnode *pMnode = pMsg->info.node; SMqConsumerClearMsg *pClearMsg = pMsg->pCont; @@ -302,11 +303,10 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) { pLostMsg->consumerId = pConsumer->consumerId; SRpcMsg rpcMsg = { - .msgType = TDMT_MND_TMQ_CONSUMER_LOST, - .pCont = pLostMsg, - .contLen = sizeof(SMqConsumerLostMsg), - }; + .msgType = TDMT_MND_TMQ_CONSUMER_LOST, .pCont = pLostMsg, .contLen = sizeof(SMqConsumerLostMsg)}; + mDebug("consumer:0x%"PRIx64" hb not received beyond threshold %d, set to lost", pConsumer->consumerId, + MND_CONSUMER_LOST_HB_CNT); tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); } } else if (status == MQ_CONSUMER_STATUS__LOST_REBD) { @@ -316,11 +316,10 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) { pClearMsg->consumerId = pConsumer->consumerId; SRpcMsg rpcMsg = { - .msgType = TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, - .pCont = pClearMsg, - .contLen = sizeof(SMqConsumerClearMsg), - }; + .msgType = TDMT_MND_TMQ_LOST_CONSUMER_CLEAR, .pCont = pClearMsg, .contLen = sizeof(SMqConsumerClearMsg)}; + mDebug("consumer:0x%" PRIx64 " lost beyond threshold %d, clear it", pConsumer->consumerId, + MND_CONSUMER_LOST_CLEAR_THRESHOLD); tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); } } else if (status == MQ_CONSUMER_STATUS__LOST) { @@ -334,7 +333,7 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) { taosArrayPush(pRebSub->removedConsumers, &pConsumer->consumerId); } taosRUnLockLatch(&pConsumer->lock); - } else { + } else { // MQ_CONSUMER_STATUS_REBALANCE taosRLockLatch(&pConsumer->lock); int32_t newTopicNum = taosArrayGetSize(pConsumer->rebNewTopics); @@ -660,7 +659,7 @@ int32_t mndProcessSubscribeReq(SRpcMsg *pMsg) { tstrncpy(pConsumerNew->clientId, subscribe.clientId, tListLen(pConsumerNew->clientId)); // set the update type - pConsumerNew->updateType = CONSUMER_UPDATE__MODIFY; + pConsumerNew->updateType = CONSUMER_UPDATE__REBALANCE; taosArrayDestroy(pConsumerNew->assignedTopics); pConsumerNew->assignedTopics = taosArrayDup(pTopicList, topicNameDup); @@ -691,7 +690,7 @@ int32_t mndProcessSubscribeReq(SRpcMsg *pMsg) { } // set the update type - pConsumerNew->updateType = CONSUMER_UPDATE__MODIFY; + pConsumerNew->updateType = CONSUMER_UPDATE__REBALANCE; taosArrayDestroy(pConsumerNew->assignedTopics); pConsumerNew->assignedTopics = taosArrayDup(pTopicList, topicNameDup); @@ -870,9 +869,10 @@ static void updateConsumerStatus(SMqConsumerObj *pConsumer) { int32_t status = pConsumer->status; if (taosArrayGetSize(pConsumer->rebNewTopics) == 0 && taosArrayGetSize(pConsumer->rebRemovedTopics) == 0) { - if (status == MQ_CONSUMER_STATUS__MODIFY) { + if (status == MQ_CONSUMER_STATUS_REBALANCE) { pConsumer->status = MQ_CONSUMER_STATUS__READY; } else if (status == MQ_CONSUMER_STATUS__LOST) { + ASSERT(taosArrayGetSize(pConsumer->currentTopics) == 0 && taosArrayGetSize(pConsumer->assignedTopics) == 0); pConsumer->status = MQ_CONSUMER_STATUS__LOST_REBD; } } @@ -913,21 +913,13 @@ static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer, taosWLockLatch(&pOldConsumer->lock); - if (pNewConsumer->updateType == CONSUMER_UPDATE__MODIFY) { - SArray *tmp = pOldConsumer->rebNewTopics; - pOldConsumer->rebNewTopics = pNewConsumer->rebNewTopics; - pNewConsumer->rebNewTopics = tmp; - - tmp = pOldConsumer->rebRemovedTopics; - pOldConsumer->rebRemovedTopics = pNewConsumer->rebRemovedTopics; - pNewConsumer->rebRemovedTopics = tmp; - - tmp = pOldConsumer->assignedTopics; - pOldConsumer->assignedTopics = pNewConsumer->assignedTopics; - pNewConsumer->assignedTopics = tmp; + if (pNewConsumer->updateType == CONSUMER_UPDATE__REBALANCE) { + TSWAP(pOldConsumer->rebNewTopics, pNewConsumer->rebNewTopics); + TSWAP(pOldConsumer->rebRemovedTopics, pNewConsumer->rebRemovedTopics); + TSWAP(pOldConsumer->assignedTopics, pNewConsumer->assignedTopics); pOldConsumer->subscribeTime = pNewConsumer->upTime; - pOldConsumer->status = MQ_CONSUMER_STATUS__MODIFY; + pOldConsumer->status = MQ_CONSUMER_STATUS_REBALANCE; } else if (pNewConsumer->updateType == CONSUMER_UPDATE__LOST) { int32_t sz = taosArrayGetSize(pOldConsumer->currentTopics); for (int32_t i = 0; i < sz; i++) { @@ -937,10 +929,10 @@ static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer, pOldConsumer->rebalanceTime = pNewConsumer->upTime; - int32_t status = pOldConsumer->status; + int32_t prevStatus = pOldConsumer->status; pOldConsumer->status = MQ_CONSUMER_STATUS__LOST; mDebug("consumer:0x%" PRIx64 " state %s -> %s, reb-time:%" PRId64 ", reb-removed-topics:%d", - pOldConsumer->consumerId, mndConsumerStatusName(status), mndConsumerStatusName(pOldConsumer->status), + pOldConsumer->consumerId, mndConsumerStatusName(prevStatus), mndConsumerStatusName(pOldConsumer->status), pOldConsumer->rebalanceTime, (int)taosArrayGetSize(pOldConsumer->rebRemovedTopics)); } else if (pNewConsumer->updateType == CONSUMER_UPDATE__RECOVER) { int32_t sz = taosArrayGetSize(pOldConsumer->assignedTopics); @@ -950,8 +942,7 @@ static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer, } pOldConsumer->rebalanceTime = pNewConsumer->upTime; - - pOldConsumer->status = MQ_CONSUMER_STATUS__MODIFY; + pOldConsumer->status = MQ_CONSUMER_STATUS_REBALANCE; } else if (pNewConsumer->updateType == CONSUMER_UPDATE__TOUCH) { atomic_add_fetch_32(&pOldConsumer->epoch, 1); @@ -1160,7 +1151,7 @@ static const char *mndConsumerStatusName(int status) { case MQ_CONSUMER_STATUS__LOST: case MQ_CONSUMER_STATUS__LOST_REBD: return "lost"; - case MQ_CONSUMER_STATUS__MODIFY: + case MQ_CONSUMER_STATUS_REBALANCE: return "rebalancing"; default: return "unknown"; diff --git a/source/dnode/mnode/impl/src/mndDef.c b/source/dnode/mnode/impl/src/mndDef.c index c69f08eb6b..6dab018236 100644 --- a/source/dnode/mnode/impl/src/mndDef.c +++ b/source/dnode/mnode/impl/src/mndDef.c @@ -225,7 +225,7 @@ SMqConsumerObj *tNewSMqConsumerObj(int64_t consumerId, char cgroup[TSDB_CGROUP_L memcpy(pConsumer->cgroup, cgroup, TSDB_CGROUP_LEN); pConsumer->epoch = 0; - pConsumer->status = MQ_CONSUMER_STATUS__MODIFY; + pConsumer->status = MQ_CONSUMER_STATUS_REBALANCE; pConsumer->hbStatus = 0; taosInitRWLatch(&pConsumer->lock); From 5b25920fec087cdda4245d8a2d6994fa756d6f2b Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 2 May 2023 23:19:32 +0800 Subject: [PATCH 031/110] refactor: do some internal refactor. --- source/dnode/mnode/impl/src/mndConsumer.c | 65 +++++++++++++--------- source/dnode/mnode/impl/src/mndSubscribe.c | 31 ++++++----- 2 files changed, 57 insertions(+), 39 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndConsumer.c b/source/dnode/mnode/impl/src/mndConsumer.c index 16ed158fed..ffb343ef22 100644 --- a/source/dnode/mnode/impl/src/mndConsumer.c +++ b/source/dnode/mnode/impl/src/mndConsumer.c @@ -672,7 +672,6 @@ int32_t mndProcessSubscribeReq(SRpcMsg *pMsg) { if (mndTransPrepare(pMnode, pTrans) != 0) goto _over; } else { - /*taosRLockLatch(&pExistedConsumer->lock);*/ int32_t status = atomic_load_32(&pExistedConsumer->status); mInfo("receive subscribe request from existed consumer:0x%" PRIx64 @@ -881,7 +880,7 @@ static void updateConsumerStatus(SMqConsumerObj *pConsumer) { // remove from new topic static void removeFromNewTopicList(SMqConsumerObj *pConsumer, const char *pTopic) { int32_t size = taosArrayGetSize(pConsumer->rebNewTopics); - for (int32_t i = 0; i < taosArrayGetSize(pConsumer->rebNewTopics); i++) { + for (int32_t i = 0; i < size; i++) { char *p = taosArrayGetP(pConsumer->rebNewTopics, i); if (strcmp(pTopic, p) == 0) { taosArrayRemove(pConsumer->rebNewTopics, i); @@ -902,11 +901,44 @@ static void removeFromRemoveTopicList(SMqConsumerObj *pConsumer, const char *pTo if (strcmp(pTopic, p) == 0) { taosArrayRemove(pConsumer->rebRemovedTopics, i); taosMemoryFree(p); + + mDebug("consumer:0x%" PRIx64 " remove topic:%s in the removed topic list, remain removedTopics:%d", + pConsumer->consumerId, pTopic, (int)taosArrayGetSize(pConsumer->rebRemovedTopics)); break; } } } +static void removeFromCurrentTopicList(SMqConsumerObj *pConsumer, const char *pTopic) { + int32_t sz = taosArrayGetSize(pConsumer->currentTopics); + for (int32_t i = 0; i < sz; i++) { + char *topic = taosArrayGetP(pConsumer->currentTopics, i); + if (strcmp(pTopic, topic) == 0) { + taosArrayRemove(pConsumer->currentTopics, i); + taosMemoryFree(topic); + + mDebug("consumer:0x%" PRIx64 " remove topic:%s in the current topic list, remain currentTopics:%d", + pConsumer->consumerId, pTopic, (int)taosArrayGetSize(pConsumer->currentTopics)); + break; + } + } +} + +static bool existInCurrentTopicList(const SMqConsumerObj* pConsumer, const char* pTopic) { + bool existing = false; + int32_t size = taosArrayGetSize(pConsumer->currentTopics); + for (int32_t i = 0; i < size; i++) { + char *topic = taosArrayGetP(pConsumer->currentTopics, i); + + if (strcmp(topic, pTopic) == 0) { + existing = true; + break; + } + } + + return existing; +} + static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer, SMqConsumerObj *pNewConsumer) { mDebug("consumer:0x%" PRIx64 " perform update action, update type:%d, subscribe-time:%" PRId64 ", uptime:%" PRId64, pOldConsumer->consumerId, pNewConsumer->updateType, pOldConsumer->subscribeTime, pOldConsumer->upTime); @@ -951,24 +983,16 @@ static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer, } else if (pNewConsumer->updateType == CONSUMER_UPDATE__ADD) { char *pNewTopic = taosStrdup(taosArrayGetP(pNewConsumer->rebNewTopics, 0)); - // not exist in current topic - bool existing = false; - int32_t numOfExistedTopics = taosArrayGetSize(pOldConsumer->currentTopics); - for (int32_t i = 0; i < numOfExistedTopics; i++) { - char *topic = taosArrayGetP(pOldConsumer->currentTopics, i); - if (strcmp(topic, pNewTopic) == 0) { - existing = true; - } - } - + // check if exist in current topic removeFromNewTopicList(pOldConsumer, pNewTopic); // add to current topic - if (!existing) { + bool existing = existInCurrentTopicList(pOldConsumer, pNewTopic); + if (existing) { + taosMemoryFree(pNewTopic); + } else { // added into current topic list taosArrayPush(pOldConsumer->currentTopics, &pNewTopic); taosArraySort(pOldConsumer->currentTopics, taosArrayCompareString); - } else { - taosMemoryFree(pNewTopic); } // set status @@ -993,16 +1017,7 @@ static int32_t mndConsumerActionUpdate(SSdb *pSdb, SMqConsumerObj *pOldConsumer, removeFromRemoveTopicList(pOldConsumer, removedTopic); // remove from current topic - int32_t i = 0; - int32_t sz = taosArrayGetSize(pOldConsumer->currentTopics); - for (i = 0; i < sz; i++) { - char *topic = taosArrayGetP(pOldConsumer->currentTopics, i); - if (strcmp(removedTopic, topic) == 0) { - taosArrayRemove(pOldConsumer->currentTopics, i); - taosMemoryFree(topic); - break; - } - } + removeFromCurrentTopicList(pOldConsumer, removedTopic); // set status int32_t status = pOldConsumer->status; diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 015c497de1..b3cf6c9701 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -213,13 +213,9 @@ static void doRemoveExistedConsumers(SMqRebOutputObj *pOutput, SHashObj *pHash, int32_t consumerVgNum = taosArrayGetSize(pConsumerEp->vgs); for (int32_t j = 0; j < consumerVgNum; j++) { - SMqVgEp *pVgEp = taosArrayGetP(pConsumerEp->vgs, j); - SMqRebOutputVg outputVg = { - .oldConsumerId = consumerId, - .newConsumerId = -1, - .pVgEp = pVgEp, - }; + SMqVgEp *pVgEp = taosArrayGetP(pConsumerEp->vgs, j); + SMqRebOutputVg outputVg = {.oldConsumerId = consumerId, .newConsumerId = -1, .pVgEp = pVgEp}; taosHashPut(pHash, &pVgEp->vgId, sizeof(int32_t), &outputVg, sizeof(SMqRebOutputVg)); mInfo("sub:%s mq re-balance remove vgId:%d from consumer:%" PRIx64, pSubKey, pVgEp->vgId, consumerId); } @@ -584,16 +580,11 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) { // here we only handle one topic rebalance requirement to ensure the atomic execution of this transaction. while (1) { -// if (rebalanceOnce) { -// break; -// } - pIter = taosHashIterate(pReq->rebSubHash, pIter); if (pIter == NULL) { break; } - // todo handle the malloc failure SMqRebInputObj rebInput = {0}; SMqRebOutputObj rebOutput = {0}; rebOutput.newConsumers = taosArrayInit(0, sizeof(int64_t)); @@ -601,6 +592,20 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) { rebOutput.modifyConsumers = taosArrayInit(0, sizeof(int64_t)); rebOutput.rebVgs = taosArrayInit(0, sizeof(SMqRebOutputVg)); + if (rebOutput.newConsumers == NULL || rebOutput.removedConsumers == NULL || rebOutput.modifyConsumers == NULL || + rebOutput.rebVgs == NULL) { + taosArrayDestroy(rebOutput.newConsumers); + taosArrayDestroy(rebOutput.removedConsumers); + taosArrayDestroy(rebOutput.modifyConsumers); + taosArrayDestroy(rebOutput.rebVgs); + + terrno = TSDB_CODE_OUT_OF_MEMORY; + mInfo("mq re-balance failed, due to out of memory"); + taosHashCleanup(pReq->rebSubHash); + mndRebEnd(); + return -1; + } + SMqRebInfo *pRebInfo = (SMqRebInfo *)pIter; SMqSubscribeObj *pSub = mndAcquireSubscribeByKey(pMnode, pRebInfo->key); @@ -640,6 +645,7 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) { rebInput.oldConsumerNum = taosHashGetSize(pSub->consumerHash); rebOutput.pSub = tCloneSubscribeObj(pSub); taosRUnLockLatch(&pSub->lock); + mInfo("sub topic:%s has %d consumers sub till now", pRebInfo->key, rebInput.oldConsumerNum); mndReleaseSubscribe(pMnode, pSub); } @@ -661,9 +667,6 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) { taosArrayDestroy(rebOutput.rebVgs); tDeleteSubscribeObj(rebOutput.pSub); taosMemoryFree(rebOutput.pSub); - -// taosSsleep(100); -// rebalanceOnce = true; } // reset flag From a53c181b98f140e2d9e2d4ece604b31694896289 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 4 May 2023 10:04:44 +0800 Subject: [PATCH 032/110] fix(tmq): remove invalid assert --- source/dnode/mnode/impl/src/mndConsumer.c | 20 ++++++++++++--- source/dnode/mnode/impl/src/mndSubscribe.c | 29 ++++++++++++++-------- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndConsumer.c b/source/dnode/mnode/impl/src/mndConsumer.c index ffb343ef22..0fc9aae59a 100644 --- a/source/dnode/mnode/impl/src/mndConsumer.c +++ b/source/dnode/mnode/impl/src/mndConsumer.c @@ -196,12 +196,14 @@ FAIL: static int32_t mndProcessConsumerClearMsg(SRpcMsg *pMsg) { SMnode *pMnode = pMsg->info.node; SMqConsumerClearMsg *pClearMsg = pMsg->pCont; - SMqConsumerObj *pConsumer = mndAcquireConsumer(pMnode, pClearMsg->consumerId); + + SMqConsumerObj *pConsumer = mndAcquireConsumer(pMnode, pClearMsg->consumerId); if (pConsumer == NULL) { + mError("consumer:0x%"PRIx64" failed to be found to clear it", pClearMsg->consumerId); return 0; } - mInfo("receive consumer clear msg, consumer id %" PRId64 ", status %s", pClearMsg->consumerId, + mInfo("consumer:0x%" PRIx64 " needs to be cleared, status %s", pClearMsg->consumerId, mndConsumerStatusName(pConsumer->status)); if (pConsumer->status != MQ_CONSUMER_STATUS__LOST_REBD) { @@ -216,6 +218,8 @@ static int32_t mndProcessConsumerClearMsg(SRpcMsg *pMsg) { STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_NOTHING, pMsg, "clear-csm"); if (pTrans == NULL) goto FAIL; + + // this is the drop action, not the update action if (mndSetConsumerDropLogs(pMnode, pTrans, pConsumerNew) != 0) goto FAIL; if (mndTransPrepare(pMnode, pTrans) != 0) goto FAIL; @@ -300,6 +304,11 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) { if (status == MQ_CONSUMER_STATUS__READY) { if (hbStatus > MND_CONSUMER_LOST_HB_CNT) { SMqConsumerLostMsg *pLostMsg = rpcMallocCont(sizeof(SMqConsumerLostMsg)); + if (pLostMsg == NULL) { + mError("consumer:0x%"PRIx64" failed to transfer consumer status to lost due to out of memory. alloc size:%d", + pConsumer->consumerId, sizeof(SMqConsumerLostMsg)); + continue; + } pLostMsg->consumerId = pConsumer->consumerId; SRpcMsg rpcMsg = { @@ -313,6 +322,11 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) { // if the client is lost longer than one day, clear it. Otherwise, do nothing about the lost consumers. if (hbStatus > MND_CONSUMER_LOST_CLEAR_THRESHOLD) { SMqConsumerClearMsg *pClearMsg = rpcMallocCont(sizeof(SMqConsumerClearMsg)); + if (pClearMsg == NULL) { + mError("consumer:0x%"PRIx64" failed to clear consumer due to out of memory. alloc size:%d", + pConsumer->consumerId, sizeof(SMqConsumerClearMsg)); + continue; + } pClearMsg->consumerId = pConsumer->consumerId; SRpcMsg rpcMsg = { @@ -871,7 +885,7 @@ static void updateConsumerStatus(SMqConsumerObj *pConsumer) { if (status == MQ_CONSUMER_STATUS_REBALANCE) { pConsumer->status = MQ_CONSUMER_STATUS__READY; } else if (status == MQ_CONSUMER_STATUS__LOST) { - ASSERT(taosArrayGetSize(pConsumer->currentTopics) == 0 && taosArrayGetSize(pConsumer->assignedTopics) == 0); + ASSERT(taosArrayGetSize(pConsumer->currentTopics) == 0); pConsumer->status = MQ_CONSUMER_STATUS__LOST_REBD; } } diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index b3cf6c9701..573c60549e 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -480,14 +480,16 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu for (int32_t i = 0; i < vgNum; i++) { SMqRebOutputVg *pRebVg = taosArrayGet(rebVgs, i); if (mndPersistSubChangeVgReq(pMnode, pTrans, pOutput->pSub, pRebVg) < 0) { - goto REB_FAIL; + mndTransDrop(pTrans); + return -1; } } // 2. redo log: subscribe and vg assignment // subscribe if (mndSetSubCommitLogs(pMnode, pTrans, pOutput->pSub) != 0) { - goto REB_FAIL; + mndTransDrop(pTrans); + return -1; } // 3. commit log: consumer to update status and epoch @@ -502,11 +504,15 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu if (mndSetConsumerCommitLogs(pMnode, pTrans, pConsumerNew) != 0) { tDeleteSMqConsumerObj(pConsumerNew); taosMemoryFree(pConsumerNew); - goto REB_FAIL; + + mndTransDrop(pTrans); + return -1; } + tDeleteSMqConsumerObj(pConsumerNew); taosMemoryFree(pConsumerNew); } + // 3.2 set new consumer consumerNum = taosArrayGetSize(pOutput->newConsumers); for (int32_t i = 0; i < consumerNum; i++) { @@ -523,8 +529,11 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu if (mndSetConsumerCommitLogs(pMnode, pTrans, pConsumerNew) != 0) { tDeleteSMqConsumerObj(pConsumerNew); taosMemoryFree(pConsumerNew); - goto REB_FAIL; + + mndTransDrop(pTrans); + return -1; } + tDeleteSMqConsumerObj(pConsumerNew); taosMemoryFree(pConsumerNew); } @@ -545,8 +554,11 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu if (mndSetConsumerCommitLogs(pMnode, pTrans, pConsumerNew) != 0) { tDeleteSMqConsumerObj(pConsumerNew); taosMemoryFree(pConsumerNew); - goto REB_FAIL; + + mndTransDrop(pTrans); + return -1; } + tDeleteSMqConsumerObj(pConsumerNew); taosMemoryFree(pConsumerNew); } @@ -559,15 +571,12 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu // 6. execution if (mndTransPrepare(pMnode, pTrans) != 0) { mError("failed to prepare trans rebalance since %s", terrstr()); - goto REB_FAIL; + mndTransDrop(pTrans); + return -1; } mndTransDrop(pTrans); return 0; - -REB_FAIL: - mndTransDrop(pTrans); - return -1; } static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) { From 0e1c261418b6eaffc61fc6c52584cb195773fe5b Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 4 May 2023 10:10:56 +0800 Subject: [PATCH 033/110] fix(tmq): fix syntax error. --- source/dnode/mnode/impl/src/mndConsumer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/mnode/impl/src/mndConsumer.c b/source/dnode/mnode/impl/src/mndConsumer.c index 0fc9aae59a..33081cd8cc 100644 --- a/source/dnode/mnode/impl/src/mndConsumer.c +++ b/source/dnode/mnode/impl/src/mndConsumer.c @@ -324,7 +324,7 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) { SMqConsumerClearMsg *pClearMsg = rpcMallocCont(sizeof(SMqConsumerClearMsg)); if (pClearMsg == NULL) { mError("consumer:0x%"PRIx64" failed to clear consumer due to out of memory. alloc size:%d", - pConsumer->consumerId, sizeof(SMqConsumerClearMsg)); + pConsumer->consumerId, (int32_t)sizeof(SMqConsumerClearMsg)); continue; } From 66b279abb71d3c843f7b8e2170ad6729b80d7519 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 4 May 2023 10:12:10 +0800 Subject: [PATCH 034/110] fix(query):fix syntax error. --- source/dnode/mnode/impl/src/mndConsumer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/mnode/impl/src/mndConsumer.c b/source/dnode/mnode/impl/src/mndConsumer.c index 33081cd8cc..cd7c8beaa7 100644 --- a/source/dnode/mnode/impl/src/mndConsumer.c +++ b/source/dnode/mnode/impl/src/mndConsumer.c @@ -306,7 +306,7 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) { SMqConsumerLostMsg *pLostMsg = rpcMallocCont(sizeof(SMqConsumerLostMsg)); if (pLostMsg == NULL) { mError("consumer:0x%"PRIx64" failed to transfer consumer status to lost due to out of memory. alloc size:%d", - pConsumer->consumerId, sizeof(SMqConsumerLostMsg)); + pConsumer->consumerId, (int32_t)sizeof(SMqConsumerLostMsg)); continue; } From b1f3a010df982b2656929cab2394f83e1db2bfe6 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 4 May 2023 13:58:45 +0800 Subject: [PATCH 035/110] fix:change field bytes if length is bigger than 1024 --- source/client/src/clientSml.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index 57458ff8f7..d6642dfe8d 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -558,10 +558,15 @@ static int32_t smlGenerateSchemaAction(SSchema *colField, SHashObj *colHash, SSm return 0; } +#define BOUNDARY 1024 static int32_t smlFindNearestPowerOf2(int32_t length, uint8_t type) { int32_t result = 1; - while (result <= length) { - result *= 2; + if (length >= BOUNDARY){ + result = length; + }else{ + while (result <= length) { + result *= 2; + } } if (type == TSDB_DATA_TYPE_BINARY && result > TSDB_MAX_BINARY_LEN - VARSTR_HEADER_SIZE) { result = TSDB_MAX_BINARY_LEN - VARSTR_HEADER_SIZE; @@ -657,7 +662,7 @@ static int32_t smlBuildFieldsList(SSmlHandle *info, SSchema *schemaField, SHashO len += field->bytes; } if(len > maxLen){ - return TSDB_CODE_TSC_INVALID_VALUE; + return isTag ? TSDB_CODE_PAR_INVALID_TAGS_LENGTH : TSDB_CODE_PAR_INVALID_ROW_LENGTH; } return TSDB_CODE_SUCCESS; From 09a04052ecf8920d2f4c22e67960e806ec53a9e3 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 4 May 2023 16:15:14 +0800 Subject: [PATCH 036/110] refactor: do some internal refactor. --- include/common/tcommon.h | 8 - include/common/tmsg.h | 10 +- include/libs/executor/executor.h | 2 - include/libs/wal/wal.h | 2 - source/client/src/clientMain.c | 4 +- source/client/src/clientStmt.c | 4 +- source/client/src/clientTmq.c | 4 +- source/client/test/clientTests.cpp | 10 +- source/common/src/tdatablock.c | 4 +- source/common/src/tdataformat.c | 4 +- source/common/src/tmsg.c | 14 +- source/dnode/vnode/inc/vnode.h | 5 +- source/dnode/vnode/src/inc/vnodeInt.h | 2 +- source/dnode/vnode/src/meta/metaQuery.c | 1 - source/dnode/vnode/src/meta/metaSnapshot.c | 4 +- source/dnode/vnode/src/meta/metaTable.c | 4 +- source/dnode/vnode/src/sma/smaRollup.c | 4 +- source/dnode/vnode/src/sma/smaTimeRange.c | 8 +- source/dnode/vnode/src/tq/tqPush.c | 2 +- source/dnode/vnode/src/tq/tqRead.c | 391 ++++++++++++--------- source/dnode/vnode/src/tq/tqScan.c | 4 +- source/dnode/vnode/src/tq/tqSink.c | 14 +- source/dnode/vnode/src/tq/tqUtil.c | 2 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 12 +- source/libs/executor/src/dataInserter.c | 12 +- source/libs/executor/src/executor.c | 16 +- source/libs/executor/src/querytask.c | 6 +- source/libs/executor/src/scanoperator.c | 18 +- source/libs/parser/src/parInsertUtil.c | 8 +- source/libs/stream/src/streamTask.c | 2 +- 30 files changed, 305 insertions(+), 276 deletions(-) diff --git a/include/common/tcommon.h b/include/common/tcommon.h index 8b4d5f16df..2bc67e439f 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -212,14 +212,6 @@ enum { FETCH_TYPE__NONE, }; -typedef struct { - int8_t fetchType; - union { - SSDataBlock data; - void* meta; - }; -} SFetchRet; - typedef struct SVarColAttr { int32_t* offset; // start position for each entry in the list uint32_t length; // used buffer size that contain the valid data diff --git a/include/common/tmsg.h b/include/common/tmsg.h index dc997221e8..1ea9714bf9 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -415,7 +415,7 @@ static FORCE_INLINE SSchemaWrapper* tCloneSSchemaWrapper(const SSchemaWrapper* p return pSW; } -static FORCE_INLINE void tDeleteSSchemaWrapper(SSchemaWrapper* pSchemaWrapper) { +static FORCE_INLINE void tDeleteSchemaWrapper(SSchemaWrapper* pSchemaWrapper) { if (pSchemaWrapper) { taosMemoryFree(pSchemaWrapper->pSchema); taosMemoryFree(pSchemaWrapper); @@ -3421,10 +3421,10 @@ typedef struct { char data[]; // SSubmitReq2 } SSubmitReq2Msg; -int32_t tEncodeSSubmitReq2(SEncoder* pCoder, const SSubmitReq2* pReq); -int32_t tDecodeSSubmitReq2(SDecoder* pCoder, SSubmitReq2* pReq); -void tDestroySSubmitTbData(SSubmitTbData* pTbData, int32_t flag); -void tDestroySSubmitReq(SSubmitReq2* pReq, int32_t flag); +int32_t tEncodeSubmitReq(SEncoder* pCoder, const SSubmitReq2* pReq); +int32_t tDecodeSubmitReq(SDecoder* pCoder, SSubmitReq2* pReq); +void tDestroySubmitTbData(SSubmitTbData* pTbData, int32_t flag); +void tDestroySubmitReq(SSubmitReq2* pReq, int32_t flag); typedef struct { int32_t affectedRows; diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index 61eca6cc4f..1fb00e743f 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -192,8 +192,6 @@ SArray* qGetQueriedTableListInfo(qTaskInfo_t tinfo); int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subType); -int32_t qStreamSetScanMemData(qTaskInfo_t tinfo, SPackedData submit); - void qStreamSetOpen(qTaskInfo_t tinfo); void qStreamExtractOffset(qTaskInfo_t tinfo, STqOffsetVal* pOffset); diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index b51289de5e..d3e2bbb1be 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -147,8 +147,6 @@ typedef struct SWalReader { int64_t curFileFirstVer; int64_t curVersion; int64_t capacity; -// int8_t curInvalid; -// int8_t curStopped; TdThreadMutex mutex; SWalFilterCond cond; // TODO remove it diff --git a/source/client/src/clientMain.c b/source/client/src/clientMain.c index 2ebc8e7379..fd70598efb 100644 --- a/source/client/src/clientMain.c +++ b/source/client/src/clientMain.c @@ -191,7 +191,7 @@ void taos_free_result(TAOS_RES *res) { taosArrayDestroyP(pRsp->rsp.blockData, taosMemoryFree); taosArrayDestroy(pRsp->rsp.blockDataLen); taosArrayDestroyP(pRsp->rsp.blockTbName, taosMemoryFree); - taosArrayDestroyP(pRsp->rsp.blockSchema, (FDelete)tDeleteSSchemaWrapper); + taosArrayDestroyP(pRsp->rsp.blockSchema, (FDelete)tDeleteSchemaWrapper); // taosx taosArrayDestroy(pRsp->rsp.createTableLen); taosArrayDestroyP(pRsp->rsp.createTableReq, taosMemoryFree); @@ -204,7 +204,7 @@ void taos_free_result(TAOS_RES *res) { taosArrayDestroyP(pRsp->rsp.blockData, taosMemoryFree); taosArrayDestroy(pRsp->rsp.blockDataLen); taosArrayDestroyP(pRsp->rsp.blockTbName, taosMemoryFree); - taosArrayDestroyP(pRsp->rsp.blockSchema, (FDelete)tDeleteSSchemaWrapper); + taosArrayDestroyP(pRsp->rsp.blockSchema, (FDelete)tDeleteSchemaWrapper); pRsp->resInfo.pRspMsg = NULL; doFreeReqResultInfo(&pRsp->resInfo); taosMemoryFree(pRsp); diff --git a/source/client/src/clientStmt.c b/source/client/src/clientStmt.c index 6e529f1a0b..975b304bf4 100644 --- a/source/client/src/clientStmt.c +++ b/source/client/src/clientStmt.c @@ -325,7 +325,7 @@ int32_t stmtCleanExecInfo(STscStmt* pStmt, bool keepTable, bool deepClean) { taosHashCleanup(pStmt->exec.pBlockHash); pStmt->exec.pBlockHash = NULL; - tDestroySSubmitTbData(pStmt->exec.pCurrTbData, TSDB_MSG_FLG_ENCODE); + tDestroySubmitTbData(pStmt->exec.pCurrTbData, TSDB_MSG_FLG_ENCODE); taosMemoryFreeClear(pStmt->exec.pCurrTbData); STMT_ERR_RET(stmtCleanBindInfo(pStmt)); @@ -895,7 +895,7 @@ int stmtExec(TAOS_STMT* stmt) { if (STMT_TYPE_QUERY == pStmt->sql.type) { launchQueryImpl(pStmt->exec.pRequest, pStmt->sql.pQuery, true, NULL); } else { - tDestroySSubmitTbData(pStmt->exec.pCurrTbData, TSDB_MSG_FLG_ENCODE); + tDestroySubmitTbData(pStmt->exec.pCurrTbData, TSDB_MSG_FLG_ENCODE); taosMemoryFreeClear(pStmt->exec.pCurrTbData); STMT_ERR_RET(qCloneCurrentTbData(pStmt->exec.pCurrBlock, &pStmt->exec.pCurrTbData)); diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index b5ae9116ef..b488af9ba1 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -864,7 +864,7 @@ static void* tmqFreeRspWrapper(SMqRspWrapper* rspWrapper) { taosArrayDestroyP(pRsp->dataRsp.blockData, taosMemoryFree); taosArrayDestroy(pRsp->dataRsp.blockDataLen); taosArrayDestroyP(pRsp->dataRsp.blockTbName, taosMemoryFree); - taosArrayDestroyP(pRsp->dataRsp.blockSchema, (FDelete)tDeleteSSchemaWrapper); + taosArrayDestroyP(pRsp->dataRsp.blockSchema, (FDelete)tDeleteSchemaWrapper); } else if (rspWrapper->tmqRspType == TMQ_MSG_TYPE__POLL_META_RSP) { SMqPollRspWrapper* pRsp = (SMqPollRspWrapper*)rspWrapper; taosMemoryFreeClear(pRsp->pEpset); @@ -877,7 +877,7 @@ static void* tmqFreeRspWrapper(SMqRspWrapper* rspWrapper) { taosArrayDestroyP(pRsp->taosxRsp.blockData, taosMemoryFree); taosArrayDestroy(pRsp->taosxRsp.blockDataLen); taosArrayDestroyP(pRsp->taosxRsp.blockTbName, taosMemoryFree); - taosArrayDestroyP(pRsp->taosxRsp.blockSchema, (FDelete)tDeleteSSchemaWrapper); + taosArrayDestroyP(pRsp->taosxRsp.blockSchema, (FDelete)tDeleteSchemaWrapper); // taosx taosArrayDestroy(pRsp->taosxRsp.createTableLen); taosArrayDestroyP(pRsp->taosxRsp.createTableReq, taosMemoryFree); diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index b9062fc8ff..56f68e5972 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -1053,9 +1053,9 @@ TEST(clientCase, sub_db_test) { } TEST(clientCase, sub_tb_test) { - taos_options(TSDB_OPTION_CONFIGDIR, "/home/tests/dir/cfg/"); + taos_options(TSDB_OPTION_CONFIGDIR, "~/first/cfg"); - TAOS* pConn = taos_connect("vm116", "root", "taosdata", NULL, 0); + TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); ASSERT_NE(pConn, nullptr); tmq_conf_t* conf = tmq_conf_new(); @@ -1091,7 +1091,7 @@ TEST(clientCase, sub_tb_test) { int32_t precision = 0; int32_t totalRows = 0; int32_t msgCnt = 0; - int32_t timeout = 25000; + int32_t timeout = 2500000; int32_t count = 0; @@ -1117,10 +1117,10 @@ TEST(clientCase, sub_tb_test) { fields = taos_fetch_fields(pRes); numOfFields = taos_field_count(pRes); totalRows += 1; - if (totalRows % 100000 == 0) { +// if (totalRows % 100000 == 0) { taos_print_row(buf, row, fields, numOfFields); printf("row content: %s\n", buf); - } +// } } taos_free_result(pRes); diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 3558feaa66..fc7cbc19c0 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -2374,7 +2374,7 @@ int32_t buildSubmitReqFromDataBlock(SSubmitReq2** ppReq, const SSDataBlock* pDat } SRow* pRow = NULL; if ((terrno = tRowBuild(pVals, pTSchema, &pRow)) < 0) { - tDestroySSubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); + tDestroySubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); goto _end; } ASSERT(pRow); @@ -2388,7 +2388,7 @@ _end: if (terrno != 0) { *ppReq = NULL; if (pReq) { - tDestroySSubmitReq(pReq, TSDB_MSG_FLG_ENCODE); + tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); taosMemoryFreeClear(pReq); } diff --git a/source/common/src/tdataformat.c b/source/common/src/tdataformat.c index f379084cf5..b18bd882ae 100644 --- a/source/common/src/tdataformat.c +++ b/source/common/src/tdataformat.c @@ -1509,7 +1509,9 @@ void tTagSetCid(const STag *pTag, int16_t iTag, int16_t cid) { // STSchema ======================================== STSchema *tBuildTSchema(SSchema *aSchema, int32_t numOfCols, int32_t version) { STSchema *pTSchema = taosMemoryCalloc(1, sizeof(STSchema) + sizeof(STColumn) * numOfCols); - if (pTSchema == NULL) return NULL; + if (pTSchema == NULL) { + return NULL; + } pTSchema->numOfCols = numOfCols; pTSchema->version = version; diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 8b14b7fbe7..7102e556cc 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -7058,7 +7058,7 @@ void tDeleteSMqDataRsp(SMqDataRsp *pRsp) { pRsp->blockDataLen = taosArrayDestroy(pRsp->blockDataLen); taosArrayDestroyP(pRsp->blockData, (FDelete)taosMemoryFree); pRsp->blockData = NULL; - taosArrayDestroyP(pRsp->blockSchema, (FDelete)tDeleteSSchemaWrapper); + taosArrayDestroyP(pRsp->blockSchema, (FDelete)tDeleteSchemaWrapper); pRsp->blockSchema = NULL; taosArrayDestroyP(pRsp->blockTbName, (FDelete)taosMemoryFree); pRsp->blockTbName = NULL; @@ -7159,7 +7159,7 @@ void tDeleteSTaosxRsp(STaosxRsp *pRsp) { pRsp->blockDataLen = NULL; taosArrayDestroyP(pRsp->blockData, (FDelete)taosMemoryFree); pRsp->blockData = NULL; - taosArrayDestroyP(pRsp->blockSchema, (FDelete)tDeleteSSchemaWrapper); + taosArrayDestroyP(pRsp->blockSchema, (FDelete)tDeleteSchemaWrapper); pRsp->blockSchema = NULL; taosArrayDestroyP(pRsp->blockTbName, (FDelete)taosMemoryFree); pRsp->blockTbName = NULL; @@ -7332,7 +7332,7 @@ _exit: return 0; } -int32_t tEncodeSSubmitReq2(SEncoder *pCoder, const SSubmitReq2 *pReq) { +int32_t tEncodeSubmitReq(SEncoder *pCoder, const SSubmitReq2 *pReq) { if (tStartEncode(pCoder) < 0) return -1; if (tEncodeU64v(pCoder, taosArrayGetSize(pReq->aSubmitTbData)) < 0) return -1; @@ -7344,7 +7344,7 @@ int32_t tEncodeSSubmitReq2(SEncoder *pCoder, const SSubmitReq2 *pReq) { return 0; } -int32_t tDecodeSSubmitReq2(SDecoder *pCoder, SSubmitReq2 *pReq) { +int32_t tDecodeSubmitReq(SDecoder *pCoder, SSubmitReq2 *pReq) { int32_t code = 0; memset(pReq, 0, sizeof(*pReq)); @@ -7387,7 +7387,7 @@ _exit: return code; } -void tDestroySSubmitTbData(SSubmitTbData *pTbData, int32_t flag) { +void tDestroySubmitTbData(SSubmitTbData *pTbData, int32_t flag) { if (NULL == pTbData) { return; } @@ -7433,14 +7433,14 @@ void tDestroySSubmitTbData(SSubmitTbData *pTbData, int32_t flag) { } } -void tDestroySSubmitReq(SSubmitReq2 *pReq, int32_t flag) { +void tDestroySubmitReq(SSubmitReq2 *pReq, int32_t flag) { if (pReq->aSubmitTbData == NULL) return; int32_t nSubmitTbData = TARRAY_SIZE(pReq->aSubmitTbData); SSubmitTbData *aSubmitTbData = (SSubmitTbData *)TARRAY_DATA(pReq->aSubmitTbData); for (int32_t i = 0; i < nSubmitTbData; i++) { - tDestroySSubmitTbData(&aSubmitTbData[i], flag); + tDestroySubmitTbData(&aSubmitTbData[i], flag); } taosArrayDestroy(pReq->aSubmitTbData); pReq->aSubmitTbData = NULL; diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 002dcda488..817e6fdae4 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -231,7 +231,7 @@ typedef struct SSnapContext { } SSnapContext; typedef struct STqReader { - SPackedData msg2; + SPackedData msg; SSubmitReq2 submit; int32_t nextBlk; int64_t lastBlkUid; @@ -242,7 +242,7 @@ typedef struct STqReader { int32_t cachedSchemaVer; int64_t cachedSchemaSuid; SSchemaWrapper *pSchemaWrapper; - STSchema *pSchema; + SSDataBlock *pResBlock; } STqReader; STqReader *tqReaderOpen(SVnode *pVnode); @@ -255,6 +255,7 @@ int32_t tqReaderRemoveTbUidList(STqReader *pReader, const SArray *tbUidList); int32_t tqSeekVer(STqReader *pReader, int64_t ver, const char *id); int32_t tqNextBlock(STqReader *pReader, SSDataBlock* pBlock); +int32_t tqNextBlockInWal(STqReader* pReader); int32_t extractSubmitMsgFromWal(SWalReader *pReader, SPackedData *pPackedData); int32_t tqReaderSetSubmitMsg(STqReader *pReader, void *msgStr, int32_t msgLen, int64_t ver); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 7668d45108..eb2787595b 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -193,7 +193,7 @@ STQ* tqOpen(const char* path, SVnode* pVnode); void tqNotifyClose(STQ*); void tqClose(STQ*); int tqPushMsg(STQ*, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver); -int tqRegisterPushEntry(STQ* pTq, void* handle, SRpcMsg* pMsg); +int tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg); int tqUnregisterPushHandle(STQ* pTq, void* pHandle); int tqStartStreamTasks(STQ* pTq); // restore all stream tasks after vnode launching completed. diff --git a/source/dnode/vnode/src/meta/metaQuery.c b/source/dnode/vnode/src/meta/metaQuery.c index 2359a165b7..d464f64de3 100644 --- a/source/dnode/vnode/src/meta/metaQuery.c +++ b/source/dnode/vnode/src/meta/metaQuery.c @@ -639,7 +639,6 @@ tb_uid_t metaStbCursorNext(SMStbCursor *pStbCur) { STSchema *metaGetTbTSchema(SMeta *pMeta, tb_uid_t uid, int32_t sver, int lock) { STSchema *pTSchema = NULL; SSchemaWrapper *pSW = NULL; - SSchema *pSchema = NULL; pSW = metaGetTableSchema(pMeta, uid, sver, lock); if (!pSW) return NULL; diff --git a/source/dnode/vnode/src/meta/metaSnapshot.c b/source/dnode/vnode/src/meta/metaSnapshot.c index 56e802d4fb..707dd66e30 100644 --- a/source/dnode/vnode/src/meta/metaSnapshot.c +++ b/source/dnode/vnode/src/meta/metaSnapshot.c @@ -217,8 +217,8 @@ typedef struct STableInfoForChildTable { static void destroySTableInfoForChildTable(void* data) { STableInfoForChildTable* pData = (STableInfoForChildTable*)data; taosMemoryFree(pData->tableName); - tDeleteSSchemaWrapper(pData->schemaRow); - tDeleteSSchemaWrapper(pData->tagRow); + tDeleteSchemaWrapper(pData->schemaRow); + tDeleteSchemaWrapper(pData->tagRow); } static void MoveToSnapShotVersion(SSnapContext* ctx) { diff --git a/source/dnode/vnode/src/meta/metaTable.c b/source/dnode/vnode/src/meta/metaTable.c index 96eec89127..83f2ece571 100644 --- a/source/dnode/vnode/src/meta/metaTable.c +++ b/source/dnode/vnode/src/meta/metaTable.c @@ -673,8 +673,8 @@ int metaDropIndexFromSTable(SMeta *pMeta, int64_t version, SDropIndexReq *pReq) metaUpdateUidIdx(pMeta, &nStbEntry); metaULock(pMeta); - tDeleteSSchemaWrapper(tag); - tDeleteSSchemaWrapper(row); + tDeleteSchemaWrapper(tag); + tDeleteSchemaWrapper(row); if (oStbEntry.pBuf) taosMemoryFree(oStbEntry.pBuf); tDecoderClear(&dc); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 69b3f9c3e0..20e04f122b 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -684,7 +684,7 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma } if (pReq && tdProcessSubmitReq(sinkTsdb, output->info.version, pReq) < 0) { - tDestroySSubmitReq(pReq, TSDB_MSG_FLG_ENCODE); + tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); taosMemoryFree(pReq); smaError("vgId:%d, process submit req for rsma suid:%" PRIu64 ", uid:%" PRIu64 " level %" PRIi8 " failed since %s", @@ -696,7 +696,7 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma SMA_VID(pSma), suid, output->info.id.groupId, pItem->level, output->info.version); if (pReq) { - tDestroySSubmitReq(pReq, TSDB_MSG_FLG_ENCODE); + tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); taosMemoryFree(pReq); } } diff --git a/source/dnode/vnode/src/sma/smaTimeRange.c b/source/dnode/vnode/src/sma/smaTimeRange.c index 2a26f65bf9..6a4bddc991 100644 --- a/source/dnode/vnode/src/sma/smaTimeRange.c +++ b/source/dnode/vnode/src/sma/smaTimeRange.c @@ -299,7 +299,7 @@ int32_t smaBlockToSubmit(SVnode *pVnode, const SArray *pBlocks, const STSchema * } SRow *pRow = NULL; if ((terrno = tRowBuild(pVals, (STSchema *)pTSchema, &pRow)) < 0) { - tDestroySSubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); + tDestroySubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); goto _end; } taosArrayPush(tbData.aRowP, &pRow); @@ -309,7 +309,7 @@ int32_t smaBlockToSubmit(SVnode *pVnode, const SArray *pBlocks, const STSchema * } // encode - tEncodeSize(tEncodeSSubmitReq2, pReq, len, terrno); + tEncodeSize(tEncodeSubmitReq, pReq, len, terrno); if (TSDB_CODE_SUCCESS == terrno) { SEncoder encoder; len += sizeof(SSubmitReq2Msg); @@ -321,7 +321,7 @@ int32_t smaBlockToSubmit(SVnode *pVnode, const SArray *pBlocks, const STSchema * ((SSubmitReq2Msg *)pBuf)->header.contLen = htonl(len); ((SSubmitReq2Msg *)pBuf)->version = htobe64(1); tEncoderInit(&encoder, POINTER_SHIFT(pBuf, sizeof(SSubmitReq2Msg)), len - sizeof(SSubmitReq2Msg)); - if (tEncodeSSubmitReq2(&encoder, pReq) < 0) { + if (tEncodeSubmitReq(&encoder, pReq) < 0) { terrno = TSDB_CODE_OUT_OF_MEMORY; /*vError("failed to encode submit req since %s", terrstr());*/ } @@ -332,7 +332,7 @@ _end: taosArrayDestroy(tagArray); taosArrayDestroy(pVals); if (pReq) { - tDestroySSubmitReq(pReq, TSDB_MSG_FLG_ENCODE); + tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); taosMemoryFree(pReq); } diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 0575b7299d..a914517645 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -287,7 +287,7 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v } -int32_t tqRegisterPushEntry(STQ* pTq, void* handle, SRpcMsg* pMsg) { +int32_t tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg) { int32_t vgId = TD_VID(pTq->pVnode); STqHandle* pHandle = (STqHandle*) handle; if(pHandle->msg == NULL){ diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index ead00dcc35..0c9c7b4793 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -265,9 +265,9 @@ STqReader* tqReaderOpen(SVnode* pVnode) { pReader->pColIdList = NULL; pReader->cachedSchemaVer = 0; pReader->cachedSchemaSuid = 0; - pReader->pSchema = NULL; pReader->pSchemaWrapper = NULL; pReader->tbIdHash = NULL; + pReader->pResBlock = createDataBlock(); return pReader; } @@ -276,19 +276,19 @@ void tqCloseReader(STqReader* pReader) { if (pReader->pWalReader) { walCloseReader(pReader->pWalReader); } - // free cached schema - if (pReader->pSchema) { - taosMemoryFree(pReader->pSchema); - } + if (pReader->pSchemaWrapper) { - tDeleteSSchemaWrapper(pReader->pSchemaWrapper); + tDeleteSchemaWrapper(pReader->pSchemaWrapper); } + if (pReader->pColIdList) { taosArrayDestroy(pReader->pColIdList); } + // free hash + blockDataDestroy(pReader->pResBlock); taosHashCleanup(pReader->tbIdHash); - tDestroySSubmitReq(&pReader->submit, TSDB_MSG_FLG_DECODE); + tDestroySubmitReq(&pReader->submit, TSDB_MSG_FLG_DECODE); taosMemoryFree(pReader); } @@ -322,9 +322,71 @@ int32_t extractSubmitMsgFromWal(SWalReader* pReader, SPackedData* pPackedData) { return 0; } +// todo ignore the error in wal? +int32_t tqNextBlockInWal(STqReader* pReader) { + SWalReader* pWalReader = pReader->pWalReader; + + while(1) { + SArray* pBlockList = pReader->submit.aSubmitTbData; + if (pBlockList == NULL || pReader->nextBlk >= taosArrayGetSize(pBlockList)) { + + // try next message in wal file + if (walNextValidMsg(pWalReader) < 0) { + return FETCH_TYPE__NONE; + } + + void* pBody = POINTER_SHIFT(pWalReader->pHead->head.body, sizeof(SSubmitReq2Msg)); + int32_t bodyLen = pWalReader->pHead->head.bodyLen - sizeof(SSubmitReq2Msg); + int64_t ver = pWalReader->pHead->head.version; + + SDecoder decoder = {0}; + tDecoderInit(&decoder, pBody, bodyLen); + if (tDecodeSubmitReq(&decoder, &pReader->submit) < 0) { + tDecoderClear(&decoder); + tqError("decode wal file error, msgLen:%d, ver:%"PRId64, bodyLen, ver); + return FETCH_TYPE__NONE; + } + + tDecoderClear(&decoder); + pReader->nextBlk = 0; + } + + size_t numOfBlocks = taosArrayGetSize(pReader->submit.aSubmitTbData); + while (pReader->nextBlk < numOfBlocks) { + tqDebug("tq reader next data block %p, %d %" PRId64 " %d", pReader->msg.msgStr, pReader->msg.msgLen, + pReader->msg.ver, pReader->nextBlk); + + SSubmitTbData* pSubmitTbData = taosArrayGet(pReader->submit.aSubmitTbData, pReader->nextBlk); + + if (pReader->tbIdHash == NULL) { + int32_t code = tqRetrieveDataBlock(pReader->pResBlock, pReader, NULL); + if (code == TSDB_CODE_SUCCESS && pReader->pResBlock->info.rows > 0) { + return FETCH_TYPE__DATA; + } + } + + void* ret = taosHashGet(pReader->tbIdHash, &pSubmitTbData->uid, sizeof(int64_t)); + if (ret != NULL) { + tqDebug("tq reader return submit block, uid:%"PRId64", ver:%"PRId64, pSubmitTbData->uid, pReader->msg.ver); + + int32_t code = tqRetrieveDataBlock(pReader->pResBlock, pReader, NULL); + if (code == TSDB_CODE_SUCCESS && pReader->pResBlock->info.rows > 0) { + return FETCH_TYPE__DATA; + } + } else { + pReader->nextBlk += 1; + tqDebug("tq reader discard submit block, uid:%"PRId64", continue", pSubmitTbData->uid); + } + } + + tDestroySubmitReq(&pReader->submit, TSDB_MSG_FLG_DECODE); + pReader->msg.msgStr = NULL; + } +} + int32_t tqNextBlock(STqReader* pReader, SSDataBlock* pBlock) { while (1) { - if (pReader->msg2.msgStr == NULL) { + if (pReader->msg.msgStr == NULL) { if (walNextValidMsg(pReader->pWalReader) < 0) { return FETCH_TYPE__NONE; } @@ -337,8 +399,7 @@ int32_t tqNextBlock(STqReader* pReader, SSDataBlock* pBlock) { } while (tqNextBlockImpl(pReader)) { - memset(pBlock, 0, sizeof(SSDataBlock)); - int32_t code = tqRetrieveDataBlock(pBlock, pReader, NULL); + int32_t code = tqRetrieveDataBlock(pReader->pResBlock, pReader, NULL); if (code != TSDB_CODE_SUCCESS || pBlock->info.rows == 0) { continue; } @@ -349,31 +410,33 @@ int32_t tqNextBlock(STqReader* pReader, SSDataBlock* pBlock) { } int32_t tqReaderSetSubmitMsg(STqReader* pReader, void* msgStr, int32_t msgLen, int64_t ver) { - pReader->msg2.msgStr = msgStr; - pReader->msg2.msgLen = msgLen; - pReader->msg2.ver = ver; + pReader->msg.msgStr = msgStr; + pReader->msg.msgLen = msgLen; + pReader->msg.ver = ver; tqDebug("tq reader set msg %p %d", msgStr, msgLen); SDecoder decoder; - tDecoderInit(&decoder, pReader->msg2.msgStr, pReader->msg2.msgLen); - if (tDecodeSSubmitReq2(&decoder, &pReader->submit) < 0) { + + tDecoderInit(&decoder, pReader->msg.msgStr, pReader->msg.msgLen); + if (tDecodeSubmitReq(&decoder, &pReader->submit) < 0) { tDecoderClear(&decoder); tqError("DecodeSSubmitReq2 error, msgLen:%d, ver:%"PRId64, msgLen, ver); return -1; } + tDecoderClear(&decoder); return 0; } bool tqNextBlockImpl(STqReader* pReader) { - if (pReader->msg2.msgStr == NULL) { + if (pReader->msg.msgStr == NULL) { return false; } int32_t blockSz = taosArrayGetSize(pReader->submit.aSubmitTbData); while (pReader->nextBlk < blockSz) { - tqDebug("tq reader next data block %p, %d %" PRId64 " %d", pReader->msg2.msgStr, pReader->msg2.msgLen, - pReader->msg2.ver, pReader->nextBlk); + tqDebug("tq reader next data block %p, %d %" PRId64 " %d", pReader->msg.msgStr, pReader->msg.msgLen, + pReader->msg.ver, pReader->nextBlk); SSubmitTbData* pSubmitTbData = taosArrayGet(pReader->submit.aSubmitTbData, pReader->nextBlk); if (pReader->tbIdHash == NULL) { @@ -382,7 +445,7 @@ bool tqNextBlockImpl(STqReader* pReader) { void* ret = taosHashGet(pReader->tbIdHash, &pSubmitTbData->uid, sizeof(int64_t)); if (ret != NULL) { - tqDebug("tq reader block found, ver:%"PRId64", uid:%"PRId64, pReader->msg2.ver, pSubmitTbData->uid); + tqDebug("tq reader block found, ver:%"PRId64", uid:%"PRId64, pReader->msg.ver, pSubmitTbData->uid); return true; } else { tqDebug("tq reader discard submit block, uid:%"PRId64", continue", pSubmitTbData->uid); @@ -391,15 +454,15 @@ bool tqNextBlockImpl(STqReader* pReader) { pReader->nextBlk++; } - tDestroySSubmitReq(&pReader->submit, TSDB_MSG_FLG_DECODE); + tDestroySubmitReq(&pReader->submit, TSDB_MSG_FLG_DECODE); pReader->nextBlk = 0; - pReader->msg2.msgStr = NULL; + pReader->msg.msgStr = NULL; return false; } bool tqNextDataBlockFilterOut(STqReader* pReader, SHashObj* filterOutUids) { - if (pReader->msg2.msgStr == NULL) return false; + if (pReader->msg.msgStr == NULL) return false; int32_t blockSz = taosArrayGetSize(pReader->submit.aSubmitTbData); while (pReader->nextBlk < blockSz) { @@ -413,9 +476,9 @@ bool tqNextDataBlockFilterOut(STqReader* pReader, SHashObj* filterOutUids) { pReader->nextBlk++; } - tDestroySSubmitReq(&pReader->submit, TSDB_MSG_FLG_DECODE); + tDestroySubmitReq(&pReader->submit, TSDB_MSG_FLG_DECODE); pReader->nextBlk = 0; - pReader->msg2.msgStr = NULL; + pReader->msg.msgStr = NULL; return false; } @@ -450,10 +513,9 @@ int32_t tqMaskBlock(SSchemaWrapper* pDst, SSDataBlock* pBlock, const SSchemaWrap } int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbData** pSubmitTbDataRet) { - tqDebug("tq reader retrieve data block %p, index:%d", pReader->msg2.msgStr, pReader->nextBlk); - SSubmitTbData* pSubmitTbData = taosArrayGet(pReader->submit.aSubmitTbData, pReader->nextBlk); - pReader->nextBlk++; + tqDebug("tq reader retrieve data block %p, index:%d", pReader->msg.msgStr, pReader->nextBlk); + SSubmitTbData* pSubmitTbData = taosArrayGet(pReader->submit.aSubmitTbData, pReader->nextBlk++); if (pSubmitTbDataRet) { *pSubmitTbDataRet = pSubmitTbData; } @@ -464,21 +526,11 @@ int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbDa pReader->lastBlkUid = uid; pBlock->info.id.uid = uid; - pBlock->info.version = pReader->msg2.ver; + pBlock->info.version = pReader->msg.ver; if (pReader->cachedSchemaSuid == 0 || pReader->cachedSchemaVer != sversion || pReader->cachedSchemaSuid != suid) { - taosMemoryFree(pReader->pSchema); - pReader->pSchema = metaGetTbTSchema(pReader->pVnodeMeta, uid, sversion, 1); - if (pReader->pSchema == NULL) { - tqWarn("vgId:%d, cannot found tsschema for table: uid:%" PRId64 " (suid:%" PRId64 - "), version %d, possibly dropped table", - pReader->pWalReader->pWal->cfg.vgId, uid, suid, sversion); - pReader->cachedSchemaSuid = 0; - terrno = TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND; - return -1; - } + tDeleteSchemaWrapper(pReader->pSchemaWrapper); - tDeleteSSchemaWrapper(pReader->pSchemaWrapper); pReader->pSchemaWrapper = metaGetTableSchema(pReader->pVnodeMeta, uid, sversion, 1); if (pReader->pSchemaWrapper == NULL) { tqWarn("vgId:%d, cannot found schema wrapper for table: suid:%" PRId64 ", version %d, possibly dropped table", @@ -488,93 +540,140 @@ int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbDa return -1; } - STSchema* pTschema = pReader->pSchema; + pReader->cachedSchemaSuid = suid; + pReader->cachedSchemaVer = sversion; + SSchemaWrapper* pSchemaWrapper = pReader->pSchemaWrapper; - int32_t colNumNeed = taosArrayGetSize(pReader->pColIdList); - - if (colNumNeed == 0) { - int32_t colMeta = 0; - while (colMeta < pSchemaWrapper->nCols) { - SSchema* pColSchema = &pSchemaWrapper->pSchema[colMeta]; + int32_t numOfCols = taosArrayGetSize(pReader->pColIdList); + if (numOfCols == 0) { // all columns are required + for (int32_t i = 0; i < pSchemaWrapper->nCols; ++i) { + SSchema* pColSchema = &pSchemaWrapper->pSchema[i]; SColumnInfoData colInfo = createColumnInfoData(pColSchema->type, pColSchema->bytes, pColSchema->colId); - int32_t code = blockDataAppendColInfo(pBlock, &colInfo); + + int32_t code = blockDataAppendColInfo(pBlock, &colInfo); if (code != TSDB_CODE_SUCCESS) { - goto FAIL; + blockDataFreeRes(pBlock); + return -1; } - colMeta++; } } else { - if (colNumNeed > pSchemaWrapper->nCols) { - colNumNeed = pSchemaWrapper->nCols; + if (numOfCols > pSchemaWrapper->nCols) { + numOfCols = pSchemaWrapper->nCols; } - int32_t colMeta = 0; - int32_t colNeed = 0; - while (colMeta < pSchemaWrapper->nCols && colNeed < colNumNeed) { - SSchema* pColSchema = &pSchemaWrapper->pSchema[colMeta]; + int32_t i = 0; + int32_t j = 0; + while (i < pSchemaWrapper->nCols && j < numOfCols) { + SSchema* pColSchema = &pSchemaWrapper->pSchema[i]; col_id_t colIdSchema = pColSchema->colId; - col_id_t colIdNeed = *(col_id_t*)taosArrayGet(pReader->pColIdList, colNeed); + + col_id_t colIdNeed = *(col_id_t*)taosArrayGet(pReader->pColIdList, j); if (colIdSchema < colIdNeed) { - colMeta++; + i++; } else if (colIdSchema > colIdNeed) { - colNeed++; + j++; } else { SColumnInfoData colInfo = createColumnInfoData(pColSchema->type, pColSchema->bytes, pColSchema->colId); int32_t code = blockDataAppendColInfo(pBlock, &colInfo); if (code != TSDB_CODE_SUCCESS) { goto FAIL; } - colMeta++; - colNeed++; + i++; + j++; } } } + } - int32_t numOfRows = 0; + int32_t numOfRows = 0; + if (pSubmitTbData->flags & SUBMIT_REQ_COLUMN_DATA_FORMAT) { + SColData* pCol = taosArrayGet(pSubmitTbData->aCol, 0); + numOfRows = pCol->nVal; + } else { + numOfRows = taosArrayGetSize(pSubmitTbData->aRowP); + } - if (pSubmitTbData->flags & SUBMIT_REQ_COLUMN_DATA_FORMAT) { - SArray* pCols = pSubmitTbData->aCol; - SColData* pCol = taosArrayGet(pCols, 0); - numOfRows = pCol->nVal; - } else { - SArray* pRows = pSubmitTbData->aRowP; - numOfRows = taosArrayGetSize(pRows); + if (blockDataEnsureCapacity(pBlock, numOfRows) < 0) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto FAIL; + } + + pBlock->info.rows = numOfRows; + + int32_t colActual = blockDataGetNumOfCols(pBlock); + + // convert and scan one block + if (pSubmitTbData->flags & SUBMIT_REQ_COLUMN_DATA_FORMAT) { + SArray* pCols = pSubmitTbData->aCol; + int32_t numOfCols = taosArrayGetSize(pCols); + int32_t targetIdx = 0; + int32_t sourceIdx = 0; + while (targetIdx < colActual) { + if (sourceIdx >= numOfCols) { + tqError("tqRetrieveDataBlock sourceIdx:%d >= numOfCols:%d", sourceIdx, numOfCols); + goto FAIL; + } + + SColData* pCol = taosArrayGet(pCols, sourceIdx); + SColumnInfoData* pColData = taosArrayGet(pBlock->pDataBlock, targetIdx); + SColVal colVal; + + if (pCol->nVal != numOfRows) { + tqError("tqRetrieveDataBlock pCol->nVal:%d != numOfRows:%d", pCol->nVal, numOfRows); + goto FAIL; + } + + if (pCol->cid < pColData->info.colId) { + sourceIdx++; + } else if (pCol->cid == pColData->info.colId) { + for (int32_t i = 0; i < pCol->nVal; i++) { + tColDataGetValue(pCol, i, &colVal); + if (IS_STR_DATA_TYPE(colVal.type)) { + if (colVal.value.pData != NULL) { + char val[65535 + 2] = {0}; + memcpy(varDataVal(val), colVal.value.pData, colVal.value.nData); + varDataSetLen(val, colVal.value.nData); + if (colDataAppend(pColData, i, val, !COL_VAL_IS_VALUE(&colVal)) < 0) { + goto FAIL; + } + } else { + colDataSetNULL(pColData, i); + } + } else { + if (colDataAppend(pColData, i, (void*)&colVal.value.val, !COL_VAL_IS_VALUE(&colVal)) < 0) { + goto FAIL; + } + } + } + sourceIdx++; + targetIdx++; + } else { + for (int32_t i = 0; i < pCol->nVal; i++) { + colDataSetNULL(pColData, i); + } + + targetIdx++; + } } + } else { + SArray* pRows = pSubmitTbData->aRowP; + SSchemaWrapper* pWrapper = pReader->pSchemaWrapper; + STSchema* pTSchema = tBuildTSchema(pWrapper->pSchema, pWrapper->nCols, pWrapper->version); - if (blockDataEnsureCapacity(pBlock, numOfRows) < 0) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto FAIL; - } - pBlock->info.rows = numOfRows; - - int32_t colActual = blockDataGetNumOfCols(pBlock); - - // convert and scan one block - if (pSubmitTbData->flags & SUBMIT_REQ_COLUMN_DATA_FORMAT) { - SArray* pCols = pSubmitTbData->aCol; - int32_t numOfCols = taosArrayGetSize(pCols); - int32_t targetIdx = 0; + for (int32_t i = 0; i < numOfRows; i++) { + SRow* pRow = taosArrayGetP(pRows, i); int32_t sourceIdx = 0; - while (targetIdx < colActual) { - if(sourceIdx >= numOfCols){ - tqError("tqRetrieveDataBlock sourceIdx:%d >= numOfCols:%d", sourceIdx, numOfCols); - goto FAIL; - } - SColData* pCol = taosArrayGet(pCols, sourceIdx); - SColumnInfoData* pColData = taosArrayGet(pBlock->pDataBlock, targetIdx); - SColVal colVal; - if(pCol->nVal != numOfRows){ - tqError("tqRetrieveDataBlock pCol->nVal:%d != numOfRows:%d", pCol->nVal, numOfRows); - goto FAIL; - } - - if (pCol->cid < pColData->info.colId) { - sourceIdx++; - } else if (pCol->cid == pColData->info.colId) { - for (int32_t i = 0; i < pCol->nVal; i++) { - tColDataGetValue(pCol, i, &colVal); + for (int32_t j = 0; j < colActual; j++) { + SColumnInfoData* pColData = taosArrayGet(pBlock->pDataBlock, j); + while (1) { + SColVal colVal; + tRowGet(pRow, pTSchema, sourceIdx, &colVal); + if (colVal.cid < pColData->info.colId) { + sourceIdx++; + continue; + } else if (colVal.cid == pColData->info.colId) { if (IS_STR_DATA_TYPE(colVal.type)) { if (colVal.value.pData != NULL) { char val[65535 + 2] = {0}; @@ -591,59 +690,18 @@ int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbDa goto FAIL; } } - } - sourceIdx++; - targetIdx++; - } else { - for (int32_t i = 0; i < pCol->nVal; i++) { + + sourceIdx++; + break; + } else { colDataSetNULL(pColData, i); - } - targetIdx++; - } - } - } else { - SArray* pRows = pSubmitTbData->aRowP; - - for (int32_t i = 0; i < numOfRows; i++) { - SRow* pRow = taosArrayGetP(pRows, i); - int32_t sourceIdx = 0; - - for (int32_t j = 0; j < colActual; j++) { - SColumnInfoData* pColData = taosArrayGet(pBlock->pDataBlock, j); - while (1) { - SColVal colVal; - tRowGet(pRow, pTschema, sourceIdx, &colVal); - if (colVal.cid < pColData->info.colId) { - sourceIdx++; - continue; - } else if (colVal.cid == pColData->info.colId) { - if (IS_STR_DATA_TYPE(colVal.type)) { - if (colVal.value.pData != NULL) { - char val[65535 + 2] = {0}; - memcpy(varDataVal(val), colVal.value.pData, colVal.value.nData); - varDataSetLen(val, colVal.value.nData); - if (colDataAppend(pColData, i, val, !COL_VAL_IS_VALUE(&colVal)) < 0) { - goto FAIL; - } - } else { - colDataSetNULL(pColData, i); - } - } else { - if (colDataAppend(pColData, i, (void*)&colVal.value.val, !COL_VAL_IS_VALUE(&colVal)) < 0) { - goto FAIL; - } - } - - sourceIdx++; - break; - } else { - colDataSetNULL(pColData, i); - break; - } + break; } } } } + + taosMemoryFreeClear(pTSchema); } return 0; @@ -654,7 +712,7 @@ FAIL: } int32_t tqRetrieveTaosxBlock(STqReader* pReader, SArray* blocks, SArray* schemas, SSubmitTbData** pSubmitTbDataRet) { - tqDebug("tq reader retrieve data block %p, %d", pReader->msg2.msgStr, pReader->nextBlk); + tqDebug("tq reader retrieve data block %p, %d", pReader->msg.msgStr, pReader->nextBlk); SSubmitTbData* pSubmitTbData = taosArrayGet(pReader->submit.aSubmitTbData, pReader->nextBlk); pReader->nextBlk++; @@ -665,18 +723,7 @@ int32_t tqRetrieveTaosxBlock(STqReader* pReader, SArray* blocks, SArray* schemas int64_t uid = pSubmitTbData->uid; pReader->lastBlkUid = uid; - taosMemoryFree(pReader->pSchema); - pReader->pSchema = metaGetTbTSchema(pReader->pVnodeMeta, uid, sversion, 1); - if (pReader->pSchema == NULL) { - tqWarn("vgId:%d, cannot found tsschema for table: uid:%" PRId64 " (suid:%" PRId64 - "), version %d, possibly dropped table", - pReader->pWalReader->pWal->cfg.vgId, uid, suid, sversion); - pReader->cachedSchemaSuid = 0; - terrno = TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND; - return -1; - } - - tDeleteSSchemaWrapper(pReader->pSchemaWrapper); + tDeleteSchemaWrapper(pReader->pSchemaWrapper); pReader->pSchemaWrapper = metaGetTableSchema(pReader->pVnodeMeta, uid, sversion, 1); if (pReader->pSchemaWrapper == NULL) { tqWarn("vgId:%d, cannot found schema wrapper for table: suid:%" PRId64 ", version %d, possibly dropped table", @@ -686,7 +733,6 @@ int32_t tqRetrieveTaosxBlock(STqReader* pReader, SArray* blocks, SArray* schemas return -1; } - STSchema* pTschema = pReader->pSchema; SSchemaWrapper* pSchemaWrapper = pReader->pSchemaWrapper; int32_t numOfRows = 0; @@ -743,18 +789,18 @@ int32_t tqRetrieveTaosxBlock(STqReader* pReader, SArray* blocks, SArray* schemas if (tqMaskBlock(pSW, &block, pSchemaWrapper, assigned) < 0) { blockDataFreeRes(&block); - tDeleteSSchemaWrapper(pSW); + tDeleteSchemaWrapper(pSW); goto FAIL; } tqDebug("vgId:%d, build new block, col %d", pReader->pWalReader->pWal->cfg.vgId, (int32_t)taosArrayGetSize(block.pDataBlock)); block.info.id.uid = uid; - block.info.version = pReader->msg2.ver; + block.info.version = pReader->msg.ver; if (blockDataEnsureCapacity(&block, numOfRows - curRow) < 0) { terrno = TSDB_CODE_OUT_OF_MEMORY; blockDataFreeRes(&block); - tDeleteSSchemaWrapper(pSW); + tDeleteSchemaWrapper(pSW); goto FAIL; } taosArrayPush(blocks, &block); @@ -803,14 +849,17 @@ int32_t tqRetrieveTaosxBlock(STqReader* pReader, SArray* blocks, SArray* schemas curRow++; } } else { + SSchemaWrapper* pWrapper = pReader->pSchemaWrapper; + STSchema* pTSchema = tBuildTSchema(pWrapper->pSchema, pWrapper->nCols, pWrapper->version); SArray* pRows = pSubmitTbData->aRowP; + for (int32_t i = 0; i < numOfRows; i++) { SRow* pRow = taosArrayGetP(pRows, i); bool buildNew = false; - for (int32_t j = 0; j < pTschema->numOfCols; j++) { + for (int32_t j = 0; j < pTSchema->numOfCols; j++) { SColVal colVal; - tRowGet(pRow, pTschema, j, &colVal); + tRowGet(pRow, pTSchema, j, &colVal); if (curRow == 0) { assigned[j] = !COL_VAL_IS_NONE(&colVal); buildNew = true; @@ -839,18 +888,18 @@ int32_t tqRetrieveTaosxBlock(STqReader* pReader, SArray* blocks, SArray* schemas if (tqMaskBlock(pSW, &block, pSchemaWrapper, assigned) < 0) { blockDataFreeRes(&block); - tDeleteSSchemaWrapper(pSW); + tDeleteSchemaWrapper(pSW); goto FAIL; } tqDebug("vgId:%d, build new block, col %d", pReader->pWalReader->pWal->cfg.vgId, (int32_t)taosArrayGetSize(block.pDataBlock)); block.info.id.uid = uid; - block.info.version = pReader->msg2.ver; + block.info.version = pReader->msg.ver; if (blockDataEnsureCapacity(&block, numOfRows - curRow) < 0) { terrno = TSDB_CODE_OUT_OF_MEMORY; blockDataFreeRes(&block); - tDeleteSSchemaWrapper(pSW); + tDeleteSchemaWrapper(pSW); goto FAIL; } taosArrayPush(blocks, &block); @@ -868,7 +917,7 @@ int32_t tqRetrieveTaosxBlock(STqReader* pReader, SArray* blocks, SArray* schemas while (targetIdx < colActual) { SColumnInfoData* pColData = taosArrayGet(pBlock->pDataBlock, targetIdx); SColVal colVal; - tRowGet(pRow, pTschema, sourceIdx, &colVal); + tRowGet(pRow, pTSchema, sourceIdx, &colVal); if (colVal.cid < pColData->info.colId) { sourceIdx++; @@ -895,6 +944,8 @@ int32_t tqRetrieveTaosxBlock(STqReader* pReader, SArray* blocks, SArray* schemas } curRow++; } + + taosMemoryFreeClear(pTSchema); } SSDataBlock* pLastBlock = taosArrayGetLast(blocks); diff --git a/source/dnode/vnode/src/tq/tqScan.c b/source/dnode/vnode/src/tq/tqScan.c index 8e243a8bd1..3d9cea54ba 100644 --- a/source/dnode/vnode/src/tq/tqScan.c +++ b/source/dnode/vnode/src/tq/tqScan.c @@ -215,7 +215,7 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR int64_t uid = pExec->pTqReader->lastBlkUid; if (tqAddTbNameToRsp(pTq, uid, pRsp, taosArrayGetSize(pBlocks)) < 0) { taosArrayDestroyEx(pBlocks, (FDelete)blockDataFreeRes); - taosArrayDestroyP(pSchemas, (FDelete)tDeleteSSchemaWrapper); + taosArrayDestroyP(pSchemas, (FDelete)tDeleteSchemaWrapper); pBlocks = taosArrayInit(0, sizeof(SSDataBlock)); pSchemas = taosArrayInit(0, sizeof(void*)); continue; @@ -274,7 +274,7 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR int64_t uid = pExec->pTqReader->lastBlkUid; if (tqAddTbNameToRsp(pTq, uid, pRsp, taosArrayGetSize(pBlocks)) < 0) { taosArrayDestroyEx(pBlocks, (FDelete)blockDataFreeRes); - taosArrayDestroyP(pSchemas, (FDelete)tDeleteSSchemaWrapper); + taosArrayDestroyP(pSchemas, (FDelete)tDeleteSchemaWrapper); pBlocks = taosArrayInit(0, sizeof(SSDataBlock)); pSchemas = taosArrayInit(0, sizeof(void*)); continue; diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index c2e6946b04..33d1e08c9c 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -672,7 +672,7 @@ void tqSinkToTablePipeline2(SStreamTask* pTask, void* vnode, int64_t ver, void* } SRow* pRow = NULL; if ((terrno = tRowBuild(pVals, (STSchema*)pTSchema, &pRow)) < 0) { - tDestroySSubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); + tDestroySubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); goto _end; } ASSERT(pRow); @@ -681,7 +681,7 @@ void tqSinkToTablePipeline2(SStreamTask* pTask, void* vnode, int64_t ver, void* SSubmitReq2 submitReq = {0}; if (!(submitReq.aSubmitTbData = taosArrayInit(1, sizeof(SSubmitTbData)))) { - tDestroySSubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); + tDestroySubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); goto _end; } @@ -690,28 +690,28 @@ void tqSinkToTablePipeline2(SStreamTask* pTask, void* vnode, int64_t ver, void* // encode int32_t len; int32_t code; - tEncodeSize(tEncodeSSubmitReq2, &submitReq, len, code); + tEncodeSize(tEncodeSubmitReq, &submitReq, len, code); SEncoder encoder; len += sizeof(SSubmitReq2Msg); pBuf = rpcMallocCont(len); if (NULL == pBuf) { - tDestroySSubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); + tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); goto _end; } ((SSubmitReq2Msg*)pBuf)->header.vgId = TD_VID(pVnode); ((SSubmitReq2Msg*)pBuf)->header.contLen = htonl(len); ((SSubmitReq2Msg*)pBuf)->version = htobe64(1); tEncoderInit(&encoder, POINTER_SHIFT(pBuf, sizeof(SSubmitReq2Msg)), len - sizeof(SSubmitReq2Msg)); - if (tEncodeSSubmitReq2(&encoder, &submitReq) < 0) { + if (tEncodeSubmitReq(&encoder, &submitReq) < 0) { terrno = TSDB_CODE_OUT_OF_MEMORY; tqError("failed to encode submit req since %s", terrstr()); tEncoderClear(&encoder); rpcFreeCont(pBuf); - tDestroySSubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); + tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); continue; } tEncoderClear(&encoder); - tDestroySSubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); + tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); SRpcMsg msg = { .msgType = TDMT_VND_SUBMIT, diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index d186c63871..133c51a8dc 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -180,7 +180,7 @@ static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) { // lock taosWLockLatch(&pTq->lock); - code = tqRegisterPushEntry(pTq, pHandle, pMsg); + code = tqRegisterPushHandle(pTq, pHandle, pMsg); taosWUnLockLatch(&pTq->lock); tDeleteSMqDataRsp(&dataRsp); return code; diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 251efeab3d..f8161427db 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -1007,7 +1007,7 @@ static int32_t vnodeResetTableCxt(SMeta *pMeta, SSubmitReqConvertCxt *pCxt) { } tdSTSRowIterInit(&pCxt->rowIter, pCxt->pTbSchema); - tDestroySSubmitTbData(pCxt->pTbData, TSDB_MSG_FLG_ENCODE); + tDestroySubmitTbData(pCxt->pTbData, TSDB_MSG_FLG_ENCODE); if (NULL == pCxt->pTbData) { pCxt->pTbData = taosMemoryCalloc(1, sizeof(SSubmitTbData)); if (NULL == pCxt->pTbData) { @@ -1039,7 +1039,7 @@ static int32_t vnodeResetTableCxt(SMeta *pMeta, SSubmitReqConvertCxt *pCxt) { static void vnodeDestroySubmitReqConvertCxt(SSubmitReqConvertCxt *pCxt) { taosMemoryFreeClear(pCxt->pTbSchema); - tDestroySSubmitTbData(pCxt->pTbData, TSDB_MSG_FLG_ENCODE); + tDestroySubmitTbData(pCxt->pTbData, TSDB_MSG_FLG_ENCODE); taosMemoryFreeClear(pCxt->pTbData); taosArrayDestroy(pCxt->pColValues); } @@ -1149,7 +1149,7 @@ static int32_t vnodeRebuildSubmitReqMsg(SSubmitReq2 *pSubmitReq, void **ppMsg) { int32_t code = TSDB_CODE_SUCCESS; char *pMsg = NULL; uint32_t msglen = 0; - tEncodeSize(tEncodeSSubmitReq2, pSubmitReq, msglen, code); + tEncodeSize(tEncodeSubmitReq, pSubmitReq, msglen, code); if (TSDB_CODE_SUCCESS == code) { pMsg = taosMemoryMalloc(msglen); if (NULL == pMsg) { @@ -1159,7 +1159,7 @@ static int32_t vnodeRebuildSubmitReqMsg(SSubmitReq2 *pSubmitReq, void **ppMsg) { if (TSDB_CODE_SUCCESS == code) { SEncoder encoder; tEncoderInit(&encoder, pMsg, msglen); - code = tEncodeSSubmitReq2(&encoder, pSubmitReq); + code = tEncodeSubmitReq(&encoder, pSubmitReq); tEncoderClear(&encoder); } if (TSDB_CODE_SUCCESS == code) { @@ -1199,7 +1199,7 @@ static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t version, void *pReq len -= sizeof(SSubmitReq2Msg); SDecoder dc = {0}; tDecoderInit(&dc, pReq, len); - if (tDecodeSSubmitReq2(&dc, pSubmitReq) < 0) { + if (tDecodeSubmitReq(&dc, pSubmitReq) < 0) { code = TSDB_CODE_INVALID_MSG; goto _exit; } @@ -1388,7 +1388,7 @@ _exit: // clear taosArrayDestroy(newTbUids); - tDestroySSubmitReq(pSubmitReq, 0 == pMsg->version ? TSDB_MSG_FLG_CMPT : TSDB_MSG_FLG_DECODE); + tDestroySubmitReq(pSubmitReq, 0 == pMsg->version ? TSDB_MSG_FLG_CMPT : TSDB_MSG_FLG_DECODE); tDestroySSubmitRsp2(pSubmitRsp, TSDB_MSG_FLG_ENCODE); if (code) terrno = code; diff --git a/source/libs/executor/src/dataInserter.c b/source/libs/executor/src/dataInserter.c index 33eccf4759..d31ac0bc51 100644 --- a/source/libs/executor/src/dataInserter.c +++ b/source/libs/executor/src/dataInserter.c @@ -126,7 +126,7 @@ static int32_t submitReqToMsg(int32_t vgId, SSubmitReq2* pReq, void** pData, int int32_t code = TSDB_CODE_SUCCESS; int32_t len = 0; void* pBuf = NULL; - tEncodeSize(tEncodeSSubmitReq2, pReq, len, code); + tEncodeSize(tEncodeSubmitReq, pReq, len, code); if (TSDB_CODE_SUCCESS == code) { SEncoder encoder; len += sizeof(SSubmitReq2Msg); @@ -138,7 +138,7 @@ static int32_t submitReqToMsg(int32_t vgId, SSubmitReq2* pReq, void** pData, int ((SSubmitReq2Msg*)pBuf)->header.contLen = htonl(len); ((SSubmitReq2Msg*)pBuf)->version = htobe64(1); tEncoderInit(&encoder, POINTER_SHIFT(pBuf, sizeof(SSubmitReq2Msg)), len - sizeof(SSubmitReq2Msg)); - code = tEncodeSSubmitReq2(&encoder, pReq); + code = tEncodeSubmitReq(&encoder, pReq); tEncoderClear(&encoder); } @@ -281,7 +281,7 @@ int32_t buildSubmitReqFromBlock(SDataInserterHandle* pInserter, SSubmitReq2** pp SRow* pRow = NULL; if ((terrno = tRowBuild(pVals, pTSchema, &pRow)) < 0) { - tDestroySSubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); + tDestroySubmitTbData(&tbData, TSDB_MSG_FLG_ENCODE); goto _end; } taosArrayPush(tbData.aRowP, &pRow); @@ -301,7 +301,7 @@ _end: if (terrno != 0) { *ppReq = NULL; if (pReq) { - tDestroySSubmitReq(pReq, TSDB_MSG_FLG_ENCODE); + tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); taosMemoryFree(pReq); } return terrno; @@ -326,7 +326,7 @@ int32_t dataBlocksToSubmitReq(SDataInserterHandle* pInserter, void** pMsg, int32 code = buildSubmitReqFromBlock(pInserter, &pReq, pDataBlock, pTSchema, uid, vgId, suid); if (code) { if (pReq) { - tDestroySSubmitReq(pReq, TSDB_MSG_FLG_ENCODE); + tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); taosMemoryFree(pReq); } @@ -335,7 +335,7 @@ int32_t dataBlocksToSubmitReq(SDataInserterHandle* pInserter, void** pMsg, int32 } code = submitReqToMsg(vgId, pReq, pMsg, msgLen); - tDestroySSubmitReq(pReq, TSDB_MSG_FLG_ENCODE); + tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); taosMemoryFree(pReq); return code; diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 2d991a14f5..5fc079b7c1 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -1052,19 +1052,6 @@ int32_t initQueryTableDataCondForTmq(SQueryTableDataCond* pCond, SSnapContext* s return TSDB_CODE_SUCCESS; } -int32_t qStreamSetScanMemData(qTaskInfo_t tinfo, SPackedData submit) { - SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; - if ((pTaskInfo->execModel != OPTR_EXEC_MODEL_QUEUE) || (pTaskInfo->streamInfo.submit.msgStr != NULL)) { - qError("qStreamSetScanMemData err:%d,%p", pTaskInfo->execModel, pTaskInfo->streamInfo.submit.msgStr); - terrno = TSDB_CODE_PAR_INTERNAL_ERROR; - return -1; - } - qDebug("set the submit block for future scan"); - - pTaskInfo->streamInfo.submit = submit; - return 0; -} - void qStreamSetOpen(qTaskInfo_t tinfo) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; SOperatorInfo* pOperator = pTaskInfo->pRoot; @@ -1086,6 +1073,7 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT if (pOperator == NULL) { return -1; } + SStreamScanInfo* pInfo = pOperator->info; STableScanInfo* pScanInfo = pInfo->pTableScanOp->info; STableScanBase* pScanBaseInfo = &pScanInfo->base; @@ -1221,7 +1209,7 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT cleanupQueryTableDataCond(&pTaskInfo->streamInfo.tableCond); strcpy(pTaskInfo->streamInfo.tbName, mtInfo.tbName); - tDeleteSSchemaWrapper(pTaskInfo->streamInfo.schema); + tDeleteSchemaWrapper(pTaskInfo->streamInfo.schema); pTaskInfo->streamInfo.schema = mtInfo.schema; qDebug("tmqsnap qStreamPrepareScan snapshot data uid:%" PRId64 " ts %" PRId64 " %s", mtInfo.uid, pOffset->ts, id); diff --git a/source/libs/executor/src/querytask.c b/source/libs/executor/src/querytask.c index a4d8327b6a..7716b5976b 100644 --- a/source/libs/executor/src/querytask.c +++ b/source/libs/executor/src/querytask.c @@ -109,8 +109,8 @@ int32_t createExecTaskInfo(SSubplan* pPlan, SExecTaskInfo** pTaskInfo, SReadHand void cleanupQueriedTableScanInfo(SSchemaInfo* pSchemaInfo) { taosMemoryFreeClear(pSchemaInfo->dbname); taosMemoryFreeClear(pSchemaInfo->tablename); - tDeleteSSchemaWrapper(pSchemaInfo->sw); - tDeleteSSchemaWrapper(pSchemaInfo->qsw); + tDeleteSchemaWrapper(pSchemaInfo->sw); + tDeleteSchemaWrapper(pSchemaInfo->qsw); } int32_t initQueriedTableSchemaInfo(SReadHandle* pHandle, SScanPhysiNode* pScanNode, const char* dbName, SExecTaskInfo* pTaskInfo) { @@ -197,7 +197,7 @@ SSchemaWrapper* extractQueriedColumnSchema(SScanPhysiNode* pScanNode) { return pqSw; } -static void cleanupStreamInfo(SStreamTaskInfo* pStreamInfo) { tDeleteSSchemaWrapper(pStreamInfo->schema); } +static void cleanupStreamInfo(SStreamTaskInfo* pStreamInfo) { tDeleteSchemaWrapper(pStreamInfo->schema); } static void freeBlock(void* pParam) { SSDataBlock* pBlock = *(SSDataBlock**)pParam; diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 130cca9cbb..0f4e18105c 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1623,7 +1623,7 @@ static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock pInfo->pRes->info.dataLoad = 1; blockDataUpdateTsWindow(pInfo->pRes, pInfo->primaryTsIndex); - blockDataFreeRes((SSDataBlock*)pBlock); +// blockDataFreeRes((SSDataBlock*)pBlock); calBlockTbName(pInfo, pInfo->pRes); return 0; @@ -1637,7 +1637,7 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { qDebug("start to exec queue scan, %s", id); if (pTaskInfo->streamInfo.submit.msgStr != NULL) { - if (pInfo->tqReader->msg2.msgStr == NULL) { + if (pInfo->tqReader->msg.msgStr == NULL) { SPackedData submit = pTaskInfo->streamInfo.submit; if (tqReaderSetSubmitMsg(pInfo->tqReader, submit.msgStr, submit.msgLen, submit.ver) < 0) { qError("submit msg messed up when initing stream submit block %p", submit.msgStr); @@ -1663,7 +1663,7 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { } } - pInfo->tqReader->msg2 = (SPackedData){0}; + pInfo->tqReader->msg = (SPackedData){0}; pTaskInfo->streamInfo.submit = (SPackedData){0}; return NULL; } @@ -1689,17 +1689,17 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { if (pTaskInfo->streamInfo.currentOffset.type == TMQ_OFFSET__LOG) { while (1) { - SSDataBlock block = {0}; - int32_t type = tqNextBlock(pInfo->tqReader, &block); + int32_t type = tqNextBlockInWal(pInfo->tqReader); + SSDataBlock* pRes = pInfo->tqReader->pResBlock; // curVersion move to next, so currentOffset = curVersion - 1 tqOffsetResetToLog(&pTaskInfo->streamInfo.currentOffset, pInfo->tqReader->pWalReader->curVersion - 1); if (type == FETCH_TYPE__DATA) { - qDebug("doQueueScan get data from log %" PRId64 " rows, version:%" PRId64, block.info.rows, + qDebug("doQueueScan get data from log %" PRId64 " rows, version:%" PRId64, pRes->info.rows, pTaskInfo->streamInfo.currentOffset.version); blockDataCleanup(pInfo->pRes); - setBlockIntoRes(pInfo, &block, true); + setBlockIntoRes(pInfo, pRes, true); if (pInfo->pRes->info.rows > 0) { qDebug("doQueueScan get data from log %" PRId64 " rows, return, version:%" PRId64, pInfo->pRes->info.rows, pTaskInfo->streamInfo.currentOffset.version); @@ -2055,7 +2055,7 @@ FETCH_NEXT_BLOCK: NEXT_SUBMIT_BLK: while (1) { - if (pInfo->tqReader->msg2.msgStr == NULL) { + if (pInfo->tqReader->msg.msgStr == NULL) { if (pInfo->validBlockIndex >= totBlockNum) { updateInfoDestoryColseWinSBF(pInfo->pUpdateInfo); doClearBufferedBlocks(pInfo); @@ -2191,7 +2191,7 @@ static SSDataBlock* doRawScan(SOperatorInfo* pOperator) { qDebug("tmqsnap change get data uid:%" PRId64 "", mtInfo.uid); } qStreamPrepareScan(pTaskInfo, &offset, pInfo->sContext->subType); - tDeleteSSchemaWrapper(mtInfo.schema); + tDeleteSchemaWrapper(mtInfo.schema); return NULL; } else if (pTaskInfo->streamInfo.currentOffset.type == TMQ_OFFSET__SNAPSHOT_META) { SSnapContext* sContext = pInfo->sContext; diff --git a/source/libs/parser/src/parInsertUtil.c b/source/libs/parser/src/parInsertUtil.c index a3b067b94d..f921094752 100644 --- a/source/libs/parser/src/parInsertUtil.c +++ b/source/libs/parser/src/parInsertUtil.c @@ -313,7 +313,7 @@ void insDestroyTableDataCxt(STableDataCxt* pTableCxt) { insDestroyBoundColInfo(&pTableCxt->boundColsInfo); taosArrayDestroyEx(pTableCxt->pValues, destroyColVal); if (pTableCxt->pData) { - tDestroySSubmitTbData(pTableCxt->pData, TSDB_MSG_FLG_ENCODE); + tDestroySubmitTbData(pTableCxt->pData, TSDB_MSG_FLG_ENCODE); taosMemoryFree(pTableCxt->pData); } taosMemoryFree(pTableCxt); @@ -324,7 +324,7 @@ void insDestroyVgroupDataCxt(SVgroupDataCxt* pVgCxt) { return; } - tDestroySSubmitReq(pVgCxt->pData, TSDB_MSG_FLG_ENCODE); + tDestroySubmitReq(pVgCxt->pData, TSDB_MSG_FLG_ENCODE); taosMemoryFree(pVgCxt->pData); taosMemoryFree(pVgCxt); } @@ -499,7 +499,7 @@ static int32_t buildSubmitReq(int32_t vgId, SSubmitReq2* pReq, void** pData, uin int32_t code = TSDB_CODE_SUCCESS; uint32_t len = 0; void* pBuf = NULL; - tEncodeSize(tEncodeSSubmitReq2, pReq, len, code); + tEncodeSize(tEncodeSubmitReq, pReq, len, code); if (TSDB_CODE_SUCCESS == code) { SEncoder encoder; len += sizeof(SSubmitReq2Msg); @@ -511,7 +511,7 @@ static int32_t buildSubmitReq(int32_t vgId, SSubmitReq2* pReq, void** pData, uin ((SSubmitReq2Msg*)pBuf)->header.contLen = htonl(len); ((SSubmitReq2Msg*)pBuf)->version = htobe64(1); tEncoderInit(&encoder, POINTER_SHIFT(pBuf, sizeof(SSubmitReq2Msg)), len - sizeof(SSubmitReq2Msg)); - code = tEncodeSSubmitReq2(&encoder, pReq); + code = tEncodeSubmitReq(&encoder, pReq); tEncoderClear(&encoder); } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 67c60008fd..f301d9d517 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -193,7 +193,7 @@ void tFreeStreamTask(SStreamTask* pTask) { taosArrayDestroyP(pTask->childEpInfo, taosMemoryFree); if (pTask->outputType == TASK_OUTPUT__TABLE) { - tDeleteSSchemaWrapper(pTask->tbSink.pSchemaWrapper); + tDeleteSchemaWrapper(pTask->tbSink.pSchemaWrapper); taosMemoryFree(pTask->tbSink.pTSchema); } From 1a8a834a2cbe3ce8a140a8848bb9d1e08c79dbf4 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 4 May 2023 17:03:53 +0800 Subject: [PATCH 037/110] fix(tmq): fix memory leak. --- source/dnode/vnode/src/tq/tqRead.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 0c9c7b4793..3ee706cd39 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -341,6 +341,8 @@ int32_t tqNextBlockInWal(STqReader* pReader) { SDecoder decoder = {0}; tDecoderInit(&decoder, pBody, bodyLen); + taosArrayDestroy(pReader->submit.aSubmitTbData); + if (tDecodeSubmitReq(&decoder, &pReader->submit) < 0) { tDecoderClear(&decoder); tqError("decode wal file error, msgLen:%d, ver:%"PRId64, bodyLen, ver); From af0ca38a897fa5d63e71f24ab1427007b73c0436 Mon Sep 17 00:00:00 2001 From: dmchen Date: Thu, 4 May 2023 17:12:02 +0800 Subject: [PATCH 038/110] int16 overflow --- include/util/taoserror.h | 1 + source/dnode/mnode/impl/src/mndStb.c | 20 ++++++++++++++++++++ source/util/src/terror.c | 1 + 3 files changed, 22 insertions(+) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index ab89466a19..3847757d9d 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -261,6 +261,7 @@ int32_t* taosGetErrno(); // #define TSDB_CODE_MND_INVALID_STABLE_NAME TAOS_DEF_ERROR_CODE(0, 0x036D) // 2.x #define TSDB_CODE_MND_INVALID_STB_OPTION TAOS_DEF_ERROR_CODE(0, 0x036E) #define TSDB_CODE_MND_INVALID_ROW_BYTES TAOS_DEF_ERROR_CODE(0, 0x036F) +#define TSDB_CODE_MND_BIG_FIELD_VALUE TAOS_DEF_ERROR_CODE(0, 0x0370) // mnode-func diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index 63bcef2a5b..939080be47 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -797,6 +797,11 @@ int32_t mndBuildStbFromReq(SMnode *pMnode, SStbObj *pDst, SMCreateStbReq *pCreat return -1; } + if(pDst->nextColId > 0 && pDst->nextColId < 0x7fff - pDst->numOfColumns - pDst->numOfTags){ + terrno = TSDB_CODE_MND_BIG_FIELD_VALUE; + return -1; + } + for (int32_t i = 0; i < pDst->numOfColumns; ++i) { SField *pField = taosArrayGet(pCreate->pColumns, i); SSchema *pSchema = &pDst->pColumns[i]; @@ -927,6 +932,11 @@ static int32_t mndBuildStbFromAlter(SStbObj *pStb, SStbObj *pDst, SMCreateStbReq return -1; } + if(pDst->nextColId > 0 && pDst->nextColId < 0x7fff - pDst->numOfColumns - pDst->numOfTags){ + terrno = TSDB_CODE_MND_BIG_FIELD_VALUE; + return -1; + } + for (int32_t i = 0; i < pDst->numOfColumns; ++i) { SField *pField = taosArrayGet(createReq->pColumns, i); SSchema *pSchema = &pDst->pColumns[i]; @@ -1154,6 +1164,11 @@ static int32_t mndAddSuperTableTag(const SStbObj *pOld, SStbObj *pNew, SArray *p return -1; } + if(pNew->nextColId > 0 && pNew->nextColId < 0x7fff - ntags){ + terrno = TSDB_CODE_MND_BIG_FIELD_VALUE; + return -1; + } + for (int32_t i = 0; i < ntags; i++) { SField *pField = taosArrayGet(pFields, i); if (mndFindSuperTableColumnIndex(pOld, pField->name) >= 0) { @@ -1461,6 +1476,11 @@ static int32_t mndAddSuperTableColumn(const SStbObj *pOld, SStbObj *pNew, SArray return -1; } + if(pNew->nextColId > 0 && pNew->nextColId < 0x7fff - ncols){ + terrno = TSDB_CODE_MND_BIG_FIELD_VALUE; + return -1; + } + for (int32_t i = 0; i < ncols; i++) { SField *pField = taosArrayGet(pFields, i); if (mndFindSuperTableColumnIndex(pOld, pField->name) >= 0) { diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 002d605793..f3aff1a200 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -203,6 +203,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_COLUMN_ALREADY_EXIST, "Column already exists TAOS_DEFINE_ERROR(TSDB_CODE_MND_COLUMN_NOT_EXIST, "Column does not exist") TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_STB_OPTION, "Invalid stable options") TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_ROW_BYTES, "Invalid row bytes") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_BIG_FIELD_VALUE, "out of range and overflow") // mnode-func TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_FUNC_NAME, "Invalid func name") From bb33f054b4c1a034118b8cfc3b474b243757e22e Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 4 May 2023 17:21:18 +0800 Subject: [PATCH 039/110] fix:change field bytes if length is bigger than 1024 --- source/client/src/clientSml.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index d6642dfe8d..01a7a2eac2 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -1562,7 +1562,8 @@ static int smlProcess(SSmlHandle *info, char *lines[], char *rawLine, char *rawL do { code = smlModifyDBSchemas(info); if (code == 0 || code == TSDB_CODE_SML_INVALID_DATA || code == TSDB_CODE_PAR_TOO_MANY_COLUMNS - || code == TSDB_CODE_PAR_INVALID_TAGS_NUM) break; + || code == TSDB_CODE_PAR_INVALID_TAGS_NUM || code == TSDB_CODE_PAR_INVALID_TAGS_LENGTH + || code == TSDB_CODE_PAR_INVALID_ROW_LENGTH) break; taosMsleep(100); uInfo("SML:0x%" PRIx64 " smlModifyDBSchemas retry code:%s, times:%d", info->id, tstrerror(code), retryNum); } while (retryNum++ < taosHashGetSize(info->superTables) * MAX_RETRY_TIMES); From 766b752c18e83d1ef91e7b44b433023ed5fb7f0c Mon Sep 17 00:00:00 2001 From: dmchen Date: Thu, 4 May 2023 18:35:37 +0800 Subject: [PATCH 040/110] if statement for overflow --- source/dnode/mnode/impl/src/mndStb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index 939080be47..3407d4a1dc 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -797,7 +797,7 @@ int32_t mndBuildStbFromReq(SMnode *pMnode, SStbObj *pDst, SMCreateStbReq *pCreat return -1; } - if(pDst->nextColId > 0 && pDst->nextColId < 0x7fff - pDst->numOfColumns - pDst->numOfTags){ + if(pDst->nextColId < 0 || pDst->nextColId >= 0x7fff - pDst->numOfColumns - pDst->numOfTags){ terrno = TSDB_CODE_MND_BIG_FIELD_VALUE; return -1; } @@ -932,7 +932,7 @@ static int32_t mndBuildStbFromAlter(SStbObj *pStb, SStbObj *pDst, SMCreateStbReq return -1; } - if(pDst->nextColId > 0 && pDst->nextColId < 0x7fff - pDst->numOfColumns - pDst->numOfTags){ + if(pDst->nextColId < 0 && pDst->nextColId >= 0x7fff - pDst->numOfColumns - pDst->numOfTags){ terrno = TSDB_CODE_MND_BIG_FIELD_VALUE; return -1; } @@ -1164,7 +1164,7 @@ static int32_t mndAddSuperTableTag(const SStbObj *pOld, SStbObj *pNew, SArray *p return -1; } - if(pNew->nextColId > 0 && pNew->nextColId < 0x7fff - ntags){ + if(pNew->nextColId < 0 && pNew->nextColId >= 0x7fff - ntags){ terrno = TSDB_CODE_MND_BIG_FIELD_VALUE; return -1; } @@ -1476,7 +1476,7 @@ static int32_t mndAddSuperTableColumn(const SStbObj *pOld, SStbObj *pNew, SArray return -1; } - if(pNew->nextColId > 0 && pNew->nextColId < 0x7fff - ncols){ + if(pNew->nextColId < 0 && pNew->nextColId >= 0x7fff - ncols){ terrno = TSDB_CODE_MND_BIG_FIELD_VALUE; return -1; } From 25f451ba819beb71f7a8f01c0b52e60502c3bc73 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 4 May 2023 23:34:35 +0800 Subject: [PATCH 041/110] fix(tmq): fix result data block info. --- source/dnode/mnode/impl/src/mndSubscribe.c | 4 ++-- source/dnode/vnode/inc/vnode.h | 1 + source/dnode/vnode/src/tq/tqRead.c | 15 ++++++++++++--- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 573c60549e..f4d6e27dea 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -197,7 +197,7 @@ static SMqRebInfo *mndGetOrCreateRebSub(SHashObj *pHash, const char *key) { return pRebSub; } -static void doRemoveExistedConsumers(SMqRebOutputObj *pOutput, SHashObj *pHash, const SMqRebInputObj *pInput) { +static void doRemoveLostConsumers(SMqRebOutputObj *pOutput, SHashObj *pHash, const SMqRebInputObj *pInput) { int32_t numOfRemoved = taosArrayGetSize(pInput->pRebInfo->removedConsumers); const char *pSubKey = pOutput->pSub->key; @@ -339,7 +339,7 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR SHashObj *pHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK); // 2. check and get actual removed consumers, put their vg into pHash - doRemoveExistedConsumers(pOutput, pHash, pInput); + doRemoveLostConsumers(pOutput, pHash, pInput); // 3. if previously no consumer, there are vgs not assigned, put these vg into pHash addUnassignedVgroups(pOutput, pHash); diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 817e6fdae4..c7424cd233 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -241,6 +241,7 @@ typedef struct STqReader { SArray *pColIdList; // SArray int32_t cachedSchemaVer; int64_t cachedSchemaSuid; + int64_t cachedSchemaUid; SSchemaWrapper *pSchemaWrapper; SSDataBlock *pResBlock; } STqReader; diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 3ee706cd39..8622216b28 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -530,22 +530,27 @@ int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbDa pBlock->info.id.uid = uid; pBlock->info.version = pReader->msg.ver; - if (pReader->cachedSchemaSuid == 0 || pReader->cachedSchemaVer != sversion || pReader->cachedSchemaSuid != suid) { + if ((suid != 0 && pReader->cachedSchemaSuid != suid) || (suid == 0 && pReader->cachedSchemaUid != uid) || (pReader->cachedSchemaVer != sversion)) { tDeleteSchemaWrapper(pReader->pSchemaWrapper); pReader->pSchemaWrapper = metaGetTableSchema(pReader->pVnodeMeta, uid, sversion, 1); if (pReader->pSchemaWrapper == NULL) { - tqWarn("vgId:%d, cannot found schema wrapper for table: suid:%" PRId64 ", version %d, possibly dropped table", - pReader->pWalReader->pWal->cfg.vgId, uid, pReader->cachedSchemaVer); + tqWarn("vgId:%d, cannot found schema wrapper for table: suid:%" PRId64 ", uid:%" PRId64 "version %d, possibly dropped table", + pReader->pWalReader->pWal->cfg.vgId, suid, uid, pReader->cachedSchemaVer); pReader->cachedSchemaSuid = 0; terrno = TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND; return -1; } + pReader->cachedSchemaUid = uid; pReader->cachedSchemaSuid = suid; pReader->cachedSchemaVer = sversion; SSchemaWrapper* pSchemaWrapper = pReader->pSchemaWrapper; + if (blockDataGetNumOfCols(pBlock) > 0) { + blockDataDestroy(pReader->pResBlock); + pReader->pResBlock = createDataBlock(); + } int32_t numOfCols = taosArrayGetSize(pReader->pColIdList); if (numOfCols == 0) { // all columns are required @@ -671,8 +676,12 @@ int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbDa SColumnInfoData* pColData = taosArrayGet(pBlock->pDataBlock, j); while (1) { SColVal colVal; + tqDebug("start to extract column id:%d, index:%d", pColData->info.colId, sourceIdx); + tRowGet(pRow, pTSchema, sourceIdx, &colVal); if (colVal.cid < pColData->info.colId) { + tqDebug("colIndex:%d column id:%d in row, ignore, the required colId:%d, total cols in schema:%d", + sourceIdx, colVal.cid, pColData->info.colId, pTSchema->numOfCols); sourceIdx++; continue; } else if (colVal.cid == pColData->info.colId) { From 32b4642dac9ddb3a94b6b99feb72ebd1783965d5 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 5 May 2023 00:57:43 +0800 Subject: [PATCH 042/110] fix(tmq): fix invalid free. --- source/dnode/vnode/src/tq/tqRead.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 8622216b28..7ead634f8b 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -341,7 +341,15 @@ int32_t tqNextBlockInWal(STqReader* pReader) { SDecoder decoder = {0}; tDecoderInit(&decoder, pBody, bodyLen); - taosArrayDestroy(pReader->submit.aSubmitTbData); + + { + int32_t nSubmitTbData = taosArrayGetSize(pReader->submit.aSubmitTbData); + for (int32_t i = 0; i < nSubmitTbData; i++) { + SSubmitTbData* pData = taosArrayGet(pReader->submit.aSubmitTbData, i); + pData->aRowP = taosArrayDestroy(pData->aRowP); + } + pReader->submit.aSubmitTbData = taosArrayDestroy(pReader->submit.aSubmitTbData); + } if (tDecodeSubmitReq(&decoder, &pReader->submit) < 0) { tDecoderClear(&decoder); From 42b23e6471e7c7b4cf77cb3a58b22d76d5da57f0 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 5 May 2023 10:40:55 +0800 Subject: [PATCH 043/110] fix(query): stop tsdb reader asap. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 54 +++++++++++--------------- 1 file changed, 23 insertions(+), 31 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 1ef86f5b30..8f7ebc6c5c 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -21,10 +21,9 @@ #define getCurrentKeyInLastBlock(_r) ((_r)->currentKey) typedef enum { - READER_STATUS_SUSPEND = 0x1, - READER_STATUS_SHOULD_STOP = 0x2, - READER_STATUS_NORMAL = 0x3, -} EReaderExecStatus; + READER_STATUS_SUSPEND = 0x1, + READER_STATUS_NORMAL = 0x2, +} EReaderStatus; typedef enum { EXTERNAL_ROWS_PREV = 0x1, @@ -184,6 +183,7 @@ typedef struct STsdbReaderAttr { STimeWindow window; bool freeBlock; SVersionRange verRange; + int16_t order; } STsdbReaderAttr; typedef struct SResultBlockInfo { @@ -196,7 +196,8 @@ struct STsdbReader { STsdb* pTsdb; SVersionRange verRange; TdThreadMutex readerMutex; - EReaderExecStatus flag; + EReaderStatus flag; + int32_t code; uint64_t suid; int16_t order; EReadMode readMode; @@ -2995,9 +2996,9 @@ static int32_t moveToNextFile(STsdbReader* pReader, SBlockNumber* pBlockNum, SAr while (1) { // only check here, since the iterate data in memory is very fast. - if (pReader->flag == READER_STATUS_SHOULD_STOP) { - tsdbWarn("tsdb reader is stopped ASAP, %s", pReader->idStr); - return TSDB_CODE_SUCCESS; + if (pReader->code != TSDB_CODE_SUCCESS) { + tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); + return pReader->code; } bool hasNext = false; @@ -3093,9 +3094,9 @@ static int32_t doLoadLastBlockSequentially(STsdbReader* pReader) { SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; while (1) { - if (pReader->flag == READER_STATUS_SHOULD_STOP) { - tsdbWarn("tsdb reader is stopped ASAP, %s", pReader->idStr); - return TSDB_CODE_SUCCESS; + if (pReader->code == TSDB_CODE_SUCCESS) { + tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); + return pReader->code; } // load the last data block of current table @@ -3246,7 +3247,7 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { } } - return code; + return (pReader->code != TSDB_CODE_SUCCESS)? pReader->code:code; } static int32_t doSumFileBlockRows(STsdbReader* pReader, SDataFReader* pFileReader) { @@ -3395,9 +3396,9 @@ static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader) { STableUidList* pUidList = &pStatus->uidList; while (1) { - if (pReader->flag == READER_STATUS_SHOULD_STOP) { - tsdbWarn("tsdb reader is stopped ASAP, %s", pReader->idStr); - return TSDB_CODE_SUCCESS; + if (pReader->code == TSDB_CODE_SUCCESS) { + tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); + return pReader->code; } STableBlockScanInfo** pBlockScanInfo = pStatus->pTableIter; @@ -3493,7 +3494,7 @@ static ERetrieveType doReadDataFromLastFiles(STsdbReader* pReader) { terrno = 0; code = doLoadLastBlockSequentially(pReader); - if (code != TSDB_CODE_SUCCESS || pReader->flag == READER_STATUS_SHOULD_STOP) { + if (code != TSDB_CODE_SUCCESS) { terrno = code; return TSDB_READ_RETURN; } @@ -3507,8 +3508,7 @@ static ERetrieveType doReadDataFromLastFiles(STsdbReader* pReader) { code = initForFirstBlockInFile(pReader, pBlockIter); // error happens or all the data files are completely checked - if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false) || - pReader->flag == READER_STATUS_SHOULD_STOP) { + if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) { terrno = code; return TSDB_READ_RETURN; } @@ -3536,13 +3536,9 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) { } code = doBuildDataBlock(pReader); - if (code != TSDB_CODE_SUCCESS || pReader->flag == READER_STATUS_SHOULD_STOP) { + if (code != TSDB_CODE_SUCCESS || pResBlock->info.rows > 0) { return code; } - - if (pResBlock->info.rows > 0) { - return TSDB_CODE_SUCCESS; - } } while (1) { @@ -3581,13 +3577,9 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) { code = doBuildDataBlock(pReader); } - if (code != TSDB_CODE_SUCCESS || pReader->flag == READER_STATUS_SHOULD_STOP) { + if (code != TSDB_CODE_SUCCESS || pResBlock->info.rows > 0) { return code; } - - if (pResBlock->info.rows > 0) { - return TSDB_CODE_SUCCESS; - } } } @@ -4849,8 +4841,8 @@ int32_t tsdbNextDataBlock(STsdbReader* pReader, bool* hasNext) { *hasNext = false; - if (isEmptyQueryTimeWindow(&pReader->window) || pReader->step == EXTERNAL_ROWS_NEXT) { - return code; + if (isEmptyQueryTimeWindow(&pReader->window) || pReader->step == EXTERNAL_ROWS_NEXT || pReader->code != TSDB_CODE_SUCCESS) { + return (pReader->code != TSDB_CODE_SUCCESS)? pReader->code:code; } SReaderStatus* pStatus = &pReader->status; @@ -5456,4 +5448,4 @@ void tsdbReaderSetId(STsdbReader* pReader, const char* idstr) { pReader->idStr = taosStrdup(idstr); } -void tsdbReaderSetCloseFlag(STsdbReader* pReader) { pReader->flag = READER_STATUS_SHOULD_STOP; } +void tsdbReaderSetCloseFlag(STsdbReader* pReader) { pReader->code = TSDB_CODE_TSC_QUERY_CANCELLED; } From 1d8c517844c5b3c2d1386b5412b05d41215c24d4 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 5 May 2023 10:46:54 +0800 Subject: [PATCH 044/110] fix:[TS-3303]use stable name + child table name as key to save uid to avoid multi items for one table in submit block --- source/client/inc/clientSml.h | 2 ++ source/client/src/clientSml.c | 19 ++++++++++++- source/client/src/clientSmlJson.c | 2 +- source/client/src/clientSmlLine.c | 2 +- source/client/src/clientSmlTelnet.c | 2 +- tests/system-test/2-query/sml.py | 1 + utils/test/c/sml_test.c | 41 +++++++++++++++++++++++++++++ 7 files changed, 65 insertions(+), 4 deletions(-) diff --git a/source/client/inc/clientSml.h b/source/client/inc/clientSml.h index 3982c0d9aa..b20fc6f57a 100644 --- a/source/client/inc/clientSml.h +++ b/source/client/inc/clientSml.h @@ -169,6 +169,7 @@ typedef struct { int32_t uid; // used for automatic create child table SHashObj *childTables; + SHashObj *tableUids; SHashObj *superTables; SHashObj *pVgHash; @@ -242,6 +243,7 @@ int8_t smlGetTsTypeByLen(int32_t len); SSmlTableInfo* smlBuildTableInfo(int numRows, const char* measure, int32_t measureLen); SSmlSTableMeta* smlBuildSTableMeta(bool isDataFormat); int32_t smlSetCTableName(SSmlTableInfo *oneTable); +void getTableUid(SSmlHandle *info, SSmlLineInfo *currElement, SSmlTableInfo *tinfo); STableMeta* smlGetMeta(SSmlHandle *info, const void* measure, int32_t measureLen); int32_t is_same_child_table_telnet(const void *a, const void *b); int64_t smlParseOpenTsdbTime(SSmlHandle *info, const char *data, int32_t len); diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index dd3f50f440..c5f7d9a1e7 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -195,6 +195,20 @@ int32_t smlSetCTableName(SSmlTableInfo *oneTable) { return TSDB_CODE_SUCCESS; } +void getTableUid(SSmlHandle *info, SSmlLineInfo *currElement, SSmlTableInfo *tinfo){ + char key[TSDB_TABLE_NAME_LEN * 2 + 1] = {0}; + size_t nLen = strlen(tinfo->childTableName); + memcpy(key, currElement->measure, currElement->measureLen); + memcpy(key + currElement->measureLen + 1, tinfo->childTableName, nLen); + void *uid = taosHashGet(info->tableUids, key, currElement->measureLen + 1 + nLen); // use \0 as separator for stable name and child table name + if (uid == NULL) { + tinfo->uid = info->uid++; + taosHashPut(info->tableUids, key, currElement->measureLen + 1 + nLen, &tinfo->uid, sizeof(uint64_t)); + }else{ + tinfo->uid = *(uint64_t*)uid; + } +} + SSmlSTableMeta *smlBuildSTableMeta(bool isDataFormat) { SSmlSTableMeta *meta = (SSmlSTableMeta *)taosMemoryCalloc(sizeof(SSmlSTableMeta), 1); if (!meta) { @@ -1142,6 +1156,7 @@ void smlDestroyInfo(SSmlHandle *info) { taosHashCleanup(info->pVgHash); taosHashCleanup(info->childTables); taosHashCleanup(info->superTables); + taosHashCleanup(info->tableUids); for (int i = 0; i < taosArrayGetSize(info->tagJsonArray); i++) { cJSON *tags = (cJSON *)taosArrayGetP(info->tagJsonArray, i); @@ -1192,6 +1207,7 @@ SSmlHandle *smlBuildSmlInfo(TAOS *taos) { info->pVgHash = taosHashInit(16, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK); info->childTables = taosHashInit(16, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK); + info->tableUids = taosHashInit(16, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK); info->superTables = taosHashInit(16, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK); info->id = smlGenId(); @@ -1202,7 +1218,7 @@ SSmlHandle *smlBuildSmlInfo(TAOS *taos) { info->valueJsonArray = taosArrayInit(8, POINTER_BYTES); info->preLineTagKV = taosArrayInit(8, sizeof(SSmlKv)); - if (NULL == info->pVgHash || NULL == info->childTables || NULL == info->superTables) { + if (NULL == info->pVgHash || NULL == info->childTables || NULL == info->superTables || NULL == info->tableUids) { uError("create SSmlHandle failed"); goto cleanup; } @@ -1428,6 +1444,7 @@ int32_t smlClearForRerun(SSmlHandle *info) { taosHashClear(info->childTables); taosHashClear(info->superTables); + taosHashClear(info->tableUids); if (!info->dataFormat) { if (unlikely(info->lines != NULL)) { diff --git a/source/client/src/clientSmlJson.c b/source/client/src/clientSmlJson.c index b0ae316031..7ccf930964 100644 --- a/source/client/src/clientSmlJson.c +++ b/source/client/src/clientSmlJson.c @@ -778,7 +778,7 @@ static int32_t smlParseTagsFromJSON(SSmlHandle *info, cJSON *tags, SSmlLineInfo tinfo->tags = taosArrayDup(preLineKV, NULL); smlSetCTableName(tinfo); - tinfo->uid = info->uid++; + getTableUid(info, elements, tinfo); if (info->dataFormat) { info->currSTableMeta->uid = tinfo->uid; tinfo->tableDataCtx = smlInitTableDataCtx(info->pQuery, info->currSTableMeta); diff --git a/source/client/src/clientSmlLine.c b/source/client/src/clientSmlLine.c index 1732473c11..2f7e8a0f97 100644 --- a/source/client/src/clientSmlLine.c +++ b/source/client/src/clientSmlLine.c @@ -312,7 +312,7 @@ static int32_t smlParseTagKv(SSmlHandle *info, char **sql, char *sqlEnd, SSmlLin } smlSetCTableName(tinfo); - tinfo->uid = info->uid++; + getTableUid(info, currElement, tinfo); if (info->dataFormat) { info->currSTableMeta->uid = tinfo->uid; tinfo->tableDataCtx = smlInitTableDataCtx(info->pQuery, info->currSTableMeta); diff --git a/source/client/src/clientSmlTelnet.c b/source/client/src/clientSmlTelnet.c index 036442573d..c5dd20ba7b 100644 --- a/source/client/src/clientSmlTelnet.c +++ b/source/client/src/clientSmlTelnet.c @@ -206,7 +206,7 @@ static int32_t smlParseTelnetTags(SSmlHandle *info, char *data, char *sqlEnd, SS tinfo->tags = taosArrayDup(preLineKV, NULL); smlSetCTableName(tinfo); - tinfo->uid = info->uid++; + getTableUid(info, elements, tinfo); if (info->dataFormat) { info->currSTableMeta->uid = tinfo->uid; tinfo->tableDataCtx = smlInitTableDataCtx(info->pQuery, info->currSTableMeta); diff --git a/tests/system-test/2-query/sml.py b/tests/system-test/2-query/sml.py index f96ed8a3ff..519957f6f9 100644 --- a/tests/system-test/2-query/sml.py +++ b/tests/system-test/2-query/sml.py @@ -34,6 +34,7 @@ class TDTestCase: if ret != 0: tdLog.info("sml_test ret != 0") + tdSql.query(f"select * from ts3303.stb2") # tdSql.execute('use sml_db') tdSql.query(f"select * from {dbname}.t_b7d815c9222ca64cdf2614c61de8f211") tdSql.checkRows(1) diff --git a/utils/test/c/sml_test.c b/utils/test/c/sml_test.c index f1f4bbc1fd..ffcd5d1b2e 100644 --- a/utils/test/c/sml_test.c +++ b/utils/test/c/sml_test.c @@ -1159,6 +1159,44 @@ int sml_td23881_Test() { return code; } +int sml_ts3303_Test() { + TAOS *taos = taos_connect("localhost", "root", "taosdata", NULL, 0); + + TAOS_RES *pRes = taos_query(taos, "drop database if exists ts3303"); + taos_free_result(pRes); + + pRes = taos_query(taos, "create database if not exists ts3303"); + taos_free_result(pRes); + + const char *sql[] = { + "stb2,t1=1,dataModelName=t0 f1=283i32 1632299372000", + "stb2,t1=1,dataModelName=t0 f1=106i32 1632299378000", + "stb2,t1=4,dataModelName=t0 f1=144i32 1629716944000", + "stb2,t1=4,dataModelName=t0 f1=125i32 1629717012000", + "stb2,t1=4,dataModelName=t0 f1=144i32 1629717012000", + "stb2,t1=4,dataModelName=t0 f1=107i32 1629717013000", + "stb2,t1=6,dataModelName=t0 f1=154i32 1629717140000", + "stb2,t1=6,dataModelName=t0 f1=93i32 1629717140000", + "stb2,t1=6,dataModelName=t0 f1=134i32 1629717140000", + "stb2,t1=4,dataModelName=t0 f1=73i32 1629717140000", + "stb2,t1=4,dataModelName=t0 f1=83i32 1629717140000", + "stb2,t1=4,dataModelName=t0 f1=72i32 1629717140000", + }; + + pRes = taos_query(taos, "use ts3303"); + taos_free_result(pRes); + + pRes = taos_schemaless_insert_ttl(taos, (char **)sql, sizeof(sql) / sizeof(sql[0]), TSDB_SML_LINE_PROTOCOL, + TSDB_SML_TIMESTAMP_MILLI_SECONDS, 20); + + int code = taos_errno(pRes); + printf("%s result1:%s\n", __FUNCTION__, taos_errstr(pRes)); + taos_free_result(pRes); + taos_close(taos); + + return code; +} + int sml_ttl_Test() { TAOS *taos = taos_connect("localhost", "root", "taosdata", NULL, 0); @@ -1336,6 +1374,9 @@ int main(int argc, char *argv[]) { ASSERT(!ret); ret = sml_ts2385_Test(); // this test case need config sml table name using ./sml_test config_file ASSERT(!ret); + ret = sml_ts3303_Test(); // this test case need config sml table name using ./sml_test config_file + ASSERT(!ret); + // for(int i = 0; i < sizeof(str)/sizeof(str[0]); i++){ // printf("str:%s \t %d\n", str[i], smlCalTypeSum(str[i], strlen(str[i]))); // } From 5006ecc484e239dfbc8cf1550c366d9ed1e4c30b Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 5 May 2023 11:45:14 +0800 Subject: [PATCH 045/110] fix: stable name not responsed while stable removed issue --- source/dnode/mnode/impl/src/mndStb.c | 3 +++ source/os/src/osLocale.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index 63bcef2a5b..da3c3b98a8 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -2524,6 +2524,9 @@ int32_t mndValidateStbInfo(SMnode *pMnode, SSTableVersion *pStbVersions, int32_t if (mndBuildStbSchema(pMnode, pStbVersion->dbFName, pStbVersion->stbName, &metaRsp, &smaVer) != 0) { metaRsp.numOfColumns = -1; metaRsp.suid = pStbVersion->suid; + tstrncpy(metaRsp.dbFName, pStbVersion->dbFName, sizeof(metaRsp.dbFName)); + tstrncpy(metaRsp.tbName, pStbVersion->stbName, sizeof(metaRsp.tbName)); + tstrncpy(metaRsp.stbName, pStbVersion->stbName, sizeof(metaRsp.stbName)); taosArrayPush(hbRsp.pMetaRsp, &metaRsp); continue; } diff --git a/source/os/src/osLocale.c b/source/os/src/osLocale.c index 129faaacc8..136b8cf022 100644 --- a/source/os/src/osLocale.c +++ b/source/os/src/osLocale.c @@ -171,7 +171,7 @@ void taosGetSystemLocale(char *outLocale, char *outCharset) { strcpy(outLocale, "en_US.UTF-8"); } else { tstrncpy(outLocale, locale, TD_LOCALE_LEN); - printf("locale not configured, set to system default:%s\n", outLocale); + //printf("locale not configured, set to system default:%s\n", outLocale); } // if user does not specify the charset, extract it from locale From 38cbe0b768b383d2df9f1ce2b1c4b4f6c758626b Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 5 May 2023 12:52:10 +0800 Subject: [PATCH 046/110] fix(query): fix the invalid copmarison. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 8f7ebc6c5c..d0a0ea7947 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -3094,7 +3094,7 @@ static int32_t doLoadLastBlockSequentially(STsdbReader* pReader) { SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; while (1) { - if (pReader->code == TSDB_CODE_SUCCESS) { + if (pReader->code != TSDB_CODE_SUCCESS) { tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); return pReader->code; } @@ -3396,7 +3396,7 @@ static int32_t buildBlockFromBufferSequentially(STsdbReader* pReader) { STableUidList* pUidList = &pStatus->uidList; while (1) { - if (pReader->code == TSDB_CODE_SUCCESS) { + if (pReader->code != TSDB_CODE_SUCCESS) { tsdbWarn("tsdb reader is stopped ASAP, code:%s, %s", strerror(pReader->code), pReader->idStr); return pReader->code; } From 3fb2d7656d2d39941aaa25b29759a7dd9bf81f0e Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 5 May 2023 13:39:37 +0800 Subject: [PATCH 047/110] fix:[TS-3303]use stable name + child table name as key to save uid to avoid multi items for one table in submit block --- tests/system-test/2-query/sml.py | 2 ++ utils/test/c/sml_test.c | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/tests/system-test/2-query/sml.py b/tests/system-test/2-query/sml.py index 519957f6f9..2f97118fbf 100644 --- a/tests/system-test/2-query/sml.py +++ b/tests/system-test/2-query/sml.py @@ -35,6 +35,8 @@ class TDTestCase: tdLog.info("sml_test ret != 0") tdSql.query(f"select * from ts3303.stb2") + tdSql.query(f"select * from ts3303.meters") + # tdSql.execute('use sml_db') tdSql.query(f"select * from {dbname}.t_b7d815c9222ca64cdf2614c61de8f211") tdSql.checkRows(1) diff --git a/utils/test/c/sml_test.c b/utils/test/c/sml_test.c index ffcd5d1b2e..f1dc8ebe79 100644 --- a/utils/test/c/sml_test.c +++ b/utils/test/c/sml_test.c @@ -1183,6 +1183,11 @@ int sml_ts3303_Test() { "stb2,t1=4,dataModelName=t0 f1=72i32 1629717140000", }; + const char *sql1[] = { + "meters,location=California.LosAngeles,groupid=2 current=11.8,voltage=221,phase=\"2022-02-0210:22:22\" 1626006833339000000", + "meters,groupid=2,location=California.LosAngeles current=11.8,voltage=221,phase=\"2022-02-0210:22:22\" 1626006833339000000", + }; + pRes = taos_query(taos, "use ts3303"); taos_free_result(pRes); @@ -1190,8 +1195,16 @@ int sml_ts3303_Test() { TSDB_SML_TIMESTAMP_MILLI_SECONDS, 20); int code = taos_errno(pRes); + printf("%s result0:%s\n", __FUNCTION__, taos_errstr(pRes)); + taos_free_result(pRes); + ASSERT(code == 0); + + pRes = taos_schemaless_insert_ttl(taos, (char **)sql1, sizeof(sql1) / sizeof(sql1[0]), TSDB_SML_LINE_PROTOCOL, + TSDB_SML_TIMESTAMP_NANO_SECONDS, 20); + printf("%s result1:%s\n", __FUNCTION__, taos_errstr(pRes)); taos_free_result(pRes); + taos_close(taos); return code; From 9b5c205498bc85480a1405ce91f9d8879613ff70 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 5 May 2023 13:41:25 +0800 Subject: [PATCH 048/110] fix(stream): set correct ssdatablock for tqRetrieveDataBlock --- source/libs/executor/src/scanoperator.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 0f4e18105c..5122e0e439 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1649,14 +1649,12 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { SDataBlockInfo* pBlockInfo = &pInfo->pRes->info; while (tqNextBlockImpl(pInfo->tqReader)) { - SSDataBlock block = {0}; - - int32_t code = tqRetrieveDataBlock(&block, pInfo->tqReader, NULL); - if (code != TSDB_CODE_SUCCESS || block.info.rows == 0) { + int32_t code = tqRetrieveDataBlock(pInfo->tqReader->pResBlock, pInfo->tqReader, NULL); + if (code != TSDB_CODE_SUCCESS || pInfo->tqReader->pResBlock->info.rows == 0) { continue; } - setBlockIntoRes(pInfo, &block, true); + setBlockIntoRes(pInfo, pInfo->tqReader->pResBlock, true); if (pBlockInfo->rows > 0) { return pInfo->pRes; @@ -2075,14 +2073,12 @@ FETCH_NEXT_BLOCK: blockDataCleanup(pInfo->pRes); while (tqNextBlockImpl(pInfo->tqReader)) { - SSDataBlock block = {0}; - - int32_t code = tqRetrieveDataBlock(&block, pInfo->tqReader, NULL); - if (code != TSDB_CODE_SUCCESS || block.info.rows == 0) { + int32_t code = tqRetrieveDataBlock(pInfo->tqReader->pResBlock, pInfo->tqReader, NULL); + if (code != TSDB_CODE_SUCCESS || pInfo->tqReader->pResBlock->info.rows == 0) { continue; } - setBlockIntoRes(pInfo, &block, false); + setBlockIntoRes(pInfo, pInfo->tqReader->pResBlock, false); if (updateInfoIgnore(pInfo->pUpdateInfo, &pInfo->pRes->info.window, pInfo->pRes->info.id.groupId, pInfo->pRes->info.version)) { From 3976504b4758b8df0152d0916c255e4dd22cc0a6 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 5 May 2023 14:01:15 +0800 Subject: [PATCH 049/110] fix: subtable grant is not allowed --- source/libs/parser/src/parTranslater.c | 32 ++++++++++++++++++-------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index fcdc6418a0..7f77458d53 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -6666,22 +6666,36 @@ static int32_t createRealTableForGrantTable(SGrantStmt* pStmt, SRealTableNode** } static int32_t translateGrantTagCond(STranslateContext* pCxt, SGrantStmt* pStmt, SAlterUserReq* pReq) { - if (NULL == pStmt->pTagCond) { - return TSDB_CODE_SUCCESS; - } - if ('\0' == pStmt->tabName[0] || '*' == pStmt->tabName[0]) { - return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_SYNTAX_ERROR, - "The With clause can only be used for table level privilege"); - } - - pCxt->pCurrStmt = (SNode*)pStmt; SRealTableNode* pTable = NULL; int32_t code = createRealTableForGrantTable(pStmt, &pTable); if (TSDB_CODE_SUCCESS == code) { SName name; code = getTableMetaImpl(pCxt, toName(pCxt->pParseCxt->acctId, pTable->table.dbName, pTable->table.tableName, &name), &(pTable->pMeta)); + if (code) { + nodesDestroyNode((SNode*)pTable); + return code; + } + + if (TSDB_SUPER_TABLE != pTable->pMeta->tableType && TSDB_NORMAL_TABLE != pTable->pMeta->tableType) { + nodesDestroyNode((SNode*)pTable); + return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_SYNTAX_ERROR, + "Only supertable and normal table can be granted"); + } } + + if (TSDB_CODE_SUCCESS == code && NULL == pStmt->pTagCond) { + nodesDestroyNode((SNode*)pTable); + return TSDB_CODE_SUCCESS; + } + if ('\0' == pStmt->tabName[0] || '*' == pStmt->tabName[0]) { + nodesDestroyNode((SNode*)pTable); + return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_SYNTAX_ERROR, + "The With clause can only be used for table level privilege"); + } + + pCxt->pCurrStmt = (SNode*)pStmt; + if (TSDB_CODE_SUCCESS == code) { code = addNamespace(pCxt, pTable); } From e889249c54b2dcf40e3fa4c11a90ef9f3d130079 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 5 May 2023 14:33:09 +0800 Subject: [PATCH 050/110] fix: grant no table specifed issue --- source/libs/parser/src/parTranslater.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 7f77458d53..b598fffbc6 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -6667,6 +6667,15 @@ static int32_t createRealTableForGrantTable(SGrantStmt* pStmt, SRealTableNode** static int32_t translateGrantTagCond(STranslateContext* pCxt, SGrantStmt* pStmt, SAlterUserReq* pReq) { SRealTableNode* pTable = NULL; + if ('\0' == pStmt->tabName[0] || '*' == pStmt->tabName[0]) { + if (pStmt->pTagCond) { + return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_SYNTAX_ERROR, + "The With clause can only be used for table level privilege"); + } else { + return TSDB_CODE_SUCCESS; + } + } + int32_t code = createRealTableForGrantTable(pStmt, &pTable); if (TSDB_CODE_SUCCESS == code) { SName name; @@ -6688,11 +6697,6 @@ static int32_t translateGrantTagCond(STranslateContext* pCxt, SGrantStmt* pStmt, nodesDestroyNode((SNode*)pTable); return TSDB_CODE_SUCCESS; } - if ('\0' == pStmt->tabName[0] || '*' == pStmt->tabName[0]) { - nodesDestroyNode((SNode*)pTable); - return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_SYNTAX_ERROR, - "The With clause can only be used for table level privilege"); - } pCxt->pCurrStmt = (SNode*)pStmt; From 0a469484fa7159e70f754b197429554fe5006344 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 5 May 2023 15:01:27 +0800 Subject: [PATCH 051/110] fix:[TS-3277]table not exist if drop stable in another client when insert data using schemaless interface --- include/util/taoserror.h | 2 +- source/client/src/clientSml.c | 15 +++++++-------- source/util/src/terror.c | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index ab89466a19..082d0dde47 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -146,7 +146,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_TSC_CONN_KILLED TAOS_DEF_ERROR_CODE(0, 0x0215) #define TSDB_CODE_TSC_SQL_SYNTAX_ERROR TAOS_DEF_ERROR_CODE(0, 0x0216) #define TSDB_CODE_TSC_DB_NOT_SELECTED TAOS_DEF_ERROR_CODE(0, 0x0217) -#define TSDB_CODE_TSC_INVALID_TABLE_NAME TAOS_DEF_ERROR_CODE(0, 0x0218) +//#define TSDB_CODE_TSC_INVALID_TABLE_NAME TAOS_DEF_ERROR_CODE(0, 0x0218) #define TSDB_CODE_TSC_EXCEED_SQL_LIMIT TAOS_DEF_ERROR_CODE(0, 0x0219) #define TSDB_CODE_TSC_FILE_EMPTY TAOS_DEF_ERROR_CODE(0, 0x021A) #define TSDB_CODE_TSC_LINE_SYNTAX_ERROR TAOS_DEF_ERROR_CODE(0, 0x021B) diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index dd3f50f440..2d86f1989d 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -1320,17 +1320,15 @@ static int32_t smlInsertData(SSmlHandle *info) { if (info->pRequest->dbList == NULL) { info->pRequest->dbList = taosArrayInit(1, TSDB_DB_FNAME_LEN); } - void *data = taosArrayReserve(info->pRequest->dbList, 1); - memcpy(data, info->pRequest->pDb, - TSDB_DB_FNAME_LEN > strlen(info->pRequest->pDb) ? strlen(info->pRequest->pDb) : TSDB_DB_FNAME_LEN); + char *data = (char*)taosArrayReserve(info->pRequest->dbList, 1); + SName pName = {TSDB_TABLE_NAME_T, info->taos->acctId, {0}, {0}}; + tstrncpy(pName.dbname, info->pRequest->pDb, sizeof(pName.dbname)); + tNameGetFullDbName(&pName, data); SSmlTableInfo **oneTable = (SSmlTableInfo **)taosHashIterate(info->childTables, NULL); while (oneTable) { SSmlTableInfo *tableData = *oneTable; - - SName pName = {TSDB_TABLE_NAME_T, info->taos->acctId, {0}, {0}}; - tstrncpy(pName.dbname, info->pRequest->pDb, sizeof(pName.dbname)); - memcpy(pName.tname, tableData->childTableName, strlen(tableData->childTableName)); + tstrncpy(pName.tname, tableData->sTableName, tableData->sTableNameLen + 1); if (info->pRequest->tableList == NULL) { info->pRequest->tableList = taosArrayInit(1, sizeof(SName)); @@ -1647,7 +1645,8 @@ TAOS_RES *taos_schemaless_insert_inner(TAOS *taos, char *lines[], char *rawLine, info->cost.endTime = taosGetTimestampUs(); info->cost.code = code; if (code == TSDB_CODE_TDB_INVALID_TABLE_SCHEMA_VER || code == TSDB_CODE_SDB_OBJ_CREATING || - code == TSDB_CODE_PAR_VALUE_TOO_LONG || code == TSDB_CODE_MND_TRANS_CONFLICT) { + code == TSDB_CODE_PAR_VALUE_TOO_LONG || code == TSDB_CODE_MND_TRANS_CONFLICT || + code == TSDB_CODE_PAR_TABLE_NOT_EXIST) { if (cnt++ >= 10) { uInfo("SML:%" PRIx64 " retry:%d/10 end code:%d, msg:%s", info->id, cnt, code, tstrerror(code)); break; diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 002d605793..09cb1a20cb 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -122,7 +122,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_TSC_NO_WRITE_AUTH, "No write permission") TAOS_DEFINE_ERROR(TSDB_CODE_TSC_CONN_KILLED, "Connection killed") TAOS_DEFINE_ERROR(TSDB_CODE_TSC_SQL_SYNTAX_ERROR, "Syntax error in SQL") TAOS_DEFINE_ERROR(TSDB_CODE_TSC_DB_NOT_SELECTED, "Database not specified or available") -TAOS_DEFINE_ERROR(TSDB_CODE_TSC_INVALID_TABLE_NAME, "Table does not exist") +//TAOS_DEFINE_ERROR(TSDB_CODE_TSC_INVALID_TABLE_NAME, "Table does not exist") TAOS_DEFINE_ERROR(TSDB_CODE_TSC_EXCEED_SQL_LIMIT, "SQL statement too long") TAOS_DEFINE_ERROR(TSDB_CODE_TSC_FILE_EMPTY, "File is empty") TAOS_DEFINE_ERROR(TSDB_CODE_TSC_LINE_SYNTAX_ERROR, "Syntax error in Line") From 8b12d4d3dad000c91fef10a218db6231a72f3ee3 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 5 May 2023 15:02:03 +0800 Subject: [PATCH 052/110] fix(stream): secure the delete task operation. TD-1198 --- source/dnode/vnode/src/tq/tq.c | 2 ++ source/dnode/vnode/src/tq/tqRead.c | 1 - source/dnode/vnode/src/tq/tqRestore.c | 1 + source/libs/stream/src/streamExec.c | 1 + source/libs/stream/src/streamMeta.c | 4 +++- 5 files changed, 7 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 12b81b6c3f..6b46a6a12f 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1326,7 +1326,9 @@ int32_t tqStartStreamTasks(STQ* pTq) { int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; + taosWLockLatch(&pMeta->lock); + int32_t numOfTasks = taosHashGetSize(pTq->pStreamMeta->pTasks); if (numOfTasks == 0) { tqInfo("vgId:%d no stream tasks exists", vgId); diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index ead00dcc35..0deac6e82c 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -1039,6 +1039,5 @@ int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) { } taosWUnLockLatch(&pTq->pStreamMeta->lock); - return 0; } diff --git a/source/dnode/vnode/src/tq/tqRestore.c b/source/dnode/vnode/src/tq/tqRestore.c index 58cb7b9e63..c3c7f7ba7b 100644 --- a/source/dnode/vnode/src/tq/tqRestore.c +++ b/source/dnode/vnode/src/tq/tqRestore.c @@ -36,6 +36,7 @@ int32_t tqStreamTasksScanWal(STQ* pTq) { if (shouldIdle) { taosWLockLatch(&pMeta->lock); + pMeta->walScanCounter -= 1; times = pMeta->walScanCounter; diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index f33e126068..aa7aaf93d4 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -313,6 +313,7 @@ int32_t streamExecForAll(SStreamTask* pTask) { pTask->chkInfo = (SCheckpointInfo) {.version = dataVer, .id = ckId, .currentVer = pTask->chkInfo.currentVer}; taosWLockLatch(&pTask->pMeta->lock); + streamMetaSaveTask(pTask->pMeta, pTask); if (streamMetaCommit(pTask->pMeta) < 0) { taosWUnLockLatch(&pTask->pMeta->lock); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 822ae2a485..0d797f0bcb 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -216,12 +216,14 @@ void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); if (ppTask) { SStreamTask* pTask = *ppTask; + + taosWLockLatch(&pMeta->lock); + taosHashRemove(pMeta->pTasks, &taskId, sizeof(int32_t)); tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(int32_t), pMeta->txn); atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__STOP); - taosWLockLatch(&pMeta->lock); streamMetaReleaseTask(pMeta, pTask); taosWUnLockLatch(&pMeta->lock); } From 4576e90fe608e7f9061d4f2389a69829ce576382 Mon Sep 17 00:00:00 2001 From: haoranchen Date: Fri, 5 May 2023 15:57:06 +0800 Subject: [PATCH 053/110] Update run_case.sh --- tests/parallel_test/run_case.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/parallel_test/run_case.sh b/tests/parallel_test/run_case.sh index ffc23ba6d2..56e898c5b2 100755 --- a/tests/parallel_test/run_case.sh +++ b/tests/parallel_test/run_case.sh @@ -79,7 +79,7 @@ md5sum /home/TDinternal/debug/build/lib/libtaos.so #define taospy 2.7.6 pip3 list|grep taospy pip3 uninstall taospy -y -pip3 install taospy==2.7.6 +pip3 install --default-timeout=60 taospy==2.7.6 $TIMEOUT_CMD $cmd RET=$? From 3550f9b8c010606ad110957d8fa4539945b08d32 Mon Sep 17 00:00:00 2001 From: haoranchen Date: Fri, 5 May 2023 15:57:49 +0800 Subject: [PATCH 054/110] Update run_case.sh --- tests/parallel_test/run_case.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/parallel_test/run_case.sh b/tests/parallel_test/run_case.sh index 56e898c5b2..2d736e1414 100755 --- a/tests/parallel_test/run_case.sh +++ b/tests/parallel_test/run_case.sh @@ -79,7 +79,7 @@ md5sum /home/TDinternal/debug/build/lib/libtaos.so #define taospy 2.7.6 pip3 list|grep taospy pip3 uninstall taospy -y -pip3 install --default-timeout=60 taospy==2.7.6 +pip3 install --default-timeout=120 taospy==2.7.6 $TIMEOUT_CMD $cmd RET=$? From 8286c145b68ff352d350162741858e9a1123fa4b Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Fri, 5 May 2023 16:15:15 +0800 Subject: [PATCH 055/110] fix(tdb/decoder): free decoder's value if not needed by user --- source/libs/tdb/src/db/tdbBtree.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/libs/tdb/src/db/tdbBtree.c b/source/libs/tdb/src/db/tdbBtree.c index 6df2b40000..c49b5726b6 100644 --- a/source/libs/tdb/src/db/tdbBtree.c +++ b/source/libs/tdb/src/db/tdbBtree.c @@ -1814,6 +1814,11 @@ int tdbBtreeNext(SBTC *pBtc, void **ppKey, int *kLen, void **ppVal, int *vLen) { *ppVal = pVal; *vLen = cd.vLen; + } else { + if (TDB_CELLDECODER_FREE_VAL(&cd)) { + tdbTrace("tdb/btree-next2 decoder: %p pVal free: %p", &cd, cd.pVal); + tdbFree(cd.pVal); + } } ret = tdbBtcMoveToNext(pBtc); From fdee112ae4c970baa6ce83b0b834b1b6a13cb69b Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 5 May 2023 17:31:08 +0800 Subject: [PATCH 056/110] fix:[TS-3277]table not exist if drop stable in another client when insert data using schemaless interface --- source/client/src/clientSml.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index 2d86f1989d..2fb20aca1b 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -1335,6 +1335,8 @@ static int32_t smlInsertData(SSmlHandle *info) { } taosArrayPush(info->pRequest->tableList, &pName); + tstrncpy(pName.tname, tableData->childTableName, strlen(tableData->childTableName) + 1); + SRequestConnInfo conn = {0}; conn.pTrans = info->taos->pAppInfo->pTransporter; conn.requestId = info->pRequest->requestId; From ee0e0b4ad4ad28fa9b8da5d0b466616f189ce13d Mon Sep 17 00:00:00 2001 From: dm chen Date: Fri, 5 May 2023 17:46:24 +0800 Subject: [PATCH 057/110] Update taoserror.h --- include/util/taoserror.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 3847757d9d..2b116f0c82 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -261,7 +261,7 @@ int32_t* taosGetErrno(); // #define TSDB_CODE_MND_INVALID_STABLE_NAME TAOS_DEF_ERROR_CODE(0, 0x036D) // 2.x #define TSDB_CODE_MND_INVALID_STB_OPTION TAOS_DEF_ERROR_CODE(0, 0x036E) #define TSDB_CODE_MND_INVALID_ROW_BYTES TAOS_DEF_ERROR_CODE(0, 0x036F) -#define TSDB_CODE_MND_BIG_FIELD_VALUE TAOS_DEF_ERROR_CODE(0, 0x0370) +#define TSDB_CODE_MND_FIELD_VALUE_OVERFLOW TAOS_DEF_ERROR_CODE(0, 0x0370) // mnode-func From 59f103a4772d15999c20eb9c97bcbf8ed42c72d8 Mon Sep 17 00:00:00 2001 From: dm chen Date: Fri, 5 May 2023 17:47:32 +0800 Subject: [PATCH 058/110] Update mndStb.c --- source/dnode/mnode/impl/src/mndStb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index 3407d4a1dc..6b3b69e003 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -798,7 +798,7 @@ int32_t mndBuildStbFromReq(SMnode *pMnode, SStbObj *pDst, SMCreateStbReq *pCreat } if(pDst->nextColId < 0 || pDst->nextColId >= 0x7fff - pDst->numOfColumns - pDst->numOfTags){ - terrno = TSDB_CODE_MND_BIG_FIELD_VALUE; + terrno = TSDB_CODE_MND_FIELD_VALUE_OVERFLOW; return -1; } From 4242f2c4ccfa09af7079f672083c0b37c9c5a637 Mon Sep 17 00:00:00 2001 From: dm chen Date: Fri, 5 May 2023 17:49:27 +0800 Subject: [PATCH 059/110] Update mndStb.c --- source/dnode/mnode/impl/src/mndStb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index 6b3b69e003..21e2bbaf92 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -933,7 +933,7 @@ static int32_t mndBuildStbFromAlter(SStbObj *pStb, SStbObj *pDst, SMCreateStbReq } if(pDst->nextColId < 0 && pDst->nextColId >= 0x7fff - pDst->numOfColumns - pDst->numOfTags){ - terrno = TSDB_CODE_MND_BIG_FIELD_VALUE; + terrno = TSDB_CODE_MND_FIELD_VALUE_OVERFLOW; return -1; } @@ -1165,7 +1165,7 @@ static int32_t mndAddSuperTableTag(const SStbObj *pOld, SStbObj *pNew, SArray *p } if(pNew->nextColId < 0 && pNew->nextColId >= 0x7fff - ntags){ - terrno = TSDB_CODE_MND_BIG_FIELD_VALUE; + terrno = TSDB_CODE_MND_FIELD_VALUE_OVERFLOW; return -1; } @@ -1477,7 +1477,7 @@ static int32_t mndAddSuperTableColumn(const SStbObj *pOld, SStbObj *pNew, SArray } if(pNew->nextColId < 0 && pNew->nextColId >= 0x7fff - ncols){ - terrno = TSDB_CODE_MND_BIG_FIELD_VALUE; + terrno = TSDB_CODE_MND_FIELD_VALUE_OVERFLOW; return -1; } From 87696d8ab313a91fa8a7ffe515ed9f15b8b2d64c Mon Sep 17 00:00:00 2001 From: dm chen Date: Fri, 5 May 2023 17:50:06 +0800 Subject: [PATCH 060/110] Update terror.c --- source/util/src/terror.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/util/src/terror.c b/source/util/src/terror.c index f3aff1a200..a875f1d209 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -203,7 +203,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_COLUMN_ALREADY_EXIST, "Column already exists TAOS_DEFINE_ERROR(TSDB_CODE_MND_COLUMN_NOT_EXIST, "Column does not exist") TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_STB_OPTION, "Invalid stable options") TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_ROW_BYTES, "Invalid row bytes") -TAOS_DEFINE_ERROR(TSDB_CODE_MND_BIG_FIELD_VALUE, "out of range and overflow") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_FIELD_VALUE_OVERFLOW, "out of range and overflow") // mnode-func TAOS_DEFINE_ERROR(TSDB_CODE_MND_INVALID_FUNC_NAME, "Invalid func name") From 08ce790cda0926560485963e1048f85356002ba1 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 5 May 2023 18:20:27 +0800 Subject: [PATCH 061/110] fix(query): fix memory leak. --- source/dnode/vnode/src/tq/tqRead.c | 7 +++++++ source/libs/executor/src/scanoperator.c | 2 -- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 7ead634f8b..3aaf5cd60d 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -13,6 +13,7 @@ * along with this program. If not, see . */ +#include "tmsg.h" #include "tq.h" bool isValValidForTable(STqHandle* pHandle, SWalCont* pHead) { @@ -346,6 +347,10 @@ int32_t tqNextBlockInWal(STqReader* pReader) { int32_t nSubmitTbData = taosArrayGetSize(pReader->submit.aSubmitTbData); for (int32_t i = 0; i < nSubmitTbData; i++) { SSubmitTbData* pData = taosArrayGet(pReader->submit.aSubmitTbData, i); + if (pData->pCreateTbReq != NULL) { + taosArrayDestroy(pData->pCreateTbReq->ctb.tagName); + taosMemoryFreeClear(pData->pCreateTbReq); + } pData->aRowP = taosArrayDestroy(pData->aRowP); } pReader->submit.aSubmitTbData = taosArrayDestroy(pReader->submit.aSubmitTbData); @@ -530,6 +535,8 @@ int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbDa *pSubmitTbDataRet = pSubmitTbData; } + blockDataCleanup(pBlock); + int32_t sversion = pSubmitTbData->sver; int64_t suid = pSubmitTbData->suid; int64_t uid = pSubmitTbData->uid; diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 5122e0e439..9b1b5235cc 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1699,8 +1699,6 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { blockDataCleanup(pInfo->pRes); setBlockIntoRes(pInfo, pRes, true); if (pInfo->pRes->info.rows > 0) { - qDebug("doQueueScan get data from log %" PRId64 " rows, return, version:%" PRId64, pInfo->pRes->info.rows, - pTaskInfo->streamInfo.currentOffset.version); return pInfo->pRes; } } else if (type == FETCH_TYPE__NONE) { From 8958aabe4bc078c0b7d635e7b52ff99e44991abf Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 5 May 2023 19:33:30 +0800 Subject: [PATCH 062/110] fix(stream): opt task scan performance. --- include/libs/stream/tstream.h | 1 + source/dnode/snode/src/snode.c | 4 ++++ source/dnode/vnode/src/tq/tq.c | 9 ++++--- source/dnode/vnode/src/tq/tqPush.c | 5 ++-- source/dnode/vnode/src/tq/tqRestore.c | 34 ++++++++------------------- source/libs/stream/src/streamMeta.c | 26 +++++++++++++++++++- 6 files changed, 49 insertions(+), 30 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 48c15e9117..5fd9a8b12b 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -340,6 +340,7 @@ typedef struct SStreamMeta { TTB* pTaskDb; TTB* pCheckpointDb; SHashObj* pTasks; + SArray* pTaskList; // SArray void* ahandle; TXN* txn; FTaskExpand* expandFunc; diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index cefc4fa63e..7352bbc0fe 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -153,11 +153,15 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { ASSERT(pTask->taskLevel == TASK_LEVEL__AGG); // 2.save task + taosWLockLatch(&pSnode->pMeta->lock); code = streamMetaAddDeployedTask(pSnode->pMeta, -1, pTask); if (code < 0) { + taosWUnLockLatch(&pSnode->pMeta->lock); return -1; } + taosWUnLockLatch(&pSnode->pMeta->lock); + // 3.go through recover steps to fill history if (pTask->fillHistory) { streamSetParamForRecover(pTask); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 6b46a6a12f..792ff8677e 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -781,13 +781,17 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms tDecoderClear(&decoder); // 2.save task, use the newest commit version as the initial start version of stream task. + taosWLockLatch(&pTq->pStreamMeta->lock); code = streamMetaAddDeployedTask(pTq->pStreamMeta, sversion, pTask); if (code < 0) { tqError("vgId:%d failed to add s-task:%s, total:%d", TD_VID(pTq->pVnode), pTask->id.idStr, streamMetaGetNumOfTasks(pTq->pStreamMeta)); + taosWUnLockLatch(&pTq->pStreamMeta->lock); return -1; } + taosWUnLockLatch(&pTq->pStreamMeta->lock); + // 3.go through recover steps to fill history if (pTask->fillHistory) { streamTaskCheckDownstream(pTask, sversion); @@ -1323,13 +1327,12 @@ FAIL: int32_t tqCheckLogInWal(STQ* pTq, int64_t sversion) { return sversion <= pTq->walLogLastVer; } int32_t tqStartStreamTasks(STQ* pTq) { - int32_t vgId = TD_VID(pTq->pVnode); - + int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; taosWLockLatch(&pMeta->lock); - int32_t numOfTasks = taosHashGetSize(pTq->pStreamMeta->pTasks); + int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); if (numOfTasks == 0) { tqInfo("vgId:%d no stream tasks exists", vgId); taosWUnLockLatch(&pTq->pStreamMeta->lock); diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 0575b7299d..6a51f74908 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -262,14 +262,15 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v // taosWUnLockLatch(&pTq->lock); } - tqDebug("handle submit, restore:%d, size:%d", pTq->pVnode->restored, (int)taosHashGetSize(pTq->pStreamMeta->pTasks)); + int32_t numOfTasks = streamMetaGetNumOfTasks(pTq->pStreamMeta); + tqDebug("handle submit, restore:%d, size:%d", pTq->pVnode->restored, numOfTasks); // push data for stream processing: // 1. the vnode has already been restored. // 2. the vnode should be the leader. // 3. the stream is not suspended yet. if (!tsDisableStream && vnodeIsRoleLeader(pTq->pVnode) && pTq->pVnode->restored) { - if (taosHashGetSize(pTq->pStreamMeta->pTasks) == 0) { + if (numOfTasks == 0) { return 0; } diff --git a/source/dnode/vnode/src/tq/tqRestore.c b/source/dnode/vnode/src/tq/tqRestore.c index c3c7f7ba7b..1e45f578f6 100644 --- a/source/dnode/vnode/src/tq/tqRestore.c +++ b/source/dnode/vnode/src/tq/tqRestore.c @@ -57,42 +57,28 @@ int32_t tqStreamTasksScanWal(STQ* pTq) { return 0; } -static SArray* extractTaskIdList(SStreamMeta* pStreamMeta, int32_t numOfTasks) { - SArray* pTaskIdList = taosArrayInit(numOfTasks, sizeof(int32_t)); - void* pIter = NULL; - - taosWLockLatch(&pStreamMeta->lock); - while(1) { - pIter = taosHashIterate(pStreamMeta->pTasks, pIter); - if (pIter == NULL) { - break; - } - - SStreamTask* pTask = *(SStreamTask**)pIter; - taosArrayPush(pTaskIdList, &pTask->id.taskId); - } - - taosWUnLockLatch(&pStreamMeta->lock); - return pTaskIdList; -} - int32_t createStreamRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { *pScanIdle = true; bool noNewDataInWal = true; int32_t vgId = pStreamMeta->vgId; - int32_t numOfTasks = taosHashGetSize(pStreamMeta->pTasks); + int32_t numOfTasks = taosArrayGetSize(pStreamMeta->pTaskList); if (numOfTasks == 0) { return TSDB_CODE_SUCCESS; } + SArray* pTaskList = NULL; + taosWLockLatch(&pStreamMeta->lock); + pTaskList = taosArrayDup(pStreamMeta->pTaskList, NULL); + taosWUnLockLatch(&pStreamMeta->lock); + tqDebug("vgId:%d start to check wal to extract new submit block for %d tasks", vgId, numOfTasks); - SArray* pTaskIdList = extractTaskIdList(pStreamMeta, numOfTasks); // update the new task number - numOfTasks = taosArrayGetSize(pTaskIdList); + numOfTasks = taosArrayGetSize(pTaskList); + for (int32_t i = 0; i < numOfTasks; ++i) { - int32_t* pTaskId = taosArrayGet(pTaskIdList, i); + int32_t* pTaskId = taosArrayGet(pTaskList, i); SStreamTask* pTask = streamMetaAcquireTask(pStreamMeta, *pTaskId); if (pTask == NULL) { continue; @@ -166,7 +152,7 @@ int32_t createStreamRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { *pScanIdle = true; } - taosArrayDestroy(pTaskIdList); + taosArrayDestroy(pTaskList); return 0; } diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 0d797f0bcb..aefe7885f9 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -57,6 +57,13 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF goto _err; } + // task list + pMeta->pTaskList = taosArrayInit(4, sizeof(int32_t)); + if (pMeta->pTaskList == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + if (streamMetaBegin(pMeta) < 0) { goto _err; } @@ -70,6 +77,7 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF _err: taosMemoryFree(pMeta->path); if (pMeta->pTasks) taosHashCleanup(pMeta->pTasks); + if (pMeta->pTaskList) taosArrayDestroy(pMeta->pTaskList); if (pMeta->pTaskDb) tdbTbClose(pMeta->pTaskDb); if (pMeta->pCheckpointDb) tdbTbClose(pMeta->pCheckpointDb); if (pMeta->db) tdbClose(pMeta->db); @@ -100,6 +108,7 @@ void streamMetaClose(SStreamMeta* pMeta) { } taosHashCleanup(pMeta->pTasks); + pMeta->pTaskList = taosArrayDestroy(pMeta->pTaskList); taosMemoryFree(pMeta->path); taosMemoryFree(pMeta); } @@ -180,11 +189,15 @@ int32_t streamMetaAddDeployedTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* } taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, POINTER_BYTES); + taosArrayPush(pMeta->pTaskList, &pTask->id.taskId); return 0; } int32_t streamMetaGetNumOfTasks(const SStreamMeta* pMeta) { - return (int32_t) taosHashGetSize(pMeta->pTasks); + size_t size = taosHashGetSize(pMeta->pTasks); + ASSERT(taosArrayGetSize(pMeta->pTaskList) == taosHashGetSize(pMeta->pTasks)); + + return (int32_t) size; } SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId) { @@ -224,6 +237,15 @@ void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__STOP); + int32_t num = taosArrayGetSize(pMeta->pTaskList); + for(int32_t i = 0; i < num; ++i) { + int32_t* pTaskId = taosArrayGet(pMeta->pTaskList, i); + if (*pTaskId == taskId) { + taosArrayRemove(pMeta->pTaskList, i); + break; + } + } + streamMetaReleaseTask(pMeta, pTask); taosWUnLockLatch(&pMeta->lock); } @@ -308,6 +330,8 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { return -1; } + taosArrayPush(pMeta->pTaskList, &pTask->id.taskId); + if (pTask->fillHistory) { pTask->status.taskStatus = TASK_STATUS__WAIT_DOWNSTREAM; streamTaskCheckDownstream(pTask, ver); From 1e9bca79a420717d553931c8e9dbb9505d2e009a Mon Sep 17 00:00:00 2001 From: Ping Xiao Date: Fri, 5 May 2023 19:38:26 +0800 Subject: [PATCH 063/110] update crash gen push message --- tests/pytest/auto_crash_gen.py | 26 +++++++++---------- tests/pytest/auto_crash_gen_valgrind.py | 20 +++++++------- .../pytest/auto_crash_gen_valgrind_cluster.py | 24 ++++++++--------- 3 files changed, 32 insertions(+), 38 deletions(-) diff --git a/tests/pytest/auto_crash_gen.py b/tests/pytest/auto_crash_gen.py index 56629ede13..5af2f055cd 100755 --- a/tests/pytest/auto_crash_gen.py +++ b/tests/pytest/auto_crash_gen.py @@ -1,3 +1,4 @@ +import datetime import os import socket import requests @@ -238,17 +239,7 @@ def start_taosd(): start_cmd = 'cd %s && python3 test.py >>/dev/null '%(start_path) os.system(start_cmd) -def get_cmds(args_list): - # build_path = get_path() - # if repo == "community": - # crash_gen_path = build_path[:-5]+"community/tests/pytest/" - # elif repo == "TDengine": - # crash_gen_path = build_path[:-5]+"/tests/pytest/" - # else: - # pass - - # crash_gen_cmd = 'cd %s && ./crash_gen.sh --valgrind -p -t 10 -s 1000 -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550 '%(crash_gen_path) - +def get_cmds(args_list): crash_gen_cmd = get_auto_mix_cmds(args_list,valgrind=valgrind_mode) return crash_gen_cmd @@ -295,7 +286,7 @@ def check_status(): elif "Crash_Gen is now exiting with status code: 0" in run_code: return 0 else: - return 2 + return 2 def main(): @@ -310,7 +301,7 @@ def main(): build_path = get_path() - os.system("pip install git+https://github.com/taosdata/taos-connector-python.git") + if repo =="community": crash_gen_path = build_path[:-5]+"community/tests/pytest/" elif repo =="TDengine": @@ -334,7 +325,9 @@ def main(): if not os.path.exists(run_dir): os.mkdir(run_dir) print(crash_cmds) + starttime = datetime.datetime.now() run_crash_gen(crash_cmds) + endtime = datetime.datetime.now() status = check_status() print("exit status : ", status) @@ -349,7 +342,12 @@ def main(): print('======== crash_gen run sucess and exit as expected ========') try: - text = f"crash_gen instance exit status of docker [ {hostname} ] is : {msg_dict[status]}\n " + f" and git commit : {git_commit}" + text = f'''exit status: {msg_dict[status]} + git commit : {git_commit} + hostname: {hostname} + start time: {starttime} + end time: {endtime} + cmd: {crash_cmds}''' send_msg(get_msg(text)) except Exception as e: print("exception:", e) diff --git a/tests/pytest/auto_crash_gen_valgrind.py b/tests/pytest/auto_crash_gen_valgrind.py index 22fc5a480f..49e2c43f84 100755 --- a/tests/pytest/auto_crash_gen_valgrind.py +++ b/tests/pytest/auto_crash_gen_valgrind.py @@ -1,6 +1,7 @@ #!/usr/bin/python3 +import datetime import os import socket import requests @@ -241,15 +242,6 @@ def start_taosd(): os.system(start_cmd +">>/dev/null") def get_cmds(args_list): - # build_path = get_path() - # if repo == "community": - # crash_gen_path = build_path[:-5]+"community/tests/pytest/" - # elif repo == "TDengine": - # crash_gen_path = build_path[:-5]+"/tests/pytest/" - # else: - # pass - - # crash_gen_cmd = 'cd %s && ./crash_gen.sh --valgrind -p -t 10 -s 1000 -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550 '%(crash_gen_path) crash_gen_cmd = get_auto_mix_cmds(args_list,valgrind=valgrind_mode) return crash_gen_cmd @@ -343,7 +335,6 @@ def main(): args = limits(args) build_path = get_path() - os.system("pip install git+https://github.com/taosdata/taos-connector-python.git >>/dev/null") if repo =="community": crash_gen_path = build_path[:-5]+"community/tests/pytest/" elif repo =="TDengine": @@ -368,7 +359,9 @@ def main(): if not os.path.exists(run_dir): os.mkdir(run_dir) print(crash_cmds) + starttime = datetime.datetime.now() run_crash_gen(crash_cmds) + endtime = datetime.datetime.now() status = check_status() # back_path = os.path.join(core_path,"valgrind_report") @@ -384,7 +377,12 @@ def main(): print('======== crash_gen run sucess and exit as expected ========') try: - text = f"crash_gen instance exit status of docker [ {hostname} ] is : {msg_dict[status]}\n " + f" and git commit : {git_commit}" + text = f'''exit status: {msg_dict[status]} + git commit : {git_commit} + hostname: {hostname} + start time: {starttime} + end time: {endtime} + cmd: {crash_cmds}''' send_msg(get_msg(text)) except Exception as e: print("exception:", e) diff --git a/tests/pytest/auto_crash_gen_valgrind_cluster.py b/tests/pytest/auto_crash_gen_valgrind_cluster.py index 547de9af47..5189ff4262 100755 --- a/tests/pytest/auto_crash_gen_valgrind_cluster.py +++ b/tests/pytest/auto_crash_gen_valgrind_cluster.py @@ -1,6 +1,7 @@ #!/usr/bin/python3 +import datetime import os import socket import requests @@ -241,16 +242,7 @@ def start_taosd(): os.system(start_cmd +">>/dev/null") def get_cmds(args_list): - # build_path = get_path() - # if repo == "community": - # crash_gen_path = build_path[:-5]+"community/tests/pytest/" - # elif repo == "TDengine": - # crash_gen_path = build_path[:-5]+"/tests/pytest/" - # else: - # pass - - # crash_gen_cmd = 'cd %s && ./crash_gen.sh --valgrind -p -t 10 -s 1000 -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550 '%(crash_gen_path) - + crash_gen_cmd = get_auto_mix_cmds(args_list,valgrind=valgrind_mode) return crash_gen_cmd @@ -342,8 +334,7 @@ def main(): args = random_args(args_list) args = limits(args) - build_path = get_path() - os.system("pip install git+https://github.com/taosdata/taos-connector-python.git >>/dev/null") + build_path = get_path() if repo =="community": crash_gen_path = build_path[:-5]+"community/tests/pytest/" elif repo =="TDengine": @@ -368,7 +359,9 @@ def main(): if not os.path.exists(run_dir): os.mkdir(run_dir) print(crash_cmds) + starttime = datetime.datetime.now() run_crash_gen(crash_cmds) + endtime = datetime.datetime.now() status = check_status() # back_path = os.path.join(core_path,"valgrind_report") @@ -384,7 +377,12 @@ def main(): print('======== crash_gen run sucess and exit as expected ========') try: - text = f"crash_gen instance exit status of docker [ {hostname} ] is : {msg_dict[status]}\n " + f" and git commit : {git_commit}" + text = f'''exit status: {msg_dict[status]} + git commit : {git_commit} + hostname: {hostname} + start time: {starttime} + end time: {endtime} + cmd: {crash_cmds}''' send_msg(get_msg(text)) except Exception as e: print("exception:", e) From 5119594584cc903bb00a60a73f910ef94af3f93d Mon Sep 17 00:00:00 2001 From: Ping Xiao Date: Fri, 5 May 2023 20:11:48 +0800 Subject: [PATCH 064/110] update push message --- tests/pytest/auto_crash_gen.py | 11 ++++++----- tests/pytest/auto_crash_gen_valgrind.py | 11 ++++++----- tests/pytest/auto_crash_gen_valgrind_cluster.py | 11 ++++++----- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/tests/pytest/auto_crash_gen.py b/tests/pytest/auto_crash_gen.py index 5af2f055cd..b1f23b9c4a 100755 --- a/tests/pytest/auto_crash_gen.py +++ b/tests/pytest/auto_crash_gen.py @@ -342,12 +342,13 @@ def main(): print('======== crash_gen run sucess and exit as expected ========') try: + cmd = crash_cmds.split('&')[2] text = f'''exit status: {msg_dict[status]} - git commit : {git_commit} - hostname: {hostname} - start time: {starttime} - end time: {endtime} - cmd: {crash_cmds}''' + git commit : {git_commit} + hostname: {hostname} + start time: {starttime} + end time: {endtime} + cmd: {cmd}''' send_msg(get_msg(text)) except Exception as e: print("exception:", e) diff --git a/tests/pytest/auto_crash_gen_valgrind.py b/tests/pytest/auto_crash_gen_valgrind.py index 49e2c43f84..9d83c2b401 100755 --- a/tests/pytest/auto_crash_gen_valgrind.py +++ b/tests/pytest/auto_crash_gen_valgrind.py @@ -377,12 +377,13 @@ def main(): print('======== crash_gen run sucess and exit as expected ========') try: + cmd = crash_cmds.split('&')[2] text = f'''exit status: {msg_dict[status]} - git commit : {git_commit} - hostname: {hostname} - start time: {starttime} - end time: {endtime} - cmd: {crash_cmds}''' + git commit : {git_commit} + hostname: {hostname} + start time: {starttime} + end time: {endtime} + cmd: {cmd}''' send_msg(get_msg(text)) except Exception as e: print("exception:", e) diff --git a/tests/pytest/auto_crash_gen_valgrind_cluster.py b/tests/pytest/auto_crash_gen_valgrind_cluster.py index 5189ff4262..8e387317a8 100755 --- a/tests/pytest/auto_crash_gen_valgrind_cluster.py +++ b/tests/pytest/auto_crash_gen_valgrind_cluster.py @@ -377,12 +377,13 @@ def main(): print('======== crash_gen run sucess and exit as expected ========') try: + cmd = crash_cmds.split('&')[2] text = f'''exit status: {msg_dict[status]} - git commit : {git_commit} - hostname: {hostname} - start time: {starttime} - end time: {endtime} - cmd: {crash_cmds}''' + git commit : {git_commit} + hostname: {hostname} + start time: {starttime} + end time: {endtime} + cmd: {cmd}''' send_msg(get_msg(text)) except Exception as e: print("exception:", e) From 50ba55561b3bcb879c5a3bf9b01b2ac60defd839 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 5 May 2023 22:11:59 +0800 Subject: [PATCH 065/110] fix(stream): set correct group id and table id. --- source/dnode/vnode/src/tq/tqRead.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 3aaf5cd60d..082e31ea91 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -565,6 +565,10 @@ int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbDa if (blockDataGetNumOfCols(pBlock) > 0) { blockDataDestroy(pReader->pResBlock); pReader->pResBlock = createDataBlock(); + pBlock = pReader->pResBlock; + + pBlock->info.id.uid = uid; + pBlock->info.version = pReader->msg.ver; } int32_t numOfCols = taosArrayGetSize(pReader->pColIdList); From ff6523fb02f72697d3b71a3342c3f9c16422eb2b Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Sat, 6 May 2023 09:14:03 +0800 Subject: [PATCH 066/110] enh: declare mndSplitVgroup in mndVgroup.h --- source/dnode/mnode/impl/inc/mndVgroup.h | 2 ++ source/dnode/mnode/impl/src/mndVgroup.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/source/dnode/mnode/impl/inc/mndVgroup.h b/source/dnode/mnode/impl/inc/mndVgroup.h index 0229735952..94c4eae83f 100644 --- a/source/dnode/mnode/impl/inc/mndVgroup.h +++ b/source/dnode/mnode/impl/inc/mndVgroup.h @@ -50,6 +50,8 @@ void *mndBuildCreateVnodeReq(SMnode *, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *p void *mndBuildDropVnodeReq(SMnode *, SDnodeObj *pDnode, SDbObj *pDb, SVgObj *pVgroup, int32_t *pContLen); bool mndVgroupInDb(SVgObj *pVgroup, int64_t dbUid); +int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj *pVgroup); + #ifdef __cplusplus } #endif diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index ed1fddb63f..0003d07fd6 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -2006,7 +2006,7 @@ static int32_t mndAddAdjustVnodeHashRangeAction(SMnode *pMnode, STrans *pTrans, return 0; } -static int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj *pVgroup) { +int32_t mndSplitVgroup(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SVgObj *pVgroup) { int32_t code = -1; STrans *pTrans = NULL; SSdbRaw *pRaw = NULL; From 29fcd1b83aec652621cb27d2cb4af008ad187d44 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 6 May 2023 10:08:05 +0800 Subject: [PATCH 067/110] refactor: do some internal refactor. --- source/dnode/vnode/inc/vnode.h | 9 +- source/dnode/vnode/src/inc/vnodeInt.h | 1 - source/dnode/vnode/src/tq/tq.c | 85 ++------ source/dnode/vnode/src/tq/tqPush.c | 258 +----------------------- source/dnode/vnode/src/tq/tqRead.c | 53 ++--- source/dnode/vnode/src/tq/tqScan.c | 7 +- source/dnode/vnode/src/tq/tqUtil.c | 9 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 1 - source/libs/executor/inc/querytask.h | 2 +- source/libs/executor/src/executor.c | 1 + source/libs/executor/src/scanoperator.c | 9 +- source/libs/stream/src/streamExec.c | 3 +- source/libs/wal/src/walRead.c | 1 + 13 files changed, 58 insertions(+), 381 deletions(-) diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index c7424cd233..88460cd3ca 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -255,14 +255,13 @@ int32_t tqReaderAddTbUidList(STqReader *pReader, const SArray *pTableUidList); int32_t tqReaderRemoveTbUidList(STqReader *pReader, const SArray *tbUidList); int32_t tqSeekVer(STqReader *pReader, int64_t ver, const char *id); -int32_t tqNextBlock(STqReader *pReader, SSDataBlock* pBlock); int32_t tqNextBlockInWal(STqReader* pReader); -int32_t extractSubmitMsgFromWal(SWalReader *pReader, SPackedData *pPackedData); - -int32_t tqReaderSetSubmitMsg(STqReader *pReader, void *msgStr, int32_t msgLen, int64_t ver); bool tqNextBlockImpl(STqReader *pReader); + +int32_t extractSubmitMsgFromWal(SWalReader *pReader, SPackedData *pPackedData); +int32_t tqReaderSetSubmitMsg(STqReader *pReader, void *msgStr, int32_t msgLen, int64_t ver); bool tqNextDataBlockFilterOut(STqReader *pReader, SHashObj *filterOutUids); -int32_t tqRetrieveDataBlock(SSDataBlock *pBlock, STqReader *pReader, SSubmitTbData **pSubmitTbDataRet); +int32_t tqRetrieveDataBlock(STqReader *pReader, SSubmitTbData **pSubmitTbDataRet); int32_t tqRetrieveTaosxBlock(STqReader *pReader, SArray *blocks, SArray *schemas, SSubmitTbData **pSubmitTbDataRet); int32_t vnodeEnqueueStreamMsg(SVnode *pVnode, SRpcMsg *pMsg); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index eb2787595b..1aea479511 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -212,7 +212,6 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t version, char* msg, int32_t msg int32_t tqProcessTaskDropReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen); int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t version, char* msg, int32_t msgLen); -int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit); int32_t tqProcessSubmitReqForSubscribe(STQ* pTq); int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver); int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 12b81b6c3f..4997db684f 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1069,12 +1069,15 @@ int32_t tqProcessSubmitReqForSubscribe(STQ* pTq) { int32_t vgId = TD_VID(pTq->pVnode); taosWLockLatch(&pTq->lock); - if(taosHashGetSize(pTq->pPushMgr) > 0){ - void *pIter = taosHashIterate(pTq->pPushMgr, NULL); - while(pIter){ + + if (taosHashGetSize(pTq->pPushMgr) > 0) { + void* pIter = taosHashIterate(pTq->pPushMgr, NULL); + + while (pIter) { STqHandle* pHandle = *(STqHandle**)pIter; - tqDebug("vgId:%d start set submit for pHandle:%p, consume id:0x%"PRIx64, vgId, pHandle, pHandle->consumerId); - if(ASSERT(pHandle->msg != NULL)){ + tqDebug("vgId:%d start set submit for pHandle:%p, consumer:0x%" PRIx64, vgId, pHandle, pHandle->consumerId); + + if (ASSERT(pHandle->msg != NULL)) { tqError("pHandle->msg should not be null"); break; }else{ @@ -1083,77 +1086,15 @@ int32_t tqProcessSubmitReqForSubscribe(STQ* pTq) { taosMemoryFree(pHandle->msg); pHandle->msg = NULL; } + pIter = taosHashIterate(pTq->pPushMgr, pIter); } + taosHashClear(pTq->pPushMgr); } + // unlock taosWUnLockLatch(&pTq->lock); - - return 0; -} - -int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) { -#if 0 - void* pIter = NULL; - SStreamDataSubmit2* pSubmit = streamDataSubmitNew(submit, STREAM_INPUT__DATA_SUBMIT); - if (pSubmit == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - tqError("failed to create data submit for stream since out of memory"); - saveOffsetForAllTasks(pTq, submit.ver); - return -1; - } - - SArray* pInputQueueFullTasks = taosArrayInit(4, POINTER_BYTES); - - while (1) { - pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter); - if (pIter == NULL) { - break; - } - - SStreamTask* pTask = *(SStreamTask**)pIter; - if (pTask->taskLevel != TASK_LEVEL__SOURCE) { - continue; - } - - if (pTask->status.taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { - tqDebug("stream task:%d skip push data, not ready for processing, status %d", pTask->id.taskId, - pTask->status.taskStatus); - continue; - } - - // check if offset value exists - char key[128] = {0}; - createStreamTaskOffsetKey(key, pTask->id.streamId, pTask->id.taskId); - - if (tInputQueueIsFull(pTask)) { - STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key); - - int64_t ver = submit.ver; - if (pOffset == NULL) { - doSaveTaskOffset(pTq->pOffsetStore, key, submit.ver); - } else { - ver = pOffset->val.version; - } - - tqDebug("s-task:%s input queue is full, discard submit block, ver:%" PRId64, pTask->id.idStr, ver); - taosArrayPush(pInputQueueFullTasks, &pTask); - continue; - } - - // check if offset value exists - STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key); - ASSERT(pOffset == NULL); - - addSubmitBlockNLaunchTask(pTq->pOffsetStore, pTask, pSubmit, key, submit.ver); - } - - streamDataSubmitDestroy(pSubmit); - taosFreeQitem(pSubmit); -#endif - - tqStartStreamTasks(pTq); return 0; } @@ -1323,9 +1264,9 @@ FAIL: int32_t tqCheckLogInWal(STQ* pTq, int64_t sversion) { return sversion <= pTq->walLogLastVer; } int32_t tqStartStreamTasks(STQ* pTq) { - int32_t vgId = TD_VID(pTq->pVnode); - + int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; + taosWLockLatch(&pMeta->lock); int32_t numOfTasks = taosHashGetSize(pTq->pStreamMeta->pTasks); if (numOfTasks == 0) { diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index a914517645..c8195f72a9 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -16,250 +16,10 @@ #include "tq.h" #include "vnd.h" -#if 0 -void tqTmrRspFunc(void* param, void* tmrId) { - STqHandle* pHandle = (STqHandle*)param; - atomic_store_8(&pHandle->pushHandle.tmrStopped, 1); -} - -static int32_t tqLoopExecFromQueue(STQ* pTq, STqHandle* pHandle, SStreamDataSubmit** ppSubmit, SMqDataRsp* pRsp) { - SStreamDataSubmit* pSubmit = *ppSubmit; - while (pSubmit != NULL) { - if (tqLogScanExec(pTq, &pHandle->execHandle, pSubmit->data, pRsp, 0) < 0) { - } - // update processed - atomic_store_64(&pHandle->pushHandle.processedVer, pSubmit->ver); - streamQueueProcessSuccess(&pHandle->pushHandle.inputQ); - streamDataSubmitDestroy(pSubmit); - if (pRsp->blockNum > 0) { - *ppSubmit = pSubmit; - return 0; - } else { - pSubmit = streamQueueNextItem(&pHandle->pushHandle.inputQ); - } - } - *ppSubmit = pSubmit; - return -1; -} - -int32_t tqExecFromInputQ(STQ* pTq, STqHandle* pHandle) { - SMqDataRsp rsp = {0}; - // 1. guard and set status executing - int8_t execStatus = atomic_val_compare_exchange_8(&pHandle->pushHandle.execStatus, TASK_EXEC_STATUS__IDLE, - TASK_EXEC_STATUS__EXECUTING); - if (execStatus == TASK_EXEC_STATUS__IDLE) { - SStreamDataSubmit* pSubmit = NULL; - // 2. check processedVer - // 2.1. if not missed, get msg from queue - // 2.2. if missed, scan wal - pSubmit = streamQueueNextItem(&pHandle->pushHandle.inputQ); - while (pHandle->pushHandle.processedVer <= pSubmit->ver) { - // read from wal - } - while (pHandle->pushHandle.processedVer > pSubmit->ver + 1) { - streamQueueProcessSuccess(&pHandle->pushHandle.inputQ); - streamDataSubmitDestroy(pSubmit); - pSubmit = streamQueueNextItem(&pHandle->pushHandle.inputQ); - if (pSubmit == NULL) break; - } - // 3. exec, after each success, update processed ver - // first run - if (tqLoopExecFromQueue(pTq, pHandle, &pSubmit, &rsp) == 0) { - goto SEND_RSP; - } - // set exec status closing - atomic_store_8(&pHandle->pushHandle.execStatus, TASK_EXEC_STATUS__CLOSING); - // second run - if (tqLoopExecFromQueue(pTq, pHandle, &pSubmit, &rsp) == 0) { - goto SEND_RSP; - } - // set exec status idle - atomic_store_8(&pHandle->pushHandle.execStatus, TASK_EXEC_STATUS__IDLE); - } -SEND_RSP: - // 4. if get result - // 4.1 set exec input status blocked and exec status idle - atomic_store_8(&pHandle->pushHandle.execStatus, TASK_EXEC_STATUS__IDLE); - // 4.2 rpc send - rsp.rspOffset = pHandle->pushHandle.processedVer; - /*if (tqSendPollRsp(pTq, pMsg, pReq, &rsp) < 0) {*/ - /*return -1;*/ - /*}*/ - // 4.3 clear rpc info - memset(&pHandle->pushHandle.rpcInfo, 0, sizeof(SRpcHandleInfo)); - return 0; -} - -int32_t tqOpenPushHandle(STQ* pTq, STqHandle* pHandle) { - memset(&pHandle->pushHandle, 0, sizeof(STqPushHandle)); - pHandle->pushHandle.inputQ.queue = taosOpenQueue(); - pHandle->pushHandle.inputQ.qall = taosAllocateQall(); - if (pHandle->pushHandle.inputQ.queue == NULL || pHandle->pushHandle.inputQ.qall == NULL) { - if (pHandle->pushHandle.inputQ.queue) { - taosCloseQueue(pHandle->pushHandle.inputQ.queue); - } - if (pHandle->pushHandle.inputQ.qall) { - taosFreeQall(pHandle->pushHandle.inputQ.qall); - } - return -1; - } - return 0; -} - -int32_t tqPreparePush(STQ* pTq, STqHandle* pHandle, int64_t reqId, const SRpcHandleInfo* pInfo, int64_t processedVer, - int64_t timeout) { - memcpy(&pHandle->pushHandle.rpcInfo, pInfo, sizeof(SRpcHandleInfo)); - atomic_store_64(&pHandle->pushHandle.reqId, reqId); - atomic_store_64(&pHandle->pushHandle.processedVer, processedVer); - atomic_store_8(&pHandle->pushHandle.inputStatus, TASK_INPUT_STATUS__NORMAL); - atomic_store_8(&pHandle->pushHandle.tmrStopped, 0); - taosTmrReset(tqTmrRspFunc, (int32_t)timeout, pHandle, tqMgmt.timer, &pHandle->pushHandle.timerId); - return 0; -} - -int32_t tqEnqueue(STqHandle* pHandle, SStreamDataSubmit* pSubmit) { - int8_t inputStatus = atomic_load_8(&pHandle->pushHandle.inputStatus); - if (inputStatus == TASK_INPUT_STATUS__NORMAL) { - SStreamDataSubmit* pSubmitClone = streamSubmitBlockClone(pSubmit); - if (pSubmitClone == NULL) { - return -1; - } - taosWriteQitem(pHandle->pushHandle.inputQ.queue, pSubmitClone); - return 0; - } - return -1; -} - -int32_t tqSendExecReq(STQ* pTq, STqHandle* pHandle) { - // - return 0; -} - -int32_t tqPushMsgNew(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver, SRpcHandleInfo handleInfo) { - if (msgType != TDMT_VND_SUBMIT) return 0; - void* pIter = NULL; - STqHandle* pHandle = NULL; - SSubmitReq* pReq = (SSubmitReq*)msg; - int32_t workerId = 4; - int64_t fetchOffset = ver; - - while (1) { - pIter = taosHashIterate(pTq->pushMgr, pIter); - if (pIter == NULL) break; - pHandle = *(STqHandle**)pIter; - - taosWLockLatch(&pHandle->pushHandle.lock); - - SMqDataRsp rsp = {0}; - rsp.reqOffset = pHandle->pushHandle.reqOffset; - rsp.blockData = taosArrayInit(0, sizeof(void*)); - rsp.blockDataLen = taosArrayInit(0, sizeof(int32_t)); - - if (msgType == TDMT_VND_SUBMIT) { - tqLogScanExec(pTq, &pHandle->execHandle, pReq, &rsp, workerId); - } else { - tqError("tq push unexpected msg type %d", msgType); - } - - if (rsp.blockNum == 0) { - taosWUnLockLatch(&pHandle->pushHandle.lock); - continue; - } - - rsp.rspOffset = fetchOffset; - - int32_t tlen = sizeof(SMqRspHead) + tEncodeSMqDataBlkRsp(NULL, &rsp); - void* buf = rpcMallocCont(tlen); - if (buf == NULL) { - // todo free - return -1; - } - - ((SMqRspHead*)buf)->mqMsgType = TMQ_MSG_TYPE__POLL_RSP; - ((SMqRspHead*)buf)->epoch = pHandle->pushHandle.epoch; - ((SMqRspHead*)buf)->consumerId = pHandle->pushHandle.consumerId; - - void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); - tEncodeSMqDataBlkRsp(&abuf, &rsp); - - SRpcMsg resp = { - .info = pHandle->pushHandle.rpcInfo, - .pCont = buf, - .contLen = tlen, - .code = 0, - }; - tmsgSendRsp(&resp); - - memset(&pHandle->pushHandle.rpcInfo, 0, sizeof(SRpcHandleInfo)); - taosWUnLockLatch(&pHandle->pushHandle.lock); - - tqDebug("vgId:%d offset %" PRId64 " from consumer:%" PRId64 ", (epoch %d) send rsp, block num: %d, req:%" PRId64 ", rsp:%" PRId64, - TD_VID(pTq->pVnode), fetchOffset, pHandle->pushHandle.consumerId, pHandle->pushHandle.epoch, rsp.blockNum, - rsp.reqOffset, rsp.rspOffset); - - // TODO destroy - taosArrayDestroy(rsp.blockData); - taosArrayDestroy(rsp.blockDataLen); - } - - return 0; -} -#endif - int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) { -// void* pReq = POINTER_SHIFT(msg, sizeof(SSubmitReq2Msg)); -// int32_t len = msgLen - sizeof(SSubmitReq2Msg); -// int32_t vgId = TD_VID(pTq->pVnode); if (msgType == TDMT_VND_SUBMIT) { tqProcessSubmitReqForSubscribe(pTq); - // lock push mgr to avoid potential msg lost -// taosWLockLatch(&pTq->lock); -// -// int32_t numOfRegisteredPush = taosHashGetSize(pTq->pPushMgr); -// if (numOfRegisteredPush > 0) { -// tqDebug("vgId:%d tq push msg version:%" PRId64 " type:%s, head:%p, body:%p len:%d, numOfPushed consumers:%d", -// vgId, ver, TMSG_INFO(msgType), msg, pReq, len, numOfRegisteredPush); -// -// void* data = taosMemoryMalloc(len); -// if (data == NULL) { -// terrno = TSDB_CODE_OUT_OF_MEMORY; -// tqError("failed to copy data for stream since out of memory, vgId:%d", vgId); -// taosWUnLockLatch(&pTq->lock); -// return -1; -// } -// -// memcpy(data, pReq, len); -// -// SArray* cachedKey = taosArrayInit(0, sizeof(SItem)); -// void* pIter = NULL; -// -// while (1) { -// pIter = taosHashIterate(pTq->pPushMgr, pIter); -// if (pIter == NULL) { -// break; -// } -// -// STqPushEntry* pPushEntry = *(STqPushEntry**)pIter; -// -// STqHandle* pHandle = taosHashGet(pTq->pHandle, pPushEntry->subKey, strlen(pPushEntry->subKey)); -// if (pHandle == NULL) { -// tqDebug("vgId:%d, failed to find handle %s in pushing data to consumer, ignore", pTq->pVnode->config.vgId, -// pPushEntry->subKey); -// continue; -// } -// -// STqExecHandle* pExec = &pHandle->execHandle; -// doPushDataForEntry(pIter, pExec, pTq, ver, vgId, data, len, cachedKey); -// } -// -// doRemovePushedEntry(cachedKey, pTq); -// taosArrayDestroyEx(cachedKey, freeItem); -// taosMemoryFree(data); -// } -// -// // unlock -// taosWUnLockLatch(&pTq->lock); } tqDebug("handle submit, restore:%d, size:%d", pTq->pVnode->restored, (int)taosHashGetSize(pTq->pStreamMeta->pTasks)); @@ -274,8 +34,7 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v } if (msgType == TDMT_VND_SUBMIT) { - SPackedData submit = {0}; - tqProcessSubmitReq(pTq, submit); + tqStartStreamTasks(pTq); } if (msgType == TDMT_VND_DELETE) { @@ -286,16 +45,16 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v return 0; } - int32_t tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg) { int32_t vgId = TD_VID(pTq->pVnode); - STqHandle* pHandle = (STqHandle*) handle; - if(pHandle->msg == NULL){ + STqHandle* pHandle = (STqHandle*)handle; + + if (pHandle->msg == NULL) { pHandle->msg = taosMemoryCalloc(1, sizeof(SRpcMsg)); memcpy(pHandle->msg, pMsg, sizeof(SRpcMsg)); pHandle->msg->pCont = rpcMallocCont(pMsg->contLen); - }else{ - void *tmp = pHandle->msg->pCont; + } else { + void* tmp = pHandle->msg->pCont; memcpy(pHandle->msg, pMsg, sizeof(SRpcMsg)); pHandle->msg->pCont = tmp; } @@ -303,7 +62,8 @@ int32_t tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg) { memcpy(pHandle->msg->pCont, pMsg->pCont, pMsg->contLen); pHandle->msg->contLen = pMsg->contLen; int32_t ret = taosHashPut(pTq->pPushMgr, pHandle->subKey, strlen(pHandle->subKey), &pHandle, POINTER_BYTES); - tqDebug("vgId:%d data is over, ret:%d, consumerId:0x%" PRIx64", register to pHandle:%p, pCont:%p, len:%d", vgId, ret, pHandle->consumerId, pHandle, pHandle->msg->pCont, pHandle->msg->contLen); + tqDebug("vgId:%d data is over, ret:%d, consumerId:0x%" PRIx64 ", register to pHandle:%p, pCont:%p, len:%d", vgId, ret, + pHandle->consumerId, pHandle, pHandle->msg->pCont, pHandle->msg->contLen); return 0; } @@ -313,6 +73,7 @@ int32_t tqUnregisterPushHandle(STQ* pTq, void *handle) { int32_t ret = taosHashRemove(pTq->pPushMgr, pHandle->subKey, strlen(pHandle->subKey)); tqError("vgId:%d remove pHandle:%p,ret:%d consumer Id:0x%" PRIx64, vgId, pHandle, ret, pHandle->consumerId); + if(pHandle->msg != NULL) { tqPushDataRsp(pTq, pHandle); @@ -320,5 +81,6 @@ int32_t tqUnregisterPushHandle(STQ* pTq, void *handle) { taosMemoryFree(pHandle->msg); pHandle->msg = NULL; } + return 0; } diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 082e31ea91..7ed77edd5b 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -332,6 +332,7 @@ int32_t tqNextBlockInWal(STqReader* pReader) { if (pBlockList == NULL || pReader->nextBlk >= taosArrayGetSize(pBlockList)) { // try next message in wal file + // todo always retry to avoid read failure caused by wal file deletion if (walNextValidMsg(pWalReader) < 0) { return FETCH_TYPE__NONE; } @@ -374,7 +375,7 @@ int32_t tqNextBlockInWal(STqReader* pReader) { SSubmitTbData* pSubmitTbData = taosArrayGet(pReader->submit.aSubmitTbData, pReader->nextBlk); if (pReader->tbIdHash == NULL) { - int32_t code = tqRetrieveDataBlock(pReader->pResBlock, pReader, NULL); + int32_t code = tqRetrieveDataBlock(pReader, NULL); if (code == TSDB_CODE_SUCCESS && pReader->pResBlock->info.rows > 0) { return FETCH_TYPE__DATA; } @@ -384,7 +385,7 @@ int32_t tqNextBlockInWal(STqReader* pReader) { if (ret != NULL) { tqDebug("tq reader return submit block, uid:%"PRId64", ver:%"PRId64, pSubmitTbData->uid, pReader->msg.ver); - int32_t code = tqRetrieveDataBlock(pReader->pResBlock, pReader, NULL); + int32_t code = tqRetrieveDataBlock(pReader, NULL); if (code == TSDB_CODE_SUCCESS && pReader->pResBlock->info.rows > 0) { return FETCH_TYPE__DATA; } @@ -399,31 +400,6 @@ int32_t tqNextBlockInWal(STqReader* pReader) { } } -int32_t tqNextBlock(STqReader* pReader, SSDataBlock* pBlock) { - while (1) { - if (pReader->msg.msgStr == NULL) { - if (walNextValidMsg(pReader->pWalReader) < 0) { - return FETCH_TYPE__NONE; - } - - void* pBody = POINTER_SHIFT(pReader->pWalReader->pHead->head.body, sizeof(SSubmitReq2Msg)); - int32_t bodyLen = pReader->pWalReader->pHead->head.bodyLen - sizeof(SSubmitReq2Msg); - int64_t ver = pReader->pWalReader->pHead->head.version; - - tqReaderSetSubmitMsg(pReader, pBody, bodyLen, ver); - } - - while (tqNextBlockImpl(pReader)) { - int32_t code = tqRetrieveDataBlock(pReader->pResBlock, pReader, NULL); - if (code != TSDB_CODE_SUCCESS || pBlock->info.rows == 0) { - continue; - } - - return FETCH_TYPE__DATA; - } - } -} - int32_t tqReaderSetSubmitMsg(STqReader* pReader, void* msgStr, int32_t msgLen, int64_t ver) { pReader->msg.msgStr = msgStr; pReader->msg.msgLen = msgLen; @@ -527,7 +503,7 @@ int32_t tqMaskBlock(SSchemaWrapper* pDst, SSDataBlock* pBlock, const SSchemaWrap return 0; } -int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbData** pSubmitTbDataRet) { +int32_t tqRetrieveDataBlock(STqReader* pReader, SSubmitTbData** pSubmitTbDataRet) { tqDebug("tq reader retrieve data block %p, index:%d", pReader->msg.msgStr, pReader->nextBlk); SSubmitTbData* pSubmitTbData = taosArrayGet(pReader->submit.aSubmitTbData, pReader->nextBlk++); @@ -535,6 +511,7 @@ int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbDa *pSubmitTbDataRet = pSubmitTbData; } + SSDataBlock* pBlock = pReader->pResBlock; blockDataCleanup(pBlock); int32_t sversion = pSubmitTbData->sver; @@ -603,7 +580,7 @@ int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbDa SColumnInfoData colInfo = createColumnInfoData(pColSchema->type, pColSchema->bytes, pColSchema->colId); int32_t code = blockDataAppendColInfo(pBlock, &colInfo); if (code != TSDB_CODE_SUCCESS) { - goto FAIL; + return -1; } i++; j++; @@ -622,7 +599,7 @@ int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbDa if (blockDataEnsureCapacity(pBlock, numOfRows) < 0) { terrno = TSDB_CODE_OUT_OF_MEMORY; - goto FAIL; + return -1; } pBlock->info.rows = numOfRows; @@ -638,7 +615,7 @@ int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbDa while (targetIdx < colActual) { if (sourceIdx >= numOfCols) { tqError("tqRetrieveDataBlock sourceIdx:%d >= numOfCols:%d", sourceIdx, numOfCols); - goto FAIL; + return -1; } SColData* pCol = taosArrayGet(pCols, sourceIdx); @@ -647,7 +624,7 @@ int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbDa if (pCol->nVal != numOfRows) { tqError("tqRetrieveDataBlock pCol->nVal:%d != numOfRows:%d", pCol->nVal, numOfRows); - goto FAIL; + return -1; } if (pCol->cid < pColData->info.colId) { @@ -661,14 +638,14 @@ int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbDa memcpy(varDataVal(val), colVal.value.pData, colVal.value.nData); varDataSetLen(val, colVal.value.nData); if (colDataAppend(pColData, i, val, !COL_VAL_IS_VALUE(&colVal)) < 0) { - goto FAIL; + return -1; } } else { colDataSetNULL(pColData, i); } } else { if (colDataAppend(pColData, i, (void*)&colVal.value.val, !COL_VAL_IS_VALUE(&colVal)) < 0) { - goto FAIL; + return -1; } } } @@ -710,14 +687,14 @@ int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbDa memcpy(varDataVal(val), colVal.value.pData, colVal.value.nData); varDataSetLen(val, colVal.value.nData); if (colDataAppend(pColData, i, val, !COL_VAL_IS_VALUE(&colVal)) < 0) { - goto FAIL; + return -1; } } else { colDataSetNULL(pColData, i); } } else { if (colDataAppend(pColData, i, (void*)&colVal.value.val, !COL_VAL_IS_VALUE(&colVal)) < 0) { - goto FAIL; + return -1; } } @@ -735,10 +712,6 @@ int32_t tqRetrieveDataBlock(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbDa } return 0; - -FAIL: - blockDataFreeRes(pBlock); - return -1; } int32_t tqRetrieveTaosxBlock(STqReader* pReader, SArray* blocks, SArray* schemas, SSubmitTbData** pSubmitTbDataRet) { diff --git a/source/dnode/vnode/src/tq/tqScan.c b/source/dnode/vnode/src/tq/tqScan.c index 3d9cea54ba..800bcc8b71 100644 --- a/source/dnode/vnode/src/tq/tqScan.c +++ b/source/dnode/vnode/src/tq/tqScan.c @@ -66,9 +66,10 @@ static int32_t tqAddTbNameToRsp(const STQ* pTq, int64_t uid, STaosxRsp* pRsp, in int32_t tqScanData(STQ* pTq, const STqHandle* pHandle, SMqDataRsp* pRsp, STqOffsetVal* pOffset) { const int32_t MAX_ROWS_TO_RETURN = 4096; - int32_t vgId = TD_VID(pTq->pVnode); - int32_t code = 0; - int32_t totalRows = 0; + + int32_t vgId = TD_VID(pTq->pVnode); + int32_t code = 0; + int32_t totalRows = 0; const STqExecHandle* pExec = &pHandle->execHandle; qTaskInfo_t task = pExec->task; diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index 133c51a8dc..d83345ad59 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -175,7 +175,7 @@ static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, goto end; } -// till now, all data has been transferred to consumer, new data needs to push client once arrived. + // till now, all data has been transferred to consumer, new data needs to push client once arrived. if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) { // lock @@ -361,11 +361,10 @@ int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequ // this is a normal subscribe requirement if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { return extractDataAndRspForNormalSubscribe(pTq, pHandle, pRequest, pMsg, &offset); + } else { // todo handle the case where re-balance occurs. + // for taosx + return extractDataAndRspForDbStbSubscribe(pTq, pHandle, pRequest, pMsg, &offset); } - - // todo handle the case where re-balance occurs. - // for taosx - return extractDataAndRspForDbStbSubscribe(pTq, pHandle, pRequest, pMsg, &offset); } int32_t tqSendMetaPollRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqMetaRsp* pRsp) { diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index f8161427db..c608403456 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -448,7 +448,6 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp walApplyVer(pVnode->pWal, version); if (tqPushMsg(pVnode->pTq, pMsg->pCont, pMsg->contLen, pMsg->msgType, version) < 0) { -// /*vInfo("vgId:%d, push msg end", pVnode->config.vgId);*/ vError("vgId:%d, failed to push msg to TQ since %s", TD_VID(pVnode), tstrerror(terrno)); return -1; } diff --git a/source/libs/executor/inc/querytask.h b/source/libs/executor/inc/querytask.h index 8852265da0..37c93fef5c 100644 --- a/source/libs/executor/inc/querytask.h +++ b/source/libs/executor/inc/querytask.h @@ -59,7 +59,7 @@ typedef struct { STqOffsetVal currentOffset; // for tmq SMqMetaRsp metaRsp; // for tmq fetching meta int64_t snapshotVer; - SPackedData submit; // todo remove it +// SPackedData submit; // todo remove it SSchemaWrapper* schema; char tbName[TSDB_TABLE_NAME_LEN]; // this is the current scan table: todo refactor int8_t recoverStep; diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 5fc079b7c1..2d5830e4a9 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -1080,6 +1080,7 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT STableListInfo* pTableListInfo = pScanBaseInfo->pTableListInfo; if (pOffset->type == TMQ_OFFSET__LOG) { + // todo refactor: move away tsdbReaderClose(pScanBaseInfo->dataReader); pScanBaseInfo->dataReader = NULL; diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 9b1b5235cc..7cb3c00c1a 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1636,6 +1636,7 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { qDebug("start to exec queue scan, %s", id); +#if 0 if (pTaskInfo->streamInfo.submit.msgStr != NULL) { if (pInfo->tqReader->msg.msgStr == NULL) { SPackedData submit = pTaskInfo->streamInfo.submit; @@ -1649,7 +1650,7 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { SDataBlockInfo* pBlockInfo = &pInfo->pRes->info; while (tqNextBlockImpl(pInfo->tqReader)) { - int32_t code = tqRetrieveDataBlock(pInfo->tqReader->pResBlock, pInfo->tqReader, NULL); + int32_t code = tqRetrieveDataBlock(pInfo->tqReader, NULL); if (code != TSDB_CODE_SUCCESS || pInfo->tqReader->pResBlock->info.rows == 0) { continue; } @@ -1665,6 +1666,7 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { pTaskInfo->streamInfo.submit = (SPackedData){0}; return NULL; } +#endif if (pTaskInfo->streamInfo.currentOffset.type == TMQ_OFFSET__SNAPSHOT_DATA) { SSDataBlock* pResult = doTableScan(pInfo->pTableScanOp); @@ -1682,10 +1684,12 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { if (tqSeekVer(pInfo->tqReader, pTaskInfo->streamInfo.snapshotVer + 1, pTaskInfo->id.str) < 0) { return NULL; } + tqOffsetResetToLog(&pTaskInfo->streamInfo.currentOffset, pTaskInfo->streamInfo.snapshotVer); } if (pTaskInfo->streamInfo.currentOffset.type == TMQ_OFFSET__LOG) { + while (1) { int32_t type = tqNextBlockInWal(pInfo->tqReader); SSDataBlock* pRes = pInfo->tqReader->pResBlock; @@ -2071,7 +2075,7 @@ FETCH_NEXT_BLOCK: blockDataCleanup(pInfo->pRes); while (tqNextBlockImpl(pInfo->tqReader)) { - int32_t code = tqRetrieveDataBlock(pInfo->tqReader->pResBlock, pInfo->tqReader, NULL); + int32_t code = tqRetrieveDataBlock(pInfo->tqReader, NULL); if (code != TSDB_CODE_SUCCESS || pInfo->tqReader->pResBlock->info.rows == 0) { continue; } @@ -2109,7 +2113,6 @@ FETCH_NEXT_BLOCK: // record the scan action. pInfo->numOfExec++; pOperator->resultInfo.totalRows += pBlockInfo->rows; - // printDataBlock(pInfo->pRes, "stream scan"); qDebug("scan rows: %" PRId64, pBlockInfo->rows); if (pBlockInfo->rows > 0) { diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index f33e126068..f79d84c371 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -253,7 +253,6 @@ int32_t streamExecForAll(SStreamTask* pTask) { while (1) { SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue); if (qItem == NULL) { -// qDebug("s-task:%s extract data from input queue, queue is empty, abort", pTask->id.idStr); break; } @@ -298,7 +297,7 @@ int32_t streamExecForAll(SStreamTask* pTask) { } SArray* pRes = taosArrayInit(0, sizeof(SSDataBlock)); - qDebug("s-task:%s exec begin, numOfBlocks:%d", pTask->id.idStr, batchSize); + qDebug("s-task:%s start to execute, numOfBlocks:%d", pTask->id.idStr, batchSize); streamTaskExecImpl(pTask, pInput, pRes); diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c index 6154e30938..4cc43a19a0 100644 --- a/source/libs/wal/src/walRead.c +++ b/source/libs/wal/src/walRead.c @@ -237,6 +237,7 @@ static int32_t walFetchHeadNew(SWalReader *pRead, int64_t fetchVer) { } seeked = true; } + while (1) { contLen = taosReadFile(pRead->pLogFile, pRead->pHead, sizeof(SWalCkHead)); if (contLen == sizeof(SWalCkHead)) { From 3afc5bdfcf0811468ad387d655314ddde5d37934 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 6 May 2023 10:09:51 +0800 Subject: [PATCH 068/110] refactor: add some logs. --- source/libs/stream/src/streamExec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index f79d84c371..e85a552d13 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -250,6 +250,8 @@ int32_t streamExecForAll(SStreamTask* pTask) { void* pInput = NULL; // merge multiple input data if possible in the input queue. + qDebug("s-task:%s start to extract data block from inputQ", pTask->id.idStr); + while (1) { SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue); if (qItem == NULL) { From be7aacc43f2dd5042e295981fca023e680183c52 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Sat, 6 May 2023 10:24:42 +0800 Subject: [PATCH 069/110] fix: print float with 20 width limit --- tools/shell/src/shellEngine.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/shell/src/shellEngine.c b/tools/shell/src/shellEngine.c index 5ac32eaad9..0f91bdeeda 100644 --- a/tools/shell/src/shellEngine.c +++ b/tools/shell/src/shellEngine.c @@ -554,7 +554,12 @@ void shellPrintField(const char *val, TAOS_FIELD *field, int32_t width, int32_t if (tsEnableScience) { printf("%*e", width, GET_FLOAT_VAL(val)); } else { - printf("%*.5f", width, GET_FLOAT_VAL(val)); + n = snprintf(buf, TSDB_MAX_BYTES_PER_ROW, "%*.5f", width, GET_FLOAT_VAL(val)); + if (n > TMAX(20, width)) { + printf("%*e", width, GET_FLOAT_VAL(val)); + } else { + printf("%s", buf); + } } break; case TSDB_DATA_TYPE_DOUBLE: From 55eddbfb5eadf004edb519510f7271144baaa4df Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Sat, 6 May 2023 10:54:05 +0800 Subject: [PATCH 070/110] fix:[TD-23972] push subscribe msg to vnode even though consumer not change --- source/dnode/mnode/impl/src/mndSubscribe.c | 42 ++++++++------ source/dnode/vnode/src/tq/tq.c | 64 ++++++++++------------ 2 files changed, 53 insertions(+), 53 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 573c60549e..75bc595a2e 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -269,6 +269,18 @@ static void addUnassignedVgroups(SMqRebOutputObj *pOutput, SHashObj *pHash) { } } +static void putNoTransferToOutput(SMqRebOutputObj *pOutput, SMqConsumerEp *pConsumerEp){ + for(int i = 0; i < taosArrayGetSize(pConsumerEp->vgs); i++){ + SMqVgEp *pVgEp = (SMqVgEp *)taosArrayGetP(pConsumerEp->vgs, i); + SMqRebOutputVg outputVg = { + .oldConsumerId = pConsumerEp->consumerId, + .newConsumerId = pConsumerEp->consumerId, + .pVgEp = pVgEp, + }; + taosArrayPush(pOutput->rebVgs, &outputVg); + } +} + static void transferVgroupsForConsumers(SMqRebOutputObj *pOutput, SHashObj *pHash, int32_t minVgCnt, int32_t imbConsumerNum) { const char *pSubKey = pOutput->pSub->key; @@ -290,24 +302,19 @@ static void transferVgroupsForConsumers(SMqRebOutputObj *pOutput, SHashObj *pHas taosArrayPush(pOutput->modifyConsumers, &pConsumerEp->consumerId); if (consumerVgNum > minVgCnt) { if (imbCnt < imbConsumerNum) { - if (consumerVgNum == minVgCnt + 1) { - imbCnt++; - continue; - } else { - // pop until equal minVg + 1 - while (taosArrayGetSize(pConsumerEp->vgs) > minVgCnt + 1) { - SMqVgEp *pVgEp = *(SMqVgEp **)taosArrayPop(pConsumerEp->vgs); - SMqRebOutputVg outputVg = { - .oldConsumerId = pConsumerEp->consumerId, - .newConsumerId = -1, - .pVgEp = pVgEp, - }; - taosHashPut(pHash, &pVgEp->vgId, sizeof(int32_t), &outputVg, sizeof(SMqRebOutputVg)); - mInfo("sub:%s mq rebalance remove vgId:%d from consumer:0x%" PRIx64 ",(first scan)", pSubKey, pVgEp->vgId, - pConsumerEp->consumerId); - } - imbCnt++; + // pop until equal minVg + 1 + while (taosArrayGetSize(pConsumerEp->vgs) > minVgCnt + 1) { + SMqVgEp *pVgEp = *(SMqVgEp **)taosArrayPop(pConsumerEp->vgs); + SMqRebOutputVg outputVg = { + .oldConsumerId = pConsumerEp->consumerId, + .newConsumerId = -1, + .pVgEp = pVgEp, + }; + taosHashPut(pHash, &pVgEp->vgId, sizeof(int32_t), &outputVg, sizeof(SMqRebOutputVg)); + mInfo("sub:%s mq rebalance remove vgId:%d from consumer:0x%" PRIx64 ",(first scan)", pSubKey, pVgEp->vgId, + pConsumerEp->consumerId); } + imbCnt++; } else { // all the remain consumers should only have the number of vgroups, which is equalled to the value of minVg while (taosArrayGetSize(pConsumerEp->vgs) > minVgCnt) { @@ -323,6 +330,7 @@ static void transferVgroupsForConsumers(SMqRebOutputObj *pOutput, SHashObj *pHas } } } + putNoTransferToOutput(pOutput, pConsumerEp); } } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 792ff8677e..7004fe0be3 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -445,6 +445,7 @@ int32_t tqProcessDelCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t } int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { + int ret = 0; SMqRebVgReq req = {0}; tDecodeSMqRebVgReq(msg, &req); @@ -463,8 +464,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg if (req.newConsumerId == -1) { tqError("vgId:%d, tq invalid re-balance request, new consumerId %" PRId64 "", req.vgId, req.newConsumerId); - taosMemoryFree(req.qmsg); - return 0; + goto end; } STqHandle tqHandle = {0}; @@ -481,8 +481,8 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg // TODO version should be assigned and refed during preprocess SWalRef* pRef = walRefCommittedVer(pVnode->pWal); if (pRef == NULL) { - taosMemoryFree(req.qmsg); - return -1; + ret = -1; + goto end; } int64_t ver = pRef->refVer; @@ -534,49 +534,41 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg taosHashPut(pTq->pHandle, req.subKey, strlen(req.subKey), pHandle, sizeof(STqHandle)); tqDebug("try to persist handle %s consumer:0x%" PRIx64 " , old consumer:0x%" PRIx64, req.subKey, pHandle->consumerId, oldConsumerId); - if (tqMetaSaveHandle(pTq, req.subKey, pHandle) < 0) { - taosMemoryFree(req.qmsg); - return -1; - } + ret = tqMetaSaveHandle(pTq, req.subKey, pHandle); + goto end; } else { if (pHandle->consumerId == req.newConsumerId) { // do nothing tqInfo("vgId:%d consumer:0x%" PRIx64 " remains, no switch occurs", req.vgId, req.newConsumerId); - atomic_store_32(&pHandle->epoch, -1); - atomic_add_fetch_32(&pHandle->epoch, 1); - taosMemoryFree(req.qmsg); - return tqMetaSaveHandle(pTq, req.subKey, pHandle); } else { tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId, req.newConsumerId); - - // kill executing task - qTaskInfo_t pTaskInfo = pHandle->execHandle.task; - if (pTaskInfo != NULL) { - qKillTask(pTaskInfo, TSDB_CODE_SUCCESS); - } - - taosWLockLatch(&pTq->lock); - atomic_store_32(&pHandle->epoch, 0); - - // remove if it has been register in the push manager, and return one empty block to consumer - tqUnregisterPushHandle(pTq, pHandle); - atomic_store_64(&pHandle->consumerId, req.newConsumerId); - - if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { - qStreamCloseTsdbReader(pTaskInfo); - } - - taosWUnLockLatch(&pTq->lock); - if (tqMetaSaveHandle(pTq, req.subKey, pHandle) < 0) { - taosMemoryFree(req.qmsg); - return -1; - } } + // kill executing task + qTaskInfo_t pTaskInfo = pHandle->execHandle.task; + if (pTaskInfo != NULL) { + qKillTask(pTaskInfo, TSDB_CODE_SUCCESS); + } + + taosWLockLatch(&pTq->lock); + atomic_add_fetch_32(&pHandle->epoch, 1); + + // remove if it has been register in the push manager, and return one empty block to consumer + tqUnregisterPushHandle(pTq, pHandle); + + + if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { + qStreamCloseTsdbReader(pTaskInfo); + } + + taosWUnLockLatch(&pTq->lock); + ret = tqMetaSaveHandle(pTq, req.subKey, pHandle); + goto end; } +end: taosMemoryFree(req.qmsg); - return 0; + return ret; } int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { From 446097434e5a288929fe8264ac9a56c6294fc06c Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Sat, 6 May 2023 11:06:41 +0800 Subject: [PATCH 071/110] fix:[TD-23972] push subscribe msg to vnode even though consumer not change --- source/dnode/vnode/src/tq/tqUtil.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index d186c63871..57fd271416 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -165,12 +165,19 @@ static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, SRpcMsg* pMsg, STqOffsetVal* pOffset) { uint64_t consumerId = pRequest->consumerId; int32_t vgId = TD_VID(pTq->pVnode); + int code = 0; SMqDataRsp dataRsp = {0}; tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType); + qTaskInfo_t task = pHandle->execHandle.task; + if(qTaskIsExecuting(task)){ + code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); + tDeleteSMqDataRsp(&dataRsp); + return code; + } qSetTaskId(pHandle->execHandle.task, consumerId, pRequest->reqId); - int code = tqScanData(pTq, pHandle, &dataRsp, pOffset); + code = tqScanData(pTq, pHandle, &dataRsp, pOffset); if(code != 0) { goto end; } From d32097d56a9eeb211befff2d66d34248fa10143a Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Sat, 6 May 2023 11:26:54 +0800 Subject: [PATCH 072/110] fix:disable subscribeStb.py temporary --- tests/system-test/win-test-file | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system-test/win-test-file b/tests/system-test/win-test-file index 7e68c40fd8..214e01f1a8 100644 --- a/tests/system-test/win-test-file +++ b/tests/system-test/win-test-file @@ -279,7 +279,7 @@ python3 ./test.py -f 7-tmq/subscribeDb1.py python3 ./test.py -f 7-tmq/subscribeDb2.py python3 ./test.py -f 7-tmq/subscribeDb3.py python3 ./test.py -f 7-tmq/subscribeDb4.py -python3 ./test.py -f 7-tmq/subscribeStb.py +#python3 ./test.py -f 7-tmq/subscribeStb.py python3 ./test.py -f 7-tmq/subscribeStb0.py python3 ./test.py -f 7-tmq/subscribeStb1.py python3 ./test.py -f 7-tmq/subscribeStb2.py From 6b0580eedfd5a4d0cb9393b092bb40191afd0938 Mon Sep 17 00:00:00 2001 From: slzhou Date: Sat, 6 May 2023 13:30:49 +0800 Subject: [PATCH 073/110] fix: clear null bitmap bit when set value --- source/common/src/tdatablock.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 3558feaa66..b439f0437c 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -120,6 +120,7 @@ int32_t colDataSetVal(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const pColumnInfoData->varmeta.length += dataLen; } else { memcpy(pColumnInfoData->pData + pColumnInfoData->info.bytes * rowIndex, pData, pColumnInfoData->info.bytes); + colDataClearNull_f(pColumnInfoData->nullbitmap, rowIndex); } return 0; @@ -1857,7 +1858,6 @@ static char* formatTimestamp(char* buf, int64_t val, int precision) { return buf; } -#if 0 void blockDebugShowDataBlock(SSDataBlock* pBlock, const char* flag) { SArray* dataBlocks = taosArrayInit(1, sizeof(SSDataBlock*)); taosArrayPush(dataBlocks, &pBlock); @@ -1950,11 +1950,10 @@ void blockDebugShowDataBlocks(const SArray* dataBlocks, const char* flag) { } } -#endif // for debug char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** pDataBuf) { - int32_t size = 2048; + int32_t size = 2048*1024; *pDataBuf = taosMemoryCalloc(size, 1); char* dumpBuf = *pDataBuf; char pBuf[128] = {0}; @@ -1970,7 +1969,7 @@ char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** pDataBuf) if (len >= size - 1) return dumpBuf; for (int32_t j = 0; j < rows; j++) { - len += snprintf(dumpBuf + len, size - len, "%s |", flag); + len += snprintf(dumpBuf + len, size - len, "%s %d|", flag, j); if (len >= size - 1) return dumpBuf; for (int32_t k = 0; k < colNum; k++) { From 25170e1a75f0bfed8327ab0eb5f9c51323ce491a Mon Sep 17 00:00:00 2001 From: slzhou Date: Sat, 6 May 2023 13:32:58 +0800 Subject: [PATCH 074/110] fix: restore the removal of show datablock to console --- source/common/src/tdatablock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index b439f0437c..9c56dd0439 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1858,6 +1858,7 @@ static char* formatTimestamp(char* buf, int64_t val, int precision) { return buf; } +#if 0 void blockDebugShowDataBlock(SSDataBlock* pBlock, const char* flag) { SArray* dataBlocks = taosArrayInit(1, sizeof(SSDataBlock*)); taosArrayPush(dataBlocks, &pBlock); @@ -1949,7 +1950,7 @@ void blockDebugShowDataBlocks(const SArray* dataBlocks, const char* flag) { } } } - +#endif // for debug char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** pDataBuf) { From 760ccfd991f7b041cd90a873e0c4d2b3979c9ea8 Mon Sep 17 00:00:00 2001 From: Ping Xiao Date: Sat, 6 May 2023 10:12:25 +0800 Subject: [PATCH 075/110] update push message --- tests/pytest/auto_crash_gen.py | 19 +++++++++++++++-- tests/pytest/auto_crash_gen_valgrind.py | 21 ++++++++++++++++--- .../pytest/auto_crash_gen_valgrind_cluster.py | 19 +++++++++++++++-- 3 files changed, 52 insertions(+), 7 deletions(-) diff --git a/tests/pytest/auto_crash_gen.py b/tests/pytest/auto_crash_gen.py index b1f23b9c4a..fd8b2b77a2 100755 --- a/tests/pytest/auto_crash_gen.py +++ b/tests/pytest/auto_crash_gen.py @@ -343,12 +343,27 @@ def main(): try: cmd = crash_cmds.split('&')[2] - text = f'''exit status: {msg_dict[status]} - git commit : {git_commit} + if status == 0: + log_dir = "none" + else: + log_dir= "/root/pxiao/crash_gen_logs" + + if status == 3: + core_dir = "/root/pxiao/crash_gen_logs" + else: + core_dir = "none" + + text = f'''exit status: {msg_dict[status]} + test scope: crash_gen + owner: pxiao hostname: {hostname} start time: {starttime} end time: {endtime} + git commit : {git_commit} + log dir: {log_dir} + core dir: {core_dir} cmd: {cmd}''' + send_msg(get_msg(text)) except Exception as e: print("exception:", e) diff --git a/tests/pytest/auto_crash_gen_valgrind.py b/tests/pytest/auto_crash_gen_valgrind.py index 9d83c2b401..f2d87be7a7 100755 --- a/tests/pytest/auto_crash_gen_valgrind.py +++ b/tests/pytest/auto_crash_gen_valgrind.py @@ -378,13 +378,28 @@ def main(): try: cmd = crash_cmds.split('&')[2] - text = f'''exit status: {msg_dict[status]} - git commit : {git_commit} + if status == 0: + log_dir = "none" + else: + log_dir= "/root/pxiao/crash_gen_logs" + + if status == 3: + core_dir = "/root/pxiao/crash_gen_logs" + else: + core_dir = "none" + + text = f'''exit status: {msg_dict[status]} + test scope: crash_gen + owner: pxiao hostname: {hostname} start time: {starttime} end time: {endtime} + git commit : {git_commit} + log dir: {log_dir} + core dir: {core_dir} cmd: {cmd}''' - send_msg(get_msg(text)) + + send_msg(get_msg(text)) except Exception as e: print("exception:", e) exit(status) diff --git a/tests/pytest/auto_crash_gen_valgrind_cluster.py b/tests/pytest/auto_crash_gen_valgrind_cluster.py index 8e387317a8..ee2b92258d 100755 --- a/tests/pytest/auto_crash_gen_valgrind_cluster.py +++ b/tests/pytest/auto_crash_gen_valgrind_cluster.py @@ -378,12 +378,27 @@ def main(): try: cmd = crash_cmds.split('&')[2] - text = f'''exit status: {msg_dict[status]} - git commit : {git_commit} + if status == 0: + log_dir = "none" + else: + log_dir= "/root/pxiao/crash_gen_logs" + + if status == 3: + core_dir = "/root/pxiao/crash_gen_logs" + else: + core_dir = "none" + + text = f'''exit status: {msg_dict[status]} + test scope: crash_gen + owner: pxiao hostname: {hostname} start time: {starttime} end time: {endtime} + git commit : {git_commit} + log dir: {log_dir} + core dir: {core_dir} cmd: {cmd}''' + send_msg(get_msg(text)) except Exception as e: print("exception:", e) From 33d4e0cc677bb9f300a720f2d8f5ed364b47e80b Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Sat, 6 May 2023 15:03:17 +0800 Subject: [PATCH 076/110] fix: fix taosd not exit when press ctrl+c --- source/libs/executor/src/executorInt.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/source/libs/executor/src/executorInt.c b/source/libs/executor/src/executorInt.c index f525f6728c..62ab2d9df2 100644 --- a/source/libs/executor/src/executorInt.c +++ b/source/libs/executor/src/executorInt.c @@ -82,7 +82,7 @@ static void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const SC static int32_t doSetInputDataBlock(SExprSupp* pExprSup, SSDataBlock* pBlock, int32_t order, int32_t scanFlag, bool createDummyCol); static int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprSupp* pSup, SDiskbasedBuf* pBuf, - SGroupResInfo* pGroupResInfo); + SGroupResInfo* pGroupResInfo, int32_t threshold); SResultRow* getNewResultRow(SDiskbasedBuf* pResultBuf, int32_t* currentPageId, int32_t interBufSize) { SFilePage* pData = NULL; @@ -776,7 +776,7 @@ int32_t finalizeResultRows(SDiskbasedBuf* pBuf, SResultRowPosition* resultRowPos } int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprSupp* pSup, SDiskbasedBuf* pBuf, - SGroupResInfo* pGroupResInfo) { + SGroupResInfo* pGroupResInfo, int32_t threshold) { SExprInfo* pExprInfo = pSup->pExprInfo; int32_t numOfExprs = pSup->numOfExprs; int32_t* rowEntryOffset = pSup->rowEntryInfoOffset; @@ -825,6 +825,9 @@ int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprS releaseBufPage(pBuf, page); pBlock->info.rows += pRow->numOfRows; + if (pBlock->info.rows >= threshold) { + break; + } } qDebug("%s result generated, rows:%" PRId64 ", groupId:%" PRIu64, GET_TASKID(pTaskInfo), pBlock->info.rows, @@ -850,7 +853,7 @@ void doBuildStreamResBlock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGr // clear the existed group id pBlock->info.id.groupId = 0; ASSERT(!pbInfo->mergeResultBlock); - doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo); + doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold); void* tbname = NULL; if (streamStateGetParName(pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, &tbname) < 0) { @@ -877,10 +880,10 @@ void doBuildResultDatablock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SG // clear the existed group id pBlock->info.id.groupId = 0; if (!pbInfo->mergeResultBlock) { - doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo); + doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold); } else { while (hasRemainResults(pGroupResInfo)) { - doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo); + doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold); if (pBlock->info.rows >= pOperator->resultInfo.threshold) { break; } From fc799bcd19c33939d172172de03e7abce403d2a6 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Sat, 6 May 2023 15:06:53 +0800 Subject: [PATCH 077/110] test: fix tsim/db/error1.sim random failed --- tests/script/tsim/db/error1.sim | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/script/tsim/db/error1.sim b/tests/script/tsim/db/error1.sim index 32dbe826cc..d275dca387 100644 --- a/tests/script/tsim/db/error1.sim +++ b/tests/script/tsim/db/error1.sim @@ -56,18 +56,18 @@ endi if $data23 != 0 then return -1 -endi +end -print ========== stop dnode2 -system sh/exec.sh -n dnode2 -s stop -x SIGKILL +#print ========== stop dnode2 +#system sh/exec.sh -n dnode2 -s stop -x SIGKILL -sleep 1000 -print =============== drop database -sql_error drop database d1 +#sleep 1000 +#print =============== drop database +sql drop database d1 -print ========== start dnode2 -system sh/exec.sh -n dnode2 -s start -sleep 1000 +#print ========== start dnode2 +#system sh/exec.sh -n dnode2 -s start +#sleep 1000 print =============== re-create database $x = 0 From b925342ad06a1d0433b19afb5eacd741957001d1 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Sat, 6 May 2023 15:08:33 +0800 Subject: [PATCH 078/110] test: fix tsim/db/error1.sim random failed --- tests/script/tsim/db/error1.sim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/script/tsim/db/error1.sim b/tests/script/tsim/db/error1.sim index d275dca387..64b17125aa 100644 --- a/tests/script/tsim/db/error1.sim +++ b/tests/script/tsim/db/error1.sim @@ -56,7 +56,7 @@ endi if $data23 != 0 then return -1 -end +endi #print ========== stop dnode2 #system sh/exec.sh -n dnode2 -s stop -x SIGKILL From c8ac3a581ec53a26f6edb35bddd1fd6cc7f91b54 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Sat, 6 May 2023 15:50:38 +0800 Subject: [PATCH 079/110] fix: forbid some stream computing --- source/libs/parser/src/parTranslater.c | 17 ++++++++++++++++- source/libs/parser/test/parInitialCTest.cpp | 4 ++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index b598fffbc6..3b8dd80f55 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -6118,17 +6118,32 @@ static bool isEventWindowQuery(SSelectStmt* pSelect) { return NULL != pSelect->pWindow && QUERY_NODE_EVENT_WINDOW == nodeType(pSelect->pWindow); } +static bool hasJsonTypeProjection(SSelectStmt* pSelect) { + SNode* pProj = NULL; + FOREACH(pProj, pSelect->pProjectionList) { + if (TSDB_DATA_TYPE_JSON == ((SExprNode*)pProj)->resType.type) { + return true; + } + } + return false; +} + static int32_t checkStreamQuery(STranslateContext* pCxt, SCreateStreamStmt* pStmt) { SSelectStmt* pSelect = (SSelectStmt*)pStmt->pQuery; if (TSDB_DATA_TYPE_TIMESTAMP != ((SExprNode*)nodesListGetNode(pSelect->pProjectionList, 0))->resType.type || !pSelect->isTimeLineResult || crossTableWithoutAggOper(pSelect) || NULL != pSelect->pOrderByList || - crossTableWithUdaf(pSelect) || isEventWindowQuery(pSelect)) { + crossTableWithUdaf(pSelect) || isEventWindowQuery(pSelect) || hasJsonTypeProjection(pSelect)) { return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_STREAM_QUERY, "Unsupported stream query"); } if (NULL != pSelect->pSubtable && TSDB_DATA_TYPE_VARCHAR != ((SExprNode*)pSelect->pSubtable)->resType.type) { return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_STREAM_QUERY, "SUBTABLE expression must be of VARCHAR type"); } + if (NULL != pSelect->pSubtable && NULL == pSelect->pPartitionByList && nodesExprHasColumn(pSelect->pSubtable)) { + return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_STREAM_QUERY, + "SUBTABLE expression must not has column when no partition by clause"); + } + if (NULL == pSelect->pWindow && STREAM_TRIGGER_AT_ONCE != pStmt->pOptions->triggerType) { return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_STREAM_QUERY, "The trigger mode of non window query can only be AT_ONCE"); diff --git a/source/libs/parser/test/parInitialCTest.cpp b/source/libs/parser/test/parInitialCTest.cpp index b7ca944ebb..d79ad04c74 100644 --- a/source/libs/parser/test/parInitialCTest.cpp +++ b/source/libs/parser/test/parInitialCTest.cpp @@ -920,6 +920,10 @@ TEST_F(ParserInitialCTest, createStreamSemanticCheck) { run("CREATE STREAM s1 INTO st1 AS SELECT PERCENTILE(c1, 30) FROM t1 INTERVAL(10S)", TSDB_CODE_PAR_STREAM_NOT_ALLOWED_FUNC); + run("CREATE STREAM s2 INTO st1 AS SELECT ts, to_json("{c1:1}") FROM st1 PARTITION BY TBNAME", + TSDB_CODE_PAR_INVALID_STREAM_QUERY); + run("CREATE STREAM s3 INTO st3 TAGS(tname VARCHAR(10), id INT) SUBTABLE(CONCAT('new-', tname)) " + "AS SELECT _WSTART wstart, COUNT(*) cnt FROM st1 INTERVAL(10S)", TSDB_CODE_PAR_INVALID_STREAM_QUERY); } /* From 83c857a017eaec512c2afaa402c34887cf793159 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Sat, 6 May 2023 16:17:34 +0800 Subject: [PATCH 080/110] fix: fix compilation error --- source/libs/parser/test/parInitialCTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/parser/test/parInitialCTest.cpp b/source/libs/parser/test/parInitialCTest.cpp index d79ad04c74..902a2eaf85 100644 --- a/source/libs/parser/test/parInitialCTest.cpp +++ b/source/libs/parser/test/parInitialCTest.cpp @@ -920,7 +920,7 @@ TEST_F(ParserInitialCTest, createStreamSemanticCheck) { run("CREATE STREAM s1 INTO st1 AS SELECT PERCENTILE(c1, 30) FROM t1 INTERVAL(10S)", TSDB_CODE_PAR_STREAM_NOT_ALLOWED_FUNC); - run("CREATE STREAM s2 INTO st1 AS SELECT ts, to_json("{c1:1}") FROM st1 PARTITION BY TBNAME", + run("CREATE STREAM s2 INTO st1 AS SELECT ts, to_json('{c1:1}') FROM st1 PARTITION BY TBNAME", TSDB_CODE_PAR_INVALID_STREAM_QUERY); run("CREATE STREAM s3 INTO st3 TAGS(tname VARCHAR(10), id INT) SUBTABLE(CONCAT('new-', tname)) " "AS SELECT _WSTART wstart, COUNT(*) cnt FROM st1 INTERVAL(10S)", TSDB_CODE_PAR_INVALID_STREAM_QUERY); From ee32620808441e640ac22b5d65769a8323553754 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Sat, 6 May 2023 16:22:30 +0800 Subject: [PATCH 081/110] fix:[TS-3347]set ver to first version if version stored is smaller than first version in wal when subscribe db --- include/libs/executor/executor.h | 2 ++ source/dnode/vnode/src/tq/tqUtil.c | 1 + source/libs/executor/src/executor.c | 15 +++++++++------ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index 1fb00e743f..b7e6c42e3b 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -190,6 +190,8 @@ STimeWindow getAlignQueryTimeWindow(SInterval* pInterval, int32_t precision, int SArray* qGetQueriedTableListInfo(qTaskInfo_t tinfo); +void verifyOffset(void *pWalReader, STqOffsetVal* pOffset); + int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subType); void qStreamSetOpen(qTaskInfo_t tinfo); diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index 133c51a8dc..dba363122a 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -246,6 +246,7 @@ static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle, if (offset->type == TMQ_OFFSET__LOG) { + verifyOffset(pHandle->pWalReader, offset); int64_t fetchVer = offset->version + 1; pCkHead = taosMemoryMalloc(sizeof(SWalCkHead) + 2048); if (pCkHead == NULL) { diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 5fc079b7c1..1c87619e84 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -1058,6 +1058,14 @@ void qStreamSetOpen(qTaskInfo_t tinfo) { pOperator->status = OP_NOT_OPENED; } +void verifyOffset(void *pWalReader, STqOffsetVal* pOffset){ + // if offset version is small than first version , let's seek to first version + int64_t firstVer = walGetFirstVer(((SWalReader*)pWalReader)->pWal); + if (pOffset->version + 1 < firstVer){ + pOffset->version = firstVer - 1; + } +} + int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subType) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; SOperatorInfo* pOperator = pTaskInfo->pRoot; @@ -1083,12 +1091,7 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT tsdbReaderClose(pScanBaseInfo->dataReader); pScanBaseInfo->dataReader = NULL; - // let's seek to the next version in wal file - int64_t firstVer = walGetFirstVer(pInfo->tqReader->pWalReader->pWal); - if (pOffset->version + 1 < firstVer){ - pOffset->version = firstVer - 1; - } - + verifyOffset(pInfo->tqReader->pWalReader, pOffset); if (tqSeekVer(pInfo->tqReader, pOffset->version + 1, id) < 0) { qError("tqSeekVer failed ver:%" PRId64 ", %s", pOffset->version + 1, id); return -1; From 276e5daa8cd909db51aec77630a0c72f621422db Mon Sep 17 00:00:00 2001 From: slzhou Date: Sat, 6 May 2023 17:13:14 +0800 Subject: [PATCH 082/110] fix: pseudo column function are treated as variable --- source/libs/parser/src/parTranslater.c | 20 +++++++++++++++++++- source/libs/parser/test/parInitialCTest.cpp | 2 +- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 3b8dd80f55..492b2a5349 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -6128,6 +6128,24 @@ static bool hasJsonTypeProjection(SSelectStmt* pSelect) { return false; } +static EDealRes hasVariable(SNode* pNode, void* pContext) { + if (QUERY_NODE_COLUMN == nodeType(pNode)) { + *(bool*)pContext = true; + return DEAL_RES_END; + } + if (QUERY_NODE_FUNCTION == nodeType(pNode) && fmIsPseudoColumnFunc(((SFunctionNode*)pNode)->funcId)) { + *(bool*)pContext = true; + return DEAL_RES_END; + } + return DEAL_RES_CONTINUE; +} + +static int32_t subtableExprHasVariable(SNode* pNode) { + bool hasCol = false; + nodesWalkExprPostOrder(pNode, hasVariable, &hasCol); + return hasCol; +} + static int32_t checkStreamQuery(STranslateContext* pCxt, SCreateStreamStmt* pStmt) { SSelectStmt* pSelect = (SSelectStmt*)pStmt->pQuery; if (TSDB_DATA_TYPE_TIMESTAMP != ((SExprNode*)nodesListGetNode(pSelect->pProjectionList, 0))->resType.type || @@ -6139,7 +6157,7 @@ static int32_t checkStreamQuery(STranslateContext* pCxt, SCreateStreamStmt* pStm return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_STREAM_QUERY, "SUBTABLE expression must be of VARCHAR type"); } - if (NULL != pSelect->pSubtable && NULL == pSelect->pPartitionByList && nodesExprHasColumn(pSelect->pSubtable)) { + if (NULL != pSelect->pSubtable && 0 == LIST_LENGTH(pSelect->pPartitionByList) && subtableExprHasVariable(pSelect->pSubtable)) { return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_STREAM_QUERY, "SUBTABLE expression must not has column when no partition by clause"); } diff --git a/source/libs/parser/test/parInitialCTest.cpp b/source/libs/parser/test/parInitialCTest.cpp index 902a2eaf85..6a08193a39 100644 --- a/source/libs/parser/test/parInitialCTest.cpp +++ b/source/libs/parser/test/parInitialCTest.cpp @@ -922,7 +922,7 @@ TEST_F(ParserInitialCTest, createStreamSemanticCheck) { TSDB_CODE_PAR_STREAM_NOT_ALLOWED_FUNC); run("CREATE STREAM s2 INTO st1 AS SELECT ts, to_json('{c1:1}') FROM st1 PARTITION BY TBNAME", TSDB_CODE_PAR_INVALID_STREAM_QUERY); - run("CREATE STREAM s3 INTO st3 TAGS(tname VARCHAR(10), id INT) SUBTABLE(CONCAT('new-', tname)) " + run("CREATE STREAM s3 INTO st3 TAGS(tname VARCHAR(10), id INT) SUBTABLE(CONCAT('new-', tbname)) " "AS SELECT _WSTART wstart, COUNT(*) cnt FROM st1 INTERVAL(10S)", TSDB_CODE_PAR_INVALID_STREAM_QUERY); } From 04e9648d8ebab33db928740e9ecd6f8f0a455621 Mon Sep 17 00:00:00 2001 From: slzhou Date: Sat, 6 May 2023 17:16:27 +0800 Subject: [PATCH 083/110] enhance: change variable name --- source/libs/parser/src/parTranslater.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 492b2a5349..874ad1c695 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -6141,9 +6141,9 @@ static EDealRes hasVariable(SNode* pNode, void* pContext) { } static int32_t subtableExprHasVariable(SNode* pNode) { - bool hasCol = false; - nodesWalkExprPostOrder(pNode, hasVariable, &hasCol); - return hasCol; + bool hasVar = false; + nodesWalkExprPostOrder(pNode, hasVariable, &hasVar); + return hasVar; } static int32_t checkStreamQuery(STranslateContext* pCxt, SCreateStreamStmt* pStmt) { From 279fe0803f720e2d3ff09ae9a697ff01faebf4e9 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Sat, 6 May 2023 18:35:24 +0800 Subject: [PATCH 084/110] fix:[TD-23972] push subscribe msg to vnode even though consumer not change --- source/dnode/mnode/impl/src/mndSubscribe.c | 8 ++++---- source/dnode/vnode/src/tq/tq.c | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 75bc595a2e..b6ab7df68c 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -133,10 +133,10 @@ static int32_t mndBuildSubChangeReq(void **pBuf, int32_t *pLen, const SMqSubscri static int32_t mndPersistSubChangeVgReq(SMnode *pMnode, STrans *pTrans, const SMqSubscribeObj *pSub, const SMqRebOutputVg *pRebVg) { - if (pRebVg->oldConsumerId == pRebVg->newConsumerId) { - terrno = TSDB_CODE_MND_INVALID_SUB_OPTION; - return -1; - } +// if (pRebVg->oldConsumerId == pRebVg->newConsumerId) { +// terrno = TSDB_CODE_MND_INVALID_SUB_OPTION; +// return -1; +// } void *buf; int32_t tlen; diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 7004fe0be3..1661bb4c21 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -539,10 +539,13 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg } else { if (pHandle->consumerId == req.newConsumerId) { // do nothing tqInfo("vgId:%d consumer:0x%" PRIx64 " remains, no switch occurs", req.vgId, req.newConsumerId); + atomic_add_fetch_32(&pHandle->epoch, 1); + } else { tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId, req.newConsumerId); atomic_store_64(&pHandle->consumerId, req.newConsumerId); + atomic_store_32(&pHandle->epoch, 0); } // kill executing task qTaskInfo_t pTaskInfo = pHandle->execHandle.task; @@ -551,8 +554,6 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg } taosWLockLatch(&pTq->lock); - atomic_add_fetch_32(&pHandle->epoch, 1); - // remove if it has been register in the push manager, and return one empty block to consumer tqUnregisterPushHandle(pTq, pHandle); From 261f2736f7573203ddfd9444e249ccbaef70a660 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Sat, 6 May 2023 19:17:36 +0800 Subject: [PATCH 085/110] fix:[TD-24010] subscribeStb.py failed in windows CI --- source/libs/wal/src/walRead.c | 2 +- tests/system-test/win-test-file | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c index 4cc43a19a0..844ad89b6c 100644 --- a/source/libs/wal/src/walRead.c +++ b/source/libs/wal/src/walRead.c @@ -75,7 +75,7 @@ int32_t walNextValidMsg(SWalReader *pReader) { int64_t committedVer = walGetCommittedVer(pReader->pWal); int64_t appliedVer = walGetAppliedVer(pReader->pWal); int64_t endVer = pReader->cond.scanUncommited ? lastVer : committedVer; - endVer = TMIN(appliedVer, endVer); +// endVer = TMIN(appliedVer, endVer); wDebug("vgId:%d, wal start to fetch, index:%" PRId64 ", last index:%" PRId64 " commit index:%" PRId64 ", applied index:%" PRId64 ", end index:%" PRId64, diff --git a/tests/system-test/win-test-file b/tests/system-test/win-test-file index 214e01f1a8..7e68c40fd8 100644 --- a/tests/system-test/win-test-file +++ b/tests/system-test/win-test-file @@ -279,7 +279,7 @@ python3 ./test.py -f 7-tmq/subscribeDb1.py python3 ./test.py -f 7-tmq/subscribeDb2.py python3 ./test.py -f 7-tmq/subscribeDb3.py python3 ./test.py -f 7-tmq/subscribeDb4.py -#python3 ./test.py -f 7-tmq/subscribeStb.py +python3 ./test.py -f 7-tmq/subscribeStb.py python3 ./test.py -f 7-tmq/subscribeStb0.py python3 ./test.py -f 7-tmq/subscribeStb1.py python3 ./test.py -f 7-tmq/subscribeStb2.py From a425f085040d1aab329d543c3336d939b67f897f Mon Sep 17 00:00:00 2001 From: dmchen Date: Sat, 6 May 2023 19:53:57 +0800 Subject: [PATCH 086/110] use wrong usedb map --- source/dnode/mnode/impl/src/mndUser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/mnode/impl/src/mndUser.c b/source/dnode/mnode/impl/src/mndUser.c index 523753d7c6..2a0d753722 100644 --- a/source/dnode/mnode/impl/src/mndUser.c +++ b/source/dnode/mnode/impl/src/mndUser.c @@ -236,7 +236,7 @@ SSdbRaw *mndUserActionEncode(SUserObj *pUser) { SDB_SET_BINARY(pRaw, dataPos, key, keyLen, _OVER); SDB_SET_INT32(pRaw, dataPos, *useDb, _OVER) - useDb = taosHashIterate(pUser->writeTbs, useDb); + useDb = taosHashIterate(pUser->useDbs, useDb); } SDB_SET_RESERVE(pRaw, dataPos, USER_RESERVE_SIZE, _OVER) From cbdddeffec83c494bb340277f694aa5abf3883d3 Mon Sep 17 00:00:00 2001 From: Adam Ji Date: Mon, 8 May 2023 10:41:22 +0800 Subject: [PATCH 087/110] docs: add wal args --- docs/examples/rust/nativeexample/examples/subscribe_demo.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/examples/rust/nativeexample/examples/subscribe_demo.rs b/docs/examples/rust/nativeexample/examples/subscribe_demo.rs index 7551ad46b1..d54bb60e93 100644 --- a/docs/examples/rust/nativeexample/examples/subscribe_demo.rs +++ b/docs/examples/rust/nativeexample/examples/subscribe_demo.rs @@ -45,7 +45,7 @@ async fn main() -> anyhow::Result<()> { taos.exec_many([ format!("DROP TOPIC IF EXISTS tmq_meters"), format!("DROP DATABASE IF EXISTS `{db}`"), - format!("CREATE DATABASE `{db}`"), + format!("CREATE DATABASE `{db}` WAL_RETENTION_PERIOD 3600"), format!("USE `{db}`"), // create super table format!("CREATE TABLE `meters` (`ts` TIMESTAMP, `current` FLOAT, `voltage` INT, `phase` FLOAT) TAGS (`groupid` INT, `location` BINARY(24))"), From 4eed65432c9970a04a563f2b286eb0c8dd73995c Mon Sep 17 00:00:00 2001 From: dmchen Date: Mon, 8 May 2023 13:26:11 +0800 Subject: [PATCH 088/110] colid overflow --- source/dnode/mnode/impl/src/mndStb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index a73c08e69a..8b708c3e0f 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -932,7 +932,7 @@ static int32_t mndBuildStbFromAlter(SStbObj *pStb, SStbObj *pDst, SMCreateStbReq return -1; } - if(pDst->nextColId < 0 && pDst->nextColId >= 0x7fff - pDst->numOfColumns - pDst->numOfTags){ + if(pDst->nextColId < 0 || pDst->nextColId >= 0x7fff - pDst->numOfColumns - pDst->numOfTags){ terrno = TSDB_CODE_MND_FIELD_VALUE_OVERFLOW; return -1; } @@ -1163,8 +1163,8 @@ static int32_t mndAddSuperTableTag(const SStbObj *pOld, SStbObj *pNew, SArray *p if (mndAllocStbSchemas(pOld, pNew) != 0) { return -1; } - - if(pNew->nextColId < 0 && pNew->nextColId >= 0x7fff - ntags){ + + if(pNew->nextColId < 0 || pNew->nextColId >= 0x7fff - ntags){ terrno = TSDB_CODE_MND_FIELD_VALUE_OVERFLOW; return -1; } @@ -1476,7 +1476,7 @@ static int32_t mndAddSuperTableColumn(const SStbObj *pOld, SStbObj *pNew, SArray return -1; } - if(pNew->nextColId < 0 && pNew->nextColId >= 0x7fff - ncols){ + if(pNew->nextColId < 0 || pNew->nextColId >= 0x7fff - ncols){ terrno = TSDB_CODE_MND_FIELD_VALUE_OVERFLOW; return -1; } From 2f45a3dfdbcb41fcab26becd1a6384e9d1a29007 Mon Sep 17 00:00:00 2001 From: slzhou Date: Mon, 8 May 2023 14:42:31 +0800 Subject: [PATCH 089/110] fix: change variable name --- source/libs/parser/src/parTranslater.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 874ad1c695..e753c08f9d 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -6128,7 +6128,7 @@ static bool hasJsonTypeProjection(SSelectStmt* pSelect) { return false; } -static EDealRes hasVariable(SNode* pNode, void* pContext) { +static EDealRes hasColumnOrPseudoColumn(SNode* pNode, void* pContext) { if (QUERY_NODE_COLUMN == nodeType(pNode)) { *(bool*)pContext = true; return DEAL_RES_END; @@ -6140,10 +6140,10 @@ static EDealRes hasVariable(SNode* pNode, void* pContext) { return DEAL_RES_CONTINUE; } -static int32_t subtableExprHasVariable(SNode* pNode) { - bool hasVar = false; - nodesWalkExprPostOrder(pNode, hasVariable, &hasVar); - return hasVar; +static int32_t subtableExprHasColumnOrPseudoColumn(SNode* pNode) { + bool hasColumn = false; + nodesWalkExprPostOrder(pNode, hasColumnOrPseudoColumn, &hasColumn); + return hasColumn; } static int32_t checkStreamQuery(STranslateContext* pCxt, SCreateStreamStmt* pStmt) { @@ -6157,7 +6157,7 @@ static int32_t checkStreamQuery(STranslateContext* pCxt, SCreateStreamStmt* pStm return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_STREAM_QUERY, "SUBTABLE expression must be of VARCHAR type"); } - if (NULL != pSelect->pSubtable && 0 == LIST_LENGTH(pSelect->pPartitionByList) && subtableExprHasVariable(pSelect->pSubtable)) { + if (NULL != pSelect->pSubtable && 0 == LIST_LENGTH(pSelect->pPartitionByList) && subtableExprHasColumnOrPseudoColumn(pSelect->pSubtable)) { return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_STREAM_QUERY, "SUBTABLE expression must not has column when no partition by clause"); } From f9142c0ddb8eba709cec30a2a4bf0fd9f954d8a3 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Mon, 8 May 2023 16:49:56 +0800 Subject: [PATCH 090/110] fix:[TD24010] lost data if apply ver is small than commit ver --- include/libs/wal/wal.h | 2 +- source/client/src/clientSml.c | 4 +++- source/libs/wal/src/walRead.c | 15 ++++++++++----- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index d3e2bbb1be..46dc179295 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -132,7 +132,7 @@ typedef struct { } SWalRef; typedef struct { - int8_t scanUncommited; +// int8_t scanUncommited; int8_t scanNotApplied; int8_t scanMeta; int8_t enableRef; diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index 19d08ad66d..ce06e0eac4 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -1580,7 +1580,9 @@ static int smlProcess(SSmlHandle *info, char *lines[], char *rawLine, char *rawL code = smlModifyDBSchemas(info); if (code == 0 || code == TSDB_CODE_SML_INVALID_DATA || code == TSDB_CODE_PAR_TOO_MANY_COLUMNS || code == TSDB_CODE_PAR_INVALID_TAGS_NUM || code == TSDB_CODE_PAR_INVALID_TAGS_LENGTH - || code == TSDB_CODE_PAR_INVALID_ROW_LENGTH) break; + || code == TSDB_CODE_PAR_INVALID_ROW_LENGTH || code == TSDB_CODE_MND_FIELD_VALUE_OVERFLOW) { + break; + } taosMsleep(100); uInfo("SML:0x%" PRIx64 " smlModifyDBSchemas retry code:%s, times:%d", info->id, tstrerror(code), retryNum); } while (retryNum++ < taosHashGetSize(info->superTables) * MAX_RETRY_TIMES); diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c index 844ad89b6c..b29e36efdc 100644 --- a/source/libs/wal/src/walRead.c +++ b/source/libs/wal/src/walRead.c @@ -37,7 +37,7 @@ SWalReader *walOpenReader(SWal *pWal, SWalFilterCond *cond) { if (cond) { pReader->cond = *cond; } else { - pReader->cond.scanUncommited = 0; +// pReader->cond.scanUncommited = 0; pReader->cond.scanNotApplied = 0; pReader->cond.scanMeta = 0; pReader->cond.enableRef = 0; @@ -74,13 +74,18 @@ int32_t walNextValidMsg(SWalReader *pReader) { int64_t lastVer = walGetLastVer(pReader->pWal); int64_t committedVer = walGetCommittedVer(pReader->pWal); int64_t appliedVer = walGetAppliedVer(pReader->pWal); - int64_t endVer = pReader->cond.scanUncommited ? lastVer : committedVer; + while(appliedVer < committedVer){ // wait apply ver equal to commit ver, otherwise may lost data when consume data [TD-24010] + wDebug("vgId:%d, wal apply ver:%"PRId64" smaller than commit ver:%"PRId64", so sleep 1ms", pReader->pWal->cfg.vgId, appliedVer, committedVer); + taosMsleep(1); + appliedVer = walGetAppliedVer(pReader->pWal); + } +// int64_t endVer = pReader->cond.scanUncommited ? lastVer : committedVer; // endVer = TMIN(appliedVer, endVer); wDebug("vgId:%d, wal start to fetch, index:%" PRId64 ", last index:%" PRId64 " commit index:%" PRId64 - ", applied index:%" PRId64 ", end index:%" PRId64, - pReader->pWal->cfg.vgId, fetchVer, lastVer, committedVer, appliedVer, endVer); - while (fetchVer <= endVer) { + ", applied index:%" PRId64, + pReader->pWal->cfg.vgId, fetchVer, lastVer, committedVer, appliedVer); + while (fetchVer <= committedVer) { if (walFetchHeadNew(pReader, fetchVer) < 0) { return -1; } From bd5372c3782b3b6b04295f54a295151ed5b7282d Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 8 May 2023 17:02:51 +0800 Subject: [PATCH 091/110] fix: protect against nullptr in syncRespCleanRsp --- source/libs/sync/src/syncRespMgr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/libs/sync/src/syncRespMgr.c b/source/libs/sync/src/syncRespMgr.c index f9f14c2e00..3506d477d3 100644 --- a/source/libs/sync/src/syncRespMgr.c +++ b/source/libs/sync/src/syncRespMgr.c @@ -171,6 +171,8 @@ static void syncRespCleanByTTL(SSyncRespMgr *pObj, int64_t ttl, bool rsp) { } void syncRespCleanRsp(SSyncRespMgr *pObj) { + if (pObj == NULL) return; + SSyncNode *pNode = pObj->data; sTrace("vgId:%d, clean all resp", pNode->vgId); From c3e0de0bfce800c13fecdd5c761ab639e23d0d02 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Mon, 8 May 2023 09:21:20 +0000 Subject: [PATCH 092/110] fix invalid read --- source/libs/transport/src/transCli.c | 29 ++++++++++++++-------------- source/libs/transport/src/transSvr.c | 2 +- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index c23d6d0a1f..ea35f1cfe5 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -587,12 +587,12 @@ void* destroyConnPool(SCliThrd* pThrd) { static SCliConn* getConnFromPool(SCliThrd* pThrd, char* key, bool* exceed) { void* pool = pThrd->pool; - SConnList* plist = taosHashGet((SHashObj*)pool, key, strlen(key)); + SConnList* plist = taosHashGet((SHashObj*)pool, key, strlen(key) + 1); STrans* pTranInst = pThrd->pTransInst; if (plist == NULL) { SConnList list = {0}; - taosHashPut((SHashObj*)pool, key, strlen(key), (void*)&list, sizeof(list)); - plist = taosHashGet(pool, key, strlen(key)); + taosHashPut((SHashObj*)pool, key, strlen(key) + 1, (void*)&list, sizeof(list)); + plist = taosHashGet(pool, key, strlen(key) + 1); SMsgList* nList = taosMemoryCalloc(1, sizeof(SMsgList)); QUEUE_INIT(&nList->msgQ); @@ -627,11 +627,11 @@ static SCliConn* getConnFromPool(SCliThrd* pThrd, char* key, bool* exceed) { static SCliConn* getConnFromPool2(SCliThrd* pThrd, char* key, SCliMsg** pMsg) { void* pool = pThrd->pool; STrans* pTransInst = pThrd->pTransInst; - SConnList* plist = taosHashGet((SHashObj*)pool, key, strlen(key)); + SConnList* plist = taosHashGet((SHashObj*)pool, key, strlen(key) + 1); if (plist == NULL) { SConnList list = {0}; - taosHashPut((SHashObj*)pool, key, strlen(key), (void*)&list, sizeof(list)); - plist = taosHashGet(pool, key, strlen(key)); + taosHashPut((SHashObj*)pool, key, strlen(key) + 1, (void*)&list, sizeof(list)); + plist = taosHashGet(pool, key, strlen(key) + 1); SMsgList* nList = taosMemoryCalloc(1, sizeof(SMsgList)); QUEUE_INIT(&nList->msgQ); @@ -717,7 +717,7 @@ static void addConnToPool(void* pool, SCliConn* conn) { cliDestroyConnMsgs(conn, false); if (conn->list == NULL) { - conn->list = taosHashGet((SHashObj*)pool, conn->ip, strlen(conn->ip)); + conn->list = taosHashGet((SHashObj*)pool, conn->ip, strlen(conn->ip) + 1); } SConnList* pList = conn->list; @@ -822,7 +822,8 @@ static void cliRecvCb(uv_stream_t* handle, ssize_t nread, const uv_buf_t* buf) { return; } if (nread < 0) { - tWarn("%s conn %p read error:%s, ref:%d", CONN_GET_INST_LABEL(conn), conn, uv_err_name(nread), T_REF_VAL_GET(conn)); + tDebug("%s conn %p read error:%s, ref:%d", CONN_GET_INST_LABEL(conn), conn, uv_err_name(nread), + T_REF_VAL_GET(conn)); conn->broken = true; cliHandleExcept(conn); } @@ -875,8 +876,8 @@ static void cliDestroyConn(SCliConn* conn, bool clear) { connList->list->numOfConn--; connList->size--; } else { - SConnList* connList = taosHashGet((SHashObj*)pThrd->pool, conn->ip, strlen(conn->ip)); - connList->list->numOfConn--; + SConnList* connList = taosHashGet((SHashObj*)pThrd->pool, conn->ip, strlen(conn->ip) + 1); + if (connList != NULL) connList->list->numOfConn--; } conn->list = NULL; pThrd->newConnCount--; @@ -1269,7 +1270,7 @@ static void cliHandleFastFail(SCliConn* pConn, int status) { if (pMsg != NULL && REQUEST_NO_RESP(&pMsg->msg) && (pTransInst->failFastFp != NULL && pTransInst->failFastFp(pMsg->msg.msgType))) { - SFailFastItem* item = taosHashGet(pThrd->failFastCache, pConn->ip, strlen(pConn->ip)); + SFailFastItem* item = taosHashGet(pThrd->failFastCache, pConn->ip, strlen(pConn->ip) + 1); int64_t cTimestamp = taosGetTimestampMs(); if (item != NULL) { int32_t elapse = cTimestamp - item->timestamp; @@ -1281,7 +1282,7 @@ static void cliHandleFastFail(SCliConn* pConn, int status) { } } else { SFailFastItem item = {.count = 1, .timestamp = cTimestamp}; - taosHashPut(pThrd->failFastCache, pConn->ip, strlen(pConn->ip), &item, sizeof(SFailFastItem)); + taosHashPut(pThrd->failFastCache, pConn->ip, strlen(pConn->ip) + 1, &item, sizeof(SFailFastItem)); } } } else { @@ -1459,7 +1460,7 @@ FORCE_INLINE int32_t cliBuildExceptResp(SCliMsg* pMsg, STransMsg* pResp) { } static FORCE_INLINE uint32_t cliGetIpFromFqdnCache(SHashObj* cache, char* fqdn) { uint32_t addr = 0; - uint32_t* v = taosHashGet(cache, fqdn, strlen(fqdn)); + uint32_t* v = taosHashGet(cache, fqdn, strlen(fqdn) + 1); if (v == NULL) { addr = taosGetIpv4FromFqdn(fqdn); if (addr == 0xffffffff) { @@ -1468,7 +1469,7 @@ static FORCE_INLINE uint32_t cliGetIpFromFqdnCache(SHashObj* cache, char* fqdn) return addr; } - taosHashPut(cache, fqdn, strlen(fqdn), &addr, sizeof(addr)); + taosHashPut(cache, fqdn, strlen(fqdn) + 1, &addr, sizeof(addr)); } else { addr = *v; } diff --git a/source/libs/transport/src/transSvr.c b/source/libs/transport/src/transSvr.c index 28fb474972..269c7ecf9b 100644 --- a/source/libs/transport/src/transSvr.c +++ b/source/libs/transport/src/transSvr.c @@ -314,7 +314,7 @@ void uvOnRecvCb(uv_stream_t* cli, ssize_t nread, const uv_buf_t* buf) { return; } - tWarn("%s conn %p read error:%s", transLabel(pTransInst), conn, uv_err_name(nread)); + tDebug("%s conn %p read error:%s", transLabel(pTransInst), conn, uv_err_name(nread)); if (nread < 0) { conn->broken = true; if (conn->status == ConnAcquire) { From e22c62ffbacf687fe0fab8e0104626b806183d8b Mon Sep 17 00:00:00 2001 From: wangjiaming0909 <604227650@qq.com> Date: Mon, 8 May 2023 18:13:37 +0800 Subject: [PATCH 093/110] fix: data compare of signed and unsigned integers --- source/libs/scalar/test/CMakeLists.txt | 2 +- .../libs/scalar/test/filter/filterTests.cpp | 123 ++++++++++++++++++ source/util/src/tcompare.c | 70 ++++++---- 3 files changed, 166 insertions(+), 29 deletions(-) diff --git a/source/libs/scalar/test/CMakeLists.txt b/source/libs/scalar/test/CMakeLists.txt index 32f5e098c5..caaf86264c 100644 --- a/source/libs/scalar/test/CMakeLists.txt +++ b/source/libs/scalar/test/CMakeLists.txt @@ -1,4 +1,4 @@ enable_testing() -#add_subdirectory(filter) +add_subdirectory(filter) add_subdirectory(scalar) diff --git a/source/libs/scalar/test/filter/filterTests.cpp b/source/libs/scalar/test/filter/filterTests.cpp index b59e89fe0d..51ee9b6570 100644 --- a/source/libs/scalar/test/filter/filterTests.cpp +++ b/source/libs/scalar/test/filter/filterTests.cpp @@ -33,6 +33,7 @@ #include "os.h" #include "filter.h" +#include "filterInt.h" #include "nodes.h" #include "scalar.h" #include "stub.h" @@ -344,6 +345,7 @@ TEST(timerangeTest, greater_and_lower_not_strict) { nodesDestroyNode(logicNode1); } +#if 0 TEST(columnTest, smallint_column_greater_double_value) { SNode *pLeft = NULL, *pRight = NULL, *opNode = NULL; int16_t leftv[5] = {1, 2, 3, 4, 5}; @@ -1337,6 +1339,127 @@ TEST(scalarModelogicTest, diff_columns_or_and_or) { nodesDestroyNode(logicNode1); blockDataDestroy(src); } +#endif + +template +int32_t compareSignedWithUnsigned(SignedT l, UnsignedT r) { + if (l < 0) return -1; + auto l_uint64 = static_cast(l); + auto r_uint64 = static_cast(r); + if (l_uint64 < r_uint64) return -1; + if (l_uint64 > r_uint64) return 1; + return 0; +} + +template +int32_t compareUnsignedWithSigned(UnsignedT l, SignedT r) { + if (r < 0) return 1; + auto l_uint64 = static_cast(l); + auto r_uint64 = static_cast(r); + if (l_uint64 < r_uint64) return -1; + if (l_uint64 > r_uint64) return 1; + return 0; +} + +template +void doCompareWithValueRange_SignedWithUnsigned(__compar_fn_t fp) { + int32_t signedMin = -10, signedMax = 10; + int32_t unsignedMin = 0, unsignedMax = 10; + for (SignedT l = signedMin; l <= signedMax; ++l) { + for (UnsignedT r = unsignedMin; r <= unsignedMax; ++r) { + ASSERT_EQ(fp(&l, &r), compareSignedWithUnsigned(l, r)); + } + } +} + +template +void doCompareWithValueRange_UnsignedWithSigned(__compar_fn_t fp) { + int32_t signedMin = -10, signedMax = 10; + int32_t unsignedMin = 0, unsignedMax = 10; + for (UnsignedT l = unsignedMin; l <= unsignedMax; ++l) { + for (SignedT r = signedMin; r <= signedMax; ++r) { + ASSERT_EQ(fp(&l, &r), compareUnsignedWithSigned(l, r)); + } + } +} + +template +void doCompareWithValueRange_OnlyLeftType(__compar_fn_t fp, int32_t rType) { + switch (rType) { + case TSDB_DATA_TYPE_UTINYINT: + doCompareWithValueRange_SignedWithUnsigned(fp); + break; + case TSDB_DATA_TYPE_USMALLINT: + doCompareWithValueRange_SignedWithUnsigned(fp); + break; + case TSDB_DATA_TYPE_UINT: + doCompareWithValueRange_SignedWithUnsigned(fp); + break; + case TSDB_DATA_TYPE_UBIGINT: + doCompareWithValueRange_SignedWithUnsigned(fp); + break; + case TSDB_DATA_TYPE_TINYINT: + doCompareWithValueRange_UnsignedWithSigned(fp); + break; + case TSDB_DATA_TYPE_SMALLINT: + doCompareWithValueRange_UnsignedWithSigned(fp); + break; + case TSDB_DATA_TYPE_INT: + doCompareWithValueRange_UnsignedWithSigned(fp); + break; + case TSDB_DATA_TYPE_BIGINT: + doCompareWithValueRange_UnsignedWithSigned(fp); + break; + default: + FAIL(); + } +} + +void doCompare(const std::vector &lTypes, const std::vector &rTypes, int32_t oper) { + for (int i = 0; i < lTypes.size(); ++i) { + for (int j = 0; j < rTypes.size(); ++j) { + auto fp = filterGetCompFuncEx(lTypes[i], rTypes[j], oper); + switch (lTypes[i]) { + case TSDB_DATA_TYPE_TINYINT: + doCompareWithValueRange_OnlyLeftType(fp, rTypes[j]); + break; + case TSDB_DATA_TYPE_SMALLINT: + doCompareWithValueRange_OnlyLeftType(fp, rTypes[j]); + break; + case TSDB_DATA_TYPE_INT: + doCompareWithValueRange_OnlyLeftType(fp, rTypes[j]); + break; + case TSDB_DATA_TYPE_BIGINT: + doCompareWithValueRange_OnlyLeftType(fp, rTypes[j]); + break; + case TSDB_DATA_TYPE_UTINYINT: + doCompareWithValueRange_OnlyLeftType(fp, rTypes[j]); + break; + case TSDB_DATA_TYPE_USMALLINT: + doCompareWithValueRange_OnlyLeftType(fp, rTypes[j]); + break; + case TSDB_DATA_TYPE_UINT: + doCompareWithValueRange_OnlyLeftType(fp, rTypes[j]); + break; + case TSDB_DATA_TYPE_UBIGINT: + doCompareWithValueRange_OnlyLeftType(fp, rTypes[j]); + break; + default: + FAIL(); + } + } + } +} + +TEST(dataCompareTest, signed_and_unsigned_int) { + std::vector lType = {TSDB_DATA_TYPE_TINYINT, TSDB_DATA_TYPE_SMALLINT, TSDB_DATA_TYPE_INT, + TSDB_DATA_TYPE_BIGINT}; + std::vector rType = {TSDB_DATA_TYPE_UTINYINT, TSDB_DATA_TYPE_USMALLINT, TSDB_DATA_TYPE_UINT, + TSDB_DATA_TYPE_UBIGINT}; + + doCompare(lType, rType, OP_TYPE_GREATER_THAN); + doCompare(rType, lType, OP_TYPE_GREATER_THAN); +} int main(int argc, char **argv) { taosSeedRand(taosGetTimestampSec()); diff --git a/source/util/src/tcompare.c b/source/util/src/tcompare.c index f8f78ae6a5..dc57ed97b2 100644 --- a/source/util/src/tcompare.c +++ b/source/util/src/tcompare.c @@ -308,17 +308,19 @@ int32_t compareInt8Uint16(const void *pLeft, const void *pRight) { int32_t compareInt8Uint32(const void *pLeft, const void *pRight) { int8_t left = GET_INT8_VAL(pLeft); + if (left < 0) return -1; uint32_t right = GET_UINT32_VAL(pRight); - if (left > right) return 1; - if (left < right) return -1; + if ((uint32_t)left > right) return 1; + if ((uint32_t)left < right) return -1; return 0; } int32_t compareInt8Uint64(const void *pLeft, const void *pRight) { int8_t left = GET_INT8_VAL(pLeft); + if (left < 0) return -1; uint64_t right = GET_UINT64_VAL(pRight); - if (left > right) return 1; - if (left < right) return -1; + if ((uint64_t)left > right) return 1; + if ((uint64_t)left < right) return -1; return 0; } @@ -380,17 +382,19 @@ int32_t compareInt16Uint16(const void *pLeft, const void *pRight) { int32_t compareInt16Uint32(const void *pLeft, const void *pRight) { int16_t left = GET_INT16_VAL(pLeft); + if (left < 0) return -1; uint32_t right = GET_UINT32_VAL(pRight); - if (left > right) return 1; - if (left < right) return -1; + if ((uint32_t)left > right) return 1; + if ((uint32_t)left < right) return -1; return 0; } int32_t compareInt16Uint64(const void *pLeft, const void *pRight) { int16_t left = GET_INT16_VAL(pLeft); + if (left < 0) return -1; uint64_t right = GET_UINT64_VAL(pRight); - if (left > right) return 1; - if (left < right) return -1; + if ((uint64_t)left > right) return 1; + if ((uint64_t)left < right) return -1; return 0; } @@ -452,17 +456,19 @@ int32_t compareInt32Uint16(const void *pLeft, const void *pRight) { int32_t compareInt32Uint32(const void *pLeft, const void *pRight) { int32_t left = GET_INT32_VAL(pLeft); + if (left < 0) return -1; uint32_t right = GET_UINT32_VAL(pRight); - if (left > right) return 1; - if (left < right) return -1; + if ((uint32_t)left > right) return 1; + if ((uint32_t)left < right) return -1; return 0; } int32_t compareInt32Uint64(const void *pLeft, const void *pRight) { int32_t left = GET_INT32_VAL(pLeft); + if (left < 0) return -1; uint64_t right = GET_UINT64_VAL(pRight); - if (left > right) return 1; - if (left < right) return -1; + if ((uint64_t)left > right) return 1; + if ((uint64_t)left < right) return -1; return 0; } @@ -532,9 +538,10 @@ int32_t compareInt64Uint32(const void *pLeft, const void *pRight) { int32_t compareInt64Uint64(const void *pLeft, const void *pRight) { int64_t left = GET_INT64_VAL(pLeft); + if (left < 0) return -1; uint64_t right = GET_UINT64_VAL(pRight); - if (left > right) return 1; - if (left < right) return -1; + if ((uint64_t)left > right) return 1; + if ((uint64_t)left < right) return -1; return 0; } @@ -857,24 +864,27 @@ int32_t compareUint16Uint64(const void *pLeft, const void *pRight) { int32_t compareUint32Int8(const void *pLeft, const void *pRight) { uint32_t left = GET_UINT32_VAL(pLeft); int8_t right = GET_INT8_VAL(pRight); - if (left > right) return 1; - if (left < right) return -1; + if (right < 0) return 1; + if (left > (uint32_t)right) return 1; + if (left < (uint32_t)right) return -1; return 0; } int32_t compareUint32Int16(const void *pLeft, const void *pRight) { uint32_t left = GET_UINT32_VAL(pLeft); int16_t right = GET_INT16_VAL(pRight); - if (left > right) return 1; - if (left < right) return -1; + if (right < 0) return 1; + if (left > (uint32_t)right) return 1; + if (left < (uint32_t)right) return -1; return 0; } int32_t compareUint32Int32(const void *pLeft, const void *pRight) { uint32_t left = GET_UINT32_VAL(pLeft); int32_t right = GET_INT32_VAL(pRight); - if (left > right) return 1; - if (left < right) return -1; + if (right < 0) return 1; + if (left > (uint32_t)right) return 1; + if (left < (uint32_t)right) return -1; return 0; } @@ -929,32 +939,36 @@ int32_t compareUint32Uint64(const void *pLeft, const void *pRight) { int32_t compareUint64Int8(const void *pLeft, const void *pRight) { uint64_t left = GET_UINT64_VAL(pLeft); int8_t right = GET_INT8_VAL(pRight); - if (left > right) return 1; - if (left < right) return -1; + if (right < 0) return 1; + if (left > (uint64_t)right) return 1; + if (left < (uint64_t)right) return -1; return 0; } int32_t compareUint64Int16(const void *pLeft, const void *pRight) { uint64_t left = GET_UINT64_VAL(pLeft); int16_t right = GET_INT16_VAL(pRight); - if (left > right) return 1; - if (left < right) return -1; + if (right < 0) return 1; + if (left > (uint64_t)right) return 1; + if (left < (uint64_t)right) return -1; return 0; } int32_t compareUint64Int32(const void *pLeft, const void *pRight) { uint64_t left = GET_UINT64_VAL(pLeft); int32_t right = GET_INT32_VAL(pRight); - if (left > right) return 1; - if (left < right) return -1; + if (right < 0) return 1; + if (left > (uint64_t)right) return 1; + if (left < (uint64_t)right) return -1; return 0; } int32_t compareUint64Int64(const void *pLeft, const void *pRight) { uint64_t left = GET_UINT64_VAL(pLeft); int64_t right = GET_INT64_VAL(pRight); - if (left > right) return 1; - if (left < right) return -1; + if (right < 0) return 1; + if (left > (uint64_t)right) return 1; + if (left < (uint64_t)right) return -1; return 0; } From 50feb647488b149d286e696e03c3db40996aaf69 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Tue, 9 May 2023 14:29:09 +0800 Subject: [PATCH 094/110] fix: add check nchar/varchar column length for normal table --- source/libs/parser/src/parTranslater.c | 5 +++++ tests/script/tsim/alter/table.sim | 9 +++++++++ 2 files changed, 14 insertions(+) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index e753c08f9d..f4c86d4849 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -8265,6 +8265,11 @@ static int32_t buildAddColReq(STranslateContext* pCxt, SAlterTableStmt* pStmt, S return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_DUPLICATED_COLUMN); } + if ((TSDB_DATA_TYPE_VARCHAR == pStmt->dataType.type && calcTypeBytes(pStmt->dataType) > TSDB_MAX_BINARY_LEN) || + (TSDB_DATA_TYPE_NCHAR == pStmt->dataType.type && calcTypeBytes(pStmt->dataType) > TSDB_MAX_NCHAR_LEN)) { + return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN); + } + if (TSDB_MAX_COLUMNS == pTableMeta->tableInfo.numOfColumns) { return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_TOO_MANY_COLUMNS); } diff --git a/tests/script/tsim/alter/table.sim b/tests/script/tsim/alter/table.sim index dccfc7f5d6..a90d8f11b3 100644 --- a/tests/script/tsim/alter/table.sim +++ b/tests/script/tsim/alter/table.sim @@ -657,6 +657,15 @@ if $data20 != null then return -1 endi +print =============== error +sql create table tb2023(ts timestamp, f int); +sql_error alter table tb2023 add column v varchar(16375); +sql_error alter table tb2023 add column v varchar(16385); +sql_error alter table tb2023 add column v varchar(33100); +sql alter table tb2023 add column v varchar(16374); +sql alter table tb2023 drop column v +sql_error alter table tb2023 add column v nchar(4094); +sql alter table tb2023 add column v nchar(4093); print ======= over sql drop database d1 sql select * from information_schema.ins_databases From d05650475e424274fa06cad781b61af89d2acaf1 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Tue, 9 May 2023 14:32:35 +0800 Subject: [PATCH 095/110] enhance: add test case --- tests/script/tsim/alter/table.sim | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/script/tsim/alter/table.sim b/tests/script/tsim/alter/table.sim index a90d8f11b3..ded5d6f78a 100644 --- a/tests/script/tsim/alter/table.sim +++ b/tests/script/tsim/alter/table.sim @@ -663,9 +663,11 @@ sql_error alter table tb2023 add column v varchar(16375); sql_error alter table tb2023 add column v varchar(16385); sql_error alter table tb2023 add column v varchar(33100); sql alter table tb2023 add column v varchar(16374); +sql desc tb2023 sql alter table tb2023 drop column v sql_error alter table tb2023 add column v nchar(4094); sql alter table tb2023 add column v nchar(4093); +sql desc tb2023 print ======= over sql drop database d1 sql select * from information_schema.ins_databases From 479bb157e035ab4461d24ac198293b768eb848d8 Mon Sep 17 00:00:00 2001 From: wangjiaming0909 <604227650@qq.com> Date: Tue, 9 May 2023 15:19:21 +0800 Subject: [PATCH 096/110] fix: select _block_dist() from t; set the unit of Size to KB --- docs/en/12-taos-sql/24-show.md | 2 +- docs/zh/12-taos-sql/24-show.md | 2 +- source/libs/function/src/builtinsimpl.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/12-taos-sql/24-show.md b/docs/en/12-taos-sql/24-show.md index e2aff7a878..1f340cab30 100644 --- a/docs/en/12-taos-sql/24-show.md +++ b/docs/en/12-taos-sql/24-show.md @@ -189,7 +189,7 @@ show table distributed d0\G; Show Example

 *************************** 1.row ***************************
-_block_dist: Total_Blocks=[5] Total_Size=[93.65 Kb] Average_size=[18.73 Kb] Compression_Ratio=[23.98 %]
+_block_dist: Total_Blocks=[5] Total_Size=[93.65 KB] Average_size=[18.73 KB] Compression_Ratio=[23.98 %]
 
 Total_Blocks :  Table `d0` contains total 5 blocks
 
diff --git a/docs/zh/12-taos-sql/24-show.md b/docs/zh/12-taos-sql/24-show.md
index c85efa2376..ab29a1ee50 100644
--- a/docs/zh/12-taos-sql/24-show.md
+++ b/docs/zh/12-taos-sql/24-show.md
@@ -189,7 +189,7 @@ SHOW TABLE DISTRIBUTED table_name;
 
 *************************** 1.row ***************************
 
-_block_dist: Total_Blocks=[5] Total_Size=[93.65 Kb] Average_size=[18.73 Kb] Compression_Ratio=[23.98 %]
+_block_dist: Total_Blocks=[5] Total_Size=[93.65 KB] Average_size=[18.73 KB] Compression_Ratio=[23.98 %]
 
 Total_Blocks:  表 d0 占用的 block 个数为 5 个
 
diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c
index a8ecd9b0a2..4c019b3e71 100644
--- a/source/libs/function/src/builtinsimpl.c
+++ b/source/libs/function/src/builtinsimpl.c
@@ -5572,7 +5572,7 @@ int32_t blockDistFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
   }
 
   int32_t len = sprintf(st + VARSTR_HEADER_SIZE,
-                        "Total_Blocks=[%d] Total_Size=[%.2f Kb] Average_size=[%.2f Kb] Compression_Ratio=[%.2f %c]",
+                        "Total_Blocks=[%d] Total_Size=[%.2f KB] Average_size=[%.2f KB] Compression_Ratio=[%.2f %c]",
                         pData->numOfBlocks, pData->totalSize / 1024.0, averageSize / 1024.0, compRatio, '%');
 
   varDataSetLen(st, len);

From fb0183a642a148d5f155c93c273b2eaa8c4bb2dc Mon Sep 17 00:00:00 2001
From: wangjiaming0909 <604227650@qq.com>
Date: Tue, 9 May 2023 15:24:55 +0800
Subject: [PATCH 097/110] fix: set telemetry reporting service url from
 telemetry.taosdata.com to telemetry.tdengine.com

---
 source/common/src/tglobal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c
index aa35b298e6..b85dfa80b0 100644
--- a/source/common/src/tglobal.c
+++ b/source/common/src/tglobal.c
@@ -84,7 +84,7 @@ bool     tsMonitorComp = false;
 // telem
 bool     tsEnableTelem = true;
 int32_t  tsTelemInterval = 43200;
-char     tsTelemServer[TSDB_FQDN_LEN] = "telemetry.taosdata.com";
+char     tsTelemServer[TSDB_FQDN_LEN] = "telemetry.tdengine.com";
 uint16_t tsTelemPort = 80;
 char    *tsTelemUri = "/report";
 

From 76dd5a962a16d7beb367fd9f4ff4c680de3b7dc3 Mon Sep 17 00:00:00 2001
From: Ganlin Zhao 
Date: Tue, 9 May 2023 17:19:45 +0800
Subject: [PATCH 098/110] fix: fix crash caused by tsdbReaderResume error not
 handled

---
 source/dnode/vnode/src/tsdb/tsdbRead.c | 29 ++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c
index d0a0ea7947..2fc2b8cdd6 100644
--- a/source/dnode/vnode/src/tsdb/tsdbRead.c
+++ b/source/dnode/vnode/src/tsdb/tsdbRead.c
@@ -4851,7 +4851,11 @@ int32_t tsdbNextDataBlock(STsdbReader* pReader, bool* hasNext) {
   qTrace("tsdb/read: %p, take read mutex, code: %d", pReader, code);
 
   if (pReader->flag == READER_STATUS_SUSPEND) {
-    tsdbReaderResume(pReader);
+    code = tsdbReaderResume(pReader);
+    if (code != TSDB_CODE_SUCCESS) {
+      tsdbReleaseReader(pReader);
+      return code;
+    }
   }
 
   if (pReader->innerReader[0] != NULL && pReader->step == 0) {
@@ -5124,11 +5128,17 @@ SSDataBlock* tsdbRetrieveDataBlock(STsdbReader* pReader, SArray* pIdList) {
 }
 
 int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) {
+  int32_t code = TSDB_CODE_SUCCESS;
+
   qTrace("tsdb/reader-reset: %p, take read mutex", pReader);
   tsdbAcquireReader(pReader);
 
   if (pReader->flag == READER_STATUS_SUSPEND) {
-    tsdbReaderResume(pReader);
+    code = tsdbReaderResume(pReader);
+    if (code != TSDB_CODE_SUCCESS) {
+      tsdbReleaseReader(pReader);
+      return code;
+    }
   }
 
   if (isEmptyQueryTimeWindow(&pReader->window) || pReader->pReadSnap == NULL) {
@@ -5163,8 +5173,6 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) {
   int64_t ts = asc ? pReader->window.skey - 1 : pReader->window.ekey + 1;
   resetAllDataBlockScanInfo(pStatus->pTableMap, ts, step);
 
-  int32_t code = 0;
-
   // no data in files, let's try buffer in memory
   if (pStatus->fileIter.numOfFiles == 0) {
     pStatus->loadFromFile = false;
@@ -5209,7 +5217,11 @@ int32_t tsdbGetFileBlocksDistInfo(STsdbReader* pReader, STableBlockDistInfo* pTa
   // find the start data block in file
   tsdbAcquireReader(pReader);
   if (pReader->flag == READER_STATUS_SUSPEND) {
-    tsdbReaderResume(pReader);
+    code = tsdbReaderResume(pReader);
+    if (code != TSDB_CODE_SUCCESS) {
+      tsdbReleaseReader(pReader);
+      return code;
+    }
   }
   SReaderStatus* pStatus = &pReader->status;
 
@@ -5277,12 +5289,17 @@ int32_t tsdbGetFileBlocksDistInfo(STsdbReader* pReader, STableBlockDistInfo* pTa
 }
 
 int64_t tsdbGetNumOfRowsInMemTable(STsdbReader* pReader) {
+  int32_t code = TSDB_CODE_SUCCESS;
   int64_t rows = 0;
 
   SReaderStatus* pStatus = &pReader->status;
   tsdbAcquireReader(pReader);
   if (pReader->flag == READER_STATUS_SUSPEND) {
-    tsdbReaderResume(pReader);
+    code = tsdbReaderResume(pReader);
+    if (code != TSDB_CODE_SUCCESS) {
+      tsdbReleaseReader(pReader);
+      return code;
+    }
   }
 
   int32_t iter = 0;

From 8224d49702370079d9488a57170fd4966f71364b Mon Sep 17 00:00:00 2001
From: liuyao <54liuyao@163.com>
Date: Tue, 9 May 2023 14:03:57 +0800
Subject: [PATCH 099/110] fix:after restarting taosd, stream does not work.

---
 source/dnode/vnode/src/tq/tqRestore.c | 9 +++++++++
 source/libs/stream/src/streamMeta.c   | 5 +++++
 2 files changed, 14 insertions(+)

diff --git a/source/dnode/vnode/src/tq/tqRestore.c b/source/dnode/vnode/src/tq/tqRestore.c
index 1e45f578f6..0bb33b1215 100644
--- a/source/dnode/vnode/src/tq/tqRestore.c
+++ b/source/dnode/vnode/src/tq/tqRestore.c
@@ -109,6 +109,15 @@ int32_t createStreamRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) {
     // seek the stored version and extract data from WAL
     int32_t code = walReadSeekVer(pTask->exec.pWalReader, pTask->chkInfo.currentVer);
     if (code != TSDB_CODE_SUCCESS) {  // no data in wal, quit
+      SWal *pWal = pTask->exec.pWalReader->pWal;
+      if (pTask->chkInfo.currentVer < pWal->vers.firstVer ) {
+        pTask->chkInfo.currentVer = pWal->vers.firstVer;
+        code = walReadSeekVer(pTask->exec.pWalReader, pTask->chkInfo.currentVer);
+        if (code != TSDB_CODE_SUCCESS) {
+          streamMetaReleaseTask(pStreamMeta, pTask);
+          continue;
+        }
+      }
       streamMetaReleaseTask(pStreamMeta, pTask);
       continue;
     }
diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c
index aefe7885f9..af54904c43 100644
--- a/source/libs/stream/src/streamMeta.c
+++ b/source/libs/stream/src/streamMeta.c
@@ -188,6 +188,11 @@ int32_t streamMetaAddDeployedTask(SStreamMeta* pMeta, int64_t ver, SStreamTask*
     return -1;
   }
 
+  if (streamMetaCommit(pMeta) < 0) {
+    tFreeStreamTask(pTask);
+    return -1;
+  }
+
   taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, POINTER_BYTES);
   taosArrayPush(pMeta->pTaskList, &pTask->id.taskId);
   return 0;

From 587f750c94d8d7784a0d964b54c29440d9e1e5ed Mon Sep 17 00:00:00 2001
From: wangmm0220 
Date: Tue, 9 May 2023 17:42:11 +0800
Subject: [PATCH 100/110] fix:[TD-24058]send poll result to client if no data 5
 times to avoid lost data

---
 source/dnode/vnode/src/inc/tq.h    |  1 +
 source/dnode/vnode/src/tq/tqUtil.c | 19 +++++++++++++------
 source/libs/wal/src/walRead.c      | 15 +++++++--------
 3 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h
index 30b2fb74ca..e431ca4a01 100644
--- a/source/dnode/vnode/src/inc/tq.h
+++ b/source/dnode/vnode/src/inc/tq.h
@@ -101,6 +101,7 @@ typedef struct {
   STqPushHandle pushHandle;    // push
   STqExecHandle execHandle;    // exec
   SRpcMsg*      msg;
+  int32_t       noDataPollCnt;
 } STqHandle;
 
 typedef struct {
diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c
index a66d63a910..94803ef438 100644
--- a/source/dnode/vnode/src/tq/tqUtil.c
+++ b/source/dnode/vnode/src/tq/tqUtil.c
@@ -16,6 +16,7 @@
 #include "tq.h"
 
 #define IS_OFFSET_RESET_TYPE(_t)  ((_t) < 0)
+#define NO_POLL_CNT 5
 
 static int32_t tqSendMetaPollRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqMetaRsp* pRsp);
 
@@ -185,12 +186,18 @@ static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle,
   //   till now, all data has been transferred to consumer, new data needs to push client once arrived.
   if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG &&
       dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) {
-    // lock
-    taosWLockLatch(&pTq->lock);
-    code = tqRegisterPushHandle(pTq, pHandle, pMsg);
-    taosWUnLockLatch(&pTq->lock);
-    tDeleteSMqDataRsp(&dataRsp);
-    return code;
+    if(pHandle->noDataPollCnt >= NO_POLL_CNT){  // send poll result to client if no data 5 times to avoid lost data
+      pHandle->noDataPollCnt = 0;
+      // lock
+      taosWLockLatch(&pTq->lock);
+      code = tqRegisterPushHandle(pTq, pHandle, pMsg);
+      taosWUnLockLatch(&pTq->lock);
+      tDeleteSMqDataRsp(&dataRsp);
+      return code;
+    }
+    else{
+      pHandle->noDataPollCnt++;
+    }
   }
 
 
diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c
index b29e36efdc..37d97b35a6 100644
--- a/source/libs/wal/src/walRead.c
+++ b/source/libs/wal/src/walRead.c
@@ -74,18 +74,17 @@ int32_t walNextValidMsg(SWalReader *pReader) {
   int64_t lastVer = walGetLastVer(pReader->pWal);
   int64_t committedVer = walGetCommittedVer(pReader->pWal);
   int64_t appliedVer = walGetAppliedVer(pReader->pWal);
-  while(appliedVer < committedVer){   // wait apply ver equal to commit ver, otherwise may lost data when consume data [TD-24010]
-    wDebug("vgId:%d, wal apply ver:%"PRId64" smaller than commit ver:%"PRId64", so sleep 1ms", pReader->pWal->cfg.vgId, appliedVer, committedVer);
-    taosMsleep(1);
-    appliedVer = walGetAppliedVer(pReader->pWal);
+  if(appliedVer < committedVer){   // wait apply ver equal to commit ver, otherwise may lost data when consume data [TD-24010]
+    wDebug("vgId:%d, wal apply ver:%"PRId64" smaller than commit ver:%"PRId64, pReader->pWal->cfg.vgId, appliedVer, committedVer);
+//    taosMsleep(10);
   }
 //  int64_t endVer = pReader->cond.scanUncommited ? lastVer : committedVer;
-//  endVer = TMIN(appliedVer, endVer);
+  int64_t endVer = TMIN(appliedVer, committedVer);
 
   wDebug("vgId:%d, wal start to fetch, index:%" PRId64 ", last index:%" PRId64 " commit index:%" PRId64
-         ", applied index:%" PRId64,
-         pReader->pWal->cfg.vgId, fetchVer, lastVer, committedVer, appliedVer);
-  while (fetchVer <= committedVer) {
+         ", applied index:%" PRId64", end index:%" PRId64,
+         pReader->pWal->cfg.vgId, fetchVer, lastVer, committedVer, appliedVer, endVer);
+  while (fetchVer <= endVer) {
     if (walFetchHeadNew(pReader, fetchVer) < 0) {
       return -1;
     }

From 60c6d480f2210f963de9cd8409d9d74b4688a4e5 Mon Sep 17 00:00:00 2001
From: chenhaoran 
Date: Tue, 9 May 2023 20:13:25 +0800
Subject: [PATCH 101/110] test:add testcases of learner  in election

---
 tests/pytest/util/cluster.py                  |   6 +-
 .../6-cluster/clusterCommonCheck.py           |  42 +++-
 ...eInsertDataRebootModifyMetaAlterRep1to3.py | 206 ++++++++++++++++
 ...eInsertDataRebootModifyMetaAlterRep3to1.py | 206 ++++++++++++++++
 ...dnode3mnodeInsertDatarRebootAlterRep1-3.py | 222 ++++++++++++++++++
 ...node3mnodeInsertLessDataAlterRep3to1to3.py | 196 ++++++++++++++++
 .../6dnode3mnodeStopDnodeInsertDatatb.py      | 191 +++++++++++++++
 7 files changed, 1065 insertions(+), 4 deletions(-)
 create mode 100644 tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertDataRebootModifyMetaAlterRep1to3.py
 create mode 100644 tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertDataRebootModifyMetaAlterRep3to1.py
 create mode 100644 tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertDatarRebootAlterRep1-3.py
 create mode 100644 tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py
 create mode 100644 tests/system-test/6-cluster/manually-test/6dnode3mnodeStopDnodeInsertDatatb.py

diff --git a/tests/pytest/util/cluster.py b/tests/pytest/util/cluster.py
index 2607cf63c2..a6e3530dc9 100644
--- a/tests/pytest/util/cluster.py
+++ b/tests/pytest/util/cluster.py
@@ -52,8 +52,9 @@ class ConfigureyCluster:
             dnode.addExtraCfg("secondEp", f"{hostname}:{startPort_sec}")
 
             # configure  dnoe of independent mnodes
-            if num <= self.mnodeNums and self.mnodeNums != 0 and independentMnode == True :
-                dnode.addExtraCfg("supportVnodes", 1024)
+            if num <= self.mnodeNums and self.mnodeNums != 0 and independentMnode == "True" :
+                tdLog.info("set mnode supportVnodes 0")
+                dnode.addExtraCfg("supportVnodes", 0)            
             # print(dnode)
             self.dnodes.append(dnode)
         return self.dnodes
@@ -71,6 +72,7 @@ class ConfigureyCluster:
         tdSql.init(conn.cursor())
         mnodeNums=int(mnodeNums)
         for i in range(2,mnodeNums+1):
+            tdLog.info("create mnode on dnode %d"%i)
             tdSql.execute(" create mnode on  dnode %d;"%i)
 
 
diff --git a/tests/system-test/6-cluster/clusterCommonCheck.py b/tests/system-test/6-cluster/clusterCommonCheck.py
index 149c6d8ded..f5926321da 100644
--- a/tests/system-test/6-cluster/clusterCommonCheck.py
+++ b/tests/system-test/6-cluster/clusterCommonCheck.py
@@ -207,7 +207,7 @@ class ClusterComCheck:
                 count+=1
         else:
             tdLog.debug(tdSql.queryResult)
-            tdLog.exit("stop mnodes  on dnode %d  failed in 10s ")
+            tdLog.exit(f"stop mnodes  on dnode {offlineDnodeNo}  failed in 10s ")
 
     def check3mnode2off(self,mnodeNums=3):
         count=0
@@ -226,7 +226,45 @@ class ClusterComCheck:
             count+=1
         else:
             tdLog.debug(tdSql.queryResult)
-            tdLog.exit("stop mnodes  on dnode %d  failed in 10s ")
+            tdLog.exit("stop mnodes  on dnode 2 or 3 failed in 10s")
+
+    def check_vgroups_status(self,vgroup_numbers=2,db_replica=3,count_number=10,db_name="db"):
+        """ check vgroups status in 10s after db vgroups status is changed """
+        vgroup_numbers = int(vgroup_numbers)
+        self.db_replica = int(db_replica)
+        tdLog.debug("start to check status of vgroups")
+        count=0
+        last_number=vgroup_numbers-1
+        while count < count_number:
+            time.sleep(1)
+            tdSql.query(f"show  {db_name}.vgroups;")
+            if  count == 0 :
+                if tdSql.checkRows(vgroup_numbers) :
+                    tdLog.success(f"{db_name} has {vgroup_numbers} vgroups" )
+                else:
+                    tdLog.exit(f"vgroup number of {db_name} is not correct")
+            if self.db_replica == 1 :
+                if  tdSql.queryResult[0][4] == 'leader' and tdSql.queryResult[1][4] == 'leader' and tdSql.queryResult[last_number][4] == 'leader':
+                    ready_time= (count + 1)
+                    tdLog.success(f"all vgroups of {db_name} are leaders in {count + 1} s")
+                    return True
+                count+=1
+            elif self.db_replica == 3 :
+                vgroup_status_first=[tdSql.queryResult[0][4],tdSql.queryResult[0][6],tdSql.queryResult[0][8]]
+                
+                vgroup_status_last=[tdSql.queryResult[last_number][4],tdSql.queryResult[last_number][6],tdSql.queryResult[last_number][8]]
+                if  vgroup_status_first.count('leader') == 1 and vgroup_status_first.count('follower') == 2:
+                    if vgroup_status_last.count('leader') == 1 and vgroup_status_last.count('follower') == 2:
+                        ready_time= (count + 1)
+                        tdLog.success(f"all vgroups of {db_name} are ready in {ready_time} s")
+                        return True
+                count+=1
+        else:
+            tdLog.debug(tdSql.queryResult)
+            tdLog.notice(f"all vgroups  leader of {db_name} is selected {count}s ")
+            caller = inspect.getframeinfo(inspect.stack()[1][0])
+            args = (caller.filename, caller.lineno)
+            tdLog.exit("%s(%d) failed " % args)
 
 
 
diff --git a/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertDataRebootModifyMetaAlterRep1to3.py b/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertDataRebootModifyMetaAlterRep1to3.py
new file mode 100644
index 0000000000..7d46b3143d
--- /dev/null
+++ b/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertDataRebootModifyMetaAlterRep1to3.py
@@ -0,0 +1,206 @@
+import taos
+import sys
+import time
+import os
+
+from util.log import *
+from util.sql import *
+from util.cases import *
+from util.dnodes import TDDnodes
+from util.dnodes import TDDnode
+from util.cluster import *
+sys.path.append("./6-cluster")
+from clusterCommonCreate import *
+from clusterCommonCheck import clusterComCheck
+
+import time
+import socket
+import subprocess
+from multiprocessing import Process
+import threading
+import time
+import inspect
+import ctypes
+
+class TDTestCase:
+
+    def init(self, conn, logSql, replicaVar=1):
+        tdLog.debug(f"start to excute {__file__}")
+        self.TDDnodes = None
+        tdSql.init(conn.cursor())
+        self.host = socket.gethostname()
+
+
+    def getBuildPath(self):
+        selfPath = os.path.dirname(os.path.realpath(__file__))
+
+        if ("community" in selfPath):
+            projPath = selfPath[:selfPath.find("community")]
+        else:
+            projPath = selfPath[:selfPath.find("tests")]
+
+        for root, dirs, files in os.walk(projPath):
+            if ("taosd" in files):
+                rootRealPath = os.path.dirname(os.path.realpath(root))
+                if ("packaging" not in rootRealPath):
+                    buildPath = root[:len(root) - len("/build/bin")]
+                    break
+        return buildPath
+
+    def _async_raise(self, tid, exctype):
+        """raises the exception, performs cleanup if needed"""
+        if not inspect.isclass(exctype):
+            exctype = type(exctype)
+        res = ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, ctypes.py_object(exctype))
+        if res == 0:
+            raise ValueError("invalid thread id")
+        elif res != 1:
+            # """if it returns a number greater than one, you're in trouble,
+            # and you should call it again with exc=NULL to revert the effect"""
+            ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, None)
+            raise SystemError("PyThreadState_SetAsyncExc failed")
+
+    def stopThread(self,thread):
+        self._async_raise(thread.ident, SystemExit)
+
+
+    def fiveDnodeThreeMnode(self,dnodeNumbers,mnodeNums,restartNumbers,stopRole):
+        tdLog.printNoPrefix("======== test case 1: ")
+        paraDict = {'dbName':     'db0_0',
+                    'dropFlag':   1,
+                    'event':      '',
+                    'vgroups':    4,
+                    'replica':    1,
+                    'stbName':    'stb',
+                    'stbNumbers': 2,
+                    'colPrefix':  'c',
+                    'tagPrefix':  't',
+                    'colSchema':   [{'type': 'INT', 'count':1}, {'type': 'binary', 'len':20, 'count':1}],
+                    'tagSchema':   [{'type': 'INT', 'count':1}, {'type': 'binary', 'len':20, 'count':1}],
+                    'ctbPrefix':  'ctb',
+                    'ctbNum':     200,
+                    'startTs':    1640966400000,  # 2022-01-01 00:00:00.000
+                    "rowsPerTbl": 1000,
+                    "batchNum": 5000
+                    }
+
+        dnodeNumbers=int(dnodeNumbers)
+        mnodeNums=int(mnodeNums)
+        vnodeNumbers = int(dnodeNumbers-mnodeNums)
+        allctbNumbers=(paraDict['stbNumbers']*paraDict["ctbNum"])
+        rowsPerStb=paraDict["ctbNum"]*paraDict["rowsPerTbl"]
+        rowsall=rowsPerStb*paraDict['stbNumbers']
+        dbNumbers = 1
+
+        tdLog.info("first check dnode and mnode")
+        tdSql.query("select * from information_schema.ins_dnodes;")
+        tdSql.checkData(0,1,'%s:6030'%self.host)
+        tdSql.checkData(4,1,'%s:6430'%self.host)
+        clusterComCheck.checkDnodes(dnodeNumbers)
+        
+        #check mnode status
+        tdLog.info("check mnode status")
+        clusterComCheck.checkMnodeStatus(mnodeNums)
+
+        # add some error operations and
+        tdLog.info("Confirm the status of the dnode again")
+        tdSql.error("create mnode on dnode 2")
+        tdSql.query("select * from information_schema.ins_dnodes;")
+        print(tdSql.queryResult)
+        clusterComCheck.checkDnodes(dnodeNumbers)
+
+        # create database and stable
+        clusterComCreate.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], paraDict["vgroups"],paraDict['replica'])
+        tdLog.info("Take turns stopping Mnodes ")
+
+        tdDnodes=cluster.dnodes
+        stopcount =0
+        threads=[]
+
+        # create stable:stb_0
+        stableName= paraDict['stbName']
+        newTdSql=tdCom.newTdSql()
+        clusterComCreate.create_stables(newTdSql, paraDict["dbName"],stableName,paraDict['stbNumbers'])
+        #create child table:ctb_0
+        for i in range(paraDict['stbNumbers']):
+            stableName= '%s_%d'%(paraDict['stbName'],i)
+            newTdSql=tdCom.newTdSql()
+            clusterComCreate.create_ctable(newTdSql, paraDict["dbName"],stableName,stableName, paraDict['ctbNum'])
+        #insert date
+        for i in range(paraDict['stbNumbers']):
+            stableName= '%s_%d'%(paraDict['stbName'],i)
+            newTdSql=tdCom.newTdSql()
+            threads.append(threading.Thread(target=clusterComCreate.insert_data, args=(newTdSql, paraDict["dbName"],stableName,paraDict["ctbNum"],paraDict["rowsPerTbl"],paraDict["batchNum"],paraDict["startTs"])))
+        for tr in threads:
+            tr.start()
+        for tr in threads:
+            tr.join()
+
+        while stopcount < restartNumbers:
+            tdLog.info(" restart loop: %d"%stopcount )
+            if stopRole == "mnode":
+                for i in range(mnodeNums):
+                    tdDnodes[i].stoptaosd()
+                    # sleep(10)
+                    tdDnodes[i].starttaosd()
+                    # sleep(10)
+            elif stopRole == "vnode":
+                for i in range(vnodeNumbers):
+                    tdDnodes[i+mnodeNums].stoptaosd()
+                    # sleep(10)
+                    tdDnodes[i+mnodeNums].starttaosd()
+                    # sleep(10)
+            elif stopRole == "dnode":
+                for i in range(dnodeNumbers):
+                    if i == 0 :
+                        stableName= '%s_%d'%(paraDict['stbName'],0)
+                        newTdSql=tdCom.newTdSql()
+                        # newTdSql.execute('alter database db0_0 replica 3')
+                        clusterComCreate.alterStbMetaData(newTdSql, paraDict["dbName"],stableName,paraDict["ctbNum"],paraDict["rowsPerTbl"],paraDict["batchNum"])
+                    tdDnodes[i].stoptaosd()
+                    clusterComCheck.checkDbRows(dbNumbers)
+                    # sleep(10)
+                    tdDnodes[i].starttaosd()
+                    if i == 3 :
+                        TdSqlEx=tdCom.newTdSql()
+                        tdLog.info("alter database db0_0 replica 3")
+                        TdSqlEx.execute('alter database db0_0 replica 3')
+ 
+
+            # dnodeNumbers don't include database of schema
+            if clusterComCheck.checkDnodes(dnodeNumbers):
+                tdLog.info("123")
+            else:
+                print("456")
+
+                self.stopThread(threads)
+                tdLog.exit("one or more of dnodes failed to start ")
+                # self.check3mnode()
+            stopcount+=1
+
+
+        clusterComCheck.checkDnodes(dnodeNumbers)
+        clusterComCheck.checkDbRows(dbNumbers)
+        # clusterComCheck.checkDb(dbNumbers,1,paraDict["dbName"])
+
+        # tdSql.execute("use %s" %(paraDict["dbName"]))
+        tdSql.query("show %s.stables"%(paraDict["dbName"]))
+        tdSql.checkRows(paraDict["stbNumbers"])
+        for i in range(paraDict['stbNumbers']):
+            stableName= '%s.%s_%d'%(paraDict["dbName"],paraDict['stbName'],i)
+            tdSql.query("select count(*) from %s"%stableName)
+            if i == 0 :
+                tdSql.checkData(0,0,rowsPerStb*2)
+            else:
+                tdSql.checkData(0,0,rowsPerStb)   
+        clusterComCheck.check_vgroups_status(vgroup_numbers=paraDict["vgroups"],db_replica=3,db_name=paraDict["dbName"],count_number=150)     
+    def run(self):
+        # print(self.master_dnode.cfgDict)
+        self.fiveDnodeThreeMnode(dnodeNumbers=6,mnodeNums=3,restartNumbers=1,stopRole='dnode')
+
+    def stop(self):
+        tdSql.close()
+        tdLog.success(f"{__file__} successfully executed")
+
+tdCases.addLinux(__file__, TDTestCase())
+tdCases.addWindows(__file__, TDTestCase())
diff --git a/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertDataRebootModifyMetaAlterRep3to1.py b/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertDataRebootModifyMetaAlterRep3to1.py
new file mode 100644
index 0000000000..5b5fb04969
--- /dev/null
+++ b/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertDataRebootModifyMetaAlterRep3to1.py
@@ -0,0 +1,206 @@
+import taos
+import sys
+import time
+import os
+
+from util.log import *
+from util.sql import *
+from util.cases import *
+from util.dnodes import TDDnodes
+from util.dnodes import TDDnode
+from util.cluster import *
+sys.path.append("./6-cluster")
+from clusterCommonCreate import *
+from clusterCommonCheck import clusterComCheck
+
+import time
+import socket
+import subprocess
+from multiprocessing import Process
+import threading
+import time
+import inspect
+import ctypes
+
+class TDTestCase:
+
+    def init(self, conn, logSql, replicaVar=1):
+        tdLog.debug(f"start to excute {__file__}")
+        self.TDDnodes = None
+        tdSql.init(conn.cursor())
+        self.host = socket.gethostname()
+
+
+    def getBuildPath(self):
+        selfPath = os.path.dirname(os.path.realpath(__file__))
+
+        if ("community" in selfPath):
+            projPath = selfPath[:selfPath.find("community")]
+        else:
+            projPath = selfPath[:selfPath.find("tests")]
+
+        for root, dirs, files in os.walk(projPath):
+            if ("taosd" in files):
+                rootRealPath = os.path.dirname(os.path.realpath(root))
+                if ("packaging" not in rootRealPath):
+                    buildPath = root[:len(root) - len("/build/bin")]
+                    break
+        return buildPath
+
+    def _async_raise(self, tid, exctype):
+        """raises the exception, performs cleanup if needed"""
+        if not inspect.isclass(exctype):
+            exctype = type(exctype)
+        res = ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, ctypes.py_object(exctype))
+        if res == 0:
+            raise ValueError("invalid thread id")
+        elif res != 1:
+            # """if it returns a number greater than one, you're in trouble,
+            # and you should call it again with exc=NULL to revert the effect"""
+            ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, None)
+            raise SystemError("PyThreadState_SetAsyncExc failed")
+
+    def stopThread(self,thread):
+        self._async_raise(thread.ident, SystemExit)
+
+
+    def fiveDnodeThreeMnode(self,dnodeNumbers,mnodeNums,restartNumbers,stopRole):
+        tdLog.printNoPrefix("======== test case 1: ")
+        paraDict = {'dbName':     'db0_0',
+                    'dropFlag':   1,
+                    'event':      '',
+                    'vgroups':    4,
+                    'replica':    3,
+                    'stbName':    'stb',
+                    'stbNumbers': 2,
+                    'colPrefix':  'c',
+                    'tagPrefix':  't',
+                    'colSchema':   [{'type': 'INT', 'count':1}, {'type': 'binary', 'len':20, 'count':1}],
+                    'tagSchema':   [{'type': 'INT', 'count':1}, {'type': 'binary', 'len':20, 'count':1}],
+                    'ctbPrefix':  'ctb',
+                    'ctbNum':     200,
+                    'startTs':    1640966400000,  # 2022-01-01 00:00:00.000
+                    "rowsPerTbl": 1000,
+                    "batchNum": 5000
+                    }
+
+        dnodeNumbers=int(dnodeNumbers)
+        mnodeNums=int(mnodeNums)
+        vnodeNumbers = int(dnodeNumbers-mnodeNums)
+        allctbNumbers=(paraDict['stbNumbers']*paraDict["ctbNum"])
+        rowsPerStb=paraDict["ctbNum"]*paraDict["rowsPerTbl"]
+        rowsall=rowsPerStb*paraDict['stbNumbers']
+        dbNumbers = 1
+
+        tdLog.info("first check dnode and mnode")
+        tdSql.query("select * from information_schema.ins_dnodes;")
+        tdSql.checkData(0,1,'%s:6030'%self.host)
+        tdSql.checkData(4,1,'%s:6430'%self.host)
+        clusterComCheck.checkDnodes(dnodeNumbers)
+        
+        #check mnode status
+        tdLog.info("check mnode status")
+        clusterComCheck.checkMnodeStatus(mnodeNums)
+
+        # add some error operations and
+        tdLog.info("Confirm the status of the dnode again")
+        tdSql.error("create mnode on dnode 2")
+        tdSql.query("select * from information_schema.ins_dnodes;")
+        print(tdSql.queryResult)
+        clusterComCheck.checkDnodes(dnodeNumbers)
+
+        # create database and stable
+        clusterComCreate.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], paraDict["vgroups"],paraDict['replica'])
+        tdLog.info("Take turns stopping Mnodes ")
+
+        tdDnodes=cluster.dnodes
+        stopcount =0
+        threads=[]
+
+        # create stable:stb_0
+        stableName= paraDict['stbName']
+        newTdSql=tdCom.newTdSql()
+        clusterComCreate.create_stables(newTdSql, paraDict["dbName"],stableName,paraDict['stbNumbers'])
+        #create child table:ctb_0
+        for i in range(paraDict['stbNumbers']):
+            stableName= '%s_%d'%(paraDict['stbName'],i)
+            newTdSql=tdCom.newTdSql()
+            clusterComCreate.create_ctable(newTdSql, paraDict["dbName"],stableName,stableName, paraDict['ctbNum'])
+        #insert date
+        for i in range(paraDict['stbNumbers']):
+            stableName= '%s_%d'%(paraDict['stbName'],i)
+            newTdSql=tdCom.newTdSql()
+            threads.append(threading.Thread(target=clusterComCreate.insert_data, args=(newTdSql, paraDict["dbName"],stableName,paraDict["ctbNum"],paraDict["rowsPerTbl"],paraDict["batchNum"],paraDict["startTs"])))
+        for tr in threads:
+            tr.start()
+        for tr in threads:
+            tr.join()
+
+        while stopcount < restartNumbers:
+            tdLog.info(" restart loop: %d"%stopcount )
+            if stopRole == "mnode":
+                for i in range(mnodeNums):
+                    tdDnodes[i].stoptaosd()
+                    # sleep(10)
+                    tdDnodes[i].starttaosd()
+                    # sleep(10)
+            elif stopRole == "vnode":
+                for i in range(vnodeNumbers):
+                    tdDnodes[i+mnodeNums].stoptaosd()
+                    # sleep(10)
+                    tdDnodes[i+mnodeNums].starttaosd()
+                    # sleep(10)
+            elif stopRole == "dnode":
+                for i in range(dnodeNumbers):
+                    tdDnodes[i].stoptaosd()
+                    clusterComCheck.checkDbRows(dbNumbers)
+                    if i == 0 :
+                        stableName= '%s_%d'%(paraDict['stbName'],0)
+                        newTdSql=tdCom.newTdSql()
+                        # newTdSql.execute('alter database db0_0 replica 3')
+                        clusterComCreate.alterStbMetaData(newTdSql, paraDict["dbName"],stableName,paraDict["ctbNum"],paraDict["rowsPerTbl"],paraDict["batchNum"])
+                    # sleep(10)
+                    tdDnodes[i].starttaosd()
+                    if i == 3 :
+                        TdSqlEx=tdCom.newTdSql()
+                        tdLog.info("alter database db0_0 replica 1")
+                        TdSqlEx.execute('alter database db0_0 replica 1')
+ 
+
+            # dnodeNumbers don't include database of schema
+            if clusterComCheck.checkDnodes(dnodeNumbers):
+                tdLog.info("123")
+            else:
+                print("456")
+
+                self.stopThread(threads)
+                tdLog.exit("one or more of dnodes failed to start ")
+                # self.check3mnode()
+            stopcount+=1
+
+
+        clusterComCheck.checkDnodes(dnodeNumbers)
+        clusterComCheck.checkDbRows(dbNumbers)
+        # clusterComCheck.checkDb(dbNumbers,1,paraDict["dbName"])
+
+        # tdSql.execute("use %s" %(paraDict["dbName"]))
+        tdSql.query("show %s.stables"%(paraDict["dbName"]))
+        tdSql.checkRows(paraDict["stbNumbers"])
+        for i in range(paraDict['stbNumbers']):
+            stableName= '%s.%s_%d'%(paraDict["dbName"],paraDict['stbName'],i)
+            tdSql.query("select count(*) from %s"%stableName)
+            if i == 0 :
+                tdSql.checkData(0,0,rowsPerStb*2)
+            else:
+                tdSql.checkData(0,0,rowsPerStb)   
+        clusterComCheck.check_vgroups_status(vgroup_numbers=paraDict["vgroups"],db_replica=1,db_name=paraDict["dbName"],count_number=150)    
+    def run(self):
+        # print(self.master_dnode.cfgDict)
+        self.fiveDnodeThreeMnode(dnodeNumbers=6,mnodeNums=3,restartNumbers=1,stopRole='dnode')
+
+    def stop(self):
+        tdSql.close()
+        tdLog.success(f"{__file__} successfully executed")
+
+tdCases.addLinux(__file__, TDTestCase())
+tdCases.addWindows(__file__, TDTestCase())
diff --git a/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertDatarRebootAlterRep1-3.py b/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertDatarRebootAlterRep1-3.py
new file mode 100644
index 0000000000..aa3ed8e3fd
--- /dev/null
+++ b/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertDatarRebootAlterRep1-3.py
@@ -0,0 +1,222 @@
+import taos
+import sys
+import time
+import os
+
+from util.log import *
+from util.sql import *
+from util.cases import *
+from util.dnodes import TDDnodes
+from util.dnodes import TDDnode
+from util.cluster import *
+sys.path.append("./6-cluster")
+from clusterCommonCreate import *
+from clusterCommonCheck import clusterComCheck
+
+import time
+import socket
+import subprocess
+from multiprocessing import Process
+import threading
+import time
+import inspect
+import ctypes
+
+class TDTestCase:
+
+    def init(self, conn, logSql, replicaVar=1):
+        tdLog.debug(f"start to excute {__file__}")
+        self.TDDnodes = None
+        tdSql.init(conn.cursor())
+        self.host = socket.gethostname()
+
+
+    def getBuildPath(self):
+        selfPath = os.path.dirname(os.path.realpath(__file__))
+
+        if ("community" in selfPath):
+            projPath = selfPath[:selfPath.find("community")]
+        else:
+            projPath = selfPath[:selfPath.find("tests")]
+
+        for root, dirs, files in os.walk(projPath):
+            if ("taosd" in files):
+                rootRealPath = os.path.dirname(os.path.realpath(root))
+                if ("packaging" not in rootRealPath):
+                    buildPath = root[:len(root) - len("/build/bin")]
+                    break
+        return buildPath
+
+    def _async_raise(self, tid, exctype):
+        """raises the exception, performs cleanup if needed"""
+        if not inspect.isclass(exctype):
+            exctype = type(exctype)
+        res = ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, ctypes.py_object(exctype))
+        if res == 0:
+            raise ValueError("invalid thread id")
+        elif res != 1:
+            # """if it returns a number greater than one, you're in trouble,
+            # and you should call it again with exc=NULL to revert the effect"""
+            ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, None)
+            raise SystemError("PyThreadState_SetAsyncExc failed")
+
+    def stopThread(self,thread):
+        self._async_raise(thread.ident, SystemExit)
+
+
+    def insertData(self,countstart,countstop):
+        # fisrt add data : db\stable\childtable\general table
+
+        for couti in range(countstart,countstop):
+            tdLog.debug("drop database if exists db%d" %couti)
+            tdSql.execute("drop database if exists db%d" %couti)
+            print("create database if not exists db%d replica 1 duration 300" %couti)
+            tdSql.execute("create database if not exists db%d replica 1 duration 300" %couti)
+            tdSql.execute("use db%d" %couti)
+            tdSql.execute(
+            '''create table stb1
+            (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp)
+            tags (t1 int)
+            '''
+            )
+            tdSql.execute(
+                '''
+                create table t1
+                (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp)
+                '''
+            )
+            for i in range(4):
+                tdSql.execute(f'create table ct{i+1} using stb1 tags ( {i+1} )')
+
+
+    def fiveDnodeThreeMnode(self,dnodeNumbers,mnodeNums,restartNumbers,stopRole):
+        tdLog.printNoPrefix("======== test case 1: ")
+        paraDict = {'dbName':     'db0_0',
+                    'dropFlag':   1,
+                    'event':      '',
+                    'vgroups':    4,
+                    'replica':    1,
+                    'stbName':    'stb',
+                    'stbNumbers': 2,
+                    'colPrefix':  'c',
+                    'tagPrefix':  't',
+                    'colSchema':   [{'type': 'INT', 'count':1}, {'type': 'binary', 'len':20, 'count':1}],
+                    'tagSchema':   [{'type': 'INT', 'count':1}, {'type': 'binary', 'len':20, 'count':1}],
+                    'ctbPrefix':  'ctb',
+                    'ctbNum':     1000,
+                    'startTs':    1640966400000,  # 2022-01-01 00:00:00.000
+                    "rowsPerTbl": 100,
+                    "batchNum": 5000
+                    }
+
+        dnodeNumbers = int(dnodeNumbers)
+        mnodeNums = int(mnodeNums)
+        vnodeNumbers = int(dnodeNumbers-mnodeNums)
+        allctbNumbers = (paraDict['stbNumbers']*paraDict["ctbNum"])
+        rowsPerStb = paraDict["ctbNum"]*paraDict["rowsPerTbl"]
+        rowsall = rowsPerStb*paraDict['stbNumbers']
+        dbNumbers = 1
+        replica3 = 3
+        tdLog.info("first check dnode and mnode")
+        tdSql.query("select * from information_schema.ins_dnodes;")
+        tdSql.checkData(0,1,'%s:6030'%self.host)
+        tdSql.checkData(4,1,'%s:6430'%self.host)
+        clusterComCheck.checkDnodes(dnodeNumbers)
+        
+        #check mnode status
+        tdLog.info("check mnode status")
+        clusterComCheck.checkMnodeStatus(mnodeNums)
+
+        # add some error operations and
+        tdLog.info("Confirm the status of the dnode again")
+        tdSql.error("create mnode on dnode 2")
+        tdSql.query("select * from information_schema.ins_dnodes;")
+        print(tdSql.queryResult)
+        clusterComCheck.checkDnodes(dnodeNumbers)
+
+        # create database and stable
+        clusterComCreate.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], paraDict["vgroups"],paraDict['replica'])
+        tdLog.info("Take turns stopping Mnodes ")
+
+        tdDnodes=cluster.dnodes
+        stopcount =0
+        threads=[]
+
+        # create stable:stb_0
+        stableName= paraDict['stbName']
+        newTdSql=tdCom.newTdSql()
+        clusterComCreate.create_stables(newTdSql, paraDict["dbName"],stableName,paraDict['stbNumbers'])
+        #create child table:ctb_0
+        for i in range(paraDict['stbNumbers']):
+            stableName= '%s_%d'%(paraDict['stbName'],i)
+            newTdSql=tdCom.newTdSql()
+            clusterComCreate.create_ctable(newTdSql, paraDict["dbName"],stableName,stableName, paraDict['ctbNum'])
+        #insert date
+        for i in range(paraDict['stbNumbers']):
+            stableName= '%s_%d'%(paraDict['stbName'],i)
+            newTdSql=tdCom.newTdSql()
+            threads.append(threading.Thread(target=clusterComCreate.insert_data, args=(newTdSql, paraDict["dbName"],stableName,paraDict["ctbNum"],paraDict["rowsPerTbl"],paraDict["batchNum"],paraDict["startTs"])))
+        for tr in threads:
+            tr.start()
+        TdSqlEx=tdCom.newTdSql()
+        tdLog.info("alter database db0_0 replica 3")
+        TdSqlEx.execute('alter database db0_0 replica 3')
+        while stopcount < restartNumbers:
+            tdLog.info(" restart loop: %d"%stopcount )
+            if stopRole == "mnode":
+                for i in range(mnodeNums):
+                    tdDnodes[i].stoptaosd()
+                    # sleep(10)
+                    tdDnodes[i].starttaosd()
+                    # sleep(10)
+            elif stopRole == "vnode":
+                for i in range(vnodeNumbers):
+                    tdDnodes[i+mnodeNums].stoptaosd()
+                    # sleep(10)
+                    tdDnodes[i+mnodeNums].starttaosd()
+                    # sleep(10)
+            elif stopRole == "dnode":
+                for i in range(dnodeNumbers):
+                    tdDnodes[i].stoptaosd()
+                    # tdLog.info('select  cast(c2 as nchar(10)) from db0_0.stb_1;')
+                    # TdSqlEx.execute('select  cast(c2 as nchar(10)) from db0_0.stb_1;')
+                    # tdLog.info('select  avg(c1)  from db0_0.stb_0 interval(10s);')
+                    # TdSqlEx.execute('select  avg(c1)  from db0_0.stb_0 interval(10s);')
+                    # sleep(10)
+                    tdDnodes[i].starttaosd()
+                    # sleep(10)
+            # dnodeNumbers don't include database of schema
+            if clusterComCheck.checkDnodes(dnodeNumbers):
+                tdLog.info("123")
+            else:
+                print("456")
+
+                self.stopThread(threads)
+                tdLog.exit("one or more of dnodes failed to start ")
+                # self.check3mnode()
+            stopcount+=1
+
+        for tr in threads:
+            tr.join()
+        clusterComCheck.checkDnodes(dnodeNumbers)
+        clusterComCheck.checkDbRows(dbNumbers)
+        # clusterComCheck.checkDb(dbNumbers,1,paraDict["dbName"])
+
+        # tdSql.execute("use %s" %(paraDict["dbName"]))
+        tdSql.query("show %s.stables"%(paraDict["dbName"]))
+        tdSql.checkRows(paraDict["stbNumbers"])
+        # for i in range(paraDict['stbNumbers']):
+        #     stableName= '%s.%s_%d'%(paraDict["dbName"],paraDict['stbName'],i)
+        #     tdSql.query("select count(*) from %s"%stableName)
+        #     tdSql.checkData(0,0,rowsPerStb)
+        clusterComCheck.check_vgroups_status(vgroup_numbers=paraDict["vgroups"],db_replica=replica3,db_name=paraDict["dbName"],count_number=240)        
+    def run(self):
+        # print(self.master_dnode.cfgDict)
+        self.fiveDnodeThreeMnode(dnodeNumbers=6,mnodeNums=3,restartNumbers=4,stopRole='dnode')
+
+    def stop(self):
+        tdSql.close()
+        tdLog.success(f"{__file__} successfully executed")
+
+tdCases.addLinux(__file__, TDTestCase())
+tdCases.addWindows(__file__, TDTestCase())
diff --git a/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py b/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py
new file mode 100644
index 0000000000..ed7b99a880
--- /dev/null
+++ b/tests/system-test/6-cluster/manually-test/6dnode3mnodeInsertLessDataAlterRep3to1to3.py
@@ -0,0 +1,196 @@
+import taos
+import sys
+import time
+import os
+
+from util.log import *
+from util.sql import *
+from util.cases import *
+from util.dnodes import TDDnodes
+from util.dnodes import TDDnode
+from util.cluster import *
+sys.path.append("./6-cluster")
+from clusterCommonCreate import *
+from clusterCommonCheck import clusterComCheck
+
+import time
+import socket
+import subprocess
+from multiprocessing import Process
+import threading
+import time
+import inspect
+import ctypes
+
+class TDTestCase:
+
+    def init(self, conn, logSql, replicaVar=1):
+        tdLog.debug(f"start to excute {__file__}")
+        self.TDDnodes = None
+        tdSql.init(conn.cursor())
+        self.host = socket.gethostname()
+
+
+    def getBuildPath(self):
+        selfPath = os.path.dirname(os.path.realpath(__file__))
+
+        if ("community" in selfPath):
+            projPath = selfPath[:selfPath.find("community")]
+        else:
+            projPath = selfPath[:selfPath.find("tests")]
+
+        for root, dirs, files in os.walk(projPath):
+            if ("taosd" in files):
+                rootRealPath = os.path.dirname(os.path.realpath(root))
+                if ("packaging" not in rootRealPath):
+                    buildPath = root[:len(root) - len("/build/bin")]
+                    break
+        return buildPath
+
+    def _async_raise(self, tid, exctype):
+        """raises the exception, performs cleanup if needed"""
+        if not inspect.isclass(exctype):
+            exctype = type(exctype)
+        res = ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, ctypes.py_object(exctype))
+        if res == 0:
+            raise ValueError("invalid thread id")
+        elif res != 1:
+            # """if it returns a number greater than one, you're in trouble,
+            # and you should call it again with exc=NULL to revert the effect"""
+            ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, None)
+            raise SystemError("PyThreadState_SetAsyncExc failed")
+
+    def stopThread(self,thread):
+        self._async_raise(thread.ident, SystemExit)
+
+
+    def insertData(self,countstart,countstop):
+        # fisrt add data : db\stable\childtable\general table
+
+        for couti in range(countstart,countstop):
+            tdLog.debug("drop database if exists db%d" %couti)
+            tdSql.execute("drop database if exists db%d" %couti)
+            print("create database if not exists db%d replica 1 duration 300" %couti)
+            tdSql.execute("create database if not exists db%d replica 1 duration 300" %couti)
+            tdSql.execute("use db%d" %couti)
+            tdSql.execute(
+            '''create table stb1
+            (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp)
+            tags (t1 int)
+            '''
+            )
+            tdSql.execute(
+                '''
+                create table t1
+                (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp)
+                '''
+            )
+            for i in range(4):
+                tdSql.execute(f'create table ct{i+1} using stb1 tags ( {i+1} )')
+
+
+    def fiveDnodeThreeMnode(self,dnodeNumbers,mnodeNums,restartNumbers,stopRole):
+        tdLog.printNoPrefix("======== test case 1: ")
+        paraDict = {'dbName':     'db0_0',
+                    'dropFlag':   1,
+                    'event':      '',
+                    'vgroups':    4,
+                    'replica':    3,
+                    'stbName':    'stb',
+                    'stbNumbers': 2,
+                    'colPrefix':  'c',
+                    'tagPrefix':  't',
+                    'colSchema':   [{'type': 'INT', 'count':1}, {'type': 'binary', 'len':20, 'count':1}],
+                    'tagSchema':   [{'type': 'INT', 'count':1}, {'type': 'binary', 'len':20, 'count':1}],
+                    'ctbPrefix':  'ctb',
+                    'ctbNum':     1,
+                    'startTs':    1640966400000,  # 2022-01-01 00:00:00.000
+                    "rowsPerTbl": 1,
+                    "batchNum": 5000
+                    }
+
+        dnodeNumbers=int(dnodeNumbers)
+        mnodeNums=int(mnodeNums)
+        vnodeNumbers = int(dnodeNumbers-mnodeNums)
+        replica1 = 1
+        replica3 = 3
+        allctbNumbers=(paraDict['stbNumbers']*paraDict["ctbNum"])
+        rowsPerStb=paraDict["ctbNum"]*paraDict["rowsPerTbl"]
+        rowsall=rowsPerStb*paraDict['stbNumbers']
+        dbNumbers = 1
+
+        tdLog.info("first check dnode and mnode")
+        tdSql.query("select * from information_schema.ins_dnodes;")
+        tdSql.checkData(0,1,'%s:6030'%self.host)
+        tdSql.checkData(4,1,'%s:6430'%self.host)
+        clusterComCheck.checkDnodes(dnodeNumbers)
+        
+        #check mnode status
+        tdLog.info("check mnode status")
+        clusterComCheck.checkMnodeStatus(mnodeNums)
+
+        # add some error operations and
+        tdLog.info("Confirm the status of the dnode again")
+        tdSql.error("create mnode on dnode 2")
+        tdSql.query("select * from information_schema.ins_dnodes;")
+        print(tdSql.queryResult)
+        clusterComCheck.checkDnodes(dnodeNumbers)
+
+        # create database and stable
+        clusterComCreate.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], paraDict["vgroups"],paraDict['replica'])
+        tdLog.info("Take turns stopping Mnodes ")
+
+        tdDnodes=cluster.dnodes
+        stopcount =0
+        threads=[]
+
+        # create stable:stb_0
+        stableName= paraDict['stbName']
+        newTdSql=tdCom.newTdSql()
+        clusterComCreate.create_stables(newTdSql, paraDict["dbName"],stableName,paraDict['stbNumbers'])
+        #create child table:ctb_0
+        for i in range(paraDict['stbNumbers']):
+            stableName= '%s_%d'%(paraDict['stbName'],i)
+            newTdSql=tdCom.newTdSql()
+            clusterComCreate.create_ctable(newTdSql, paraDict["dbName"],stableName,stableName, paraDict['ctbNum'])
+        #insert date
+        for i in range(paraDict['stbNumbers']):
+            stableName= '%s_%d'%(paraDict['stbName'],i)
+            newTdSql=tdCom.newTdSql()
+            threads.append(threading.Thread(target=clusterComCreate.insert_data, args=(newTdSql, paraDict["dbName"],stableName,paraDict["ctbNum"],paraDict["rowsPerTbl"],paraDict["batchNum"],paraDict["startTs"])))
+        for tr in threads:
+            tr.start()
+        TdSqlEx=tdCom.newTdSql()
+        tdLog.info(f"alter database db0_0 replica {replica1}")
+        TdSqlEx.execute(f'alter database db0_0 replica {replica1}')
+        for tr in threads:
+            tr.join()
+        clusterComCheck.checkDnodes(dnodeNumbers)
+        clusterComCheck.checkDbRows(dbNumbers)
+        # clusterComCheck.checkDb(dbNumbers,1,paraDict["dbName"])
+
+        # tdSql.execute("use %s" %(paraDict["dbName"]))
+        tdSql.query("show %s.stables"%(paraDict["dbName"]))
+        tdSql.checkRows(paraDict["stbNumbers"])
+        for i in range(paraDict['stbNumbers']):
+            stableName= '%s.%s_%d'%(paraDict["dbName"],paraDict['stbName'],i)
+            tdSql.query("select count(*) from %s"%stableName)
+            tdSql.checkData(0,0,rowsPerStb)
+        
+        clusterComCheck.check_vgroups_status(vgroup_numbers=paraDict["vgroups"],db_replica=replica1,db_name=paraDict["dbName"],count_number=20)
+        sleep(5)
+        tdLog.info(f"show transactions;alter database db0_0 replica {replica3};")
+        TdSqlEx.execute(f'show transactions;')
+        TdSqlEx.execute(f'alter database db0_0 replica {replica3};')
+        clusterComCheck.check_vgroups_status(vgroup_numbers=paraDict["vgroups"],db_replica=replica3,db_name=paraDict["dbName"],count_number=120)
+
+    def run(self):
+        # print(self.master_dnode.cfgDict)
+        self.fiveDnodeThreeMnode(dnodeNumbers=6,mnodeNums=3,restartNumbers=4,stopRole='dnode')
+
+    def stop(self):
+        tdSql.close()
+        tdLog.success(f"{__file__} successfully executed")
+
+tdCases.addLinux(__file__, TDTestCase())
+tdCases.addWindows(__file__, TDTestCase())
diff --git a/tests/system-test/6-cluster/manually-test/6dnode3mnodeStopDnodeInsertDatatb.py b/tests/system-test/6-cluster/manually-test/6dnode3mnodeStopDnodeInsertDatatb.py
new file mode 100644
index 0000000000..e02af29a05
--- /dev/null
+++ b/tests/system-test/6-cluster/manually-test/6dnode3mnodeStopDnodeInsertDatatb.py
@@ -0,0 +1,191 @@
+import taos
+import sys
+import time
+import os
+
+from util.log import *
+from util.sql import *
+from util.cases import *
+from util.dnodes import TDDnodes
+from util.dnodes import TDDnode
+from util.cluster import *
+sys.path.append("./6-cluster")
+from clusterCommonCreate import *
+from clusterCommonCheck import clusterComCheck
+
+import time
+import socket
+import subprocess
+from multiprocessing import Process
+import threading
+import time
+import inspect
+import ctypes
+
+class TDTestCase:
+
+    def init(self, conn, logSql, replicaVar=1):
+        tdLog.debug(f"start to excute {__file__}")
+        self.TDDnodes = None
+        tdSql.init(conn.cursor())
+        self.host = socket.gethostname()
+
+
+    def getBuildPath(self):
+        selfPath = os.path.dirname(os.path.realpath(__file__))
+
+        if ("community" in selfPath):
+            projPath = selfPath[:selfPath.find("community")]
+        else:
+            projPath = selfPath[:selfPath.find("tests")]
+
+        for root, dirs, files in os.walk(projPath):
+            if ("taosd" in files):
+                rootRealPath = os.path.dirname(os.path.realpath(root))
+                if ("packaging" not in rootRealPath):
+                    buildPath = root[:len(root) - len("/build/bin")]
+                    break
+        return buildPath
+
+    def _async_raise(self, tid, exctype):
+        """raises the exception, performs cleanup if needed"""
+        if not inspect.isclass(exctype):
+            exctype = type(exctype)
+        res = ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, ctypes.py_object(exctype))
+        if res == 0:
+            raise ValueError("invalid thread id")
+        elif res != 1:
+            # """if it returns a number greater than one, you're in trouble,
+            # and you should call it again with exc=NULL to revert the effect"""
+            ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, None)
+            raise SystemError("PyThreadState_SetAsyncExc failed")
+
+    def stopThread(self,thread):
+        self._async_raise(thread.ident, SystemExit)
+
+
+    def insertData(self,dbname,tableCount,rowsPerCount):
+        # tableCount : create table number
+        # rowsPerCount :  rows per table
+        # fisrt add data : db\stable\childtable\general table
+        os.system(f"taosBenchmark  -d {dbname} -n {tableCount} -t {rowsPerCount}  -z 1 -k 10000 -y ")
+
+
+    def fiveDnodeThreeMnode(self,dnodeNumbers,mnodeNums,restartNumbers,stopRole):
+        tdLog.printNoPrefix("======== test case 1: ")
+        paraDict = {'dbName':     'db0_0',
+                    'dropFlag':   1,
+                    'event':      '',
+                    'vgroups':    4,
+                    'replica':    1,
+                    'stbName':    'stb',
+                    'stbNumbers': 2,
+                    'colPrefix':  'c',
+                    'tagPrefix':  't',
+                    'colSchema':   [{'type': 'INT', 'count':1}, {'type': 'binary', 'len':20, 'count':1}],
+                    'tagSchema':   [{'type': 'INT', 'count':1}, {'type': 'binary', 'len':20, 'count':1}],
+                    'ctbPrefix':  'ctb',
+                    'ctbNum':     10000,
+                    'startTs':    1640966400000,  # 2022-01-01 00:00:00.000
+                    "rowsPerTbl": 10000,
+                    "batchNum": 5000
+                    }
+
+        dnodeNumbers=int(dnodeNumbers)
+        mnodeNums=int(mnodeNums)
+        vnodeNumbers = int(dnodeNumbers-mnodeNums)
+        allctbNumbers=(paraDict['stbNumbers']*paraDict["ctbNum"])
+        rowsPerStb=paraDict["ctbNum"]*paraDict["rowsPerTbl"]
+        rowsall=rowsPerStb*paraDict['stbNumbers']
+        dbNumbers = 1
+
+        tdLog.info("first check dnode and mnode")
+        tdSql.query("select * from information_schema.ins_dnodes;")
+        tdSql.checkData(0,1,'%s:6030'%self.host)
+        tdSql.checkData(4,1,'%s:6430'%self.host)
+        clusterComCheck.checkDnodes(dnodeNumbers)
+        
+        #check mnode status
+        tdLog.info("check mnode status")
+        clusterComCheck.checkMnodeStatus(mnodeNums)
+
+        # add some error operations and
+        tdLog.info("Confirm the status of the dnode again")
+        tdSql.error("create mnode on dnode 2")
+        tdSql.query("select * from information_schema.ins_dnodes;")
+        print(tdSql.queryResult)
+        clusterComCheck.checkDnodes(dnodeNumbers)
+
+        # create database and stable
+        tdLog.info("Take turns stopping Mnodes ")
+
+        tdDnodes=cluster.dnodes
+        stopcount =0
+        threads=[]
+
+        # create stable:stb_0
+        threads.append(threading.Thread(target=self.insertData, args=(paraDict["dbName"],paraDict["ctbNum"],paraDict["rowsPerTbl"])))
+        for tr in threads:
+            tr.start()
+        TdSqlEx=tdCom.newTdSql()
+        tdLog.info("alter database db0_0 replica 3")
+        TdSqlEx.execute('alter database db0_0 replica 3')
+        while stopcount < restartNumbers:
+            tdLog.info(" restart loop: %d"%stopcount )
+            if stopRole == "mnode":
+                for i in range(mnodeNums):
+                    tdDnodes[i].stoptaosd()
+                    # sleep(10)
+                    tdDnodes[i].starttaosd()
+                    # sleep(10)
+            elif stopRole == "vnode":
+                for i in range(vnodeNumbers):
+                    tdDnodes[i+mnodeNums].stoptaosd()
+                    # sleep(10)
+                    tdDnodes[i+mnodeNums].starttaosd()
+                    # sleep(10)
+            elif stopRole == "dnode":
+                for i in range(dnodeNumbers):
+                    tdDnodes[i].stoptaosd()
+                    # tdLog.info('select  cast(c2 as nchar(10)) from db0_0.stb_1;')
+                    # TdSqlEx.execute('select  cast(c2 as nchar(10)) from db0_0.stb_1;')
+                    # tdLog.info('select  avg(c1)  from db0_0.stb_0 interval(10s);')
+                    # TdSqlEx.execute('select  avg(c1)  from db0_0.stb_0 interval(10s);')
+                    # sleep(10)
+                    tdDnodes[i].starttaosd()
+                    # sleep(10)
+            # dnodeNumbers don't include database of schema
+            if clusterComCheck.checkDnodes(dnodeNumbers):
+                tdLog.info("123")
+            else:
+                print("456")
+
+                self.stopThread(threads)
+                tdLog.exit("one or more of dnodes failed to start ")
+                # self.check3mnode()
+            stopcount+=1
+
+        for tr in threads:
+            tr.join()
+        clusterComCheck.checkDnodes(dnodeNumbers)
+        clusterComCheck.checkDbRows(dbNumbers)
+        # clusterComCheck.checkDb(dbNumbers,1,paraDict["dbName"])
+
+        # tdSql.execute("use %s" %(paraDict["dbName"]))
+        tdSql.query("show %s.stables"%(paraDict["dbName"]))
+        tdSql.checkRows(paraDict["stbNumbers"])
+        for i in range(paraDict['stbNumbers']):
+            stableName= '%s.%s_%d'%(paraDict["dbName"],paraDict['stbName'],i)
+            tdSql.query("select count(*) from %s"%stableName)
+            tdSql.checkData(0,0,rowsPerStb)
+        clusterComCheck.check_vgroups_status(vgroup_numbers=paraDict["vgroups"],db_replica=3,db_name=paraDict["dbName"],count_number=240)             
+    def run(self):
+        # print(self.master_dnode.cfgDict)
+        self.fiveDnodeThreeMnode(dnodeNumbers=6,mnodeNums=3,restartNumbers=4,stopRole='dnode')
+
+    def stop(self):
+        tdSql.close()
+        tdLog.success(f"{__file__} successfully executed")
+
+tdCases.addLinux(__file__, TDTestCase())
+tdCases.addWindows(__file__, TDTestCase())

From 536be66bbe772badbdd11ac098f2e943d4bd1525 Mon Sep 17 00:00:00 2001
From: chenhaoran 
Date: Tue, 9 May 2023 20:34:18 +0800
Subject: [PATCH 102/110] test: add test1

---
 test1 | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 test1

diff --git a/test1 b/test1
new file mode 100644
index 0000000000..e69de29bb2

From ac0ac18cc1577b354e1d924bbe7bc6a17853a2f8 Mon Sep 17 00:00:00 2001
From: muicoder 
Date: Wed, 10 May 2023 11:44:56 +0800
Subject: [PATCH 103/110] Match the status code (#21225)

Signed-off-by: muicoder 
---
 packaging/docker/bin/entrypoint.sh | 2 +-
 packaging/docker/bin/taos-check    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/packaging/docker/bin/entrypoint.sh b/packaging/docker/bin/entrypoint.sh
index f2811de7bd..a60254d7ef 100755
--- a/packaging/docker/bin/entrypoint.sh
+++ b/packaging/docker/bin/entrypoint.sh
@@ -55,7 +55,7 @@ else
         exit $?
     fi
     while true; do
-        es=$(taos -h $FIRST_EP_HOST -P $FIRST_EP_PORT --check)
+        es=$(taos -h $FIRST_EP_HOST -P $FIRST_EP_PORT --check | grep "^[0-9]*:")
         echo ${es}
         if [ "${es%%:*}" -eq 2 ]; then
             echo "execute create dnode"
diff --git a/packaging/docker/bin/taos-check b/packaging/docker/bin/taos-check
index 5dc06b6018..349187da9b 100755
--- a/packaging/docker/bin/taos-check
+++ b/packaging/docker/bin/taos-check
@@ -1,5 +1,5 @@
 #!/bin/sh
-es=$(taos --check)
+es=$(taos --check | grep "^[0-9]*:")
 code=${es%%:*}
 if [ "$code" -ne "0" ] && [ "$code" -ne "4" ]; then
     exit 0

From 53ee1af8ba411bd75cc980d32d53657edec83c60 Mon Sep 17 00:00:00 2001
From: Benguang Zhao 
Date: Tue, 9 May 2023 20:48:16 +0800
Subject: [PATCH 104/110] fix: not set to ignore signals in dmLogCrash

---
 source/dnode/mgmt/exe/dmMain.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/source/dnode/mgmt/exe/dmMain.c b/source/dnode/mgmt/exe/dmMain.c
index f0e020edfe..989bff3984 100644
--- a/source/dnode/mgmt/exe/dmMain.c
+++ b/source/dnode/mgmt/exe/dmMain.c
@@ -87,18 +87,6 @@ static void dmStopDnode(int signum, void *sigInfo, void *context) {
 }
 
 void dmLogCrash(int signum, void *sigInfo, void *context) {
-  taosIgnSignal(SIGTERM);
-  taosIgnSignal(SIGHUP);
-  taosIgnSignal(SIGINT);
-  taosIgnSignal(SIGBREAK);
-
-#ifndef WINDOWS
-  taosIgnSignal(SIGBUS);
-#endif
-  taosIgnSignal(SIGABRT);
-  taosIgnSignal(SIGFPE);
-  taosIgnSignal(SIGSEGV);
-
   char       *pMsg = NULL;
   const char *flags = "UTL FATAL ";
   ELogLevel   level = DEBUG_FATAL;

From 8f1ed449890ca24fff6bb02eb5c6b316a141926f Mon Sep 17 00:00:00 2001
From: wangjiaming0909 <604227650@qq.com>
Date: Wed, 10 May 2023 10:46:08 +0800
Subject: [PATCH 105/110]  fix: connect error when login with
 information_schema/performance_schema

---
 source/dnode/mnode/impl/src/mndProfile.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/source/dnode/mnode/impl/src/mndProfile.c b/source/dnode/mnode/impl/src/mndProfile.c
index 50e502f4ab..ff6a2f460a 100644
--- a/source/dnode/mnode/impl/src/mndProfile.c
+++ b/source/dnode/mnode/impl/src/mndProfile.c
@@ -256,10 +256,13 @@ static int32_t mndProcessConnectReq(SRpcMsg *pReq) {
     snprintf(db, TSDB_DB_FNAME_LEN, "%d%s%s", pUser->acctId, TS_PATH_DELIMITER, connReq.db);
     pDb = mndAcquireDb(pMnode, db);
     if (pDb == NULL) {
-      terrno = TSDB_CODE_MND_INVALID_DB;
-      mGError("user:%s, failed to login from %s while use db:%s since %s", pReq->info.conn.user, ip, connReq.db,
-              terrstr());
-      goto _OVER;
+      if (0 != strcmp(connReq.db, TSDB_INFORMATION_SCHEMA_DB) &&
+          (0 != strcmp(connReq.db, TSDB_PERFORMANCE_SCHEMA_DB))) {
+        terrno = TSDB_CODE_MND_INVALID_DB;
+        mGError("user:%s, failed to login from %s while use db:%s since %s", pReq->info.conn.user, ip, connReq.db,
+                terrstr());
+        goto _OVER;
+      }
     }
 
     if (mndCheckDbPrivilege(pMnode, pReq->info.conn.user, MND_OPER_READ_OR_WRITE_DB, pDb) != 0) {

From 2a558323be174a9aa032a01d09c42df1f6ce4919 Mon Sep 17 00:00:00 2001
From: Benguang Zhao 
Date: Wed, 10 May 2023 18:02:36 +0800
Subject: [PATCH 106/110] fix: set absent closeTs with mtime of log files
 during walCheckAndRepairMeta

---
 source/libs/wal/src/walMeta.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c
index cda7e35b0f..12008bb31f 100644
--- a/source/libs/wal/src/walMeta.c
+++ b/source/libs/wal/src/walMeta.c
@@ -295,6 +295,36 @@ void walAlignVersions(SWal* pWal) {
   wInfo("vgId:%d, reset commitVer to %" PRId64, pWal->cfg.vgId, pWal->vers.commitVer);
 }
 
+int walRepairLogFileTs(SWal* pWal, bool* updateMeta) {
+  int32_t sz = taosArrayGetSize(pWal->fileInfoSet);
+  int32_t fileIdx = -1;
+  int32_t lastCloseTs = 0;
+  char    fnameStr[WAL_FILE_LEN] = {0};
+
+  while (++fileIdx < sz - 1) {
+    SWalFileInfo* pFileInfo = taosArrayGet(pWal->fileInfoSet, fileIdx);
+    if (pFileInfo->closeTs != -1) {
+      lastCloseTs = pFileInfo->closeTs;
+      continue;
+    }
+
+    walBuildLogName(pWal, pFileInfo->firstVer, fnameStr);
+    int32_t mtime = 0;
+    if (taosStatFile(fnameStr, NULL, &mtime) < 0) {
+      terrno = TAOS_SYSTEM_ERROR(errno);
+      wError("vgId:%d, failed to stat file due to %s, file:%s", pWal->cfg.vgId, strerror(errno), fnameStr);
+      return -1;
+    }
+
+    if (updateMeta != NULL) *updateMeta = true;
+    if (pFileInfo->createTs == -1) pFileInfo->createTs = lastCloseTs;
+    pFileInfo->closeTs = mtime;
+    lastCloseTs = pFileInfo->closeTs;
+  }
+
+  return 0;
+}
+
 bool walLogEntriesComplete(const SWal* pWal) {
   int32_t sz = taosArrayGetSize(pWal->fileInfoSet);
   bool    complete = true;
@@ -460,6 +490,11 @@ int walCheckAndRepairMeta(SWal* pWal) {
   }
   (void)walAlignVersions(pWal);
 
+  // repair ts of files
+  if (walRepairLogFileTs(pWal, &updateMeta) < 0) {
+    return -1;
+  }
+
   // update meta file
   if (updateMeta) {
     (void)walSaveMeta(pWal);

From 29bab82a0c7fbe5c12a8333a2268e604bacfdce5 Mon Sep 17 00:00:00 2001
From: wangmm0220 
Date: Wed, 10 May 2023 22:51:32 +0800
Subject: [PATCH 107/110] fix:compile error in release mode

---
 source/client/src/clientSml.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c
index ce06e0eac4..30c0008f3d 100644
--- a/source/client/src/clientSml.c
+++ b/source/client/src/clientSml.c
@@ -1351,7 +1351,7 @@ static int32_t smlInsertData(SSmlHandle *info) {
     }
     taosArrayPush(info->pRequest->tableList, &pName);
 
-    tstrncpy(pName.tname, tableData->childTableName, strlen(tableData->childTableName) + 1);
+    strcpy(pName.tname, tableData->childTableName);
 
     SRequestConnInfo conn = {0};
     conn.pTrans = info->taos->pAppInfo->pTransporter;

From 7433bad888ae38fa374f5c7aaadf813d0652bb04 Mon Sep 17 00:00:00 2001
From: Benguang Zhao 
Date: Wed, 10 May 2023 20:01:39 +0800
Subject: [PATCH 108/110] enh: tidy up walEndSnapshot

---
 source/libs/wal/src/walWrite.c | 59 ++++++++++++++--------------------
 1 file changed, 25 insertions(+), 34 deletions(-)

diff --git a/source/libs/wal/src/walWrite.c b/source/libs/wal/src/walWrite.c
index 848de4f36d..9b7b3dfd50 100644
--- a/source/libs/wal/src/walWrite.c
+++ b/source/libs/wal/src/walWrite.c
@@ -284,15 +284,15 @@ int32_t walEndSnapshot(SWal *pWal) {
   if (ver == -1) {
     code = -1;
     goto END;
-  };
+  }
 
   pWal->vers.snapshotVer = ver;
   int ts = taosGetTimestampSec();
-
   ver = TMAX(ver - pWal->vers.logRetention, pWal->vers.firstVer - 1);
 
+  // compatible mode for refVer
   bool hasTopic = false;
-  int64_t refVer = ver;
+  int64_t refVer = INT64_MAX;
   void *pIter = NULL;
   while (1) {
     pIter = taosHashIterate(pWal->pRefHash, pIter);
@@ -300,54 +300,40 @@ int32_t walEndSnapshot(SWal *pWal) {
     SWalRef *pRef = *(SWalRef **)pIter;
     if (pRef->refVer == -1) continue;
     refVer = TMIN(refVer, pRef->refVer - 1);
-    wDebug("vgId:%d, wal found ref %" PRId64 ", refId %" PRId64, pWal->cfg.vgId, pRef->refVer, pRef->refId);
     hasTopic = true;
   }
-  // compatible mode
   if (pWal->cfg.retentionPeriod == 0 && hasTopic) {
+    wInfo("vgId:%d, wal found refVer:%" PRId64 " in compatible mode, ver:%" PRId64, pWal->cfg.vgId, refVer, ver);
     ver = TMIN(ver, refVer);
   }
 
+  // find files safe to delete
   int          deleteCnt = 0;
   int64_t      newTotSize = pWal->totSize;
-  SWalFileInfo tmp;
+  SWalFileInfo tmp = {0};
   tmp.firstVer = ver;
-  // find files safe to delete
   SWalFileInfo *pInfo = taosArraySearch(pWal->fileInfoSet, &tmp, compareWalFileInfo, TD_LE);
+
   if (pInfo) {
-    SWalFileInfo *pLastFileInfo = taosArrayGetLast(pWal->fileInfoSet);
-    wDebug("vgId:%d, wal search found file info: first:%" PRId64 " last:%" PRId64, pWal->cfg.vgId, pInfo->firstVer,
-           pInfo->lastVer);
-    if (ver >= pInfo->lastVer) {
+    wDebug("vgId:%d, wal search found file info. ver:%" PRId64 ", first:%" PRId64 " last:%" PRId64, pWal->cfg.vgId, ver,
+           pInfo->firstVer, pInfo->lastVer);
+    ASSERT(ver <= pInfo->lastVer);
+    if (ver == pInfo->lastVer) {
       pInfo++;
-      wDebug("vgId:%d, wal remove advance one file: first:%" PRId64 " last:%" PRId64, pWal->cfg.vgId, pInfo->firstVer,
-             pInfo->lastVer);
-    }
-    if (pInfo <= pLastFileInfo) {
-      wDebug("vgId:%d, wal end remove for first:%" PRId64 " last:%" PRId64, pWal->cfg.vgId, pInfo->firstVer,
-             pInfo->lastVer);
-    } else {
-      wDebug("vgId:%d, wal no remove", pWal->cfg.vgId);
     }
 
     // iterate files, until the searched result
+    // delete according to file size or close time
     for (SWalFileInfo *iter = pWal->fileInfoSet->pData; iter < pInfo; iter++) {
-      wDebug("vgId:%d, wal check remove file %" PRId64 "(file size %" PRId64 " close ts %" PRId64
-             "), new tot size %" PRId64,
-             pWal->cfg.vgId, iter->firstVer, iter->fileSize, iter->closeTs, newTotSize);
-      if ((pWal->cfg.retentionSize != -1 && pWal->cfg.retentionSize != 0 && newTotSize > pWal->cfg.retentionSize) ||
-          ((pWal->cfg.retentionPeriod == 0) || (pWal->cfg.retentionPeriod != -1 && iter->closeTs != -1 &&
-                                                iter->closeTs + pWal->cfg.retentionPeriod < ts))) {
-        // delete according to file size or close time
-        wDebug("vgId:%d, check pass", pWal->cfg.vgId);
+      if ((pWal->cfg.retentionSize > 0 && newTotSize > pWal->cfg.retentionSize) ||
+          (pWal->cfg.retentionPeriod == 0 ||
+           pWal->cfg.retentionPeriod > 0 && iter->closeTs >= 0 && iter->closeTs + pWal->cfg.retentionPeriod < ts)) {
         deleteCnt++;
         newTotSize -= iter->fileSize;
         taosArrayPush(pWal->toDeleteFiles, iter);
       }
-      wDebug("vgId:%d, check not pass", pWal->cfg.vgId);
     }
 
-  UPDATE_META:
     // make new array, remove files
     taosArrayPopFrontBatch(pWal->fileInfoSet, deleteCnt);
     if (taosArrayGetSize(pWal->fileInfoSet) == 0) {
@@ -357,11 +343,12 @@ int32_t walEndSnapshot(SWal *pWal) {
       pWal->vers.firstVer = ((SWalFileInfo *)taosArrayGet(pWal->fileInfoSet, 0))->firstVer;
     }
   }
+
+  // update meta
   pWal->writeCur = taosArrayGetSize(pWal->fileInfoSet) - 1;
   pWal->totSize = newTotSize;
   pWal->vers.verInSnapshotting = -1;
 
-  // save snapshot ver, commit ver
   code = walSaveMeta(pWal);
   if (code < 0) {
     goto END;
@@ -369,23 +356,27 @@ int32_t walEndSnapshot(SWal *pWal) {
 
   // delete files
   deleteCnt = taosArrayGetSize(pWal->toDeleteFiles);
-  wDebug("vgId:%d, wal should delete %d files", pWal->cfg.vgId, deleteCnt);
-  char fnameStr[WAL_FILE_LEN];
+  char fnameStr[WAL_FILE_LEN] = {0};
+  pInfo = NULL;
+
   for (int i = 0; i < deleteCnt; i++) {
     pInfo = taosArrayGet(pWal->toDeleteFiles, i);
+
     walBuildLogName(pWal, pInfo->firstVer, fnameStr);
-    wDebug("vgId:%d, wal remove file %s", pWal->cfg.vgId, fnameStr);
     if (taosRemoveFile(fnameStr) < 0 && errno != ENOENT) {
       wError("vgId:%d, failed to remove log file %s due to %s", pWal->cfg.vgId, fnameStr, strerror(errno));
       goto END;
     }
     walBuildIdxName(pWal, pInfo->firstVer, fnameStr);
-    wDebug("vgId:%d, wal remove file %s", pWal->cfg.vgId, fnameStr);
     if (taosRemoveFile(fnameStr) < 0 && errno != ENOENT) {
       wError("vgId:%d, failed to remove idx file %s due to %s", pWal->cfg.vgId, fnameStr, strerror(errno));
       goto END;
     }
   }
+  if (pInfo != NULL) {
+    wInfo("vgId:%d, wal log files recycled. count:%d, until ver:%" PRId64 ", closeTs:%" PRId64, pWal->cfg.vgId,
+          deleteCnt, pInfo->lastVer, pInfo->closeTs);
+  }
   taosArrayClear(pWal->toDeleteFiles);
 
 END:

From b8933389bb55b93824c6b727c0a90c40e137a588 Mon Sep 17 00:00:00 2001
From: Benguang Zhao 
Date: Thu, 11 May 2023 09:39:41 +0800
Subject: [PATCH 109/110] enh: keep the trailing empty wal file in
 walCheckAdnRepairMeta

---
 source/libs/wal/src/walMeta.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c
index 12008bb31f..a12f8051ba 100644
--- a/source/libs/wal/src/walMeta.c
+++ b/source/libs/wal/src/walMeta.c
@@ -463,15 +463,8 @@ int walCheckAndRepairMeta(SWal* pWal) {
         wError("failed to scan wal last ver since %s", terrstr());
         return -1;
       }
-      // remove the empty wal log, and its idx
-      wInfo("vgId:%d, wal remove empty file %s", pWal->cfg.vgId, fnameStr);
-      taosRemoveFile(fnameStr);
-      walBuildIdxName(pWal, pFileInfo->firstVer, fnameStr);
-      wInfo("vgId:%d, wal remove empty file %s", pWal->cfg.vgId, fnameStr);
-      taosRemoveFile(fnameStr);
-      // remove its meta entry
-      taosArrayRemove(pWal->fileInfoSet, fileIdx);
-      continue;
+      // empty log file
+      lastVer = pFileInfo->firstVer - 1;
     }
 
     // update lastVer

From c19670ef8d886aaae1152ad878d82ae7ce8ed11d Mon Sep 17 00:00:00 2001
From: wangmm0220 
Date: Thu, 11 May 2023 11:38:46 +0800
Subject: [PATCH 110/110] fix:ci error occasionally

---
 tests/system-test/7-tmq/subscribeDb3.py | 2 +-
 tests/system-test/7-tmq/subscribeStb.py | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/system-test/7-tmq/subscribeDb3.py b/tests/system-test/7-tmq/subscribeDb3.py
index 5b5326cfba..b66334a6a6 100644
--- a/tests/system-test/7-tmq/subscribeDb3.py
+++ b/tests/system-test/7-tmq/subscribeDb3.py
@@ -336,7 +336,7 @@ class TDTestCase:
         for i in range(expectRows):
             totalConsumeRows += resultList[i]
 
-        if totalConsumeRows >= expectrowcnt or totalConsumeRows <= 0:
+        if totalConsumeRows > expectrowcnt or totalConsumeRows <= 0:
             tdLog.info("act consume rows: %d, expect consume rows between %d and 0"%(totalConsumeRows, expectrowcnt))
             tdLog.exit("tmq consume rows error!")
 
diff --git a/tests/system-test/7-tmq/subscribeStb.py b/tests/system-test/7-tmq/subscribeStb.py
index 9dcbf5b351..53f1a34d58 100644
--- a/tests/system-test/7-tmq/subscribeStb.py
+++ b/tests/system-test/7-tmq/subscribeStb.py
@@ -226,12 +226,11 @@ class TDTestCase:
         self.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit)
 
         tdLog.info("start consume processor")
-        pollDelay = 5
+        pollDelay = 10
         showMsg   = 1
         showRow   = 1
         self.startTmqSimProcess(buildPath,cfgPath,pollDelay,parameterDict["dbName"],showMsg, showRow)
 
-        time.sleep(5)
         self.create_ctables(tdSql, parameterDict["dbName"], parameterDict["stbName"], parameterDict["ctbNum"])
         self.insert_data(tdSql,\
                          parameterDict["dbName"],\
@@ -307,7 +306,7 @@ class TDTestCase:
         self.insertConsumerInfo(consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifManualCommit)
 
         tdLog.info("start consume processor")
-        pollDelay = 5
+        pollDelay = 10
         showMsg   = 1
         showRow   = 1
         self.startTmqSimProcess(buildPath,cfgPath,pollDelay,parameterDict["dbName"],showMsg, showRow)