From 4bb25c280f0452c5b078580c1bba8b9b09a94b91 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 17 May 2023 15:54:36 +0800 Subject: [PATCH 01/18] fix:add log --- source/dnode/vnode/src/tq/tq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 9730ad2877..ff715f1828 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -359,7 +359,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { } while (tqIsHandleExec(pHandle)) { - tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, subscription is executing, wait for 5ms and retry", consumerId, vgId, req.subKey); + tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, subscription is executing, wait for 5ms and retry, pHandle:%p", consumerId, vgId, req.subKey, pHandle); taosMsleep(5); } @@ -372,6 +372,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { return -1; } tqSetHandleExec(pHandle); + tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, set handle exec, pHandle:%p", consumerId, vgId, req.subKey, pHandle); taosWUnLockLatch(&pTq->lock); // 3. update the epoch value @@ -389,6 +390,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { int code = tqExtractDataForMq(pTq, pHandle, &req, pMsg); tqSetHandleIdle(pHandle); + tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, , set handle idle, pHandle:%p", consumerId, vgId, req.subKey, pHandle); return code; } @@ -555,7 +557,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg goto end; } else { while (tqIsHandleExec(pHandle)) { - tqDebug("sub req vgId:%d, topic:%s, subscription is executing, wait for 5ms and retry", vgId, pHandle->subKey); + tqDebug("sub req vgId:%d, topic:%s, subscription is executing, wait for 5ms and retry, pHandle:%p", vgId, pHandle->subKey, pHandle); taosMsleep(5); } From 1d9dd153eacdb10e5e1d03d896ca104f1bcaf30b Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 17 May 2023 18:57:45 +0800 Subject: [PATCH 02/18] fix:error in pHandle lock --- source/dnode/vnode/src/inc/tq.h | 1 + source/dnode/vnode/src/tq/tq.c | 64 +++++++++++++++------------- source/dnode/vnode/src/tq/tqMeta.c | 2 - source/dnode/vnode/src/tq/tqOffset.c | 2 - source/dnode/vnode/src/tq/tqRead.c | 2 - 5 files changed, 35 insertions(+), 36 deletions(-) diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index ef36b8429a..f0008f04cb 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -93,6 +93,7 @@ typedef struct { typedef enum tq_handle_status{ TMQ_HANDLE_STATUS_IDLE = 0, TMQ_HANDLE_STATUS_EXEC = 1, + TMQ_HANDLE_STATUS_DELETE = 2, }tq_handle_status; typedef struct { diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index ff715f1828..b1fca3faa8 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -25,6 +25,7 @@ static int32_t tqInitialize(STQ* pTq); static FORCE_INLINE bool tqIsHandleExec(STqHandle* pHandle) { return TMQ_HANDLE_STATUS_EXEC == pHandle->status; } static FORCE_INLINE void tqSetHandleExec(STqHandle* pHandle) {pHandle->status = TMQ_HANDLE_STATUS_EXEC;} static FORCE_INLINE void tqSetHandleIdle(STqHandle* pHandle) {pHandle->status = TMQ_HANDLE_STATUS_IDLE;} +static FORCE_INLINE void tqSetHandleDelete(STqHandle* pHandle) {pHandle->status = TMQ_HANDLE_STATUS_DELETE;} int32_t tqInit() { int8_t old; @@ -297,13 +298,10 @@ int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t sversion, char* msg, int32_t } if (offset.val.type == TMQ_OFFSET__LOG) { - taosWLockLatch(&pTq->lock); STqHandle* pHandle = taosHashGet(pTq->pHandle, offset.subKey, strlen(offset.subKey)); if (pHandle && (walSetRefVer(pHandle->pRef, offset.val.version) < 0)) { - taosWUnLockLatch(&pTq->lock); return -1; } - taosWUnLockLatch(&pTq->lock); } return 0; @@ -337,6 +335,7 @@ int32_t tqCheckColModifiable(STQ* pTq, int64_t tbUid, int32_t colId) { int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { SMqPollReq req = {0}; + int code = 0; if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) { tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen); terrno = TSDB_CODE_INVALID_MSG; @@ -347,20 +346,29 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { int32_t reqEpoch = req.epoch; STqOffsetVal reqOffset = req.reqOffset; int32_t vgId = TD_VID(pTq->pVnode); + STqHandle* pHandle = NULL; - taosWLockLatch(&pTq->lock); - // 1. find handle - STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); - if (pHandle == NULL) { - tqError("tmq poll: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", consumerId, vgId, req.subKey); - terrno = TSDB_CODE_INVALID_MSG; + while (1) { + taosWLockLatch(&pTq->lock); + // 1. find handle + pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); + if (pHandle == NULL) { + tqError("tmq poll: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", consumerId, vgId, req.subKey); + terrno = TSDB_CODE_INVALID_MSG; + taosWUnLockLatch(&pTq->lock); + return -1; + } + + bool exec = tqIsHandleExec(pHandle); + if(!exec) { + tqSetHandleExec(pHandle); + taosWUnLockLatch(&pTq->lock); + break; + } taosWUnLockLatch(&pTq->lock); - return -1; - } - while (tqIsHandleExec(pHandle)) { tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, subscription is executing, wait for 5ms and retry, pHandle:%p", consumerId, vgId, req.subKey, pHandle); - taosMsleep(5); + taosMsleep(10); } // 2. check re-balance status @@ -368,12 +376,11 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { tqDebug("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId); terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; - taosWUnLockLatch(&pTq->lock); - return -1; + code = -1; + goto end; } - tqSetHandleExec(pHandle); + tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, set handle exec, pHandle:%p", consumerId, vgId, req.subKey, pHandle); - taosWUnLockLatch(&pTq->lock); // 3. update the epoch value int32_t savedEpoch = pHandle->epoch; @@ -388,7 +395,9 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s, reqId:0x%" PRIx64, consumerId, req.epoch, pHandle->subKey, vgId, buf, req.reqId); - int code = tqExtractDataForMq(pTq, pHandle, &req, pMsg); + code = tqExtractDataForMq(pTq, pHandle, &req, pMsg); + +end: tqSetHandleIdle(pHandle); tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, , set handle idle, pHandle:%p", consumerId, vgId, req.subKey, pHandle); return code; @@ -405,8 +414,8 @@ int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg STqHandle* pHandle = taosHashGet(pTq->pHandle, pReq->subKey, strlen(pReq->subKey)); if (pHandle) { while (tqIsHandleExec(pHandle)) { - tqDebug("vgId:%d, topic:%s, subscription is executing, wait for 5ms and retry", vgId, pHandle->subKey); - taosMsleep(5); + tqDebug("vgId:%d, topic:%s, subscription is executing, wait for 5ms and retry, pHandle:%p", vgId, pHandle->subKey, pHandle); + taosMsleep(10); } if (pHandle->pRef) { walCloseRef(pTq->pVnode->pWal, pHandle->pRef->refId); @@ -473,7 +482,6 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg tqDebug("vgId:%d, tq process sub req:%s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pVnode->config.vgId, req.subKey, req.oldConsumerId, req.newConsumerId); - taosWLockLatch(&pTq->lock); STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); if (pHandle == NULL) { if (req.oldConsumerId != -1) { @@ -556,20 +564,14 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg ret = tqMetaSaveHandle(pTq, req.subKey, pHandle); goto end; } else { - while (tqIsHandleExec(pHandle)) { - tqDebug("sub req vgId:%d, topic:%s, subscription is executing, wait for 5ms and retry, pHandle:%p", vgId, pHandle->subKey, pHandle); - taosMsleep(5); - } - if (pHandle->consumerId == req.newConsumerId) { // do nothing tqInfo("vgId:%d consumer:0x%" PRIx64 " remains, no switch occurs", req.vgId, req.newConsumerId); - atomic_add_fetch_32(&pHandle->epoch, 1); - +// atomic_add_fetch_32(&pHandle->epoch, 1); } else { tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId, req.newConsumerId); atomic_store_64(&pHandle->consumerId, req.newConsumerId); - atomic_store_32(&pHandle->epoch, 0); +// atomic_store_32(&pHandle->epoch, 0); } // kill executing task qTaskInfo_t pTaskInfo = pHandle->execHandle.task; @@ -580,13 +582,15 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg qStreamCloseTsdbReader(pTaskInfo); } // remove if it has been register in the push manager, and return one empty block to consumer + taosWLockLatch(&pTq->lock); tqUnregisterPushHandle(pTq, pHandle); + taosWUnLockLatch(&pTq->lock); + ret = tqMetaSaveHandle(pTq, req.subKey, pHandle); goto end; } end: - taosWUnLockLatch(&pTq->lock); taosMemoryFree(req.qmsg); return ret; } diff --git a/source/dnode/vnode/src/tq/tqMeta.c b/source/dnode/vnode/src/tq/tqMeta.c index 5654147b6d..b3d21887c9 100644 --- a/source/dnode/vnode/src/tq/tqMeta.c +++ b/source/dnode/vnode/src/tq/tqMeta.c @@ -352,9 +352,7 @@ int32_t tqMetaRestoreHandle(STQ* pTq) { handle.execHandle.task = qCreateQueueExecTaskInfo(NULL, &reader, vgId, NULL, 0); } tqDebug("tq restore %s consumer %" PRId64 " vgId:%d", handle.subKey, handle.consumerId, vgId); - taosWLockLatch(&pTq->lock); taosHashPut(pTq->pHandle, pKey, kLen, &handle, sizeof(STqHandle)); - taosWUnLockLatch(&pTq->lock); } end: diff --git a/source/dnode/vnode/src/tq/tqOffset.c b/source/dnode/vnode/src/tq/tqOffset.c index 377a5d1887..0a9905b544 100644 --- a/source/dnode/vnode/src/tq/tqOffset.c +++ b/source/dnode/vnode/src/tq/tqOffset.c @@ -78,7 +78,6 @@ int32_t tqOffsetRestoreFromFile(STqOffsetStore* pStore, const char* fname) { // todo remove this if (offset.val.type == TMQ_OFFSET__LOG) { - taosWLockLatch(&pStore->pTq->lock); STqHandle* pHandle = taosHashGet(pStore->pTq->pHandle, offset.subKey, strlen(offset.subKey)); if (pHandle) { if (walSetRefVer(pHandle->pRef, offset.val.version) < 0) { @@ -86,7 +85,6 @@ int32_t tqOffsetRestoreFromFile(STqOffsetStore* pStore, const char* fname) { // offset.val.version); } } - taosWUnLockLatch(&pStore->pTq->lock); } taosMemoryFree(pMemBuf); diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 09574a6b62..a26a749af3 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -1035,7 +1035,6 @@ int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) { int32_t vgId = TD_VID(pTq->pVnode); // update the table list for each consumer handle - taosWLockLatch(&pTq->lock); while (1) { pIter = taosHashIterate(pTq->pHandle, pIter); if (pIter == NULL) { @@ -1092,7 +1091,6 @@ int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) { } } } - taosWUnLockLatch(&pTq->lock); // update the table list handle for each stream scanner/wal reader taosWLockLatch(&pTq->pStreamMeta->lock); From 405cdfa7bc5637be46072149303bed2a03e5b20c Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 17 May 2023 19:02:16 +0800 Subject: [PATCH 03/18] fix:error in pHandle lock --- source/dnode/vnode/src/inc/tq.h | 1 - source/dnode/vnode/src/tq/tq.c | 1 - 2 files changed, 2 deletions(-) diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index f0008f04cb..ef36b8429a 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -93,7 +93,6 @@ typedef struct { typedef enum tq_handle_status{ TMQ_HANDLE_STATUS_IDLE = 0, TMQ_HANDLE_STATUS_EXEC = 1, - TMQ_HANDLE_STATUS_DELETE = 2, }tq_handle_status; typedef struct { diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index b1fca3faa8..d8ebf72549 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -25,7 +25,6 @@ static int32_t tqInitialize(STQ* pTq); static FORCE_INLINE bool tqIsHandleExec(STqHandle* pHandle) { return TMQ_HANDLE_STATUS_EXEC == pHandle->status; } static FORCE_INLINE void tqSetHandleExec(STqHandle* pHandle) {pHandle->status = TMQ_HANDLE_STATUS_EXEC;} static FORCE_INLINE void tqSetHandleIdle(STqHandle* pHandle) {pHandle->status = TMQ_HANDLE_STATUS_IDLE;} -static FORCE_INLINE void tqSetHandleDelete(STqHandle* pHandle) {pHandle->status = TMQ_HANDLE_STATUS_DELETE;} int32_t tqInit() { int8_t old; From 54f48601394cf1dff9f8933d57a3887aaa8ec0d4 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 17 May 2023 19:44:56 +0800 Subject: [PATCH 04/18] fix:rollback [TD-23972] push subscribe msg to vnode even though consumer not change --- source/dnode/mnode/impl/src/mndSubscribe.c | 21 +++------------ source/dnode/vnode/src/tq/tq.c | 30 ++++++++++------------ 2 files changed, 18 insertions(+), 33 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index e62102fa77..2ae4ede25a 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -133,10 +133,10 @@ static int32_t mndBuildSubChangeReq(void **pBuf, int32_t *pLen, const SMqSubscri static int32_t mndPersistSubChangeVgReq(SMnode *pMnode, STrans *pTrans, const SMqSubscribeObj *pSub, const SMqRebOutputVg *pRebVg) { -// if (pRebVg->oldConsumerId == pRebVg->newConsumerId) { -// terrno = TSDB_CODE_MND_INVALID_SUB_OPTION; -// return -1; -// } + if (pRebVg->oldConsumerId == pRebVg->newConsumerId) { + terrno = TSDB_CODE_MND_INVALID_SUB_OPTION; + return -1; + } void *buf; int32_t tlen; @@ -269,18 +269,6 @@ static void addUnassignedVgroups(SMqRebOutputObj *pOutput, SHashObj *pHash) { } } -static void putNoTransferToOutput(SMqRebOutputObj *pOutput, SMqConsumerEp *pConsumerEp){ - for(int i = 0; i < taosArrayGetSize(pConsumerEp->vgs); i++){ - SMqVgEp *pVgEp = (SMqVgEp *)taosArrayGetP(pConsumerEp->vgs, i); - SMqRebOutputVg outputVg = { - .oldConsumerId = pConsumerEp->consumerId, - .newConsumerId = pConsumerEp->consumerId, - .pVgEp = pVgEp, - }; - taosArrayPush(pOutput->rebVgs, &outputVg); - } -} - static void transferVgroupsForConsumers(SMqRebOutputObj *pOutput, SHashObj *pHash, int32_t minVgCnt, int32_t imbConsumerNum) { const char *pSubKey = pOutput->pSub->key; @@ -330,7 +318,6 @@ static void transferVgroupsForConsumers(SMqRebOutputObj *pOutput, SHashObj *pHas } } } - putNoTransferToOutput(pOutput, pConsumerEp); } } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index d8ebf72549..e074a2d8a6 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -564,29 +564,27 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg goto end; } else { if (pHandle->consumerId == req.newConsumerId) { // do nothing - tqInfo("vgId:%d consumer:0x%" PRIx64 " remains, no switch occurs", req.vgId, req.newConsumerId); + tqInfo("vgId:%d consumer:0x%" PRIx64 " remains, no switch occurs, should not reach here", req.vgId, req.newConsumerId); // atomic_add_fetch_32(&pHandle->epoch, 1); } else { tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId, req.newConsumerId); atomic_store_64(&pHandle->consumerId, req.newConsumerId); // atomic_store_32(&pHandle->epoch, 0); + // kill executing task + qTaskInfo_t pTaskInfo = pHandle->execHandle.task; + if (pTaskInfo != NULL) { + qKillTask(pTaskInfo, TSDB_CODE_SUCCESS); + } + if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { + qStreamCloseTsdbReader(pTaskInfo); + } + // remove if it has been register in the push manager, and return one empty block to consumer + taosWLockLatch(&pTq->lock); + tqUnregisterPushHandle(pTq, pHandle); + taosWUnLockLatch(&pTq->lock); + ret = tqMetaSaveHandle(pTq, req.subKey, pHandle); } - // kill executing task - qTaskInfo_t pTaskInfo = pHandle->execHandle.task; - if (pTaskInfo != NULL) { - qKillTask(pTaskInfo, TSDB_CODE_SUCCESS); - } - if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { - qStreamCloseTsdbReader(pTaskInfo); - } - // remove if it has been register in the push manager, and return one empty block to consumer - taosWLockLatch(&pTq->lock); - tqUnregisterPushHandle(pTq, pHandle); - taosWUnLockLatch(&pTq->lock); - - ret = tqMetaSaveHandle(pTq, req.subKey, pHandle); - goto end; } end: From c7e6883ac8e2f47a2514246abecd675781f88f91 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 18 May 2023 18:10:33 +0800 Subject: [PATCH 05/18] fix:tmq ref err & vg error --- source/client/src/clientTmq.c | 101 +++++++++++++++++++++++++++------- 1 file changed, 81 insertions(+), 20 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index b488af9ba1..be547c42b2 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -87,6 +87,7 @@ struct tmq_t { void* commitCbUserParam; // status + SRWLatch lock; int8_t status; int32_t epoch; #if 0 @@ -156,6 +157,7 @@ typedef struct { int8_t tmqRspType; int32_t epoch; // epoch can be used to guard the vgHandle int32_t vgId; + char topicName[TSDB_TOPIC_FNAME_LEN]; SMqClientVg* vgHandle; SMqClientTopic* topicHandle; uint64_t reqId; @@ -168,8 +170,8 @@ typedef struct { } SMqPollRspWrapper; typedef struct { - int64_t refId; - int32_t epoch; +// int64_t refId; +// int32_t epoch; tsem_t rspSem; int32_t rspErr; } SMqSubscribeCbParam; @@ -184,8 +186,9 @@ typedef struct { typedef struct { int64_t refId; int32_t epoch; - SMqClientVg* pVg; - SMqClientTopic* pTopic; + char topicName[TSDB_TOPIC_FNAME_LEN]; +// SMqClientVg* pVg; +// SMqClientTopic* pTopic; int32_t vgId; uint64_t requestId; // request id for debug purpose } SMqPollCbParam; @@ -709,8 +712,8 @@ static void generateTimedTask(int64_t refId, int32_t type) { *pTaskType = type; taosWriteQitem(tmq->delayedTask, pTaskType); tsem_post(&tmq->rspSem); + taosReleaseRef(tmqMgmt.rsetId, refId); } - taosReleaseRef(tmqMgmt.rsetId, refId); } void tmqAssignAskEpTask(void* param, void* tmrId) { @@ -1037,6 +1040,7 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) { pTmq->commitCb = conf->commitCb; pTmq->commitCbUserParam = conf->commitCbUserParam; pTmq->resetOffsetCfg = conf->resetOffset; + taosInitRWLatch(&pTmq->lock); pTmq->hbBgEnable = conf->hbBgEnable; @@ -1140,8 +1144,8 @@ int32_t tmq_subscribe(tmq_t* tmq, const tmq_list_t* topic_list) { SMqSubscribeCbParam param = { .rspErr = 0, - .refId = tmq->refId, - .epoch = tmq->epoch, +// .refId = tmq->refId, +// .epoch = tmq->epoch, }; if (tsem_init(¶m.rspSem, 0, 0) != 0) { @@ -1217,12 +1221,40 @@ void tmq_conf_set_auto_commit_cb(tmq_conf_t* conf, tmq_commit_cb* cb, void* para conf->commitCbUserParam = param; } +static SMqClientVg* getVgInfo(tmq_t* tmq, char* topicName, int32_t vgId){ + int32_t topicNumCur = taosArrayGetSize(tmq->clientTopics); + for(int i = 0; i < topicNumCur; i++){ + SMqClientTopic* pTopicCur = taosArrayGet(tmq->clientTopics, i); + if(strcmp(pTopicCur->topicName, topicName) == 0){ + int32_t vgNumCur = taosArrayGetSize(pTopicCur->vgs); + for (int32_t j = 0; j < vgNumCur; j++) { + SMqClientVg* pVgCur = taosArrayGet(pTopicCur->vgs, j); + if(pVgCur->vgId == vgId){ + return pVgCur; + } + } + } + } + return NULL; +} + +static SMqClientTopic* getTopicInfo(tmq_t* tmq, char* topicName){ + int32_t topicNumCur = taosArrayGetSize(tmq->clientTopics); + for(int i = 0; i < topicNumCur; i++){ + SMqClientTopic* pTopicCur = taosArrayGet(tmq->clientTopics, i); + if(strcmp(pTopicCur->topicName, topicName) == 0){ + return pTopicCur; + } + } + return NULL; +} + int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { SMqPollCbParam* pParam = (SMqPollCbParam*)param; int64_t refId = pParam->refId; - SMqClientVg* pVg = pParam->pVg; - SMqClientTopic* pTopic = pParam->pTopic; +// SMqClientVg* pVg = pParam->pVg; +// SMqClientTopic* pTopic = pParam->pTopic; tmq_t* tmq = taosAcquireRef(tmqMgmt.rsetId, refId); if (tmq == NULL) { @@ -1303,11 +1335,12 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { } pRspWrapper->tmqRspType = rspType; - pRspWrapper->vgHandle = pVg; - pRspWrapper->topicHandle = pTopic; +// pRspWrapper->vgHandle = pVg; +// pRspWrapper->topicHandle = pTopic; pRspWrapper->reqId = requestId; pRspWrapper->pEpset = pMsg->pEpSet; - pRspWrapper->vgId = pVg->vgId; + pRspWrapper->vgId = vgId; + strcpy(pRspWrapper->topicName, pParam->topicName); pMsg->pEpSet = NULL; if (rspType == TMQ_MSG_TYPE__POLL_RSP) { @@ -1351,7 +1384,12 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { CREATE_MSG_FAIL: if (epoch == tmq->epoch) { - atomic_store_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE); + taosWLockLatch(&tmq->lock); + SMqClientVg* pVg = getVgInfo(tmq, pParam->topicName, vgId); + if(pVg){ + atomic_store_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE); + } + taosWLockLatch(&tmq->lock); } tsem_post(&tmq->rspSem); @@ -1472,11 +1510,13 @@ static bool doUpdateLocalEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) taosHashCleanup(pVgOffsetHashMap); + taosWLockLatch(&tmq->lock); // destroy current buffered existed topics info if (tmq->clientTopics) { taosArrayDestroyEx(tmq->clientTopics, freeClientVgInfo); } tmq->clientTopics = newTopics; + taosWUnLockLatch(&tmq->lock); int8_t flag = (topicNumGet == 0) ? TMQ_CONSUMER_STATUS__NO_TOPIC : TMQ_CONSUMER_STATUS__READY; atomic_store_8(&tmq->status, flag); @@ -1492,7 +1532,7 @@ int32_t askEpCallbackFn(void* param, SDataBuf* pMsg, int32_t code) { if (tmq == NULL) { terrno = TSDB_CODE_TMQ_CONSUMER_CLOSED; - pParam->pUserFn(tmq, terrno, NULL, pParam->pParam); +// pParam->pUserFn(tmq, terrno, NULL, pParam->pParam); taosMemoryFree(pMsg->pData); taosMemoryFree(pMsg->pEpSet); @@ -1652,8 +1692,9 @@ static int32_t doTmqPollImpl(tmq_t* pTmq, SMqClientTopic* pTopic, SMqClientVg* p pParam->refId = pTmq->refId; pParam->epoch = pTmq->epoch; - pParam->pVg = pVg; // pVg may be released,fix it - pParam->pTopic = pTopic; +// pParam->pVg = pVg; // pVg may be released,fix it +// pParam->pTopic = pTopic; + strcpy(pParam->topicName, pTopic->topicName); pParam->vgId = pVg->vgId; pParam->requestId = req.reqId; @@ -1779,8 +1820,14 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { SMqDataRsp* pDataRsp = &pollRspWrapper->dataRsp; if (pDataRsp->head.epoch == consumerEpoch) { - SMqClientVg* pVg = pollRspWrapper->vgHandle; - + SMqClientVg* pVg = getVgInfo(tmq, pollRspWrapper->topicName, pollRspWrapper->vgId); + pollRspWrapper->vgHandle = pVg; + pollRspWrapper->topicHandle = getTopicInfo(tmq, pollRspWrapper->topicName); + if(pollRspWrapper->vgHandle == NULL || pollRspWrapper->topicHandle == NULL){ + tscError("consumer:0x%" PRIx64 " get vg or topic error, topic:%s vgId:%d", tmq->consumerId, + pollRspWrapper->topicName, pollRspWrapper->vgId); + return NULL; + } // update the epset if (pollRspWrapper->pEpset != NULL) { SEp* pEp = GET_ACTIVE_EP(pollRspWrapper->pEpset); @@ -1829,7 +1876,14 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { tscDebug("consumer:0x%" PRIx64 " process meta rsp", tmq->consumerId); if (pollRspWrapper->metaRsp.head.epoch == consumerEpoch) { - SMqClientVg* pVg = pollRspWrapper->vgHandle; + SMqClientVg* pVg = getVgInfo(tmq, pollRspWrapper->topicName, pollRspWrapper->vgId); + pollRspWrapper->vgHandle = pVg; + pollRspWrapper->topicHandle = getTopicInfo(tmq, pollRspWrapper->topicName); + if(pollRspWrapper->vgHandle == NULL || pollRspWrapper->topicHandle == NULL){ + tscError("consumer:0x%" PRIx64 " get vg or topic error, topic:%s vgId:%d", tmq->consumerId, + pollRspWrapper->topicName, pollRspWrapper->vgId); + return NULL; + } if(pollRspWrapper->metaRsp.rspOffset.type != 0){ // if offset is validate pVg->currentOffset = pollRspWrapper->metaRsp.rspOffset; } @@ -1849,7 +1903,14 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { int32_t consumerEpoch = atomic_load_32(&tmq->epoch); if (pollRspWrapper->taosxRsp.head.epoch == consumerEpoch) { - SMqClientVg* pVg = pollRspWrapper->vgHandle; + SMqClientVg* pVg = getVgInfo(tmq, pollRspWrapper->topicName, pollRspWrapper->vgId); + pollRspWrapper->vgHandle = pVg; + pollRspWrapper->topicHandle = getTopicInfo(tmq, pollRspWrapper->topicName); + if(pollRspWrapper->vgHandle == NULL || pollRspWrapper->topicHandle == NULL){ + tscError("consumer:0x%" PRIx64 " get vg or topic error, topic:%s vgId:%d", tmq->consumerId, + pollRspWrapper->topicName, pollRspWrapper->vgId); + return NULL; + } if(pollRspWrapper->taosxRsp.rspOffset.type != 0){ // if offset is validate pVg->currentOffset = pollRspWrapper->taosxRsp.rspOffset; } From 7db9f1e58ed471bcd22e5d692bd11c47835140d6 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 18 May 2023 18:21:45 +0800 Subject: [PATCH 06/18] fix:tmq ref err & vg error --- source/client/src/clientTmq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index be547c42b2..433b27d5d9 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1269,8 +1269,6 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { int32_t vgId = pParam->vgId; uint64_t requestId = pParam->requestId; - taosMemoryFree(pParam); - if (code != 0) { if (pMsg->pData) taosMemoryFree(pMsg->pData); if (pMsg->pEpSet) taosMemoryFree(pMsg->pEpSet); @@ -1314,6 +1312,8 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { taosMemoryFree(pMsg->pData); taosMemoryFree(pMsg->pEpSet); + taosMemoryFree(pParam); + return 0; } @@ -1379,6 +1379,7 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { tsem_post(&tmq->rspSem); taosReleaseRef(tmqMgmt.rsetId, refId); + taosMemoryFree(pParam); return 0; @@ -1394,6 +1395,7 @@ CREATE_MSG_FAIL: tsem_post(&tmq->rspSem); taosReleaseRef(tmqMgmt.rsetId, refId); + taosMemoryFree(pParam); return -1; } From 50bdfef7b6e32134a928fc7e6cd0232b2163054e Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 18 May 2023 18:34:42 +0800 Subject: [PATCH 07/18] fix:tmq ref err & vg error --- source/client/src/clientTmq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 433b27d5d9..01667f6dc3 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1390,7 +1390,7 @@ CREATE_MSG_FAIL: if(pVg){ atomic_store_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE); } - taosWLockLatch(&tmq->lock); + taosWUnLockLatch(&tmq->lock); } tsem_post(&tmq->rspSem); From bca1af3f5d7cd00d98096b1b0f5e052b8e8c0790 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 18 May 2023 21:18:44 +0800 Subject: [PATCH 08/18] fix:change tmq subscribe try time to 60mins --- source/client/src/clientTmq.c | 4 ++-- source/dnode/vnode/src/tq/tqPush.c | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 01667f6dc3..c08fbd0adf 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1089,7 +1089,7 @@ _failed: } int32_t tmq_subscribe(tmq_t* tmq, const tmq_list_t* topic_list) { - const int32_t MAX_RETRY_COUNT = 120 * 2; // let's wait for 2 mins at most + const int32_t MAX_RETRY_COUNT = 120 * 60; // let's wait for 2 mins at most const SArray* container = &topic_list->container; int32_t sz = taosArrayGetSize(container); void* buf = NULL; @@ -1186,7 +1186,7 @@ int32_t tmq_subscribe(tmq_t* tmq, const tmq_list_t* topic_list) { while (TSDB_CODE_MND_CONSUMER_NOT_READY == doAskEp(tmq)) { if (retryCnt++ > MAX_RETRY_COUNT) { tscError("consumer:0x%" PRIx64 ", mnd not ready for subscribe, retry:%d in 500ms", tmq->consumerId, retryCnt); - code = TSDB_CODE_TSC_INTERNAL_ERROR; + code = TSDB_CODE_MND_CONSUMER_NOT_READY; goto FAIL; } diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 50d94c5ed5..42c60e0007 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -73,8 +73,11 @@ int32_t tqUnregisterPushHandle(STQ* pTq, void *handle) { STqHandle *pHandle = (STqHandle*)handle; int32_t vgId = TD_VID(pTq->pVnode); + if(taosHashGetSize(pTq->pPushMgr) <= 0) { + return 0; + } int32_t ret = taosHashRemove(pTq->pPushMgr, pHandle->subKey, strlen(pHandle->subKey)); - tqError("vgId:%d remove pHandle:%p,ret:%d consumer Id:0x%" PRIx64, vgId, pHandle, ret, pHandle->consumerId); + tqDebug("vgId:%d remove pHandle:%p,ret:%d consumer Id:0x%" PRIx64, vgId, pHandle, ret, pHandle->consumerId); if(pHandle->msg != NULL) { tqPushDataRsp(pTq, pHandle); From 238254e49ee83723dd274bae5a66d7fe9994d5df Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 19 May 2023 14:06:43 +0800 Subject: [PATCH 09/18] fix:add task status check in doQueueScan & checkout consumer in tqProcessPollReq --- source/dnode/vnode/src/tq/tq.c | 22 +++++++++--------- source/libs/executor/src/scanoperator.c | 30 +------------------------ 2 files changed, 11 insertions(+), 41 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index e074a2d8a6..41b43026ea 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -358,9 +358,19 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { return -1; } + // 2. check re-balance status + if (pHandle->consumerId != consumerId) { + tqDebug("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, + consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId); + terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; + taosWUnLockLatch(&pTq->lock); + return -1; + } + bool exec = tqIsHandleExec(pHandle); if(!exec) { tqSetHandleExec(pHandle); + tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, set handle exec, pHandle:%p", consumerId, vgId, req.subKey, pHandle); taosWUnLockLatch(&pTq->lock); break; } @@ -370,17 +380,6 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { taosMsleep(10); } - // 2. check re-balance status - if (pHandle->consumerId != consumerId) { - tqDebug("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, - consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId); - terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; - code = -1; - goto end; - } - - tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, set handle exec, pHandle:%p", consumerId, vgId, req.subKey, pHandle); - // 3. update the epoch value int32_t savedEpoch = pHandle->epoch; if (savedEpoch < reqEpoch) { @@ -396,7 +395,6 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { code = tqExtractDataForMq(pTq, pHandle, &req, pMsg); -end: tqSetHandleIdle(pHandle); tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, , set handle idle, pHandle:%p", consumerId, vgId, req.subKey, pHandle); return code; diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 7d3165237e..638662aeb5 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1665,37 +1665,9 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { qDebug("start to exec queue scan, %s", id); -#if 0 - if (pTaskInfo->streamInfo.submit.msgStr != NULL) { - if (pInfo->tqReader->msg.msgStr == NULL) { - SPackedData submit = pTaskInfo->streamInfo.submit; - if (tqReaderSetSubmitMsg(pInfo->tqReader, submit.msgStr, submit.msgLen, submit.ver) < 0) { - qError("submit msg messed up when initing stream submit block %p", submit.msgStr); - return NULL; - } - } - - blockDataCleanup(pInfo->pRes); - SDataBlockInfo* pBlockInfo = &pInfo->pRes->info; - - while (tqNextBlockImpl(pInfo->tqReader)) { - int32_t code = tqRetrieveDataBlock(pInfo->tqReader, NULL); - if (code != TSDB_CODE_SUCCESS || pInfo->tqReader->pResBlock->info.rows == 0) { - continue; - } - - setBlockIntoRes(pInfo, pInfo->tqReader->pResBlock, true); - - if (pBlockInfo->rows > 0) { - return pInfo->pRes; - } - } - - pInfo->tqReader->msg = (SPackedData){0}; - pTaskInfo->streamInfo.submit = (SPackedData){0}; + if (isTaskKilled(pTaskInfo)) { return NULL; } -#endif if (pTaskInfo->streamInfo.currentOffset.type == TMQ_OFFSET__SNAPSHOT_DATA) { SSDataBlock* pResult = doTableScan(pInfo->pTableScanOp); From f7ab8dabf086ef3740040635059aa36627bc1525 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 19 May 2023 15:14:39 +0800 Subject: [PATCH 10/18] fix:[TD-23972] push subscribe msg to vnode even though consumer not change --- source/dnode/mnode/impl/src/mndSubscribe.c | 21 ++++++++++++---- source/dnode/vnode/src/tq/tq.c | 28 ++++++++++++---------- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 2ae4ede25a..e62102fa77 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -133,10 +133,10 @@ static int32_t mndBuildSubChangeReq(void **pBuf, int32_t *pLen, const SMqSubscri static int32_t mndPersistSubChangeVgReq(SMnode *pMnode, STrans *pTrans, const SMqSubscribeObj *pSub, const SMqRebOutputVg *pRebVg) { - if (pRebVg->oldConsumerId == pRebVg->newConsumerId) { - terrno = TSDB_CODE_MND_INVALID_SUB_OPTION; - return -1; - } +// if (pRebVg->oldConsumerId == pRebVg->newConsumerId) { +// terrno = TSDB_CODE_MND_INVALID_SUB_OPTION; +// return -1; +// } void *buf; int32_t tlen; @@ -269,6 +269,18 @@ static void addUnassignedVgroups(SMqRebOutputObj *pOutput, SHashObj *pHash) { } } +static void putNoTransferToOutput(SMqRebOutputObj *pOutput, SMqConsumerEp *pConsumerEp){ + for(int i = 0; i < taosArrayGetSize(pConsumerEp->vgs); i++){ + SMqVgEp *pVgEp = (SMqVgEp *)taosArrayGetP(pConsumerEp->vgs, i); + SMqRebOutputVg outputVg = { + .oldConsumerId = pConsumerEp->consumerId, + .newConsumerId = pConsumerEp->consumerId, + .pVgEp = pVgEp, + }; + taosArrayPush(pOutput->rebVgs, &outputVg); + } +} + static void transferVgroupsForConsumers(SMqRebOutputObj *pOutput, SHashObj *pHash, int32_t minVgCnt, int32_t imbConsumerNum) { const char *pSubKey = pOutput->pSub->key; @@ -318,6 +330,7 @@ static void transferVgroupsForConsumers(SMqRebOutputObj *pOutput, SHashObj *pHas } } } + putNoTransferToOutput(pOutput, pConsumerEp); } } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 41b43026ea..6f619e9bbe 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -569,20 +569,22 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg req.newConsumerId); atomic_store_64(&pHandle->consumerId, req.newConsumerId); // atomic_store_32(&pHandle->epoch, 0); - // kill executing task - qTaskInfo_t pTaskInfo = pHandle->execHandle.task; - if (pTaskInfo != NULL) { - qKillTask(pTaskInfo, TSDB_CODE_SUCCESS); - } - if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { - qStreamCloseTsdbReader(pTaskInfo); - } - // remove if it has been register in the push manager, and return one empty block to consumer - taosWLockLatch(&pTq->lock); - tqUnregisterPushHandle(pTq, pHandle); - taosWUnLockLatch(&pTq->lock); - ret = tqMetaSaveHandle(pTq, req.subKey, pHandle); } + + // kill executing task + qTaskInfo_t pTaskInfo = pHandle->execHandle.task; + if (pTaskInfo != NULL) { + qKillTask(pTaskInfo, TSDB_CODE_SUCCESS); + } + + taosWLockLatch(&pTq->lock); + if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { + qStreamCloseTsdbReader(pTaskInfo); + } + // remove if it has been register in the push manager, and return one empty block to consumer + tqUnregisterPushHandle(pTq, pHandle); + taosWUnLockLatch(&pTq->lock); + ret = tqMetaSaveHandle(pTq, req.subKey, pHandle); } end: From ff2b545b27afa6f28f704d896910c04e727d19c8 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 19 May 2023 17:16:47 +0800 Subject: [PATCH 11/18] fix:set task status killed when vnode receive subscribe msg from mnode --- include/libs/executor/executor.h | 2 ++ source/dnode/vnode/src/tq/tq.c | 4 ++-- source/libs/executor/src/executor.c | 5 +++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index b7e6c42e3b..2598b5c28c 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -90,6 +90,8 @@ qTaskInfo_t qCreateQueueExecTaskInfo(void* msg, SReadHandle* pReaderHandle, int3 */ void qSetTaskId(qTaskInfo_t tinfo, uint64_t taskId, uint64_t queryId); +void qSetTaskCode(qTaskInfo_t tinfo, int32_t code); + int32_t qSetStreamOpOpen(qTaskInfo_t tinfo); // todo refactor diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 6f619e9bbe..9e43c4a944 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -394,7 +394,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { consumerId, req.epoch, pHandle->subKey, vgId, buf, req.reqId); code = tqExtractDataForMq(pTq, pHandle, &req, pMsg); - + qSetTaskCode(pHandle->execHandle.task, TDB_CODE_SUCCESS); tqSetHandleIdle(pHandle); tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, , set handle idle, pHandle:%p", consumerId, vgId, req.subKey, pHandle); return code; @@ -574,7 +574,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg // kill executing task qTaskInfo_t pTaskInfo = pHandle->execHandle.task; if (pTaskInfo != NULL) { - qKillTask(pTaskInfo, TSDB_CODE_SUCCESS); + qKillTask(pTaskInfo, TSDB_CODE_TSC_QUERY_KILLED); } taosWLockLatch(&pTq->lock); diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index c4a56d78ae..eea542e042 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -180,6 +180,11 @@ void qSetTaskId(qTaskInfo_t tinfo, uint64_t taskId, uint64_t queryId) { doSetTaskId(pTaskInfo->pRoot); } +void qSetTaskCode(qTaskInfo_t tinfo, int32_t code) { + SExecTaskInfo* pTaskInfo = tinfo; + pTaskInfo->code = code; +} + int32_t qSetStreamOpOpen(qTaskInfo_t tinfo) { if (tinfo == NULL) { return TSDB_CODE_APP_ERROR; From c7c255e967aa250741bcb839f329634a99834b29 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 19 May 2023 18:06:05 +0800 Subject: [PATCH 12/18] fix:set task status killed when vnode receive subscribe msg from mnode --- source/dnode/vnode/src/tq/tq.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 9e43c4a944..b1e412d5ec 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -370,6 +370,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { bool exec = tqIsHandleExec(pHandle); if(!exec) { tqSetHandleExec(pHandle); + qSetTaskCode(pHandle->execHandle.task, TDB_CODE_SUCCESS); tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, set handle exec, pHandle:%p", consumerId, vgId, req.subKey, pHandle); taosWUnLockLatch(&pTq->lock); break; @@ -394,7 +395,6 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { consumerId, req.epoch, pHandle->subKey, vgId, buf, req.reqId); code = tqExtractDataForMq(pTq, pHandle, &req, pMsg); - qSetTaskCode(pHandle->execHandle.task, TDB_CODE_SUCCESS); tqSetHandleIdle(pHandle); tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, , set handle idle, pHandle:%p", consumerId, vgId, req.subKey, pHandle); return code; @@ -571,15 +571,17 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg // atomic_store_32(&pHandle->epoch, 0); } - // kill executing task - qTaskInfo_t pTaskInfo = pHandle->execHandle.task; - if (pTaskInfo != NULL) { - qKillTask(pTaskInfo, TSDB_CODE_TSC_QUERY_KILLED); - } - taosWLockLatch(&pTq->lock); - if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { - qStreamCloseTsdbReader(pTaskInfo); + // kill executing task + if(tqIsHandleExec(pHandle)) { + qTaskInfo_t pTaskInfo = pHandle->execHandle.task; + if (pTaskInfo != NULL) { + qKillTask(pTaskInfo, TSDB_CODE_TSC_QUERY_KILLED); + } + + if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { + qStreamCloseTsdbReader(pTaskInfo); + } } // remove if it has been register in the push manager, and return one empty block to consumer tqUnregisterPushHandle(pTq, pHandle); From 33d6df8717875f1f2001d8ea8dde8339beee279d Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 19 May 2023 19:11:49 +0800 Subject: [PATCH 13/18] fix:set task status killed when vnode receive subscribe msg from mnode --- source/dnode/vnode/src/tq/tq.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index b1e412d5ec..d9961202b1 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -367,6 +367,20 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { return -1; } + // 3. update the epoch value + int32_t savedEpoch = pHandle->epoch; + if (savedEpoch <= reqEpoch) { + tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch, + reqEpoch); + pHandle->epoch = reqEpoch; + }else { + tqDebug("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, savedEpoch:%d > reqEpoch:%d ", + consumerId, TD_VID(pTq->pVnode), req.subKey, savedEpoch, reqEpoch); + terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; + taosWUnLockLatch(&pTq->lock); + return -1; + } + bool exec = tqIsHandleExec(pHandle); if(!exec) { tqSetHandleExec(pHandle); @@ -381,14 +395,6 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { taosMsleep(10); } - // 3. update the epoch value - int32_t savedEpoch = pHandle->epoch; - if (savedEpoch < reqEpoch) { - tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch, - reqEpoch); - pHandle->epoch = reqEpoch; - } - char buf[80]; tFormatOffset(buf, 80, &reqOffset); tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s, reqId:0x%" PRIx64, @@ -396,6 +402,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { code = tqExtractDataForMq(pTq, pHandle, &req, pMsg); tqSetHandleIdle(pHandle); + tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, , set handle idle, pHandle:%p", consumerId, vgId, req.subKey, pHandle); return code; } @@ -561,17 +568,17 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg ret = tqMetaSaveHandle(pTq, req.subKey, pHandle); goto end; } else { + taosWLockLatch(&pTq->lock); + if (pHandle->consumerId == req.newConsumerId) { // do nothing tqInfo("vgId:%d consumer:0x%" PRIx64 " remains, no switch occurs, should not reach here", req.vgId, req.newConsumerId); -// atomic_add_fetch_32(&pHandle->epoch, 1); } else { tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId, req.newConsumerId); atomic_store_64(&pHandle->consumerId, req.newConsumerId); -// atomic_store_32(&pHandle->epoch, 0); } + atomic_add_fetch_32(&pHandle->epoch, 1); - taosWLockLatch(&pTq->lock); // kill executing task if(tqIsHandleExec(pHandle)) { qTaskInfo_t pTaskInfo = pHandle->execHandle.task; From 1ddbdad33cf487e94d2edf6a3138324f123723e5 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 19 May 2023 21:40:33 +0800 Subject: [PATCH 14/18] fix:set task status killed when vnode receive subscribe msg from mnode --- source/dnode/vnode/src/tq/tq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index d9961202b1..da366c1610 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -362,7 +362,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { if (pHandle->consumerId != consumerId) { tqDebug("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId); - terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; + terrno = TSDB_CODE_TMQ_INVALID_MSG; taosWUnLockLatch(&pTq->lock); return -1; } From f8de38e5327ca92025c6f7a00fa6e02c08b6e710 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 19 May 2023 22:52:58 +0800 Subject: [PATCH 15/18] fix:core dump in tsdbreader is null when tsdbreader is closed by subscribe msg and current offset equal to saved --- source/libs/executor/src/executor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index eea542e042..7d251fb074 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -1080,7 +1080,7 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT const char* id = GET_TASKID(pTaskInfo); // if pOffset equal to current offset, means continue consume - if (tOffsetEqual(pOffset, &pTaskInfo->streamInfo.currentOffset)) { + if (tOffsetEqual(pOffset, &pTaskInfo->streamInfo.currentOffset) && pOffset->type != TMQ_OFFSET__SNAPSHOT_DATA) { return 0; } From 04858fae22b4b2f0fa6207b56e84dab0fc679ea6 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Sat, 20 May 2023 12:20:54 +0800 Subject: [PATCH 16/18] fix:tsdbreader is free by mistake --- include/libs/executor/executor.h | 2 +- source/dnode/mgmt/mgmt_vnode/src/vmWorker.c | 2 +- source/dnode/vnode/src/tq/tq.c | 8 ++++---- source/dnode/vnode/src/tq/tqScan.c | 12 ++++++++---- source/libs/executor/src/executor.c | 10 +++++----- 5 files changed, 19 insertions(+), 15 deletions(-) diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index 2598b5c28c..91db78e9cd 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -90,7 +90,7 @@ qTaskInfo_t qCreateQueueExecTaskInfo(void* msg, SReadHandle* pReaderHandle, int3 */ void qSetTaskId(qTaskInfo_t tinfo, uint64_t taskId, uint64_t queryId); -void qSetTaskCode(qTaskInfo_t tinfo, int32_t code); +//void qSetTaskCode(qTaskInfo_t tinfo, int32_t code); int32_t qSetStreamOpOpen(qTaskInfo_t tinfo); diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c index a318b9886e..922a85c094 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c @@ -75,7 +75,7 @@ static void vmProcessQueryQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { int32_t code = vnodeProcessQueryMsg(pVnode->pImpl, pMsg); if (code != 0) { if (terrno != 0) code = terrno; - dGError("vgId:%d, msg:%p failed to query since %s", pVnode->vgId, pMsg, terrstr(code)); + dGError("vgId:%d, msg:%p failed to query since %s", pVnode->vgId, pMsg, tstrerror(code)); vmSendRsp(pMsg, code); } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index da366c1610..ce52277d71 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -384,7 +384,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { bool exec = tqIsHandleExec(pHandle); if(!exec) { tqSetHandleExec(pHandle); - qSetTaskCode(pHandle->execHandle.task, TDB_CODE_SUCCESS); +// qSetTaskCode(pHandle->execHandle.task, TDB_CODE_SUCCESS); tqDebug("tmq poll: consumer:0x%" PRIx64 "vgId:%d, topic:%s, set handle exec, pHandle:%p", consumerId, vgId, req.subKey, pHandle); taosWUnLockLatch(&pTq->lock); break; @@ -586,9 +586,9 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg qKillTask(pTaskInfo, TSDB_CODE_TSC_QUERY_KILLED); } - if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { - qStreamCloseTsdbReader(pTaskInfo); - } +// if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { +// qStreamCloseTsdbReader(pTaskInfo); +// } } // remove if it has been register in the push manager, and return one empty block to consumer tqUnregisterPushHandle(pTq, pHandle); diff --git a/source/dnode/vnode/src/tq/tqScan.c b/source/dnode/vnode/src/tq/tqScan.c index e4b2fa8821..2f470e2221 100644 --- a/source/dnode/vnode/src/tq/tqScan.c +++ b/source/dnode/vnode/src/tq/tqScan.c @@ -84,8 +84,10 @@ int32_t tqScanData(STQ* pTq, const STqHandle* pHandle, SMqDataRsp* pRsp, STqOffs qStreamSetOpen(task); tqDebug("consumer:0x%" PRIx64 " vgId:%d, tmq one task start execute", pHandle->consumerId, vgId); - if (qExecTask(task, &pDataBlock, &ts) != TSDB_CODE_SUCCESS) { - tqError("consumer:0x%" PRIx64 " vgId:%d, task exec error since %s", pHandle->consumerId, vgId, terrstr()); + code = qExecTask(task, &pDataBlock, &ts); + if (code != TSDB_CODE_SUCCESS) { + tqError("consumer:0x%" PRIx64 " vgId:%d, task exec error since %s", pHandle->consumerId, vgId, tstrerror(code)); + terrno = code; return -1; } @@ -128,8 +130,10 @@ int32_t tqScanTaosx(STQ* pTq, const STqHandle* pHandle, STaosxRsp* pRsp, SMqMeta SSDataBlock* pDataBlock = NULL; uint64_t ts = 0; tqDebug("tmqsnap task start to execute"); - if (qExecTask(task, &pDataBlock, &ts) < 0) { - tqError("vgId:%d, task exec error since %s", pTq->pVnode->config.vgId, terrstr()); + int code = qExecTask(task, &pDataBlock, &ts); + if (code != 0) { + tqError("vgId:%d, task exec error since %s", pTq->pVnode->config.vgId, tstrerror(code)); + terrno = code; return -1; } diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 7d251fb074..a73deffa52 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -180,10 +180,10 @@ void qSetTaskId(qTaskInfo_t tinfo, uint64_t taskId, uint64_t queryId) { doSetTaskId(pTaskInfo->pRoot); } -void qSetTaskCode(qTaskInfo_t tinfo, int32_t code) { - SExecTaskInfo* pTaskInfo = tinfo; - pTaskInfo->code = code; -} +//void qSetTaskCode(qTaskInfo_t tinfo, int32_t code) { +// SExecTaskInfo* pTaskInfo = tinfo; +// pTaskInfo->code = code; +//} int32_t qSetStreamOpOpen(qTaskInfo_t tinfo) { if (tinfo == NULL) { @@ -1080,7 +1080,7 @@ int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subT const char* id = GET_TASKID(pTaskInfo); // if pOffset equal to current offset, means continue consume - if (tOffsetEqual(pOffset, &pTaskInfo->streamInfo.currentOffset) && pOffset->type != TMQ_OFFSET__SNAPSHOT_DATA) { + if (tOffsetEqual(pOffset, &pTaskInfo->streamInfo.currentOffset)) { return 0; } From 8183be4afff416a66d9ec07879ac5e620d1e43d9 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Sat, 20 May 2023 16:13:52 +0800 Subject: [PATCH 17/18] fix:move sleep logic from rpc thread to tmq thread & fix some error --- source/client/src/clientTmq.c | 9 ++++++--- source/dnode/vnode/src/tq/tq.c | 22 +++++++++++----------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index c08fbd0adf..b57ecfa845 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1275,7 +1275,7 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { // in case of consumer mismatch, wait for 500ms and retry if (code == TSDB_CODE_TMQ_CONSUMER_MISMATCH) { - taosMsleep(500); +// taosMsleep(500); atomic_store_8(&tmq->status, TMQ_CONSUMER_STATUS__RECOVER); tscDebug("consumer:0x%" PRIx64 " wait for the re-balance, wait for 500ms and set status to be RECOVER", tmq->consumerId); @@ -1289,8 +1289,8 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { pRspWrapper->tmqRspType = TMQ_MSG_TYPE__END_RSP; taosWriteQitem(tmq->mqueue, pRspWrapper); - } else if (code == TSDB_CODE_WAL_LOG_NOT_EXIST) { // poll data while insert - taosMsleep(500); +// } else if (code == TSDB_CODE_WAL_LOG_NOT_EXIST) { // poll data while insert +// taosMsleep(5); } else{ tscError("consumer:0x%" PRIx64 " msg from vgId:%d discarded, epoch %d, since %s, reqId:0x%" PRIx64, tmq->consumerId, vgId, epoch, tstrerror(code), requestId); @@ -1731,6 +1731,9 @@ static int32_t doTmqPollImpl(tmq_t* pTmq, SMqClientTopic* pTopic, SMqClientVg* p // broadcast the poll request to all related vnodes static int32_t tmqPollImpl(tmq_t* tmq, int64_t timeout) { + if(atomic_load_8(&tmq->status) == TMQ_CONSUMER_STATUS__RECOVER){ + return 0; + } int32_t numOfTopics = taosArrayGetSize(tmq->clientTopics); tscDebug("consumer:0x%" PRIx64 " start to poll data, numOfTopics:%d", tmq->consumerId, numOfTopics); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index ce52277d71..91524f7a95 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -360,22 +360,17 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { // 2. check re-balance status if (pHandle->consumerId != consumerId) { - tqDebug("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, + tqError("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId); - terrno = TSDB_CODE_TMQ_INVALID_MSG; + terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; taosWUnLockLatch(&pTq->lock); return -1; } // 3. update the epoch value - int32_t savedEpoch = pHandle->epoch; - if (savedEpoch <= reqEpoch) { - tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch, - reqEpoch); - pHandle->epoch = reqEpoch; - }else { - tqDebug("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, savedEpoch:%d > reqEpoch:%d ", - consumerId, TD_VID(pTq->pVnode), req.subKey, savedEpoch, reqEpoch); + if (pHandle->epoch > reqEpoch) { + tqError("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, savedEpoch:%d > reqEpoch:%d ", + consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->epoch, reqEpoch); terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; taosWUnLockLatch(&pTq->lock); return -1; @@ -395,6 +390,11 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { taosMsleep(10); } + if (pHandle->epoch < reqEpoch) { + tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, pHandle->epoch, reqEpoch); + pHandle->epoch = reqEpoch; + } + char buf[80]; tFormatOffset(buf, 80, &reqOffset); tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s, reqId:0x%" PRIx64, @@ -577,7 +577,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg req.newConsumerId); atomic_store_64(&pHandle->consumerId, req.newConsumerId); } - atomic_add_fetch_32(&pHandle->epoch, 1); +// atomic_add_fetch_32(&pHandle->epoch, 1); // kill executing task if(tqIsHandleExec(pHandle)) { From 7b735c67d0a84f001c41c8f1145655cd3ddb40b4 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Sat, 20 May 2023 16:36:25 +0800 Subject: [PATCH 18/18] fix:move sleep logic from rpc thread to tmq thread & fix some error --- source/dnode/vnode/src/tq/tq.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 91524f7a95..db03e97556 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -367,15 +367,6 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { return -1; } - // 3. update the epoch value - if (pHandle->epoch > reqEpoch) { - tqError("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, savedEpoch:%d > reqEpoch:%d ", - consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->epoch, reqEpoch); - terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; - taosWUnLockLatch(&pTq->lock); - return -1; - } - bool exec = tqIsHandleExec(pHandle); if(!exec) { tqSetHandleExec(pHandle); @@ -390,6 +381,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { taosMsleep(10); } + // 3. update the epoch value if (pHandle->epoch < reqEpoch) { tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, pHandle->epoch, reqEpoch); pHandle->epoch = reqEpoch;