From 11e690cb67f53c8c983ba2152c6c162e41bd3f11 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 27 Jul 2023 09:57:52 +0800 Subject: [PATCH 1/5] fix:semaphore always wait in tmq commit logic --- source/client/src/clientTmq.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index b2c901c800..b0fae1083f 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -650,7 +650,7 @@ static void asyncCommitFromResult(tmq_t* tmq, const TAOS_RES* pRes, tmq_commit_c code = asyncCommitOffset(tmq, pTopicName, vgId, &offsetVal, pCommitFp, userParam); end: - if(code != TSDB_CODE_SUCCESS){ + if(code != TSDB_CODE_SUCCESS && pCommitFp != NULL){ pCommitFp(tmq, code, userParam); } } @@ -2348,7 +2348,7 @@ int32_t tmq_commit_sync(tmq_t* tmq, const TAOS_RES* pRes) { tsem_destroy(&pInfo->sem); taosMemoryFree(pInfo); - tscDebug("consumer:0x%" PRIx64 " sync commit done, code:%s", tmq->consumerId, tstrerror(code)); + tscInfo("consumer:0x%" PRIx64 " sync res commit done, code:%s", tmq->consumerId, tstrerror(code)); return code; } @@ -2404,15 +2404,16 @@ int32_t tmq_commit_offset_sync(tmq_t *tmq, const char *pTopicName, int32_t vgId, tsem_init(&pInfo->sem, 0, 0); pInfo->code = 0; - asyncCommitOffset(tmq, tname, vgId, &offsetVal, commitCallBackFn, pInfo); - - tsem_wait(&pInfo->sem); - code = pInfo->code; + code = asyncCommitOffset(tmq, tname, vgId, &offsetVal, commitCallBackFn, pInfo); + if(code == 0){ + tsem_wait(&pInfo->sem); + code = pInfo->code; + } tsem_destroy(&pInfo->sem); taosMemoryFree(pInfo); - tscInfo("consumer:0x%" PRIx64 " sync send seek to vgId:%d, offset:%" PRId64" code:%s", tmq->consumerId, vgId, offset, tstrerror(code)); + tscInfo("consumer:0x%" PRIx64 " sync send commit to vgId:%d, offset:%" PRId64" code:%s", tmq->consumerId, vgId, offset, tstrerror(code)); return code; } @@ -2449,7 +2450,7 @@ void tmq_commit_offset_async(tmq_t *tmq, const char *pTopicName, int32_t vgId, i code = asyncCommitOffset(tmq, tname, vgId, &offsetVal, cb, param); - tscInfo("consumer:0x%" PRIx64 " async send seek to vgId:%d, offset:%" PRId64" code:%s", tmq->consumerId, vgId, offset, tstrerror(code)); + tscInfo("consumer:0x%" PRIx64 " async send commit to vgId:%d, offset:%" PRId64" code:%s", tmq->consumerId, vgId, offset, tstrerror(code)); end: if(code != 0 && cb != NULL){ From 68eb1cfc1ec3b359bf7b66f1e870674da54f695f Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 27 Jul 2023 10:03:18 +0800 Subject: [PATCH 2/5] fix:semaphore always wait in tmq commit logic --- source/client/src/clientTmq.c | 48 ++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index b0fae1083f..aba127c36e 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -586,30 +586,32 @@ static int32_t asyncCommitOffset(tmq_t* tmq, char* pTopicName, int32_t vgId, STq if(code != 0){ goto end; } - if (offsetVal->type > 0 && !tOffsetEqual(offsetVal, &pVg->offsetInfo.committedOffset)) { - char offsetBuf[TSDB_OFFSET_LEN] = {0}; - tFormatOffset(offsetBuf, tListLen(offsetBuf), offsetVal); - - char commitBuf[TSDB_OFFSET_LEN] = {0}; - tFormatOffset(commitBuf, tListLen(commitBuf), &pVg->offsetInfo.committedOffset); - - SMqCommitCbParamSet* pParamSet = prepareCommitCbParamSet(tmq, pCommitFp, userParam, 0); - if (pParamSet == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto end; - } - code = doSendCommitMsg(tmq, pVg->vgId, &pVg->epSet, offsetVal, pTopicName, pParamSet); - if (code != TSDB_CODE_SUCCESS) { - tscError("consumer:0x%" PRIx64 " topic:%s on vgId:%d end commit msg failed, send offset:%s committed:%s, code:%s", - tmq->consumerId, pTopicName, pVg->vgId, offsetBuf, commitBuf, tstrerror(terrno)); - taosMemoryFree(pParamSet); - goto end; - } - - tscInfo("consumer:0x%" PRIx64 " topic:%s on vgId:%d send commit msg success, send offset:%s committed:%s", - tmq->consumerId, pTopicName, pVg->vgId, offsetBuf, commitBuf); - pVg->offsetInfo.committedOffset = *offsetVal; + if (offsetVal->type <= 0 || tOffsetEqual(offsetVal, &pVg->offsetInfo.committedOffset)) { + code = TSDB_CODE_TMQ_INVALID_MSG; + goto end; } + char offsetBuf[TSDB_OFFSET_LEN] = {0}; + tFormatOffset(offsetBuf, tListLen(offsetBuf), offsetVal); + + char commitBuf[TSDB_OFFSET_LEN] = {0}; + tFormatOffset(commitBuf, tListLen(commitBuf), &pVg->offsetInfo.committedOffset); + + SMqCommitCbParamSet* pParamSet = prepareCommitCbParamSet(tmq, pCommitFp, userParam, 0); + if (pParamSet == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto end; + } + code = doSendCommitMsg(tmq, pVg->vgId, &pVg->epSet, offsetVal, pTopicName, pParamSet); + if (code != TSDB_CODE_SUCCESS) { + tscError("consumer:0x%" PRIx64 " topic:%s on vgId:%d end commit msg failed, send offset:%s committed:%s, code:%s", + tmq->consumerId, pTopicName, pVg->vgId, offsetBuf, commitBuf, tstrerror(terrno)); + taosMemoryFree(pParamSet); + goto end; + } + + tscInfo("consumer:0x%" PRIx64 " topic:%s on vgId:%d send commit msg success, send offset:%s committed:%s", + tmq->consumerId, pTopicName, pVg->vgId, offsetBuf, commitBuf); + pVg->offsetInfo.committedOffset = *offsetVal; end: taosRUnLockLatch(&tmq->lock); From 5fa580960e8153238aac17d1e1fe94d1cf26033f Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 27 Jul 2023 13:53:42 +0800 Subject: [PATCH 3/5] fix:semaphore always wait in tmq commit logic --- include/util/taoserror.h | 1 + source/client/src/clientTmq.c | 9 ++++++++- source/util/src/terror.c | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index f37402c18c..9c5c633358 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -780,6 +780,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_TMQ_INVALID_TOPIC TAOS_DEF_ERROR_CODE(0, 0x4009) #define TSDB_CODE_TMQ_NEED_INITIALIZED TAOS_DEF_ERROR_CODE(0, 0x4010) #define TSDB_CODE_TMQ_NO_COMMITTED TAOS_DEF_ERROR_CODE(0, 0x4011) +#define TSDB_CODE_TMQ_SAME_COMMITTED_VALUE TAOS_DEF_ERROR_CODE(0, 0x4012) // stream #define TSDB_CODE_STREAM_TASK_NOT_EXIST TAOS_DEF_ERROR_CODE(0, 0x4100) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index aba127c36e..ae4651bd45 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -586,10 +586,14 @@ static int32_t asyncCommitOffset(tmq_t* tmq, char* pTopicName, int32_t vgId, STq if(code != 0){ goto end; } - if (offsetVal->type <= 0 || tOffsetEqual(offsetVal, &pVg->offsetInfo.committedOffset)) { + if (offsetVal->type <= 0) { code = TSDB_CODE_TMQ_INVALID_MSG; goto end; } + if (tOffsetEqual(offsetVal, &pVg->offsetInfo.committedOffset)){ + code = TSDB_CODE_TMQ_SAME_COMMITTED_VALUE; + goto end; + } char offsetBuf[TSDB_OFFSET_LEN] = {0}; tFormatOffset(offsetBuf, tListLen(offsetBuf), offsetVal); @@ -653,6 +657,7 @@ static void asyncCommitFromResult(tmq_t* tmq, const TAOS_RES* pRes, tmq_commit_c end: if(code != TSDB_CODE_SUCCESS && pCommitFp != NULL){ + if(code == TSDB_CODE_TMQ_SAME_COMMITTED_VALUE) code = TSDB_CODE_SUCCESS; pCommitFp(tmq, code, userParam); } } @@ -2412,6 +2417,7 @@ int32_t tmq_commit_offset_sync(tmq_t *tmq, const char *pTopicName, int32_t vgId, code = pInfo->code; } + if(code == TSDB_CODE_TMQ_SAME_COMMITTED_VALUE) code = TSDB_CODE_SUCCESS; tsem_destroy(&pInfo->sem); taosMemoryFree(pInfo); @@ -2456,6 +2462,7 @@ void tmq_commit_offset_async(tmq_t *tmq, const char *pTopicName, int32_t vgId, i end: if(code != 0 && cb != NULL){ + if(code == TSDB_CODE_TMQ_SAME_COMMITTED_VALUE) code = TSDB_CODE_SUCCESS; cb(tmq, code, param); } } diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 83a50f7051..64d757144b 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -642,6 +642,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_CONSUMER_CLOSED, "Consumer closed") TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_CONSUMER_ERROR, "Consumer error, to see log") TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_TOPIC_OUT_OF_RANGE, "Topic num out of range") TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_GROUP_OUT_OF_RANGE, "Group num out of range 100") +TAOS_DEFINE_ERROR(TSDB_CODE_TMQ_SAME_COMMITTED_VALUE, "Same committed value") // stream TAOS_DEFINE_ERROR(TSDB_CODE_STREAM_TASK_NOT_EXIST, "Stream task not exist") From 2f6c56539242fba80f24ce896cbf9f7600fc9f61 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 28 Jul 2023 11:48:53 +0800 Subject: [PATCH 4/5] fix:pHandle not load if taosd restart --- source/dnode/mnode/impl/src/mndConsumer.c | 4 ++-- source/dnode/vnode/src/tq/tq.c | 16 ++++++++++++---- source/dnode/vnode/src/tq/tqMeta.c | 6 +++--- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndConsumer.c b/source/dnode/mnode/impl/src/mndConsumer.c index 6081b9a530..f9673b67cd 100644 --- a/source/dnode/mnode/impl/src/mndConsumer.c +++ b/source/dnode/mnode/impl/src/mndConsumer.c @@ -94,7 +94,7 @@ void mndDropConsumerFromSdb(SMnode *pMnode, int64_t consumerId){ bool mndRebTryStart() { int32_t old = atomic_val_compare_exchange_32(&mqRebInExecCnt, 0, 1); - mInfo("tq timer, rebalance counter old val:%d", old); + mDebug("tq timer, rebalance counter old val:%d", old); return old == 0; } @@ -116,7 +116,7 @@ void mndRebCntDec() { int32_t newVal = val - 1; int32_t oldVal = atomic_val_compare_exchange_32(&mqRebInExecCnt, val, newVal); if (oldVal == val) { - mInfo("rebalance trans end, rebalance counter:%d", newVal); + mDebug("rebalance trans end, rebalance counter:%d", newVal); break; } } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 89ed3ca1c7..d543e2afbd 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -522,10 +522,18 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { // 1. find handle pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); if (pHandle == NULL) { - tqError("tmq poll: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", consumerId, vgId, req.subKey); - terrno = TSDB_CODE_INVALID_MSG; - taosWUnLockLatch(&pTq->lock); - return -1; + do{ + if (tqMetaGetHandle(pTq, req.subKey) == 0){ + pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); + if(pHandle != NULL){ + break; + } + } + tqError("tmq poll: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", consumerId, vgId, req.subKey); + terrno = TSDB_CODE_INVALID_MSG; + taosWUnLockLatch(&pTq->lock); + return -1; + }while(0); } // 2. check re-balance status diff --git a/source/dnode/vnode/src/tq/tqMeta.c b/source/dnode/vnode/src/tq/tqMeta.c index df1c9ca7c9..08019f8a76 100644 --- a/source/dnode/vnode/src/tq/tqMeta.c +++ b/source/dnode/vnode/src/tq/tqMeta.c @@ -338,7 +338,7 @@ static int buildHandle(STQ* pTq, STqHandle* handle){ taosArrayDestroy(tbUidList); return -1; } - tqDebug("vgId:%d, tq try to get ctb for stb subscribe, suid:%" PRId64, pVnode->config.vgId, handle->execHandle.execTb.suid); + tqInfo("vgId:%d, tq try to get ctb for stb subscribe, suid:%" PRId64, pVnode->config.vgId, handle->execHandle.execTb.suid); handle->execHandle.pTqReader = tqReaderOpen(pVnode); tqReaderSetTbUidList(handle->execHandle.pTqReader, tbUidList, NULL); taosArrayDestroy(tbUidList); @@ -356,7 +356,7 @@ static int restoreHandle(STQ* pTq, void* pVal, int vLen, STqHandle* handle){ if(buildHandle(pTq, handle) < 0){ return -1; } - tqDebug("tq restore %s consumer %" PRId64 " vgId:%d", handle->subKey, handle->consumerId, vgId); + tqInfo("tq restore %s consumer %" PRId64 " vgId:%d", handle->subKey, handle->consumerId, vgId); return taosHashPut(pTq->pHandle, handle->subKey, strlen(handle->subKey), handle, sizeof(STqHandle)); } @@ -384,7 +384,7 @@ int32_t tqCreateHandle(STQ* pTq, SMqRebVgReq* req, STqHandle* handle){ if(buildHandle(pTq, handle) < 0){ return -1; } - tqDebug("tq restore %s consumer %" PRId64 " vgId:%d", handle->subKey, handle->consumerId, vgId); + tqInfo("tq restore %s consumer %" PRId64 " vgId:%d", handle->subKey, handle->consumerId, vgId); return taosHashPut(pTq->pHandle, handle->subKey, strlen(handle->subKey), handle, sizeof(STqHandle)); } From ce7db7bf903aad7598ca914d478f9d705c5cc1d5 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 28 Jul 2023 15:07:15 +0800 Subject: [PATCH 5/5] fix:subscribe time & return -1 if wal not exist --- source/dnode/mnode/impl/src/mndConsumer.c | 2 +- source/libs/wal/src/walRead.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndConsumer.c b/source/dnode/mnode/impl/src/mndConsumer.c index f9673b67cd..b2affacaa1 100644 --- a/source/dnode/mnode/impl/src/mndConsumer.c +++ b/source/dnode/mnode/impl/src/mndConsumer.c @@ -898,7 +898,7 @@ static int32_t mndConsumerActionInsert(SSdb *pSdb, SMqConsumerObj *pConsumer) { mInfo("consumer:0x%" PRIx64 " sub insert, cgroup:%s status:%d(%s) epoch:%d", pConsumer->consumerId, pConsumer->cgroup, pConsumer->status, mndConsumerStatusName(pConsumer->status), pConsumer->epoch); - pConsumer->subscribeTime = taosGetTimestampMs(); + pConsumer->subscribeTime = pConsumer->createTime; return 0; } diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c index 7ff7fe748e..038fbe444b 100644 --- a/source/libs/wal/src/walRead.c +++ b/source/libs/wal/src/walRead.c @@ -371,9 +371,9 @@ int32_t walFetchHead(SWalReader *pRead, int64_t ver, SWalCkHead *pHead) { pRead->pWal->vers.appliedVer); // TODO: valid ver -// if (ver > pRead->pWal->vers.appliedVer) { -// return -1; -// } + if (ver > pRead->pWal->vers.commitVer) { + return -1; + } if (pRead->curVersion != ver) { code = walReaderSeekVer(pRead, ver);