From 53e6d49e8c5762d53d5996b2d253198e9bf7b18d Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 6 Mar 2023 22:29:42 +0800 Subject: [PATCH 01/26] other: add some logs. --- source/libs/executor/src/scanoperator.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 7961d5518a..b6b9fb6b90 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -984,6 +984,7 @@ void resetTableScanInfo(STableScanInfo* pTableScanInfo, STimeWindow* pWin) { pTableScanInfo->scanTimes = 0; pTableScanInfo->currentGroupId = -1; tsdbReaderClose(pTableScanInfo->base.dataReader); + qDebug("1"); pTableScanInfo->base.dataReader = NULL; } @@ -1142,6 +1143,7 @@ static SSDataBlock* doRangeScan(SStreamScanInfo* pInfo, SSDataBlock* pSDB, int32 pInfo->updateWin = (STimeWindow){.skey = INT64_MIN, .ekey = INT64_MAX}; STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; tsdbReaderClose(pTableScanInfo->base.dataReader); + qDebug("2"); pTableScanInfo->base.dataReader = NULL; return NULL; } @@ -1615,6 +1617,7 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { if (!pTaskInfo->streamInfo.returned) { STableScanInfo* pTSInfo = pInfo->pTableScanOp->info; tsdbReaderClose(pTSInfo->base.dataReader); + qDebug("3"); pTSInfo->base.dataReader = NULL; tqOffsetResetToLog(&pTaskInfo->streamInfo.prepareStatus, pTaskInfo->streamInfo.snapshotVer); qDebug("queue scan tsdb over, switch to wal ver %" PRId64 "", pTaskInfo->streamInfo.snapshotVer + 1); @@ -1760,6 +1763,8 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { /*resetTableScanInfo(pTSInfo, pWin);*/ tsdbReaderClose(pTSInfo->base.dataReader); + qDebug("4"); + pTSInfo->base.dataReader = NULL; pInfo->pTableScanOp->status = OP_OPENED; @@ -1805,6 +1810,8 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__NONE; STableScanInfo* pTSInfo = pInfo->pTableScanOp->info; tsdbReaderClose(pTSInfo->base.dataReader); + qDebug("5"); + pTSInfo->base.dataReader = NULL; pTSInfo->base.cond.startVersion = -1; @@ -2601,6 +2608,8 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { return pBlock; } + qDebug("8"); + tsdbReaderClose(pInfo->base.dataReader); pInfo->base.dataReader = NULL; return NULL; From d54ae0c840991998f4fcf8846383179dd2130885 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 7 Mar 2023 17:49:20 +0800 Subject: [PATCH 02/26] fix(tmq): free reader after check the reader status. --- source/dnode/vnode/src/inc/tq.h | 31 +++++++++---------------- source/dnode/vnode/src/tq/tq.c | 20 ++++++++++++---- source/dnode/vnode/src/tq/tqExec.c | 15 +++++++++++- source/libs/executor/src/executor.c | 2 -- source/libs/executor/src/executorimpl.c | 21 +++++++++++++++-- source/libs/executor/src/scanoperator.c | 2 +- 6 files changed, 61 insertions(+), 30 deletions(-) diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 792fed2309..1172062e9b 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -79,8 +79,7 @@ typedef struct { } STqExecDb; typedef struct { - int8_t subType; - + int8_t subType; STqReader* pExecReader; qTaskInfo_t task; union { @@ -89,27 +88,19 @@ typedef struct { STqExecDb execDb; }; int32_t numOfCols; // number of out pout column, temporarily used + bool stop; // denote if needs to be stopped or not } STqExecHandle; typedef struct { - // info - char subKey[TSDB_SUBSCRIBE_KEY_LEN]; - int64_t consumerId; - int32_t epoch; - int8_t fetchMeta; - - int64_t snapshotVer; - - SWalReader* pWalReader; - - SWalRef* pRef; - - // push - STqPushHandle pushHandle; - - // exec - STqExecHandle execHandle; - + char subKey[TSDB_SUBSCRIBE_KEY_LEN]; + int64_t consumerId; + int32_t epoch; + int8_t fetchMeta; + int64_t snapshotVer; + SWalReader* pWalReader; + SWalRef* pRef; + STqPushHandle pushHandle; // push + STqExecHandle execHandle; // exec } STqHandle; typedef struct { diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 4d21a2e7f3..026acd0e64 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -569,16 +569,19 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { SMqDataRsp dataRsp = {0}; tqInitDataRsp(&dataRsp, &req, pHandle->execHandle.subType); + // lock taosWLockLatch(&pTq->pushLock); if (tqScanData(pTq, pHandle, &dataRsp, &fetchOffsetNew) < 0) { return -1; } + // todo handle the case where re-balance occurs. // till now, all data has been rsp to consumer, new data needs to push client once arrived. if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && - dataRsp.reqOffset.version == dataRsp.rspOffset.version) { + dataRsp.reqOffset.version == dataRsp.rspOffset.version && (pHandle->execHandle.stop != false)) { STqPushEntry* pPushEntry = taosMemoryCalloc(1, sizeof(STqPushEntry)); + if (pPushEntry != NULL) { pPushEntry->pInfo = pMsg->info; memcpy(pPushEntry->subKey, pHandle->subKey, TSDB_SUBSCRIBE_KEY_LEN); @@ -591,17 +594,21 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s offset:%" PRId64 ", vgId:%d save handle to push mgr", consumerId, pHandle->subKey, dataRsp.reqOffset.version, TD_VID(pTq->pVnode)); + // unlock taosWUnLockLatch(&pTq->pushLock); return 0; } } - taosWUnLockLatch(&pTq->pushLock); + taosWUnLockLatch(&pTq->pushLock); if (tqSendDataRsp(pTq, pMsg, &req, &dataRsp) < 0) { code = -1; } + pHandle->execHandle.stop = false; + + //NOTE: this pHandle->consumerId may have been changed already. tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, offset type:%d, uid/version:%" PRId64 ", ts:%" PRId64 "", consumerId, pHandle->subKey, TD_VID(pTq->pVnode), dataRsp.blockNum, dataRsp.rspOffset.type, dataRsp.rspOffset.uid, dataRsp.rspOffset.ts); @@ -610,6 +617,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { return code; } + // todo handle the case where re-balance occurs. // for taosx SMqMetaRsp metaRsp = {0}; STaosxRsp taosxRsp = {0}; @@ -894,11 +902,14 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg } } else { // TODO handle qmsg and exec modification - tqInfo("update the consumer info, old consumer id:0x%"PRIx64", new Id:0x%"PRIx64, pHandle->consumerId, req.newConsumerId); + tqInfo("vgId:%d switch consumer from Id:0x%"PRIx64" to Id:0x%"PRIx64, req.vgId, pHandle->consumerId, req.newConsumerId); atomic_store_32(&pHandle->epoch, -1); atomic_store_64(&pHandle->consumerId, req.newConsumerId); atomic_add_fetch_32(&pHandle->epoch, 1); taosMemoryFree(req.qmsg); + + pHandle->execHandle.stop = true; + if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { qStreamCloseTsdbReader(pHandle->execHandle.task); } @@ -906,7 +917,8 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg if (tqMetaSaveHandle(pTq, req.subKey, pHandle) < 0) { return -1; } - // close handle + + pHandle->execHandle.stop = false; } return 0; diff --git a/source/dnode/vnode/src/tq/tqExec.c b/source/dnode/vnode/src/tq/tqExec.c index f97c5ce93c..81b68f8473 100644 --- a/source/dnode/vnode/src/tq/tqExec.c +++ b/source/dnode/vnode/src/tq/tqExec.c @@ -46,11 +46,13 @@ static int32_t tqAddBlockSchemaToRsp(const STqExecHandle* pExec, STaosxRsp* pRsp static int32_t tqAddTbNameToRsp(const STQ* pTq, int64_t uid, STaosxRsp* pRsp, int32_t n) { SMetaReader mr = {0}; metaReaderInit(&mr, pTq->pVnode->pMeta, 0); + // TODO add reference to gurantee success if (metaGetTableEntryByUidCache(&mr, uid) < 0) { metaReaderClear(&mr); return -1; } + for (int32_t i = 0; i < n; i++) { char* tbName = taosStrdup(mr.me.name); taosArrayPush(pRsp->blockTbName, &tbName); @@ -83,13 +85,16 @@ int32_t tqScanData(STQ* pTq, const STqHandle* pHandle, SMqDataRsp* pRsp, STqOffs while (1) { SSDataBlock* pDataBlock = NULL; uint64_t ts = 0; + tqDebug("vgId:%d, tmq task start to execute", pTq->pVnode->config.vgId); if (qExecTask(task, &pDataBlock, &ts) < 0) { tqError("vgId:%d, task exec error since %s", pTq->pVnode->config.vgId, terrstr()); return -1; } + tqDebug("vgId:%d, tmq task executed, get %p", pTq->pVnode->config.vgId, pDataBlock); + // current scan should be stopped asap, since the rebalance occurs. if (pDataBlock == NULL) { break; } @@ -99,7 +104,15 @@ int32_t tqScanData(STQ* pTq, const STqHandle* pHandle, SMqDataRsp* pRsp, STqOffs if (pOffset->type == TMQ_OFFSET__SNAPSHOT_DATA) { rowCnt += pDataBlock->info.rows; - if (rowCnt >= 4096) break; + if (rowCnt >= 4096) { + break; + } + } + + if (pExec->stop) { + tqDebug("vgId:%d, current vgroups has been transferred to other consumer, return results asap", + TD_VID(pTq->pVnode)); + break; } } diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 9fe0f4f8a7..d00f9d9843 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -709,7 +709,6 @@ void qStopTaskOperators(SExecTaskInfo* pTaskInfo) { int32_t qAsyncKillTask(qTaskInfo_t qinfo, int32_t rspCode) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)qinfo; - if (pTaskInfo == NULL) { return TSDB_CODE_QRY_INVALID_QHANDLE; } @@ -717,7 +716,6 @@ int32_t qAsyncKillTask(qTaskInfo_t qinfo, int32_t rspCode) { qDebug("%s execTask async killed", GET_TASKID(pTaskInfo)); setTaskKilled(pTaskInfo, rspCode); - qStopTaskOperators(pTaskInfo); return TSDB_CODE_SUCCESS; diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 0dcefec93d..54e443d1da 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -2771,11 +2771,17 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, SStreamState* pSta } void qStreamCloseTsdbReader(void* task) { - if (task == NULL) return; + if (task == NULL) { + return; + } + SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)task; SOperatorInfo* pOp = pTaskInfo->pRoot; - qDebug("stream close tsdb reader, reset status uid %" PRId64 " ts %" PRId64, pTaskInfo->streamInfo.lastStatus.uid, + + qDebug("stream close tsdb reader, reset status uid:%" PRId64 " ts:%" PRId64, pTaskInfo->streamInfo.lastStatus.uid, pTaskInfo->streamInfo.lastStatus.ts); + + // todo refactor, other thread may already use this read to extract data. pTaskInfo->streamInfo.lastStatus = (STqOffsetVal){0}; while (pOp->numOfDownstream == 1 && pOp->pDownstream[0]) { SOperatorInfo* pDownstreamOp = pOp->pDownstream[0]; @@ -2783,8 +2789,19 @@ void qStreamCloseTsdbReader(void* task) { SStreamScanInfo* pInfo = pDownstreamOp->info; if (pInfo->pTableScanOp) { STableScanInfo* pTSInfo = pInfo->pTableScanOp->info; + + setOperatorCompleted(pInfo->pTableScanOp); + while(pTaskInfo->owner != 0) { + taosMsleep(100); + qDebug("wait for the reader stopping"); + } + tsdbReaderClose(pTSInfo->base.dataReader); pTSInfo->base.dataReader = NULL; + + // restore the status, todo refactor. + pInfo->pTableScanOp->status = OP_OPENED; + pTaskInfo->status = TASK_NOT_COMPLETED; return; } } diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 8ccd0c78fc..e98c27fb88 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -751,7 +751,7 @@ static SSDataBlock* doTableScan(SOperatorInfo* pOperator) { while (1) { SSDataBlock* result = doGroupedTableScan(pOperator); - if (result) { + if (result || (pOperator->status == OP_EXEC_DONE)) { return result; } From 41f26148a20f3cc900152974a91bb3fd04737369 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 8 Mar 2023 12:09:54 +0800 Subject: [PATCH 03/26] fix(mq): re-balance consumer if it is in an in-balance status. --- source/dnode/mnode/impl/inc/mndConsumer.h | 4 ++-- source/dnode/mnode/impl/src/mndConsumer.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/dnode/mnode/impl/inc/mndConsumer.h b/source/dnode/mnode/impl/inc/mndConsumer.h index 1176e1af0b..99b0d06936 100644 --- a/source/dnode/mnode/impl/inc/mndConsumer.h +++ b/source/dnode/mnode/impl/inc/mndConsumer.h @@ -24,10 +24,10 @@ extern "C" { enum { MQ_CONSUMER_STATUS__MODIFY = 1, - MQ_CONSUMER_STATUS__MODIFY_IN_REB, + MQ_CONSUMER_STATUS__MODIFY_IN_REB, // this value is not used anymore MQ_CONSUMER_STATUS__READY, MQ_CONSUMER_STATUS__LOST, - MQ_CONSUMER_STATUS__LOST_IN_REB, + MQ_CONSUMER_STATUS__LOST_IN_REB, // this value is not used anymore MQ_CONSUMER_STATUS__LOST_REBD, MQ_CONSUMER_STATUS__REMOVED, }; diff --git a/source/dnode/mnode/impl/src/mndConsumer.c b/source/dnode/mnode/impl/src/mndConsumer.c index f1ef83aca5..4b6de316a1 100644 --- a/source/dnode/mnode/impl/src/mndConsumer.c +++ b/source/dnode/mnode/impl/src/mndConsumer.c @@ -317,7 +317,7 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) { taosArrayPush(pRebSub->removedConsumers, &pConsumer->consumerId); } taosRUnLockLatch(&pConsumer->lock); - } else if (status == MQ_CONSUMER_STATUS__MODIFY) { + } else if (status == MQ_CONSUMER_STATUS__MODIFY || status == MQ_CONSUMER_STATUS__MODIFY_IN_REB) { taosRLockLatch(&pConsumer->lock); int32_t newTopicNum = taosArrayGetSize(pConsumer->rebNewTopics); From 03aa391859686b32d00bc97a414ad7ee082472f2 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 9 Mar 2023 23:03:51 +0800 Subject: [PATCH 04/26] fix(tmq): add lock before close the tsdbreader. --- source/dnode/vnode/src/inc/tq.h | 1 + source/dnode/vnode/src/inc/vnodeInt.h | 3 + source/dnode/vnode/src/tq/tq.c | 340 +++++++++++++++----------- source/dnode/vnode/src/tq/tqMeta.c | 2 +- source/dnode/vnode/src/tq/tqPush.c | 48 ++++ 5 files changed, 246 insertions(+), 148 deletions(-) diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 1172062e9b..62d6a40333 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -101,6 +101,7 @@ typedef struct { SWalRef* pRef; STqPushHandle pushHandle; // push STqExecHandle execHandle; // exec + int8_t execStatus; // this handle is used to handle the poll requirement } STqHandle; typedef struct { diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index c0d017e350..09ff844d95 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -192,6 +192,9 @@ void tqCleanUp(); STQ* tqOpen(const char* path, SVnode* pVnode); void tqClose(STQ*); int tqPushMsg(STQ*, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver); +int tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp); +int tqRemovePushEntry(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer); + int tqCommit(STQ*); int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd); int32_t tqCheckColModifiable(STQ* pTq, int64_t tbUid, int32_t colId); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 026acd0e64..14bc51eb2c 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -463,152 +463,120 @@ static int32_t tqInitTaosxRsp(STaosxRsp* pRsp, const SMqPollReq* pReq) { return 0; } -int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { - SMqPollReq req = {0}; - int32_t code = 0; - STqOffsetVal fetchOffsetNew; - SWalCkHead* pCkHead = NULL; +static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, + SRpcMsg* pMsg, bool* pBlockReturned) { + uint64_t consumerId = pRequest->consumerId; + STqOffsetVal reqOffset = pRequest->reqOffset; + STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, pRequest->subKey); + *pBlockReturned = false; - if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) { - tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen); - return -1; - } + // In this vnode, data has been polled by consumer for this topic, so let's continue from the last offset value. + if (pOffset != NULL) { + *pOffsetVal = pOffset->val; - int64_t consumerId = req.consumerId; - int32_t reqEpoch = req.epoch; - STqOffsetVal reqOffset = req.reqOffset; - - // 1. find handle - STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); - if (pHandle == NULL) { - tqError("tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s not found", consumerId, TD_VID(pTq->pVnode), - req.subKey); - return -1; - } - - // 2. check rebalance - if (pHandle->consumerId != consumerId) { - tqDebug("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, - consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId); - terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; - return -1; - } - - // update epoch if need - int32_t savedEpoch = atomic_load_32(&pHandle->epoch); - while (savedEpoch < reqEpoch) { - tqDebug("tmq poll: consumer:0x%"PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch, reqEpoch); - savedEpoch = atomic_val_compare_exchange_32(&pHandle->epoch, savedEpoch, reqEpoch); - } - - char buf[80]; - tFormatOffset(buf, 80, &reqOffset); - tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s", consumerId, - req.epoch, pHandle->subKey, TD_VID(pTq->pVnode), buf); - - // 2.reset offset if needed - if (reqOffset.type > 0) { - fetchOffsetNew = reqOffset; + char formatBuf[80]; + tFormatOffset(formatBuf, 80, pOffsetVal); + tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, prev offset found, offset reset to %s and continue.", + consumerId, pHandle->subKey, TD_VID(pTq->pVnode), formatBuf); + return 0; } else { - STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, req.subKey); - if (pOffset != NULL) { - fetchOffsetNew = pOffset->val; - char formatBuf[80]; - tFormatOffset(formatBuf, 80, &fetchOffsetNew); - tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vg %d, offset reset to %s", consumerId, pHandle->subKey, - TD_VID(pTq->pVnode), formatBuf); - } else { - if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEAST) { - if (req.useSnapshot) { - if (pHandle->fetchMeta) { - tqOffsetResetToMeta(&fetchOffsetNew, 0); - } else { - tqOffsetResetToData(&fetchOffsetNew, 0, 0); - } + // no poll occurs in this vnode for this topic, let's seek to the right offset value. + if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEAST) { + if (pRequest->useSnapshot) { + if (pHandle->fetchMeta) { + tqOffsetResetToMeta(pOffsetVal, 0); } else { - pHandle->pRef = walRefFirstVer(pTq->pVnode->pWal, pHandle->pRef); - if (pHandle->pRef == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - tqOffsetResetToLog(&fetchOffsetNew, pHandle->pRef->refVer - 1); + tqOffsetResetToData(pOffsetVal, 0, 0); + } + } else { + pHandle->pRef = walRefFirstVer(pTq->pVnode->pWal, pHandle->pRef); + if (pHandle->pRef == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; } - } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) { - if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { - SMqDataRsp dataRsp = {0}; - tqInitDataRsp(&dataRsp, &req, pHandle->execHandle.subType); - tqOffsetResetToLog(&dataRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); - tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, offset reset to %" PRId64, consumerId, - pHandle->subKey, TD_VID(pTq->pVnode), dataRsp.rspOffset.version); - if (tqSendDataRsp(pTq, pMsg, &req, &dataRsp) < 0) { - code = -1; - } - tDeleteSMqDataRsp(&dataRsp); - return code; - } else { - STaosxRsp taosxRsp = {0}; - tqInitTaosxRsp(&taosxRsp, &req); - tqOffsetResetToLog(&taosxRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); - if (tqSendTaosxRsp(pTq, pMsg, &req, &taosxRsp) < 0) { - code = -1; - } - tDeleteSTaosxRsp(&taosxRsp); - return code; - } - } else if (reqOffset.type == TMQ_OFFSET__RESET_NONE) { - tqError("tmq poll: subkey %s, no offset committed for consumer:0x%" PRIx64 - " in vg %d, subkey %s, reset none failed", - pHandle->subKey, consumerId, TD_VID(pTq->pVnode), req.subKey); - terrno = TSDB_CODE_TQ_NO_COMMITTED_OFFSET; - return -1; + tqOffsetResetToLog(pOffsetVal, pHandle->pRef->refVer - 1); } + } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) { + if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { + SMqDataRsp dataRsp = {0}; + tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType); + + tqOffsetResetToLog(&dataRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); + tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, offset reset to %" PRId64, consumerId, + pHandle->subKey, TD_VID(pTq->pVnode), dataRsp.rspOffset.version); + int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp); + tDeleteSMqDataRsp(&dataRsp); + + *pBlockReturned = true; + return code; + } else { + STaosxRsp taosxRsp = {0}; + tqInitTaosxRsp(&taosxRsp, pRequest); + tqOffsetResetToLog(&taosxRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); + int32_t code = tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp); + tDeleteSTaosxRsp(&taosxRsp); + + *pBlockReturned = true; + return code; + } + } else if (reqOffset.type == TMQ_OFFSET__RESET_NONE) { + tqError("tmq poll: subkey %s, no offset committed for consumer:0x%" PRIx64 + " in vg %d, subkey %s, reset none failed", + pHandle->subKey, consumerId, TD_VID(pTq->pVnode), pRequest->subKey); + terrno = TSDB_CODE_TQ_NO_COMMITTED_OFFSET; + return -1; } } + return 0; +} + +static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg) { + int32_t code = -1; + STqOffsetVal reqOffset = pRequest->reqOffset; + STqOffsetVal fetchOffsetNew; + SWalCkHead* pCkHead = NULL; + uint64_t consumerId = pRequest->consumerId; + + // 1. reset the offset if needed + if (reqOffset.type > 0) { + fetchOffsetNew = reqOffset; + } else { // handle the reset offset cases, according to the consumer's choice. + bool blockReturned = false; + code = extractResetOffsetVal(&fetchOffsetNew, pTq, pHandle, pRequest, pMsg, &blockReturned); + if (code != 0) { + return code; + } + + // empty block returned, quit + if (blockReturned) { + return 0; + } + } + + // this is a normal subscription requirement if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { SMqDataRsp dataRsp = {0}; - tqInitDataRsp(&dataRsp, &req, pHandle->execHandle.subType); + tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType); // lock taosWLockLatch(&pTq->pushLock); - if (tqScanData(pTq, pHandle, &dataRsp, &fetchOffsetNew) < 0) { - return -1; - } + code = tqScanData(pTq, pHandle, &dataRsp, &fetchOffsetNew); - // todo handle the case where re-balance occurs. - // till now, all data has been rsp to consumer, new data needs to push client once arrived. + // till now, all data has been transferred to consumer, new data needs to push client once arrived. if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && - dataRsp.reqOffset.version == dataRsp.rspOffset.version && (pHandle->execHandle.stop != false)) { - STqPushEntry* pPushEntry = taosMemoryCalloc(1, sizeof(STqPushEntry)); - - if (pPushEntry != NULL) { - pPushEntry->pInfo = pMsg->info; - memcpy(pPushEntry->subKey, pHandle->subKey, TSDB_SUBSCRIBE_KEY_LEN); - dataRsp.withTbName = 0; - memcpy(&pPushEntry->dataRsp, &dataRsp, sizeof(SMqDataRsp)); - pPushEntry->dataRsp.head.consumerId = consumerId; - pPushEntry->dataRsp.head.epoch = reqEpoch; - pPushEntry->dataRsp.head.mqMsgType = TMQ_MSG_TYPE__POLL_RSP; - taosHashPut(pTq->pPushMgr, pHandle->subKey, strlen(pHandle->subKey) + 1, &pPushEntry, sizeof(void*)); - - tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s offset:%" PRId64 ", vgId:%d save handle to push mgr", - consumerId, pHandle->subKey, dataRsp.reqOffset.version, TD_VID(pTq->pVnode)); - - // unlock - taosWUnLockLatch(&pTq->pushLock); - return 0; - } + dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) { + code = tqRegisterPushEntry(pTq, pHandle, pRequest, pMsg, &dataRsp); + taosWUnLockLatch(&pTq->pushLock); + return code; } taosWUnLockLatch(&pTq->pushLock); - if (tqSendDataRsp(pTq, pMsg, &req, &dataRsp) < 0) { - code = -1; - } - + code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp); pHandle->execHandle.stop = false; - //NOTE: this pHandle->consumerId may have been changed already. + // NOTE: this pHandle->consumerId may have been changed already. tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, offset type:%d, uid/version:%" PRId64 ", ts:%" PRId64 "", consumerId, pHandle->subKey, TD_VID(pTq->pVnode), dataRsp.blockNum, dataRsp.rspOffset.type, dataRsp.rspOffset.uid, dataRsp.rspOffset.ts); @@ -621,7 +589,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { // for taosx SMqMetaRsp metaRsp = {0}; STaosxRsp taosxRsp = {0}; - tqInitTaosxRsp(&taosxRsp, &req); + tqInitTaosxRsp(&taosxRsp, pRequest); if (fetchOffsetNew.type != TMQ_OFFSET__LOG) { if (tqScanTaosx(pTq, pHandle, &taosxRsp, &metaRsp, &fetchOffsetNew) < 0) { @@ -629,11 +597,9 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { } if (metaRsp.metaRspLen > 0) { - if (tqSendMetaPollRsp(pTq, pMsg, &req, &metaRsp) < 0) { - code = -1; - } + code = tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp); tqDebug("tmq poll: consumer:0x%" PRIx64 " subkey %s, vg %d, send meta offset type:%d,uid:%" PRId64 - ",version:%" PRId64, + ",version:%" PRId64, consumerId, pHandle->subKey, TD_VID(pTq->pVnode), metaRsp.rspOffset.type, metaRsp.rspOffset.uid, metaRsp.rspOffset.version); taosMemoryFree(metaRsp.metaRsp); @@ -642,9 +608,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { } if (taosxRsp.blockNum > 0) { - if (tqSendTaosxRsp(pTq, pMsg, &req, &taosxRsp) < 0) { - code = -1; - } + code = tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp); tDeleteSTaosxRsp(&taosxRsp); return code; } else { @@ -668,17 +632,19 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { walSetReaderCapacity(pHandle->pWalReader, 2048); while (1) { - savedEpoch = atomic_load_32(&pHandle->epoch); - if (savedEpoch > reqEpoch) { + // todo refactor: this is not correct. + int32_t savedEpoch = atomic_load_32(&pHandle->epoch); + if (savedEpoch > pRequest->epoch) { tqWarn("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, vg %d offset %" PRId64 ", found new consumer epoch %d, discard req epoch %d", - consumerId, req.epoch, pHandle->subKey, TD_VID(pTq->pVnode), fetchVer, savedEpoch, reqEpoch); + consumerId, pRequest->epoch, pHandle->subKey, TD_VID(pTq->pVnode), fetchVer, savedEpoch, + pRequest->epoch); break; } if (tqFetchLog(pTq, pHandle, &fetchVer, &pCkHead) < 0) { tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); - if (tqSendTaosxRsp(pTq, pMsg, &req, &taosxRsp) < 0) { + if (tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp) < 0) { code = -1; } tDeleteSTaosxRsp(&taosxRsp); @@ -689,7 +655,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { SWalCont* pHead = &pCkHead->head; tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d) iter log, vgId:%d offset %" PRId64 " msgType %d", consumerId, - req.epoch, TD_VID(pTq->pVnode), fetchVer, pHead->msgType); + pRequest->epoch, TD_VID(pTq->pVnode), fetchVer, pHead->msgType); if (pHead->msgType == TDMT_VND_SUBMIT) { SPackedData submit = { @@ -699,12 +665,12 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { }; if (tqTaosxScanLog(pTq, pHandle, submit, &taosxRsp) < 0) { tqError("tmq poll: tqTaosxScanLog error %" PRId64 ", in vgId:%d, subkey %s", consumerId, TD_VID(pTq->pVnode), - req.subKey); + pRequest->subKey); return -1; } if (taosxRsp.blockNum > 0 /* threshold */) { tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); - if (tqSendTaosxRsp(pTq, pMsg, &req, &taosxRsp) < 0) { + if (tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp) < 0) { code = -1; } tDeleteSTaosxRsp(&taosxRsp); @@ -722,7 +688,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { metaRsp.resMsgType = pHead->msgType; metaRsp.metaRspLen = pHead->bodyLen; metaRsp.metaRsp = pHead->body; - if (tqSendMetaPollRsp(pTq, pMsg, &req, &metaRsp) < 0) { + if (tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp) < 0) { code = -1; taosMemoryFreeClear(pCkHead); tDeleteSTaosxRsp(&taosxRsp); @@ -741,6 +707,68 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { return 0; } +int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { + SMqPollReq req = {0}; + if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) { + tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen); + terrno = TSDB_CODE_INVALID_MSG; + return -1; + } + + int64_t consumerId = req.consumerId; + int32_t reqEpoch = req.epoch; + STqOffsetVal reqOffset = req.reqOffset; + int32_t vgId = TD_VID(pTq->pVnode); + + // 1. find handle + STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); + if (pHandle == NULL) { + tqError("tmq poll: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", consumerId, vgId, req.subKey); + terrno = TSDB_CODE_INVALID_MSG; + return -1; + } + + int8_t oldVal = atomic_val_compare_exchange_8(&pHandle->execStatus, 0, 1); + + // other thread has started to re-balance this handle, return empty block for this poll + if (oldVal != 0) { + SMqDataRsp dataRsp = {0}; + tqInitDataRsp(&dataRsp, &req, pHandle->execHandle.subType); + int32_t code = tqSendDataRsp(pTq, pMsg, &req, &dataRsp); // here we return an empty block to client + tDeleteSMqDataRsp(&dataRsp); + return code; + } + + // pHandle->execStatus == 1, and we are safe now + // keep the epoch in the first plance, this value may be change by other threads. + int32_t savedEpoch = atomic_load_32(&pHandle->epoch); + + // 2. check rebalance status + if (pHandle->consumerId != consumerId) { + tqDebug("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, + consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId); + terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; + atomic_store_8(&pHandle->execStatus, 0); // reset the flag + return -1; + } + + // 3. update the epoch value + while (savedEpoch < reqEpoch) { + tqDebug("tmq poll: consumer:0x%"PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch, reqEpoch); + savedEpoch = atomic_val_compare_exchange_32(&pHandle->epoch, savedEpoch, reqEpoch); + } + + char buf[80]; + tFormatOffset(buf, 80, &reqOffset); + tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s", consumerId, + req.epoch, pHandle->subKey, TD_VID(pTq->pVnode), buf); + + int32_t code = extractDataForMq(pTq, pHandle, &req, pMsg); + + atomic_store_8(&pHandle->execStatus, 0); // restore the flag + return code; +} + int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { SMqVDeleteReq* pReq = (SMqVDeleteReq*)msg; @@ -813,7 +841,8 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg tDecodeSMqRebVgReq(msg, &req); // todo lock - tqDebug("vgId:%d, tq process sub req %s", pTq->pVnode->config.vgId, req.subKey); + tqDebug("vgId:%d, tq process sub req %s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pTq->pVnode->config.vgId, req.subKey, + req.oldConsumerId, req.newConsumerId); STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); if (pHandle == NULL) { @@ -821,11 +850,13 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg tqError("vgId:%d, build new consumer handle %s for consumer:0x%" PRIx64 ", but old consumerId is %" PRId64 "", req.vgId, req.subKey, req.newConsumerId, req.oldConsumerId); } + if (req.newConsumerId == -1) { tqError("vgId:%d, tq invalid rebalance request, new consumerId %" PRId64 "", req.vgId, req.newConsumerId); taosMemoryFree(req.qmsg); return 0; } + STqHandle tqHandle = {0}; pHandle = &tqHandle; /*taosInitRWLatch(&pExec->lock);*/ @@ -853,6 +884,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg .initTqReader = true, .version = ver, }; + pHandle->snapshotVer = ver; if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { @@ -867,6 +899,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg } else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__DB) { pHandle->pWalReader = walOpenReader(pTq->pVnode->pWal, NULL); pHandle->execHandle.pExecReader = tqOpenReader(pTq->pVnode); + pHandle->execHandle.execDb.pFilterOutTbUid = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK); buildSnapContext(handle.meta, handle.version, 0, pHandle->execHandle.subType, pHandle->fetchMeta, @@ -875,7 +908,6 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg pHandle->execHandle.task = qCreateQueueExecTaskInfo(NULL, &handle, NULL, NULL); } else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__TABLE) { pHandle->pWalReader = walOpenReader(pTq->pVnode->pWal, NULL); - pHandle->execHandle.execTb.suid = req.suid; SArray* tbUidList = taosArrayInit(0, sizeof(int64_t)); @@ -895,25 +927,39 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg } taosHashPut(pTq->pHandle, req.subKey, strlen(req.subKey), pHandle, sizeof(STqHandle)); - tqDebug("try to persist handle %s consumer:0x%" PRIx64" , old consumer:0x%"PRIx64, req.subKey, pHandle->consumerId, - oldConsumerId); + tqDebug("try to persist handle %s consumer:0x%" PRIx64 " , old consumer:0x%" PRIx64, req.subKey, + pHandle->consumerId, oldConsumerId); if (tqMetaSaveHandle(pTq, req.subKey, pHandle) < 0) { return -1; } } else { - // TODO handle qmsg and exec modification - tqInfo("vgId:%d switch consumer from Id:0x%"PRIx64" to Id:0x%"PRIx64, req.vgId, pHandle->consumerId, req.newConsumerId); +// ASSERT(pHandle->consumerId == req.oldConsumerId || req.oldConsumerId == -1 || pHandle->consumerId == -1); + if (pHandle->consumerId == req.newConsumerId) { // do nothing + tqInfo("vgId:%d consumer:0x%" PRIx64 " remains, no switch occurs", req.vgId, req.newConsumerId); + atomic_store_32(&pHandle->epoch, -1); + atomic_add_fetch_32(&pHandle->epoch, 1); + return tqMetaSaveHandle(pTq, req.subKey, pHandle); + } + + tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId, + req.newConsumerId); + + taosWLockLatch(&pTq->pushLock); atomic_store_32(&pHandle->epoch, -1); + + // remove if it has been register in the push manager, and return one empty block to consumer + tqRemovePushEntry(pTq, req.subKey, (int32_t) strlen(req.subKey), pHandle->consumerId, true); + atomic_store_64(&pHandle->consumerId, req.newConsumerId); atomic_add_fetch_32(&pHandle->epoch, 1); taosMemoryFree(req.qmsg); pHandle->execHandle.stop = true; - if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { qStreamCloseTsdbReader(pHandle->execHandle.task); } + taosWUnLockLatch(&pTq->pushLock); if (tqMetaSaveHandle(pTq, req.subKey, pHandle) < 0) { return -1; } diff --git a/source/dnode/vnode/src/tq/tqMeta.c b/source/dnode/vnode/src/tq/tqMeta.c index a85e6e0a70..ce0aa144f9 100644 --- a/source/dnode/vnode/src/tq/tqMeta.c +++ b/source/dnode/vnode/src/tq/tqMeta.c @@ -283,7 +283,7 @@ int32_t tqMetaRestoreHandle(STQ* pTq) { tdbTbcMoveToFirst(pCur); while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) { - STqHandle handle; + STqHandle handle = {0}; tDecoderInit(&decoder, (uint8_t*)pVal, vLen); tDecodeSTqHandle(&decoder, &handle); tDecoderClear(&decoder); diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 5a25d7e894..03bb14e447 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -344,3 +344,51 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) return 0; } + +int32_t tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, + SMqDataRsp* pDataRsp) { + uint64_t consumerId = pRequest->consumerId; + int32_t vgId = TD_VID(pTq->pVnode); + STqHandle* pTqHandle = pHandle; + + STqPushEntry* pPushEntry = taosMemoryCalloc(1, sizeof(STqPushEntry)); + if (pPushEntry == NULL) { + tqDebug("tmq poll: consumer:0x%" PRIx64 ", vgId:%d failed to malloc, size:%d", consumerId, vgId, + (int32_t)sizeof(STqPushEntry)); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + pPushEntry->pInfo = pRpcMsg->info; + memcpy(pPushEntry->subKey, pTqHandle->subKey, TSDB_SUBSCRIBE_KEY_LEN); + pDataRsp->withTbName = 0; + + memcpy(&pPushEntry->dataRsp, pDataRsp, sizeof(SMqDataRsp)); + pPushEntry->dataRsp.head.consumerId = consumerId; + pPushEntry->dataRsp.head.epoch = pRequest->epoch; + pPushEntry->dataRsp.head.mqMsgType = TMQ_MSG_TYPE__POLL_RSP; + taosHashPut(pTq->pPushMgr, pTqHandle->subKey, strlen(pTqHandle->subKey), &pPushEntry, sizeof(void*)); + + tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s offset:%" PRId64 ", vgId:%d save handle to push mgr, total:%d", consumerId, + pTqHandle->subKey, pDataRsp->reqOffset.version, vgId, taosHashGetSize(pTq->pPushMgr)); + return 0; +} + +int32_t tqRemovePushEntry(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer) { + int32_t vgId = TD_VID(pTq->pVnode); + STqPushEntry** pEntry = taosHashGet(pTq->pPushMgr, pKey, keyLen); + if (pEntry != NULL) { + uint64_t cId = (*pEntry)->dataRsp.head.consumerId; + ASSERT(consumerId == cId); + + tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s vgId:%d remove from push mgr, remains:%d", consumerId, + (*pEntry)->subKey, vgId, taosHashGetSize(pTq->pPushMgr) - 1); + taosHashRemove(pTq->pPushMgr, pKey, keyLen); + + if (rspConsumer) { // rsp the old consumer with empty block. + tqPushDataRsp(pTq, *pEntry); + } + } + + return 0; +} From 09e84884be2a4bccd90f92915010fae883c84c39 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 9 Mar 2023 23:31:29 +0800 Subject: [PATCH 05/26] refactor: remove invalid assert --- source/dnode/vnode/src/tq/tq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 14bc51eb2c..543fa9dd76 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -933,7 +933,6 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg return -1; } } else { -// ASSERT(pHandle->consumerId == req.oldConsumerId || req.oldConsumerId == -1 || pHandle->consumerId == -1); if (pHandle->consumerId == req.newConsumerId) { // do nothing tqInfo("vgId:%d consumer:0x%" PRIx64 " remains, no switch occurs", req.vgId, req.newConsumerId); atomic_store_32(&pHandle->epoch, -1); From 691cb08e886f63789f9715a5da1b644b6fca64aa Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Mar 2023 09:23:03 +0800 Subject: [PATCH 06/26] refactor: remove some unused attributes. --- source/dnode/vnode/src/inc/tq.h | 10 ++-- source/dnode/vnode/src/tq/tq.c | 90 +++++++++++++----------------- source/dnode/vnode/src/tq/tqExec.c | 6 -- 3 files changed, 43 insertions(+), 63 deletions(-) diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 62d6a40333..6cb54c6c18 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -79,16 +79,15 @@ typedef struct { } STqExecDb; typedef struct { - int8_t subType; - STqReader* pExecReader; - qTaskInfo_t task; + int8_t subType; + STqReader* pExecReader; + qTaskInfo_t task; union { STqExecCol execCol; STqExecTb execTb; STqExecDb execDb; }; - int32_t numOfCols; // number of out pout column, temporarily used - bool stop; // denote if needs to be stopped or not + int32_t numOfCols; // number of out pout column, temporarily used } STqExecHandle; typedef struct { @@ -101,7 +100,6 @@ typedef struct { SWalRef* pRef; STqPushHandle pushHandle; // push STqExecHandle execHandle; // exec - int8_t execStatus; // this handle is used to handle the poll requirement } STqHandle; typedef struct { diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 543fa9dd76..8d55a7bea1 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -534,17 +534,19 @@ static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHand static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg) { int32_t code = -1; - STqOffsetVal reqOffset = pRequest->reqOffset; - STqOffsetVal fetchOffsetNew; + STqOffsetVal offset = {0}; SWalCkHead* pCkHead = NULL; + int32_t vgId = TD_VID(pTq->pVnode); + + STqOffsetVal reqOffset = pRequest->reqOffset; uint64_t consumerId = pRequest->consumerId; // 1. reset the offset if needed if (reqOffset.type > 0) { - fetchOffsetNew = reqOffset; + offset = reqOffset; } else { // handle the reset offset cases, according to the consumer's choice. bool blockReturned = false; - code = extractResetOffsetVal(&fetchOffsetNew, pTq, pHandle, pRequest, pMsg, &blockReturned); + code = extractResetOffsetVal(&offset, pTq, pHandle, pRequest, pMsg, &blockReturned); if (code != 0) { return code; } @@ -562,7 +564,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* // lock taosWLockLatch(&pTq->pushLock); - code = tqScanData(pTq, pHandle, &dataRsp, &fetchOffsetNew); + code = tqScanData(pTq, pHandle, &dataRsp, &offset); // till now, all data has been transferred to consumer, new data needs to push client once arrived. if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && @@ -574,12 +576,12 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* taosWUnLockLatch(&pTq->pushLock); code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp); - pHandle->execHandle.stop = false; // NOTE: this pHandle->consumerId may have been changed already. - tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, offset type:%d, uid/version:%" PRId64 ", ts:%" PRId64 "", - consumerId, pHandle->subKey, TD_VID(pTq->pVnode), dataRsp.blockNum, dataRsp.rspOffset.type, - dataRsp.rspOffset.uid, dataRsp.rspOffset.ts); + tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, offset type:%d, uid/version:%" PRId64 + ", ts:%" PRId64, + consumerId, pHandle->subKey, vgId, dataRsp.blockNum, dataRsp.rspOffset.type, dataRsp.rspOffset.uid, + dataRsp.rspOffset.ts); tDeleteSMqDataRsp(&dataRsp); return code; @@ -591,16 +593,16 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* STaosxRsp taosxRsp = {0}; tqInitTaosxRsp(&taosxRsp, pRequest); - if (fetchOffsetNew.type != TMQ_OFFSET__LOG) { - if (tqScanTaosx(pTq, pHandle, &taosxRsp, &metaRsp, &fetchOffsetNew) < 0) { + if (offset.type != TMQ_OFFSET__LOG) { + if (tqScanTaosx(pTq, pHandle, &taosxRsp, &metaRsp, &offset) < 0) { return -1; } if (metaRsp.metaRspLen > 0) { code = tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp); tqDebug("tmq poll: consumer:0x%" PRIx64 " subkey %s, vg %d, send meta offset type:%d,uid:%" PRId64 - ",version:%" PRId64, - consumerId, pHandle->subKey, TD_VID(pTq->pVnode), metaRsp.rspOffset.type, metaRsp.rspOffset.uid, + ",version:%" PRId64, + consumerId, pHandle->subKey, vgId, metaRsp.rspOffset.type, metaRsp.rspOffset.uid, metaRsp.rspOffset.version); taosMemoryFree(metaRsp.metaRsp); tDeleteSTaosxRsp(&taosxRsp); @@ -612,17 +614,17 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* tDeleteSTaosxRsp(&taosxRsp); return code; } else { - fetchOffsetNew = taosxRsp.rspOffset; + offset = taosxRsp.rspOffset; } tqDebug("taosx poll: consumer:0x%" PRIx64 " subkey %s, vg %d, send data blockNum:%d, offset type:%d,uid:%" PRId64 ",version:%" PRId64, - consumerId, pHandle->subKey, TD_VID(pTq->pVnode), taosxRsp.blockNum, taosxRsp.rspOffset.type, - taosxRsp.rspOffset.uid, taosxRsp.rspOffset.version); + consumerId, pHandle->subKey, vgId, taosxRsp.blockNum, taosxRsp.rspOffset.type, taosxRsp.rspOffset.uid, + taosxRsp.rspOffset.version); } - if (fetchOffsetNew.type == TMQ_OFFSET__LOG) { - int64_t fetchVer = fetchOffsetNew.version + 1; + if (offset.type == TMQ_OFFSET__LOG) { + int64_t fetchVer = offset.version + 1; pCkHead = taosMemoryMalloc(sizeof(SWalCkHead) + 2048); if (pCkHead == NULL) { tDeleteSTaosxRsp(&taosxRsp); @@ -637,8 +639,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* if (savedEpoch > pRequest->epoch) { tqWarn("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, vg %d offset %" PRId64 ", found new consumer epoch %d, discard req epoch %d", - consumerId, pRequest->epoch, pHandle->subKey, TD_VID(pTq->pVnode), fetchVer, savedEpoch, - pRequest->epoch); + consumerId, pRequest->epoch, pHandle->subKey, vgId, fetchVer, savedEpoch, pRequest->epoch); break; } @@ -655,7 +656,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* SWalCont* pHead = &pCkHead->head; tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d) iter log, vgId:%d offset %" PRId64 " msgType %d", consumerId, - pRequest->epoch, TD_VID(pTq->pVnode), fetchVer, pHead->msgType); + pRequest->epoch, vgId, fetchVer, pHead->msgType); if (pHead->msgType == TDMT_VND_SUBMIT) { SPackedData submit = { @@ -664,7 +665,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* .ver = pHead->version, }; if (tqTaosxScanLog(pTq, pHandle, submit, &taosxRsp) < 0) { - tqError("tmq poll: tqTaosxScanLog error %" PRId64 ", in vgId:%d, subkey %s", consumerId, TD_VID(pTq->pVnode), + tqError("tmq poll: tqTaosxScanLog error %" PRId64 ", in vgId:%d, subkey %s", consumerId, vgId, pRequest->subKey); return -1; } @@ -728,45 +729,35 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { return -1; } - int8_t oldVal = atomic_val_compare_exchange_8(&pHandle->execStatus, 0, 1); - - // other thread has started to re-balance this handle, return empty block for this poll - if (oldVal != 0) { - SMqDataRsp dataRsp = {0}; - tqInitDataRsp(&dataRsp, &req, pHandle->execHandle.subType); - int32_t code = tqSendDataRsp(pTq, pMsg, &req, &dataRsp); // here we return an empty block to client - tDeleteSMqDataRsp(&dataRsp); - return code; - } - - // pHandle->execStatus == 1, and we are safe now - // keep the epoch in the first plance, this value may be change by other threads. - int32_t savedEpoch = atomic_load_32(&pHandle->epoch); - - // 2. check rebalance status + // 2. check re-balance status + taosRLockLatch(&pTq->pushLock); if (pHandle->consumerId != consumerId) { tqDebug("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId); terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; - atomic_store_8(&pHandle->execStatus, 0); // reset the flag + taosRUnLockLatch(&pTq->pushLock); return -1; } + taosRUnLockLatch(&pTq->pushLock); + + taosWLockLatch(&pTq->pushLock); + int32_t savedEpoch = pHandle->epoch; // 3. update the epoch value - while (savedEpoch < reqEpoch) { - tqDebug("tmq poll: consumer:0x%"PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch, reqEpoch); - savedEpoch = atomic_val_compare_exchange_32(&pHandle->epoch, savedEpoch, reqEpoch); - } +// while (savedEpoch < reqEpoch) { + tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch, reqEpoch); + pHandle->epoch = reqEpoch; +// savedEpoch = atomic_val_compare_exchange_32(&pHandle->epoch, savedEpoch, reqEpoch); +// } + + taosWUnLockLatch(&pTq->pushLock); char buf[80]; tFormatOffset(buf, 80, &reqOffset); tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s", consumerId, - req.epoch, pHandle->subKey, TD_VID(pTq->pVnode), buf); + req.epoch, pHandle->subKey, vgId, buf); - int32_t code = extractDataForMq(pTq, pHandle, &req, pMsg); - - atomic_store_8(&pHandle->execStatus, 0); // restore the flag - return code; + return extractDataForMq(pTq, pHandle, &req, pMsg); } int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { @@ -953,7 +944,6 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg atomic_add_fetch_32(&pHandle->epoch, 1); taosMemoryFree(req.qmsg); - pHandle->execHandle.stop = true; if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { qStreamCloseTsdbReader(pHandle->execHandle.task); } @@ -962,8 +952,6 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg if (tqMetaSaveHandle(pTq, req.subKey, pHandle) < 0) { return -1; } - - pHandle->execHandle.stop = false; } return 0; diff --git a/source/dnode/vnode/src/tq/tqExec.c b/source/dnode/vnode/src/tq/tqExec.c index 81b68f8473..640b2b57fe 100644 --- a/source/dnode/vnode/src/tq/tqExec.c +++ b/source/dnode/vnode/src/tq/tqExec.c @@ -108,12 +108,6 @@ int32_t tqScanData(STQ* pTq, const STqHandle* pHandle, SMqDataRsp* pRsp, STqOffs break; } } - - if (pExec->stop) { - tqDebug("vgId:%d, current vgroups has been transferred to other consumer, return results asap", - TD_VID(pTq->pVnode)); - break; - } } if (qStreamExtractOffset(task, &pRsp->rspOffset) < 0) { From a4e378e138c07bd9f7bc5e45d008def2f80b9b2a Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Mar 2023 09:30:59 +0800 Subject: [PATCH 07/26] refactor: update some log. --- source/client/test/clientTests.cpp | 4 ++-- source/dnode/mnode/impl/src/mndSubscribe.c | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index 2f3d600019..82d29b37eb 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -925,7 +925,7 @@ TEST(clientCase, subscription_test) { // 创建订阅 topics 列表 tmq_list_t* topicList = tmq_list_new(); -// tmq_list_append(topicList, "topic_t1"); + tmq_list_append(topicList, "topic_t1"); // 启动订阅 tmq_subscribe(tmq, topicList); @@ -954,7 +954,7 @@ TEST(clientCase, subscription_test) { printf("db: %s\n", dbName); printf("vgroup id: %d\n", vgroupId); - if (count ++ > 20) { + if (count ++ > 200) { tmq_unsubscribe(tmq); break; } diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 4d19110f31..924216bcbf 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -224,7 +224,7 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR .pVgEp = pVgEp, }; taosHashPut(pHash, &pVgEp->vgId, sizeof(int32_t), &outputVg, sizeof(SMqRebOutputVg)); - mInfo("sub:%s mq re-balance remove vgId:%d from consumer:%" PRIx64, sub, pVgEp->vgId, consumerId); + mInfo("sub:%s mq re-balance remove vgId:%d from consumer:0x%" PRIx64, sub, pVgEp->vgId, consumerId); } taosArrayDestroy(pConsumerEp->vgs); taosHashRemove(pOutput->pSub->consumerHash, &consumerId, sizeof(int64_t)); @@ -329,7 +329,7 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR newConsumerEp.vgs = taosArrayInit(0, sizeof(void *)); taosHashPut(pOutput->pSub->consumerHash, &consumerId, sizeof(int64_t), &newConsumerEp, sizeof(SMqConsumerEp)); taosArrayPush(pOutput->newConsumers, &consumerId); - mInfo("sub:%s mq rebalance add new consumer:%" PRIx64, sub, consumerId); + mInfo("sub:%s mq rebalance add new consumer:0x%" PRIx64, sub, consumerId); } } @@ -357,7 +357,7 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR taosArrayPush(pConsumerEp->vgs, &pRebVg->pVgEp); pRebVg->newConsumerId = pConsumerEp->consumerId; taosArrayPush(pOutput->rebVgs, pRebVg); - mInfo("mq rebalance: add vgId:%d to consumer:%" PRIx64 " (second scan) (not enough)", pRebVg->pVgEp->vgId, + mInfo("mq rebalance: add vgId:%d to consumer:0x%" PRIx64 " (second scan) (not enough)", pRebVg->pVgEp->vgId, pConsumerEp->consumerId); } } @@ -387,12 +387,12 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR taosArrayPush(pConsumerEp->vgs, &pRebVg->pVgEp); pRebVg->newConsumerId = pConsumerEp->consumerId; if (pRebVg->newConsumerId == pRebVg->oldConsumerId) { - mInfo("mq rebalance: skip vg %d for same consumer:%" PRIx64 " (second scan)", pRebVg->pVgEp->vgId, + mInfo("mq rebalance: skip vg %d for same consumer:0x%" PRIx64 " (second scan)", pRebVg->pVgEp->vgId, pConsumerEp->consumerId); continue; } taosArrayPush(pOutput->rebVgs, pRebVg); - mInfo("mq rebalance: add vgId:%d to consumer:%" PRIx64 " (second scan) (unassigned)", pRebVg->pVgEp->vgId, + mInfo("mq rebalance: add vgId:%d to consumer:0x%" PRIx64 " (second scan) (unassigned)", pRebVg->pVgEp->vgId, pConsumerEp->consumerId); } } else { @@ -1019,7 +1019,7 @@ int32_t mndRetrieveSubscribe(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)&pConsumerEp->consumerId, false); - mDebug("mnd show subscriptions: topic %s, consumer:%" PRIx64 " cgroup %s vgid %d", varDataVal(topic), + mDebug("mnd show subscriptions: topic %s, consumer:0x%" PRIx64 " cgroup %s vgid %d", varDataVal(topic), pConsumerEp->consumerId, varDataVal(cgroup), pVgEp->vgId); // offset From b31b7c3f64a7119b1df307fad0869cf37f04dc37 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Mar 2023 10:27:21 +0800 Subject: [PATCH 08/26] fix(tmq): fix the invalid read. --- source/client/src/clientTmq.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 9f24deff94..6a9f88ce15 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1289,9 +1289,11 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { tDecoderClear(&decoder); memcpy(&pRspWrapper->dataRsp, pMsg->pData, sizeof(SMqRspHead)); - tscDebug("consumer:0x%" PRIx64 " recv poll rsp, vgId:%d, req offset:%" PRId64 ", rsp offset:%" PRId64 " type %d, reqId:0x%"PRIx64, - tmq->consumerId, pVg->vgId, pRspWrapper->dataRsp.reqOffset.version, pRspWrapper->dataRsp.rspOffset.version, - rspType, requestId); + tscDebug("consumer:0x%" PRIx64 " recv poll rsp, vgId:%d, req:%" PRId64 ", rsp:%" PRId64 + " type %d, reqId:0x%" PRIx64, + tmq->consumerId, pParam->vgId, pRspWrapper->dataRsp.reqOffset.version, + pRspWrapper->dataRsp.rspOffset.version, rspType, requestId); + } else if (rspType == TMQ_MSG_TYPE__POLL_META_RSP) { SDecoder decoder; tDecoderInit(&decoder, POINTER_SHIFT(pMsg->pData, sizeof(SMqRspHead)), pMsg->len - sizeof(SMqRspHead)); @@ -1378,6 +1380,7 @@ bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { int32_t vgNumGet = taosArrayGetSize(pTopicEp->vgs); topic.vgs = taosArrayInit(vgNumGet, sizeof(SMqClientVg)); + for (int32_t j = 0; j < vgNumGet; j++) { SMqSubVgEp* pVgEp = taosArrayGet(pTopicEp->vgs, j); sprintf(vgKey, "%s:%d", topic.topicName, pVgEp->vgId); From 0d1b2e4b5af07a8c1569e7cda1ee444143f1fd2e Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Mar 2023 10:31:53 +0800 Subject: [PATCH 09/26] fix(tmq): use before remove hash item. --- source/dnode/vnode/src/tq/tqPush.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 03bb14e447..c0ed787176 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -383,11 +383,11 @@ int32_t tqRemovePushEntry(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t c tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s vgId:%d remove from push mgr, remains:%d", consumerId, (*pEntry)->subKey, vgId, taosHashGetSize(pTq->pPushMgr) - 1); - taosHashRemove(pTq->pPushMgr, pKey, keyLen); - if (rspConsumer) { // rsp the old consumer with empty block. tqPushDataRsp(pTq, *pEntry); } + + taosHashRemove(pTq->pPushMgr, pKey, keyLen); } return 0; From 9ca77572d98ae5fa9c65718434186d20591a6049 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Mar 2023 14:09:42 +0800 Subject: [PATCH 10/26] fix(tmq): fix race condition. --- source/client/src/clientTmq.c | 197 ++++++++++++++++++++-------------- 1 file changed, 114 insertions(+), 83 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 6a9f88ce15..3748142b0d 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -106,17 +106,13 @@ struct tmq_t { tmr_h reportTimer; tmr_h commitTimer; - // connection - STscObj* pTscObj; - - // container - SArray* clientTopics; // SArray - STaosQueue* mqueue; // queue of rsp - STaosQall* qall; - STaosQueue* delayedTask; // delayed task queue for heartbeat and auto commit - - // ctl - tsem_t rspSem; + STscObj* pTscObj; // connection + SArray* clientTopics; // SArray + STaosQueue* mqueue; // queue of rsp + STaosQall* qall; + STaosQueue* delayedTask; // delayed task queue for heartbeat and auto commit + TdThreadMutex lock; // used to protect the operation on each topic, when updating the epsets. + tsem_t rspSem; }; enum { @@ -213,9 +209,9 @@ typedef struct { typedef struct { SMqCommitCbParamSet* params; STqOffset* pOffset; - SMqClientVg* pMqVg; - /*char topicName[TSDB_TOPIC_FNAME_LEN];*/ - /*int32_t vgId;*/ + char topicName[TSDB_TOPIC_FNAME_LEN]; + int32_t vgId; + tmq_t* pTmq; } SMqCommitCbParam; static int32_t tmqAskEp(tmq_t* tmq, bool async); @@ -431,6 +427,7 @@ static void tmqCommitRspCountDown(SMqCommitCbParamSet* pParamSet) { int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { SMqCommitCbParam* pParam = (SMqCommitCbParam*)param; SMqCommitCbParamSet* pParamSet = (SMqCommitCbParamSet*)pParam->params; + // push into array #if 0 if (code == 0) { @@ -440,15 +437,34 @@ int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { } #endif - // there may be race condition. fix it - if (pBuf->pEpSet != NULL && pParam->pMqVg != NULL) { - SMqClientVg* pMqVg = pParam->pMqVg; + if (pBuf->pEpSet != NULL) { + // todo extract method + taosThreadMutexLock(&pParam->pTmq->lock); - SEp* pEp = GET_ACTIVE_EP(pBuf->pEpSet); - SEp* pOld = GET_ACTIVE_EP(&(pMqVg->epSet)); - uDebug("subKey:%s update the epset vgId:%d, ep:%s:%d, old ep:%s:%d", pParam->pOffset->subKey, pMqVg->vgId, - pEp->fqdn, pEp->port, pOld->fqdn, pOld->port); - pParam->pMqVg->epSet = *pBuf->pEpSet; + int32_t numOfTopics = taosArrayGetSize(pParam->pTmq->clientTopics); + for(int32_t i = 0; i < numOfTopics; ++i) { + SMqClientTopic* pTopic = taosArrayGet(pParam->pTmq->clientTopics, i); + if (strcmp(pTopic->topicName, pParam->topicName) != 0) { + continue; + } + + int32_t numOfVgs = taosArrayGetSize(pTopic->vgs); + for(int32_t j = 0; j < numOfVgs; ++j) { + SMqClientVg* pClientVg = taosArrayGet(pTopic->vgs, j); + if (pClientVg->vgId == pParam->vgId) { + SEp* pEp = GET_ACTIVE_EP(pBuf->pEpSet); + SEp* pOld = GET_ACTIVE_EP(&(pClientVg->epSet)); + uDebug("subKey:%s update the epset vgId:%d, ep:%s:%d, old ep:%s:%d", pParam->pOffset->subKey, pParam->vgId, + pEp->fqdn, pEp->port, pOld->fqdn, pOld->port); + pClientVg->epSet = *pBuf->pEpSet; + break; + } + } + + break; + } + + taosThreadMutexUnlock(&pParam->pTmq->lock); } taosMemoryFree(pParam->pOffset); @@ -508,7 +524,10 @@ static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, SMqClientTopic* pT pParam->params = pParamSet; pParam->pOffset = pOffset; - pParam->pMqVg = pVg; // there may be an race condition + pParam->vgId = pVg->vgId; + pParam->pTmq = tmq; + + tstrncpy(pParam->topicName, pTopic->topicName, tListLen(pParam->topicName)); // build send info SMsgSendInfo* pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); @@ -659,6 +678,7 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, for (int32_t i = 0; i < numOfTopics; i++) { SMqClientTopic* pTopic = taosArrayGet(tmq->clientTopics, i); + // todo race condition: fix it int32_t numOfVgroups = taosArrayGetSize(pTopic->vgs); for (int32_t j = 0; j < numOfVgroups; j++) { SMqClientVg* pVg = taosArrayGet(pTopic->vgs, j); @@ -944,10 +964,14 @@ void tmqFreeImpl(void* handle) { tmqClearUnhandleMsg(tmq); taosCloseQueue(tmq->mqueue); } - if (tmq->delayedTask) taosCloseQueue(tmq->delayedTask); - taosFreeQall(tmq->qall); + if (tmq->delayedTask) { + taosCloseQueue(tmq->delayedTask); + } + + taosFreeQall(tmq->qall); tsem_destroy(&tmq->rspSem); + taosThreadMutexDestroy(&tmq->lock); int32_t sz = taosArrayGetSize(tmq->clientTopics); for (int32_t i = 0; i < sz; i++) { @@ -955,6 +979,7 @@ void tmqFreeImpl(void* handle) { taosMemoryFreeClear(pTopic->schema.pSchema); taosArrayDestroy(pTopic->vgs); } + taosArrayDestroy(tmq->clientTopics); taos_close_internal(tmq->pTscObj); taosMemoryFree(tmq); @@ -993,9 +1018,10 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) { pTmq->clientTopics = taosArrayInit(0, sizeof(SMqClientTopic)); pTmq->mqueue = taosOpenQueue(); - pTmq->qall = taosAllocateQall(); pTmq->delayedTask = taosOpenQueue(); + pTmq->qall = taosAllocateQall(); + taosThreadMutexInit(&pTmq->lock, NULL); if (pTmq->clientTopics == NULL || pTmq->mqueue == NULL || pTmq->qall == NULL || pTmq->delayedTask == NULL || conf->groupId[0] == 0) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -1193,7 +1219,6 @@ FAIL: } void tmq_conf_set_auto_commit_cb(tmq_conf_t* conf, tmq_commit_cb* cb, void* param) { - // conf->commitCb = cb; conf->commitCbUserParam = param; } @@ -1328,7 +1353,53 @@ CREATE_MSG_FAIL: return -1; } -bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { +static void initClientTopicFromRsp(SMqClientTopic* pTopic, SMqSubTopicEp* pTopicEp, SHashObj* pVgOffsetHashMap, + tmq_t* tmq) { + pTopic->schema = pTopicEp->schema; + pTopicEp->schema.nCols = 0; + pTopicEp->schema.pSchema = NULL; + + char vgKey[TSDB_TOPIC_FNAME_LEN + 22]; + int32_t vgNumGet = taosArrayGetSize(pTopicEp->vgs); + + tstrncpy(pTopic->topicName, pTopicEp->topic, TSDB_TOPIC_FNAME_LEN); + tstrncpy(pTopic->db, pTopicEp->db, TSDB_DB_FNAME_LEN); + + tscDebug("consumer:0x%" PRIx64 ", update topic:%s, numOfVgs:%d", tmq->consumerId, pTopic->topicName, vgNumGet); + pTopic->vgs = taosArrayInit(vgNumGet, sizeof(SMqClientVg)); + + for (int32_t j = 0; j < vgNumGet; j++) { + SMqSubVgEp* pVgEp = taosArrayGet(pTopicEp->vgs, j); + sprintf(vgKey, "%s:%d", pTopic->topicName, pVgEp->vgId); + STqOffsetVal* pOffset = taosHashGet(pVgOffsetHashMap, vgKey, strlen(vgKey)); + STqOffsetVal offsetNew = {.type = tmq->resetOffsetCfg}; + if (pOffset != NULL) { + offsetNew = *pOffset; + } + + SMqClientVg clientVg = { + .pollCnt = 0, + .currentOffset = offsetNew, + .vgId = pVgEp->vgId, + .epSet = pVgEp->epSet, + .vgStatus = TMQ_VG_STATUS__IDLE, + .vgSkipCnt = 0, + }; + + taosArrayPush(pTopic->vgs, &clientVg); + } +} + +static void freeClientVgInfo(void* param) { + SMqClientTopic* pTopic = param; + if (pTopic->schema.nCols) { + taosMemoryFreeClear(pTopic->schema.pSchema); + } + + taosArrayDestroy(pTopic->vgs); +} + +static bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { bool set = false; int32_t topicNumCur = taosArrayGetSize(tmq->clientTopics); @@ -1343,12 +1414,13 @@ bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { return false; } - SHashObj* pHash = taosHashInit(64, MurmurHash3_32, false, HASH_NO_LOCK); - if (pHash == NULL) { + SHashObj* pVgOffsetHashMap = taosHashInit(64, MurmurHash3_32, false, HASH_NO_LOCK); + if (pVgOffsetHashMap == NULL) { taosArrayDestroy(newTopics); return false; } + // todo extract method for (int32_t i = 0; i < topicNumCur; i++) { // find old topic SMqClientTopic* pTopicCur = taosArrayGet(tmq->clientTopics, i); @@ -1362,7 +1434,7 @@ bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { tFormatOffset(buf, 80, &pVgCur->currentOffset); tscDebug("consumer:0x%" PRIx64 ", epoch:%d vgId:%d vgKey:%s, offset:%s", tmq->consumerId, epoch, pVgCur->vgId, vgKey, buf); - taosHashPut(pHash, vgKey, strlen(vgKey), &pVgCur->currentOffset, sizeof(STqOffsetVal)); + taosHashPut(pVgOffsetHashMap, vgKey, strlen(vgKey), &pVgCur->currentOffset, sizeof(STqOffsetVal)); } } } @@ -1370,62 +1442,25 @@ bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { for (int32_t i = 0; i < topicNumGet; i++) { SMqClientTopic topic = {0}; SMqSubTopicEp* pTopicEp = taosArrayGet(pRsp->topics, i); - topic.schema = pTopicEp->schema; - pTopicEp->schema.nCols = 0; - pTopicEp->schema.pSchema = NULL; - tstrncpy(topic.topicName, pTopicEp->topic, TSDB_TOPIC_FNAME_LEN); - tstrncpy(topic.db, pTopicEp->db, TSDB_DB_FNAME_LEN); - - tscDebug("consumer:0x%" PRIx64 ", update topic: %s", tmq->consumerId, topic.topicName); - - int32_t vgNumGet = taosArrayGetSize(pTopicEp->vgs); - topic.vgs = taosArrayInit(vgNumGet, sizeof(SMqClientVg)); - - for (int32_t j = 0; j < vgNumGet; j++) { - SMqSubVgEp* pVgEp = taosArrayGet(pTopicEp->vgs, j); - sprintf(vgKey, "%s:%d", topic.topicName, pVgEp->vgId); - STqOffsetVal* pOffset = taosHashGet(pHash, vgKey, strlen(vgKey)); - STqOffsetVal offsetNew = {.type = tmq->resetOffsetCfg}; - if (pOffset != NULL) { - offsetNew = *pOffset; - } - - SMqClientVg clientVg = { - .pollCnt = 0, - .currentOffset = offsetNew, - .vgId = pVgEp->vgId, - .epSet = pVgEp->epSet, - .vgStatus = TMQ_VG_STATUS__IDLE, - .vgSkipCnt = 0, - }; - taosArrayPush(topic.vgs, &clientVg); - set = true; - } + initClientTopicFromRsp(&topic, pTopicEp, pVgOffsetHashMap, tmq); taosArrayPush(newTopics, &topic); } // destroy current buffered existed topics info if (tmq->clientTopics) { - int32_t sz = taosArrayGetSize(tmq->clientTopics); - for (int32_t i = 0; i < sz; i++) { - SMqClientTopic* pTopic = taosArrayGet(tmq->clientTopics, i); - if (pTopic->schema.nCols) taosMemoryFreeClear(pTopic->schema.pSchema); - taosArrayDestroy(pTopic->vgs); - } - - taosArrayDestroy(tmq->clientTopics); + taosArrayDestroyEx(tmq->clientTopics, freeClientVgInfo); } - taosHashCleanup(pHash); + taosHashCleanup(pVgOffsetHashMap); + + taosThreadMutexLock(&tmq->lock); tmq->clientTopics = newTopics; + taosThreadMutexUnlock(&tmq->lock); - if (taosArrayGetSize(tmq->clientTopics) == 0) { - atomic_store_8(&tmq->status, TMQ_CONSUMER_STATUS__NO_TOPIC); - } else { - atomic_store_8(&tmq->status, TMQ_CONSUMER_STATUS__READY); - } - + int8_t flag = (topicNumGet == 0)? TMQ_CONSUMER_STATUS__NO_TOPIC:TMQ_CONSUMER_STATUS__READY; + atomic_store_8(&tmq->status, flag); atomic_store_32(&tmq->epoch, epoch); + tscDebug("consumer:0x%" PRIx64 " update topic info completed", tmq->consumerId); return set; } @@ -1448,7 +1483,7 @@ int32_t tmqAskEpCb(void* param, SDataBuf* pMsg, int32_t code) { } pParam->code = code; - if (code != 0) { + if (code != TSDB_CODE_SUCCESS) { tscError("consumer:0x%" PRIx64 ", get topic endpoint error, async:%d, code:%s", tmq->consumerId, pParam->async, tstrerror(code)); goto END; @@ -1471,8 +1506,6 @@ int32_t tmqAskEpCb(void* param, SDataBuf* pMsg, int32_t code) { if (!async) { SMqAskEpRsp rsp; tDecodeSMqAskEpRsp(POINTER_SHIFT(pMsg->pData, sizeof(SMqRspHead)), &rsp); - /*printf("rsp epoch %" PRId64 " sz %" PRId64 "\n", rsp.epoch, rsp.topics->size);*/ - /*printf("tmq epoch %" PRId64 " sz %" PRId64 "\n", tmq->epoch, tmq->clientTopics->size);*/ tmqUpdateEp(tmq, head->epoch, &rsp); tDeleteSMqAskEpRsp(&rsp); } else { @@ -1493,7 +1526,6 @@ int32_t tmqAskEpCb(void* param, SDataBuf* pMsg, int32_t code) { } END: - /*atomic_store_8(&tmq->epStatus, 0);*/ if (!async) { tsem_post(&pParam->rspSem); } else { @@ -1545,7 +1577,6 @@ int32_t tmqAskEp(tmq_t* tmq, bool async) { if (pParam == NULL) { tscError("consumer:0x%" PRIx64 ", failed to malloc subscribe param", tmq->consumerId); taosMemoryFree(pReq); - /*atomic_store_8(&tmq->epStatus, 0);*/ return -1; } @@ -1559,7 +1590,6 @@ int32_t tmqAskEp(tmq_t* tmq, bool async) { tsem_destroy(&pParam->rspSem); taosMemoryFree(pParam); taosMemoryFree(pReq); - /*atomic_store_8(&tmq->epStatus, 0);*/ return -1; } @@ -1995,6 +2025,7 @@ int32_t tmq_consumer_close(tmq_t* tmq) { tmq_list_destroy(lst); } + taosRemoveRef(tmqMgmt.rsetId, tmq->refId); return 0; } From 3513de5ab46a01961054b291575296337cbf33a4 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Mar 2023 14:11:58 +0800 Subject: [PATCH 11/26] fix(tmq): fix race condition. --- source/client/src/clientTmq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 3748142b0d..360e1eb569 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1446,15 +1446,16 @@ static bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { taosArrayPush(newTopics, &topic); } + taosHashCleanup(pVgOffsetHashMap); + + taosThreadMutexLock(&tmq->lock); // destroy current buffered existed topics info if (tmq->clientTopics) { taosArrayDestroyEx(tmq->clientTopics, freeClientVgInfo); } - taosHashCleanup(pVgOffsetHashMap); - - taosThreadMutexLock(&tmq->lock); tmq->clientTopics = newTopics; + taosThreadMutexUnlock(&tmq->lock); int8_t flag = (topicNumGet == 0)? TMQ_CONSUMER_STATUS__NO_TOPIC:TMQ_CONSUMER_STATUS__READY; From 1f71cf3a8bc7d735f68a050e15637cc3b9e15450 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Mar 2023 14:18:55 +0800 Subject: [PATCH 12/26] fix(tmq): fix invalid read. --- source/client/src/clientTmq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 360e1eb569..54cacfe156 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1316,8 +1316,7 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { tscDebug("consumer:0x%" PRIx64 " recv poll rsp, vgId:%d, req:%" PRId64 ", rsp:%" PRId64 " type %d, reqId:0x%" PRIx64, - tmq->consumerId, pParam->vgId, pRspWrapper->dataRsp.reqOffset.version, - pRspWrapper->dataRsp.rspOffset.version, rspType, requestId); + tmq->consumerId, vgId, pRspWrapper->dataRsp.reqOffset.version, pRspWrapper->dataRsp.rspOffset.version, rspType, requestId); } else if (rspType == TMQ_MSG_TYPE__POLL_META_RSP) { SDecoder decoder; From d8807d3d5a112d64549c8c2b4b17685946d503e5 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Mar 2023 14:23:16 +0800 Subject: [PATCH 13/26] fix(tmq): fix memory leak. --- source/dnode/vnode/src/tq/tq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 8d55a7bea1..a8ed470f50 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -830,7 +830,6 @@ int32_t tqProcessDelCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { SMqRebVgReq req = {0}; tDecodeSMqRebVgReq(msg, &req); - // todo lock tqDebug("vgId:%d, tq process sub req %s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pTq->pVnode->config.vgId, req.subKey, req.oldConsumerId, req.newConsumerId); @@ -863,8 +862,10 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg // TODO version should be assigned and refed during preprocess SWalRef* pRef = walRefCommittedVer(pTq->pVnode->pWal); if (pRef == NULL) { + taosMemoryFree(req.qmsg); return -1; } + int64_t ver = pRef->refVer; pHandle->pRef = pRef; @@ -921,6 +922,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg tqDebug("try to persist handle %s consumer:0x%" PRIx64 " , old consumer:0x%" PRIx64, req.subKey, pHandle->consumerId, oldConsumerId); if (tqMetaSaveHandle(pTq, req.subKey, pHandle) < 0) { + taosMemoryFree(req.qmsg); return -1; } } else { @@ -928,6 +930,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg tqInfo("vgId:%d consumer:0x%" PRIx64 " remains, no switch occurs", req.vgId, req.newConsumerId); atomic_store_32(&pHandle->epoch, -1); atomic_add_fetch_32(&pHandle->epoch, 1); + taosMemoryFree(req.qmsg); return tqMetaSaveHandle(pTq, req.subKey, pHandle); } @@ -950,10 +953,12 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg taosWUnLockLatch(&pTq->pushLock); if (tqMetaSaveHandle(pTq, req.subKey, pHandle) < 0) { + taosMemoryFree(req.qmsg); return -1; } } + taosMemoryFree(req.qmsg); return 0; } From 05ddf68247e6fc058cc0c2a6e5a1d8907175def8 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Mar 2023 14:27:34 +0800 Subject: [PATCH 14/26] fix(tmq): fix invalid free. --- source/dnode/vnode/src/tq/tq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index a8ed470f50..883816576d 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -945,7 +945,6 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg atomic_store_64(&pHandle->consumerId, req.newConsumerId); atomic_add_fetch_32(&pHandle->epoch, 1); - taosMemoryFree(req.qmsg); if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { qStreamCloseTsdbReader(pHandle->execHandle.task); From 9d680a0995eecb35a39ae14a7b44ddebd057519c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Mar 2023 09:09:03 +0800 Subject: [PATCH 15/26] fix(tmq): fix race condition. --- source/client/src/clientTmq.c | 57 +++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 54cacfe156..825f7ef182 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -147,7 +147,6 @@ typedef struct { } SMqClientVg; typedef struct { - // subscribe info char topicName[TSDB_TOPIC_FNAME_LEN]; char db[TSDB_DB_FNAME_LEN]; SArray* vgs; // SArray @@ -381,7 +380,7 @@ char** tmq_list_to_c_array(const tmq_list_t* list) { return container->pData; } -static int32_t tmqMakeTopicVgKey(char* dst, const char* topicName, int32_t vg) { +static int32_t makeTopicVgroupKey(char* dst, const char* topicName, int32_t vg) { return sprintf(dst, "%s:%d", topicName, vg); } @@ -424,7 +423,7 @@ static void tmqCommitRspCountDown(SMqCommitCbParamSet* pParamSet) { } } -int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { +static int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { SMqCommitCbParam* pParam = (SMqCommitCbParam*)param; SMqCommitCbParamSet* pParamSet = (SMqCommitCbParamSet*)pParam->params; @@ -478,7 +477,7 @@ int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { return 0; } -static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, SMqClientTopic* pTopic, SMqCommitCbParamSet* pParamSet) { +static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicName, SMqCommitCbParamSet* pParamSet) { STqOffset* pOffset = taosMemoryCalloc(1, sizeof(STqOffset)); if (pOffset == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -490,7 +489,7 @@ static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, SMqClientTopic* pT int32_t groupLen = strlen(tmq->groupId); memcpy(pOffset->subKey, tmq->groupId, groupLen); pOffset->subKey[groupLen] = TMQ_SEPARATOR; - strcpy(pOffset->subKey + groupLen + 1, pTopic->topicName); + strcpy(pOffset->subKey + groupLen + 1, pTopicName); int32_t len; int32_t code; @@ -527,7 +526,7 @@ static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, SMqClientTopic* pT pParam->vgId = pVg->vgId; pParam->pTmq = tmq; - tstrncpy(pParam->topicName, pTopic->topicName, tListLen(pParam->topicName)); + tstrncpy(pParam->topicName, pTopicName, tListLen(pParam->topicName)); // build send info SMsgSendInfo* pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); @@ -544,7 +543,7 @@ static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, SMqClientTopic* pT .handle = NULL, }; - SEp* pEp = &pVg->epSet.eps[pVg->epSet.inUse]; + SEp* pEp = GET_ACTIVE_EP(&pVg->epSet); tscDebug("consumer:0x%" PRIx64 " topic:%s on vgId:%d offset:%" PRId64 " prev:%" PRId64 ", ep:%s:%d", tmq->consumerId, pOffset->subKey, pVg->vgId, pOffset->val.version, pVg->committedOffset.version, pEp->fqdn, pEp->port); @@ -567,7 +566,7 @@ static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, SMqClientTopic* pT return 0; } -int32_t tmqCommitMsgImpl(tmq_t* tmq, const TAOS_RES* msg, int8_t async, tmq_commit_cb* userCb, void* userParam) { +static int32_t tmqCommitMsgImpl(tmq_t* tmq, const TAOS_RES* msg, int8_t async, tmq_commit_cb* userCb, void* userParam) { char* topic; int32_t vgId; if (TD_RES_TMQ(msg)) { @@ -591,6 +590,7 @@ int32_t tmqCommitMsgImpl(tmq_t* tmq, const TAOS_RES* msg, int8_t async, tmq_comm terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } + pParamSet->refId = tmq->refId; pParamSet->epoch = tmq->epoch; pParamSet->automatic = 0; @@ -601,15 +601,18 @@ int32_t tmqCommitMsgImpl(tmq_t* tmq, const TAOS_RES* msg, int8_t async, tmq_comm int32_t code = -1; + taosThreadMutexLock(&tmq->lock); for (int32_t i = 0; i < taosArrayGetSize(tmq->clientTopics); i++) { SMqClientTopic* pTopic = taosArrayGet(tmq->clientTopics, i); if (strcmp(pTopic->topicName, topic) != 0) continue; for (int32_t j = 0; j < taosArrayGetSize(pTopic->vgs); j++) { SMqClientVg* pVg = taosArrayGet(pTopic->vgs, j); - if (pVg->vgId != vgId) continue; + if (pVg->vgId != vgId) { + continue; + } if (pVg->currentOffset.type > 0 && !tOffsetEqual(&pVg->currentOffset, &pVg->committedOffset)) { - if (tmqSendCommitReq(tmq, pVg, pTopic, pParamSet) < 0) { + if (tmqSendCommitReq(tmq, pVg, pTopic->topicName, pParamSet) < 0) { tsem_destroy(&pParamSet->rspSem); taosMemoryFree(pParamSet); goto FAIL; @@ -623,6 +626,7 @@ HANDLE_RSP: if (pParamSet->totalRspNum == 0) { tsem_destroy(&pParamSet->rspSem); taosMemoryFree(pParamSet); + taosThreadMutexUnlock(&tmq->lock); return 0; } @@ -631,15 +635,18 @@ HANDLE_RSP: code = pParamSet->rspErr; tsem_destroy(&pParamSet->rspSem); taosMemoryFree(pParamSet); + taosThreadMutexUnlock(&tmq->lock); return code; } else { code = 0; } FAIL: + taosThreadMutexUnlock(&tmq->lock); if (code != 0 && async) { userCb(tmq, code, userParam); } + return 0; } @@ -672,7 +679,9 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, // init as 1 to prevent concurrency issue pParamSet->waitingRspNum = 1; + taosThreadMutexLock(&tmq->lock); int32_t numOfTopics = taosArrayGetSize(tmq->clientTopics); + tscDebug("consumer:0x%" PRIx64 " start to commit offset for %d topics", tmq->consumerId, numOfTopics); for (int32_t i = 0; i < numOfTopics; i++) { @@ -681,18 +690,20 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, // todo race condition: fix it int32_t numOfVgroups = taosArrayGetSize(pTopic->vgs); for (int32_t j = 0; j < numOfVgroups; j++) { - SMqClientVg* pVg = taosArrayGet(pTopic->vgs, j); - if (pVg->currentOffset.type > 0 && !tOffsetEqual(&pVg->currentOffset, &pVg->committedOffset)) { - if (tmqSendCommitReq(tmq, pVg, pTopic, pParamSet) < 0) { + SMqClientVg clientVg = *(SMqClientVg*)taosArrayGet(pTopic->vgs, j); + if (clientVg.currentOffset.type > 0 && !tOffsetEqual(&clientVg.currentOffset, &clientVg.committedOffset)) { + if (tmqSendCommitReq(tmq, &clientVg, pTopic->topicName, pParamSet) < 0) { continue; } } else { tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d, not commit, current:%" PRId64 ", ordinal:%d/%d", - tmq->consumerId, pTopic->topicName, pVg->vgId, pVg->currentOffset.version, j + 1, numOfVgroups); + tmq->consumerId, pTopic->topicName, clientVg.vgId, clientVg.currentOffset.version, j + 1, numOfVgroups); } } } + taosThreadMutexUnlock(&tmq->lock); + // no request is sent if (pParamSet->totalRspNum == 0) { tsem_destroy(&pParamSet->rspSem); @@ -1369,8 +1380,10 @@ static void initClientTopicFromRsp(SMqClientTopic* pTopic, SMqSubTopicEp* pTopic for (int32_t j = 0; j < vgNumGet; j++) { SMqSubVgEp* pVgEp = taosArrayGet(pTopicEp->vgs, j); - sprintf(vgKey, "%s:%d", pTopic->topicName, pVgEp->vgId); + + makeTopicVgroupKey(vgKey, pTopic->topicName, pVgEp->vgId); STqOffsetVal* pOffset = taosHashGet(pVgOffsetHashMap, vgKey, strlen(vgKey)); + STqOffsetVal offsetNew = {.type = tmq->resetOffsetCfg}; if (pOffset != NULL) { offsetNew = *pOffset; @@ -1428,7 +1441,8 @@ static bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { tscDebug("consumer:0x%" PRIx64 ", new vg num: %d", tmq->consumerId, vgNumCur); for (int32_t j = 0; j < vgNumCur; j++) { SMqClientVg* pVgCur = taosArrayGet(pTopicCur->vgs, j); - sprintf(vgKey, "%s:%d", pTopicCur->topicName, pVgCur->vgId); + makeTopicVgroupKey(vgKey, pTopicCur->topicName, pVgCur->vgId); + char buf[80]; tFormatOffset(buf, 80, &pVgCur->currentOffset); tscDebug("consumer:0x%" PRIx64 ", epoch:%d vgId:%d vgKey:%s, offset:%s", tmq->consumerId, epoch, @@ -1454,7 +1468,6 @@ static bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { } tmq->clientTopics = newTopics; - taosThreadMutexUnlock(&tmq->lock); int8_t flag = (topicNumGet == 0)? TMQ_CONSUMER_STATUS__NO_TOPIC:TMQ_CONSUMER_STATUS__READY; @@ -1465,7 +1478,7 @@ static bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { return set; } -int32_t tmqAskEpCb(void* param, SDataBuf* pMsg, int32_t code) { +static int32_t tmqAskEpCb(void* param, SDataBuf* pMsg, int32_t code) { SMqAskEpCbParam* pParam = (SMqAskEpCbParam*)param; int8_t async = pParam->async; tmq_t* tmq = taosAcquireRef(tmqMgmt.rsetId, pParam->refId); @@ -1757,7 +1770,7 @@ static int32_t doTmqPollImpl(tmq_t* pTmq, SMqClientTopic* pTopic, SMqClientVg* p } // broadcast the poll request to all related vnodes -int32_t tmqPollImpl(tmq_t* tmq, int64_t timeout) { +static int32_t tmqPollImpl(tmq_t* tmq, int64_t timeout) { int32_t numOfTopics = taosArrayGetSize(tmq->clientTopics); tscDebug("consumer:0x%" PRIx64 " start to poll data, numOfTopics:%d", tmq->consumerId, numOfTopics); @@ -1793,7 +1806,7 @@ int32_t tmqPollImpl(tmq_t* tmq, int64_t timeout) { return 0; } -int32_t tmqHandleNoPollRsp(tmq_t* tmq, SMqRspWrapper* rspWrapper, bool* pReset) { +static int32_t tmqHandleNoPollRsp(tmq_t* tmq, SMqRspWrapper* rspWrapper, bool* pReset) { if (rspWrapper->tmqRspType == TMQ_MSG_TYPE__EP_RSP) { /*printf("ep %d %d\n", rspMsg->head.epoch, tmq->epoch);*/ if (rspWrapper->epoch > atomic_load_32(&tmq->epoch)) { @@ -1813,7 +1826,7 @@ int32_t tmqHandleNoPollRsp(tmq_t* tmq, SMqRspWrapper* rspWrapper, bool* pReset) return 0; } -void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { +static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { tscDebug("consumer:0x%" PRIx64 " start to handle the rsp, total:%d", tmq->consumerId, tmq->qall->numOfItems); while (1) { @@ -2117,11 +2130,9 @@ const char* tmq_get_table_name(TAOS_RES* res) { } void tmq_commit_async(tmq_t* tmq, const TAOS_RES* msg, tmq_commit_cb* cb, void* param) { - // tmqCommitInner(tmq, msg, 0, 1, cb, param); } int32_t tmq_commit_sync(tmq_t* tmq, const TAOS_RES* msg) { - // return tmqCommitInner(tmq, msg, 0, 0, NULL, NULL); } From 1e01f902833515ef2f3ca3f027ee0b216774efe4 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Mar 2023 09:13:26 +0800 Subject: [PATCH 16/26] refactor(tmq): do some internal refactor. --- source/client/src/clientTmq.c | 246 +++++++++++++++++----------------- 1 file changed, 124 insertions(+), 122 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 825f7ef182..ba223984e3 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -214,6 +214,8 @@ typedef struct { } SMqCommitCbParam; static int32_t tmqAskEp(tmq_t* tmq, bool async); +static int32_t makeTopicVgroupKey(char* dst, const char* topicName, int32_t vg); +static int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet); tmq_conf_t* tmq_conf_new() { tmq_conf_t* conf = taosMemoryCalloc(1, sizeof(tmq_conf_t)); @@ -380,42 +382,6 @@ char** tmq_list_to_c_array(const tmq_list_t* list) { return container->pData; } -static int32_t makeTopicVgroupKey(char* dst, const char* topicName, int32_t vg) { - return sprintf(dst, "%s:%d", topicName, vg); -} - -int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet) { - tmq_t* tmq = taosAcquireRef(tmqMgmt.rsetId, pParamSet->refId); - if (tmq == NULL) { - if (!pParamSet->async) { - tsem_destroy(&pParamSet->rspSem); - } - taosMemoryFree(pParamSet); - terrno = TSDB_CODE_TMQ_CONSUMER_CLOSED; - return -1; - } - - // if no more waiting rsp - if (pParamSet->async) { - // call async cb func - if (pParamSet->automatic && tmq->commitCb) { - tmq->commitCb(tmq, pParamSet->rspErr, tmq->commitCbUserParam); - } else if (!pParamSet->automatic && pParamSet->userCb) { - // sem post - pParamSet->userCb(tmq, pParamSet->rspErr, pParamSet->userParam); - } - taosMemoryFree(pParamSet); - } else { - tsem_post(&pParamSet->rspSem); - } - -#if 0 - taosArrayDestroyP(pParamSet->successfulOffsets, taosMemoryFree); - taosArrayDestroyP(pParamSet->failedOffsets, taosMemoryFree); -#endif - return 0; -} - static void tmqCommitRspCountDown(SMqCommitCbParamSet* pParamSet) { int32_t waitingRspNum = atomic_sub_fetch_32(&pParamSet->waitingRspNum, 1); if (waitingRspNum == 0) { @@ -728,8 +694,8 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, return code; } -int32_t tmqCommitInner(tmq_t* tmq, const TAOS_RES* msg, int8_t automatic, int8_t async, tmq_commit_cb* userCb, - void* userParam) { +static int32_t tmqCommitInner(tmq_t* tmq, const TAOS_RES* msg, int8_t automatic, int8_t async, tmq_commit_cb* userCb, + void* userParam) { if (msg) { return tmqCommitMsgImpl(tmq, msg, async, userCb, userParam); } else { @@ -1550,90 +1516,6 @@ END: return code; } -int32_t tmqAskEp(tmq_t* tmq, bool async) { - int32_t code = TSDB_CODE_SUCCESS; -#if 0 - int8_t epStatus = atomic_val_compare_exchange_8(&tmq->epStatus, 0, 1); - if (epStatus == 1) { - int32_t epSkipCnt = atomic_add_fetch_32(&tmq->epSkipCnt, 1); - tscTrace("consumer:0x%" PRIx64 ", skip ask ep cnt %d", tmq->consumerId, epSkipCnt); - if (epSkipCnt < 5000) return 0; - } - atomic_store_32(&tmq->epSkipCnt, 0); -#endif - - SMqAskEpReq req = {0}; - req.consumerId = tmq->consumerId; - req.epoch = tmq->epoch; - strcpy(req.cgroup, tmq->groupId); - - int32_t tlen = tSerializeSMqAskEpReq(NULL, 0, &req); - if (tlen < 0) { - tscError("consumer:0x%" PRIx64 ", tSerializeSMqAskEpReq failed", tmq->consumerId); - return -1; - } - - void* pReq = taosMemoryCalloc(1, tlen); - if (pReq == NULL) { - tscError("consumer:0x%" PRIx64 ", failed to malloc askEpReq msg, size:%d", tmq->consumerId, tlen); - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - if (tSerializeSMqAskEpReq(pReq, tlen, &req) < 0) { - tscError("consumer:0x%" PRIx64 ", tSerializeSMqAskEpReq %d failed", tmq->consumerId, tlen); - taosMemoryFree(pReq); - return -1; - } - - SMqAskEpCbParam* pParam = taosMemoryCalloc(1, sizeof(SMqAskEpCbParam)); - if (pParam == NULL) { - tscError("consumer:0x%" PRIx64 ", failed to malloc subscribe param", tmq->consumerId); - taosMemoryFree(pReq); - return -1; - } - - pParam->refId = tmq->refId; - pParam->epoch = tmq->epoch; - pParam->async = async; - tsem_init(&pParam->rspSem, 0, 0); - - SMsgSendInfo* sendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); - if (sendInfo == NULL) { - tsem_destroy(&pParam->rspSem); - taosMemoryFree(pParam); - taosMemoryFree(pReq); - return -1; - } - - sendInfo->msgInfo = (SDataBuf){ - .pData = pReq, - .len = tlen, - .handle = NULL, - }; - - sendInfo->requestId = generateRequestId(); - sendInfo->requestObjRefId = 0; - sendInfo->param = pParam; - sendInfo->fp = tmqAskEpCb; - sendInfo->msgType = TDMT_MND_TMQ_ASK_EP; - - SEpSet epSet = getEpSet_s(&tmq->pTscObj->pAppInfo->mgmtEp); - tscDebug("consumer:0x%" PRIx64 " ask ep from mnode, async:%d, reqId:0x%" PRIx64, tmq->consumerId, async, - sendInfo->requestId); - - int64_t transporterId = 0; - asyncSendMsgToServer(tmq->pTscObj->pAppInfo->pTransporter, &epSet, &transporterId, sendInfo); - - if (!async) { - tsem_wait(&pParam->rspSem); - code = pParam->code; - taosMemoryFree(pParam); - } - - return code; -} - void tmqBuildConsumeReqImpl(SMqPollReq* pReq, tmq_t* tmq, int64_t timeout, SMqClientTopic* pTopic, SMqClientVg* pVg) { int32_t groupLen = strlen(tmq->groupId); memcpy(pReq->subKey, tmq->groupId, groupLen); @@ -2136,3 +2018,123 @@ void tmq_commit_async(tmq_t* tmq, const TAOS_RES* msg, tmq_commit_cb* cb, void* int32_t tmq_commit_sync(tmq_t* tmq, const TAOS_RES* msg) { return tmqCommitInner(tmq, msg, 0, 0, NULL, NULL); } + +int32_t tmqAskEp(tmq_t* tmq, bool async) { + int32_t code = TSDB_CODE_SUCCESS; +#if 0 + int8_t epStatus = atomic_val_compare_exchange_8(&tmq->epStatus, 0, 1); + if (epStatus == 1) { + int32_t epSkipCnt = atomic_add_fetch_32(&tmq->epSkipCnt, 1); + tscTrace("consumer:0x%" PRIx64 ", skip ask ep cnt %d", tmq->consumerId, epSkipCnt); + if (epSkipCnt < 5000) return 0; + } + atomic_store_32(&tmq->epSkipCnt, 0); +#endif + + SMqAskEpReq req = {0}; + req.consumerId = tmq->consumerId; + req.epoch = tmq->epoch; + strcpy(req.cgroup, tmq->groupId); + + int32_t tlen = tSerializeSMqAskEpReq(NULL, 0, &req); + if (tlen < 0) { + tscError("consumer:0x%" PRIx64 ", tSerializeSMqAskEpReq failed", tmq->consumerId); + return -1; + } + + void* pReq = taosMemoryCalloc(1, tlen); + if (pReq == NULL) { + tscError("consumer:0x%" PRIx64 ", failed to malloc askEpReq msg, size:%d", tmq->consumerId, tlen); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + if (tSerializeSMqAskEpReq(pReq, tlen, &req) < 0) { + tscError("consumer:0x%" PRIx64 ", tSerializeSMqAskEpReq %d failed", tmq->consumerId, tlen); + taosMemoryFree(pReq); + return -1; + } + + SMqAskEpCbParam* pParam = taosMemoryCalloc(1, sizeof(SMqAskEpCbParam)); + if (pParam == NULL) { + tscError("consumer:0x%" PRIx64 ", failed to malloc subscribe param", tmq->consumerId); + taosMemoryFree(pReq); + return -1; + } + + pParam->refId = tmq->refId; + pParam->epoch = tmq->epoch; + pParam->async = async; + tsem_init(&pParam->rspSem, 0, 0); + + SMsgSendInfo* sendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); + if (sendInfo == NULL) { + tsem_destroy(&pParam->rspSem); + taosMemoryFree(pParam); + taosMemoryFree(pReq); + return -1; + } + + sendInfo->msgInfo = (SDataBuf){ + .pData = pReq, + .len = tlen, + .handle = NULL, + }; + + sendInfo->requestId = generateRequestId(); + sendInfo->requestObjRefId = 0; + sendInfo->param = pParam; + sendInfo->fp = tmqAskEpCb; + sendInfo->msgType = TDMT_MND_TMQ_ASK_EP; + + SEpSet epSet = getEpSet_s(&tmq->pTscObj->pAppInfo->mgmtEp); + tscDebug("consumer:0x%" PRIx64 " ask ep from mnode, async:%d, reqId:0x%" PRIx64, tmq->consumerId, async, + sendInfo->requestId); + + int64_t transporterId = 0; + asyncSendMsgToServer(tmq->pTscObj->pAppInfo->pTransporter, &epSet, &transporterId, sendInfo); + + if (!async) { + tsem_wait(&pParam->rspSem); + code = pParam->code; + taosMemoryFree(pParam); + } + + return code; +} + +int32_t makeTopicVgroupKey(char* dst, const char* topicName, int32_t vg) { + return sprintf(dst, "%s:%d", topicName, vg); +} + +int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet) { + tmq_t* tmq = taosAcquireRef(tmqMgmt.rsetId, pParamSet->refId); + if (tmq == NULL) { + if (!pParamSet->async) { + tsem_destroy(&pParamSet->rspSem); + } + taosMemoryFree(pParamSet); + terrno = TSDB_CODE_TMQ_CONSUMER_CLOSED; + return -1; + } + + // if no more waiting rsp + if (pParamSet->async) { + // call async cb func + if (pParamSet->automatic && tmq->commitCb) { + tmq->commitCb(tmq, pParamSet->rspErr, tmq->commitCbUserParam); + } else if (!pParamSet->automatic && pParamSet->userCb) { + // sem post + pParamSet->userCb(tmq, pParamSet->rspErr, pParamSet->userParam); + } + taosMemoryFree(pParamSet); + } else { + tsem_post(&pParamSet->rspSem); + } + +#if 0 + taosArrayDestroyP(pParamSet->successfulOffsets, taosMemoryFree); + taosArrayDestroyP(pParamSet->failedOffsets, taosMemoryFree); +#endif + return 0; +} \ No newline at end of file From 9e860f02ac942e607db2b1167b92591877fafcf8 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Mar 2023 11:25:37 +0800 Subject: [PATCH 17/26] refactor: do some internal refactor. --- include/common/tcommon.h | 7 -- source/client/src/clientTmq.c | 205 +++++++++++++++++----------------- source/common/src/tmsg.c | 3 +- 3 files changed, 104 insertions(+), 111 deletions(-) diff --git a/include/common/tcommon.h b/include/common/tcommon.h index 2a40976a8b..9e928a79ac 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -25,13 +25,6 @@ extern "C" { #endif -// TODO remove it -enum { - TMQ_CONF__RESET_OFFSET__NONE = -3, - TMQ_CONF__RESET_OFFSET__EARLIEAST = -2, - TMQ_CONF__RESET_OFFSET__LATEST = -1, -}; - // clang-format off #define IS_META_MSG(x) ( \ x == TDMT_VND_CREATE_STB \ diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index ba223984e3..0d3d37dc29 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -20,18 +20,10 @@ #include "tdatablock.h" #include "tdef.h" #include "tglobal.h" -#include "tmsgtype.h" #include "tqueue.h" #include "tref.h" #include "ttimer.h" -#if 0 -#undef tsem_post -#define tsem_post(x) \ - tscInfo("call sem post at %s %d", __FUNCTION__, __LINE__); \ - sem_post(x) -#endif - struct SMqMgmt { int8_t inited; tmr_h timer; @@ -216,6 +208,7 @@ typedef struct { static int32_t tmqAskEp(tmq_t* tmq, bool async); static int32_t makeTopicVgroupKey(char* dst, const char* topicName, int32_t vg); static int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet); +static void tmqCommitRspCountDown(SMqCommitCbParamSet* pParamSet, int64_t consumerId, const char* pTopic, int32_t vgId); tmq_conf_t* tmq_conf_new() { tmq_conf_t* conf = taosMemoryCalloc(1, sizeof(tmq_conf_t)); @@ -227,7 +220,7 @@ tmq_conf_t* tmq_conf_new() { conf->withTbName = false; conf->autoCommit = true; conf->autoCommitInterval = 5000; - conf->resetOffset = TMQ_CONF__RESET_OFFSET__EARLIEAST; + conf->resetOffset = TMQ_OFFSET__RESET_EARLIEAST; conf->hbBgEnable = true; return conf; @@ -235,29 +228,35 @@ tmq_conf_t* tmq_conf_new() { void tmq_conf_destroy(tmq_conf_t* conf) { if (conf) { - if (conf->ip) taosMemoryFree(conf->ip); - if (conf->user) taosMemoryFree(conf->user); - if (conf->pass) taosMemoryFree(conf->pass); + if (conf->ip) { + taosMemoryFree(conf->ip); + } + if (conf->user) { + taosMemoryFree(conf->user); + } + if (conf->pass) { + taosMemoryFree(conf->pass); + } taosMemoryFree(conf); } } tmq_conf_res_t tmq_conf_set(tmq_conf_t* conf, const char* key, const char* value) { - if (strcmp(key, "group.id") == 0) { + if (strcasecmp(key, "group.id") == 0) { tstrncpy(conf->groupId, value, TSDB_CGROUP_LEN); return TMQ_CONF_OK; } - if (strcmp(key, "client.id") == 0) { + if (strcasecmp(key, "client.id") == 0) { tstrncpy(conf->clientId, value, 256); return TMQ_CONF_OK; } - if (strcmp(key, "enable.auto.commit") == 0) { - if (strcmp(value, "true") == 0) { + if (strcasecmp(key, "enable.auto.commit") == 0) { + if (strcasecmp(value, "true") == 0) { conf->autoCommit = true; return TMQ_CONF_OK; - } else if (strcmp(value, "false") == 0) { + } else if (strcasecmp(value, "false") == 0) { conf->autoCommit = false; return TMQ_CONF_OK; } else { @@ -265,31 +264,31 @@ tmq_conf_res_t tmq_conf_set(tmq_conf_t* conf, const char* key, const char* value } } - if (strcmp(key, "auto.commit.interval.ms") == 0) { + if (strcasecmp(key, "auto.commit.interval.ms") == 0) { conf->autoCommitInterval = atoi(value); return TMQ_CONF_OK; } - if (strcmp(key, "auto.offset.reset") == 0) { - if (strcmp(value, "none") == 0) { - conf->resetOffset = TMQ_CONF__RESET_OFFSET__NONE; + if (strcasecmp(key, "auto.offset.reset") == 0) { + if (strcasecmp(value, "none") == 0) { + conf->resetOffset = TMQ_OFFSET__RESET_NONE; return TMQ_CONF_OK; - } else if (strcmp(value, "earliest") == 0) { - conf->resetOffset = TMQ_CONF__RESET_OFFSET__EARLIEAST; + } else if (strcasecmp(value, "earliest") == 0) { + conf->resetOffset = TMQ_OFFSET__RESET_EARLIEAST; return TMQ_CONF_OK; - } else if (strcmp(value, "latest") == 0) { - conf->resetOffset = TMQ_CONF__RESET_OFFSET__LATEST; + } else if (strcasecmp(value, "latest") == 0) { + conf->resetOffset = TMQ_OFFSET__RESET_LATEST; return TMQ_CONF_OK; } else { return TMQ_CONF_INVALID; } } - if (strcmp(key, "msg.with.table.name") == 0) { - if (strcmp(value, "true") == 0) { + if (strcasecmp(key, "msg.with.table.name") == 0) { + if (strcasecmp(value, "true") == 0) { conf->withTbName = true; return TMQ_CONF_OK; - } else if (strcmp(value, "false") == 0) { + } else if (strcasecmp(value, "false") == 0) { conf->withTbName = false; return TMQ_CONF_OK; } else { @@ -297,11 +296,11 @@ tmq_conf_res_t tmq_conf_set(tmq_conf_t* conf, const char* key, const char* value } } - if (strcmp(key, "experimental.snapshot.enable") == 0) { - if (strcmp(value, "true") == 0) { + if (strcasecmp(key, "experimental.snapshot.enable") == 0) { + if (strcasecmp(value, "true") == 0) { conf->snapEnable = true; return TMQ_CONF_OK; - } else if (strcmp(value, "false") == 0) { + } else if (strcasecmp(value, "false") == 0) { conf->snapEnable = false; return TMQ_CONF_OK; } else { @@ -309,42 +308,40 @@ tmq_conf_res_t tmq_conf_set(tmq_conf_t* conf, const char* key, const char* value } } - if (strcmp(key, "experimental.snapshot.batch.size") == 0) { + if (strcasecmp(key, "experimental.snapshot.batch.size") == 0) { conf->snapBatchSize = atoi(value); return TMQ_CONF_OK; } - if (strcmp(key, "enable.heartbeat.background") == 0) { - if (strcmp(value, "true") == 0) { + if (strcasecmp(key, "enable.heartbeat.background") == 0) { + if (strcasecmp(value, "true") == 0) { conf->hbBgEnable = true; return TMQ_CONF_OK; - } else if (strcmp(value, "false") == 0) { + } else if (strcasecmp(value, "false") == 0) { conf->hbBgEnable = false; return TMQ_CONF_OK; } else { return TMQ_CONF_INVALID; } - return TMQ_CONF_OK; } - if (strcmp(key, "td.connect.ip") == 0) { + if (strcasecmp(key, "td.connect.ip") == 0) { conf->ip = taosStrdup(value); return TMQ_CONF_OK; } - if (strcmp(key, "td.connect.user") == 0) { + if (strcasecmp(key, "td.connect.user") == 0) { conf->user = taosStrdup(value); return TMQ_CONF_OK; } - if (strcmp(key, "td.connect.pass") == 0) { + if (strcasecmp(key, "td.connect.pass") == 0) { conf->pass = taosStrdup(value); return TMQ_CONF_OK; } - if (strcmp(key, "td.connect.port") == 0) { + if (strcasecmp(key, "td.connect.port") == 0) { conf->port = atoi(value); return TMQ_CONF_OK; } - if (strcmp(key, "td.connect.db") == 0) { - /*conf->db = taosStrdup(value);*/ + if (strcasecmp(key, "td.connect.db") == 0) { return TMQ_CONF_OK; } @@ -352,7 +349,6 @@ tmq_conf_res_t tmq_conf_set(tmq_conf_t* conf, const char* key, const char* value } tmq_list_t* tmq_list_new() { - // return (tmq_list_t*)taosArrayInit(0, sizeof(void*)); } @@ -382,13 +378,31 @@ char** tmq_list_to_c_array(const tmq_list_t* list) { return container->pData; } -static void tmqCommitRspCountDown(SMqCommitCbParamSet* pParamSet) { - int32_t waitingRspNum = atomic_sub_fetch_32(&pParamSet->waitingRspNum, 1); - if (waitingRspNum == 0) { - tmqCommitDone(pParamSet); +static void updateVgEpset(tmq_t* pTmq, SMqCommitCbParam* pParam, SEpSet* pEpSet) { + int32_t numOfTopics = taosArrayGetSize(pTmq->clientTopics); + for(int32_t i = 0; i < numOfTopics; ++i) { + SMqClientTopic* pTopic = taosArrayGet(pTmq->clientTopics, i); + if (strcmp(pTopic->topicName, pParam->topicName) != 0) { + continue; + } + + int32_t numOfVgs = taosArrayGetSize(pTopic->vgs); + for(int32_t j = 0; j < numOfVgs; ++j) { + SMqClientVg* pClientVg = taosArrayGet(pTopic->vgs, j); + if (pClientVg->vgId == pParam->vgId) { + SEp* pEp = GET_ACTIVE_EP(pEpSet); + SEp* pOld = GET_ACTIVE_EP(&(pClientVg->epSet)); + uDebug("subKey:%s update the epset vgId:%d, ep:%s:%d, old ep:%s:%d", pParam->pOffset->subKey, pParam->vgId, + pEp->fqdn, pEp->port, pOld->fqdn, pOld->port); + pClientVg->epSet = *pEpSet; + break; + } + } + + break; } } - +// todo retry to send the commit if failed static int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { SMqCommitCbParam* pParam = (SMqCommitCbParam*)param; SMqCommitCbParamSet* pParamSet = (SMqCommitCbParamSet*)pParam->params; @@ -402,33 +416,10 @@ static int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { } #endif + // update the epset if needed if (pBuf->pEpSet != NULL) { - // todo extract method taosThreadMutexLock(&pParam->pTmq->lock); - - int32_t numOfTopics = taosArrayGetSize(pParam->pTmq->clientTopics); - for(int32_t i = 0; i < numOfTopics; ++i) { - SMqClientTopic* pTopic = taosArrayGet(pParam->pTmq->clientTopics, i); - if (strcmp(pTopic->topicName, pParam->topicName) != 0) { - continue; - } - - int32_t numOfVgs = taosArrayGetSize(pTopic->vgs); - for(int32_t j = 0; j < numOfVgs; ++j) { - SMqClientVg* pClientVg = taosArrayGet(pTopic->vgs, j); - if (pClientVg->vgId == pParam->vgId) { - SEp* pEp = GET_ACTIVE_EP(pBuf->pEpSet); - SEp* pOld = GET_ACTIVE_EP(&(pClientVg->epSet)); - uDebug("subKey:%s update the epset vgId:%d, ep:%s:%d, old ep:%s:%d", pParam->pOffset->subKey, pParam->vgId, - pEp->fqdn, pEp->port, pOld->fqdn, pOld->port); - pClientVg->epSet = *pBuf->pEpSet; - break; - } - } - - break; - } - + updateVgEpset(pParam->pTmq, pParam, pBuf->pEpSet); taosThreadMutexUnlock(&pParam->pTmq->lock); } @@ -436,10 +427,7 @@ static int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { taosMemoryFree(pBuf->pData); taosMemoryFree(pBuf->pEpSet); - /*tscDebug("receive offset commit cb of %s on vgId:%d, offset is %" PRId64, pParam->pOffset->subKey, pParam->->vgId, - * pOffset->version);*/ - - tmqCommitRspCountDown(pParamSet); + tmqCommitRspCountDown(pParamSet, pParam->pTmq->consumerId, pParam->topicName, pParam->vgId); return 0; } @@ -522,7 +510,6 @@ static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, const char* pTopic pMsgSendInfo->paramFreeFp = taosMemoryFree; pMsgSendInfo->fp = tmqCommitCb; pMsgSendInfo->msgType = TDMT_VND_TMQ_COMMIT_OFFSET; - // send msg atomic_add_fetch_32(&pParamSet->waitingRspNum, 1); atomic_add_fetch_32(&pParamSet->totalRspNum, 1); @@ -652,9 +639,10 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, for (int32_t i = 0; i < numOfTopics; i++) { SMqClientTopic* pTopic = taosArrayGet(tmq->clientTopics, i); + int32_t numOfVgroups = taosArrayGetSize(pTopic->vgs); - // todo race condition: fix it - int32_t numOfVgroups = taosArrayGetSize(pTopic->vgs); + tscDebug("consumer:0x%" PRIx64 " commit offset for topics:%s, numOfVgs:%d", tmq->consumerId, pTopic->topicName, + numOfVgroups); for (int32_t j = 0; j < numOfVgroups; j++) { SMqClientVg clientVg = *(SMqClientVg*)taosArrayGet(pTopic->vgs, j); if (clientVg.currentOffset.type > 0 && !tOffsetEqual(&clientVg.currentOffset, &clientVg.committedOffset)) { @@ -668,6 +656,8 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, } } + tscDebug("consumer:0x%" PRIx64 " total commit:%d for %d topics", tmq->consumerId, pParamSet->waitingRspNum, + numOfTopics); taosThreadMutexUnlock(&tmq->lock); // no request is sent @@ -678,7 +668,7 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, } // count down since waiting rsp num init as 1 - tmqCommitRspCountDown(pParamSet); + tmqCommitRspCountDown(pParamSet, tmq->consumerId, "", 0); if (!async) { tsem_wait(&pParamSet->rspSem); @@ -696,9 +686,9 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, static int32_t tmqCommitInner(tmq_t* tmq, const TAOS_RES* msg, int8_t automatic, int8_t async, tmq_commit_cb* userCb, void* userParam) { - if (msg) { + if (msg) { // user invoked commit? return tmqCommitMsgImpl(tmq, msg, async, userCb, userParam); - } else { + } else { // this for auto commit return tmqCommitConsumerImpl(tmq, automatic, async, userCb, userParam); } } @@ -986,7 +976,7 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) { tmq_t* pTmq = taosMemoryCalloc(1, sizeof(tmq_t)); if (pTmq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - tscError("failed to create consumer, consumer group %s, code:%s", conf->groupId, terrstr()); + tscError("failed to create consumer, groupId:%s, code:%s", conf->groupId, terrstr()); return NULL; } @@ -1002,9 +992,9 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) { if (pTmq->clientTopics == NULL || pTmq->mqueue == NULL || pTmq->qall == NULL || pTmq->delayedTask == NULL || conf->groupId[0] == 0) { terrno = TSDB_CODE_OUT_OF_MEMORY; - tscError("consumer:0x%" PRIx64 " setup failed since %s, consumer group %s", pTmq->consumerId, terrstr(), + tscError("consumer:0x%" PRIx64 " setup failed since %s, groupId:%s", pTmq->consumerId, terrstr(), pTmq->groupId); - goto FAIL; + goto _failed; } // init status @@ -1034,22 +1024,20 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) { if (tsem_init(&pTmq->rspSem, 0, 0) != 0) { tscError("consumer:0x %" PRIx64 " setup failed since %s, consumer group %s", pTmq->consumerId, terrstr(), pTmq->groupId); - goto FAIL; + goto _failed; } // init connection pTmq->pTscObj = taos_connect_internal(conf->ip, user, pass, NULL, NULL, conf->port, CONN_TYPE__TMQ); if (pTmq->pTscObj == NULL) { - tscError("consumer:0x %" PRIx64 " setup failed since %s, consumer group %s", pTmq->consumerId, terrstr(), - pTmq->groupId); + tscError("consumer:0x%" PRIx64 " setup failed since %s, groupId:%s", pTmq->consumerId, terrstr(), pTmq->groupId); tsem_destroy(&pTmq->rspSem); - goto FAIL; + goto _failed; } pTmq->refId = taosAddRef(tmqMgmt.rsetId, pTmq); if (pTmq->refId < 0) { - tmqFreeImpl(pTmq); - return NULL; + goto _failed; } if (pTmq->hbBgEnable) { @@ -1058,16 +1046,17 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) { pTmq->hbLiveTimer = taosTmrStart(tmqSendHbReq, 1000, pRefId, tmqMgmt.timer); } - tscInfo("consumer:0x%" PRIx64 " is setup, groupId:%s", pTmq->consumerId, pTmq->groupId); + char buf[80] = {0}; + STqOffsetVal offset = {.type = pTmq->resetOffsetCfg}; + tFormatOffset(buf, tListLen(buf), &offset); + tscInfo("consumer:0x%" PRIx64 " is setup, groupId:%s, snapshot:%d, autoCommit:%d, commitInterval:%dms, offset:%s, backgroudHB:%d", + pTmq->consumerId, pTmq->groupId, pTmq->useSnapshot, pTmq->autoCommit, pTmq->autoCommitInterval, buf, + pTmq->hbBgEnable); + return pTmq; -FAIL: - if (pTmq->clientTopics) taosArrayDestroy(pTmq->clientTopics); - if (pTmq->mqueue) taosCloseQueue(pTmq->mqueue); - if (pTmq->delayedTask) taosCloseQueue(pTmq->delayedTask); - if (pTmq->qall) taosFreeQall(pTmq->qall); - taosMemoryFree(pTmq); - +_failed: + tmqFreeImpl(pTmq); return NULL; } @@ -2123,10 +2112,10 @@ int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet) { // call async cb func if (pParamSet->automatic && tmq->commitCb) { tmq->commitCb(tmq, pParamSet->rspErr, tmq->commitCbUserParam); - } else if (!pParamSet->automatic && pParamSet->userCb) { - // sem post + } else if (!pParamSet->automatic && pParamSet->userCb) { // sem post pParamSet->userCb(tmq, pParamSet->rspErr, pParamSet->userParam); } + taosMemoryFree(pParamSet); } else { tsem_post(&pParamSet->rspSem); @@ -2137,4 +2126,14 @@ int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet) { taosArrayDestroyP(pParamSet->failedOffsets, taosMemoryFree); #endif return 0; -} \ No newline at end of file +} + +void tmqCommitRspCountDown(SMqCommitCbParamSet* pParamSet, int64_t consumerId, const char* pTopic, int32_t vgId) { + int32_t waitingRspNum = atomic_sub_fetch_32(&pParamSet->waitingRspNum, 1); + tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d commit-rsp received, remain:%d", consumerId, pTopic, vgId, + waitingRspNum); + + if (waitingRspNum == 0) { + tmqCommitDone(pParamSet); + } +} diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 01a0ee7306..1f7e42e9d2 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -6586,8 +6586,9 @@ int32_t tFormatOffset(char *buf, int32_t maxLen, const STqOffsetVal *pVal) { } else if (pVal->type == TMQ_OFFSET__SNAPSHOT_DATA || pVal->type == TMQ_OFFSET__SNAPSHOT_META) { snprintf(buf, maxLen, "offset(snapshot) uid:%" PRId64 " ts:%" PRId64, pVal->uid, pVal->ts); } else { - ASSERT(0); + return TSDB_CODE_INVALID_PARA; } + return 0; } From af423685ca1c6d2f15163b9d73ff78c9e6e88446 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Mar 2023 17:42:01 +0800 Subject: [PATCH 18/26] fix(tmq): handle the commit failure. --- source/client/src/clientTmq.c | 137 ++++++++++++++++++++++------------ 1 file changed, 91 insertions(+), 46 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 0d3d37dc29..14d056224a 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -208,6 +208,8 @@ typedef struct { static int32_t tmqAskEp(tmq_t* tmq, bool async); static int32_t makeTopicVgroupKey(char* dst, const char* topicName, int32_t vg); static int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet); +static int32_t doSendCommitMsg(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicName, SMqCommitCbParamSet* pParamSet, + int32_t index, int32_t totalVgroups); static void tmqCommitRspCountDown(SMqCommitCbParamSet* pParamSet, int64_t consumerId, const char* pTopic, int32_t vgId); tmq_conf_t* tmq_conf_new() { @@ -378,51 +380,90 @@ char** tmq_list_to_c_array(const tmq_list_t* list) { return container->pData; } -static void updateVgEpset(tmq_t* pTmq, SMqCommitCbParam* pParam, SEpSet* pEpSet) { - int32_t numOfTopics = taosArrayGetSize(pTmq->clientTopics); +static SMqClientVg* foundClientVg(SArray* pTopicList, const char* pName, int32_t vgId, int32_t* index, int32_t* numOfVgroups) { + int32_t numOfTopics = taosArrayGetSize(pTopicList); + *index = -1; + *numOfVgroups = 0; + for(int32_t i = 0; i < numOfTopics; ++i) { - SMqClientTopic* pTopic = taosArrayGet(pTmq->clientTopics, i); - if (strcmp(pTopic->topicName, pParam->topicName) != 0) { + SMqClientTopic* pTopic = taosArrayGet(pTopicList, i); + if (strcmp(pTopic->topicName, pName) != 0) { continue; } - int32_t numOfVgs = taosArrayGetSize(pTopic->vgs); - for(int32_t j = 0; j < numOfVgs; ++j) { + *numOfVgroups = taosArrayGetSize(pTopic->vgs); + for (int32_t j = 0; j < (*numOfVgroups); ++j) { SMqClientVg* pClientVg = taosArrayGet(pTopic->vgs, j); - if (pClientVg->vgId == pParam->vgId) { - SEp* pEp = GET_ACTIVE_EP(pEpSet); - SEp* pOld = GET_ACTIVE_EP(&(pClientVg->epSet)); - uDebug("subKey:%s update the epset vgId:%d, ep:%s:%d, old ep:%s:%d", pParam->pOffset->subKey, pParam->vgId, - pEp->fqdn, pEp->port, pOld->fqdn, pOld->port); - pClientVg->epSet = *pEpSet; - break; + if (pClientVg->vgId == vgId) { + *index = j; + return pClientVg; } } - - break; } + + return NULL; } -// todo retry to send the commit if failed + static int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { SMqCommitCbParam* pParam = (SMqCommitCbParam*)param; SMqCommitCbParamSet* pParamSet = (SMqCommitCbParamSet*)pParam->params; - // push into array -#if 0 - if (code == 0) { - taosArrayPush(pParamSet->failedOffsets, &pParam->pOffset); - } else { - taosArrayPush(pParamSet->successfulOffsets, &pParam->pOffset); - } -#endif - - // update the epset if needed - if (pBuf->pEpSet != NULL) { + if (code != TSDB_CODE_SUCCESS) { // if commit offset failed, let's try again taosThreadMutexLock(&pParam->pTmq->lock); - updateVgEpset(pParam->pTmq, pParam, pBuf->pEpSet); + int32_t numOfVgroups, index; + SMqClientVg* pVg = foundClientVg(pParam->pTmq->clientTopics, pParam->topicName, pParam->vgId, &index, &numOfVgroups); + + if (pVg == NULL) { + tscDebug("consumer:0x%" PRIx64 + " subKey:%s vgId:%d commit failed, code:%s has been transferred to other consumer, no need retry ordinal:%d/%d", + pParam->pTmq->consumerId, pParam->pOffset->subKey, pParam->vgId, tstrerror(code), index + 1, numOfVgroups); + } else { // let's retry the commit + int32_t code1 = doSendCommitMsg(pParam->pTmq, pVg, pParam->topicName, pParamSet, index, numOfVgroups); + if (code1 != TSDB_CODE_SUCCESS) { // retry failed. + tscError("consumer:0x%" PRIx64 " topic:%s vgId:%d offset:%" PRId64 + " retry failed, ignore this commit. code:%s ordinal:%d/%d", + pParam->pTmq->consumerId, pParam->topicName, pVg->vgId, pVg->committedOffset.version, + tstrerror(terrno), index + 1, numOfVgroups); + } + } + taosThreadMutexUnlock(&pParam->pTmq->lock); + + taosMemoryFree(pParam->pOffset); + taosMemoryFree(pBuf->pData); + taosMemoryFree(pBuf->pEpSet); + + tmqCommitRspCountDown(pParamSet, pParam->pTmq->consumerId, pParam->topicName, pParam->vgId); + return 0; } + // todo replace the pTmq with refId + taosThreadMutexLock(&pParam->pTmq->lock); + tmq_t* pTmq = pParam->pTmq; + int32_t index = 0, numOfVgroups = 0; + + SMqClientVg* pVg = foundClientVg(pTmq->clientTopics, pParam->topicName, pParam->vgId, &index, &numOfVgroups); + if (pVg == NULL) { + tscDebug("consumer:0x%" PRIx64 " subKey:%s vgId:%d has been transferred to other consumer, ordinal:%d/%d", + pParam->pTmq->consumerId, pParam->pOffset->subKey, pParam->vgId, index + 1, numOfVgroups); + } else { // update the epset if needed + if (pBuf->pEpSet != NULL) { + SEp* pEp = GET_ACTIVE_EP(pBuf->pEpSet); + SEp* pOld = GET_ACTIVE_EP(&(pVg->epSet)); + + tscDebug("consumer:0x%" PRIx64 " subKey:%s update the epset vgId:%d, ep:%s:%d, old ep:%s:%d, ordinal:%d/%d", + pTmq->consumerId, pParam->pOffset->subKey, pParam->vgId, pEp->fqdn, pEp->port, pOld->fqdn, pOld->port, + index + 1, numOfVgroups); + + pVg->epSet = *pBuf->pEpSet; + } + + // update the offset value. + pVg->committedOffset = pVg->currentOffset; + } + + taosThreadMutexUnlock(&pParam->pTmq->lock); + taosMemoryFree(pParam->pOffset); taosMemoryFree(pBuf->pData); taosMemoryFree(pBuf->pEpSet); @@ -431,7 +472,8 @@ static int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { return 0; } -static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicName, SMqCommitCbParamSet* pParamSet) { +static int32_t doSendCommitMsg(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicName, SMqCommitCbParamSet* pParamSet, + int32_t index, int32_t totalVgroups) { STqOffset* pOffset = taosMemoryCalloc(1, sizeof(STqOffset)); if (pOffset == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -498,11 +540,9 @@ static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, const char* pTopic }; SEp* pEp = GET_ACTIVE_EP(&pVg->epSet); - tscDebug("consumer:0x%" PRIx64 " topic:%s on vgId:%d offset:%" PRId64 " prev:%" PRId64 ", ep:%s:%d", tmq->consumerId, - pOffset->subKey, pVg->vgId, pOffset->val.version, pVg->committedOffset.version, pEp->fqdn, pEp->port); - - // TODO: put into cb, the commit offset should be move to the callback function - pVg->committedOffset = pVg->currentOffset; + tscDebug("consumer:0x%" PRIx64 " topic:%s on vgId:%d offset:%" PRId64 " prev:%" PRId64 ", ep:%s:%d, ordinal:%d/%d", + tmq->consumerId, pOffset->subKey, pVg->vgId, pOffset->val.version, pVg->committedOffset.version, pEp->fqdn, + pEp->port, index + 1, totalVgroups); pMsgSendInfo->requestId = generateRequestId(); pMsgSendInfo->requestObjRefId = 0; @@ -557,15 +597,19 @@ static int32_t tmqCommitMsgImpl(tmq_t* tmq, const TAOS_RES* msg, int8_t async, t taosThreadMutexLock(&tmq->lock); for (int32_t i = 0; i < taosArrayGetSize(tmq->clientTopics); i++) { SMqClientTopic* pTopic = taosArrayGet(tmq->clientTopics, i); - if (strcmp(pTopic->topicName, topic) != 0) continue; - for (int32_t j = 0; j < taosArrayGetSize(pTopic->vgs); j++) { + if (strcmp(pTopic->topicName, topic) != 0) { + continue; + } + + int32_t numOfVgroups = taosArrayGetSize(pTopic->vgs); + for (int32_t j = 0; j < numOfVgroups; j++) { SMqClientVg* pVg = taosArrayGet(pTopic->vgs, j); if (pVg->vgId != vgId) { continue; } if (pVg->currentOffset.type > 0 && !tOffsetEqual(&pVg->currentOffset, &pVg->committedOffset)) { - if (tmqSendCommitReq(tmq, pVg, pTopic->topicName, pParamSet) < 0) { + if (doSendCommitMsg(tmq, pVg, pTopic->topicName, pParamSet, j, numOfVgroups) < 0) { tsem_destroy(&pParamSet->rspSem); taosMemoryFree(pParamSet); goto FAIL; @@ -634,7 +678,6 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, taosThreadMutexLock(&tmq->lock); int32_t numOfTopics = taosArrayGetSize(tmq->clientTopics); - tscDebug("consumer:0x%" PRIx64 " start to commit offset for %d topics", tmq->consumerId, numOfTopics); for (int32_t i = 0; i < numOfTopics; i++) { @@ -644,14 +687,19 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, tscDebug("consumer:0x%" PRIx64 " commit offset for topics:%s, numOfVgs:%d", tmq->consumerId, pTopic->topicName, numOfVgroups); for (int32_t j = 0; j < numOfVgroups; j++) { - SMqClientVg clientVg = *(SMqClientVg*)taosArrayGet(pTopic->vgs, j); - if (clientVg.currentOffset.type > 0 && !tOffsetEqual(&clientVg.currentOffset, &clientVg.committedOffset)) { - if (tmqSendCommitReq(tmq, &clientVg, pTopic->topicName, pParamSet) < 0) { + SMqClientVg* pVg = taosArrayGet(pTopic->vgs, j); + + if (pVg->currentOffset.type > 0 && !tOffsetEqual(&pVg->currentOffset, &pVg->committedOffset)) { + code = doSendCommitMsg(tmq, pVg, pTopic->topicName, pParamSet, j, numOfVgroups); + if (code != TSDB_CODE_SUCCESS) { + tscError("consumer:0x%" PRIx64 " topic:%s vgId:%d offset:%" PRId64 " failed, code:%s ordinal:%d/%d", + tmq->consumerId, pTopic->topicName, pVg->vgId, pVg->committedOffset.version, tstrerror(terrno), + j + 1, numOfVgroups); continue; } } else { - tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d, not commit, current:%" PRId64 ", ordinal:%d/%d", - tmq->consumerId, pTopic->topicName, clientVg.vgId, clientVg.currentOffset.version, j + 1, numOfVgroups); + tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d, no commit, current:%" PRId64 ", ordinal:%d/%d", + tmq->consumerId, pTopic->topicName, pVg->vgId, pVg->currentOffset.version, j + 1, numOfVgroups); } } } @@ -1804,7 +1852,6 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { taosFreeQitem(pollRspWrapper); } } else { - /*printf("handle ep rsp %d\n", rspMsg->head.mqMsgType);*/ bool reset = false; tmqHandleNoPollRsp(tmq, rspWrapper, &reset); taosFreeQitem(rspWrapper); @@ -1814,8 +1861,6 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { } } } - - tscDebug("consumer:0x%" PRIx64 " handle the rsp completed", tmq->consumerId); } TAOS_RES* tmq_consumer_poll(tmq_t* tmq, int64_t timeout) { From 85ee1318d2f3138fba4ac0d9560a7d2ed97f990a Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Mar 2023 17:44:35 +0800 Subject: [PATCH 19/26] refactor: add some logs. --- source/client/src/clientTmq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 14d056224a..a4f8b46395 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -460,6 +460,8 @@ static int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { // update the offset value. pVg->committedOffset = pVg->currentOffset; + tscDebug("consumer:0x%" PRIx64 " subKey:%s vgId:%d, commit offset success. ordinal:%d/%d", pTmq->consumerId, + pParam->pOffset->subKey, pParam->vgId, index + 1, numOfVgroups); } taosThreadMutexUnlock(&pParam->pTmq->lock); From 26925a1c133c53032ca888d77050091d21b6dfc6 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Mar 2023 18:01:56 +0800 Subject: [PATCH 20/26] refactor: add some logs for tmq. --- source/client/src/clientTmq.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index a4f8b46395..e708b64a92 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -706,7 +706,7 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, } } - tscDebug("consumer:0x%" PRIx64 " total commit:%d for %d topics", tmq->consumerId, pParamSet->waitingRspNum, + tscDebug("consumer:0x%" PRIx64 " total commit:%d for %d topics", tmq->consumerId, pParamSet->waitingRspNum - 1, numOfTopics); taosThreadMutexUnlock(&tmq->lock); @@ -2177,10 +2177,12 @@ int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet) { void tmqCommitRspCountDown(SMqCommitCbParamSet* pParamSet, int64_t consumerId, const char* pTopic, int32_t vgId) { int32_t waitingRspNum = atomic_sub_fetch_32(&pParamSet->waitingRspNum, 1); - tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d commit-rsp received, remain:%d", consumerId, pTopic, vgId, - waitingRspNum); - if (waitingRspNum == 0) { + tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d all commit-rsp received, commit completed", consumerId, pTopic, + vgId); tmqCommitDone(pParamSet); + } else { + tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d commit-rsp received, remain:%d", consumerId, pTopic, vgId, + waitingRspNum); } } From 7f2aae69d4bcea30e0e275aed1e9df64f66a524b Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 13 Mar 2023 01:54:29 +0800 Subject: [PATCH 21/26] fix(tmq): fix error in taosx --- source/client/src/clientTmq.c | 47 +++++++++++++++++++----------- source/dnode/vnode/src/inc/tq.h | 2 +- source/dnode/vnode/src/tq/tq.c | 35 ++++++++++------------ source/dnode/vnode/src/tq/tqRead.c | 6 ++-- utils/test/c/tmqSim.c | 22 ++++++++------ 5 files changed, 63 insertions(+), 49 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index e708b64a92..55d0453fbf 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -150,6 +150,7 @@ typedef struct { int32_t epoch; SMqClientVg* vgHandle; SMqClientTopic* topicHandle; + uint64_t reqId; union { SMqDataRsp dataRsp; SMqMetaRsp metaRsp; @@ -458,8 +459,6 @@ static int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { pVg->epSet = *pBuf->pEpSet; } - // update the offset value. - pVg->committedOffset = pVg->currentOffset; tscDebug("consumer:0x%" PRIx64 " subKey:%s vgId:%d, commit offset success. ordinal:%d/%d", pTmq->consumerId, pParam->pOffset->subKey, pParam->vgId, index + 1, numOfVgroups); } @@ -699,6 +698,9 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, j + 1, numOfVgroups); continue; } + + // update the offset value. + pVg->committedOffset = pVg->currentOffset; } else { tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d, no commit, current:%" PRId64 ", ordinal:%d/%d", tmq->consumerId, pTopic->topicName, pVg->vgId, pVg->currentOffset.version, j + 1, numOfVgroups); @@ -1261,8 +1263,8 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { taosMemoryFree(pParam); if (code != 0) { - tscWarn("consumer:0x%"PRIx64" msg from vgId:%d discarded, epoch %d, since %s, reqId:0x%"PRIx64, tmq->consumerId, vgId, - epoch, tstrerror(code), requestId); + tscWarn("consumer:0x%" PRIx64 " msg from vgId:%d discarded, epoch %d, since %s, reqId:0x%" PRIx64, tmq->consumerId, + vgId, epoch, tstrerror(code), requestId); if (pMsg->pData) taosMemoryFree(pMsg->pData); if (pMsg->pEpSet) taosMemoryFree(pMsg->pEpSet); @@ -1281,8 +1283,6 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { } pRspWrapper->tmqRspType = TMQ_MSG_TYPE__END_RSP; - /*pRspWrapper->vgHandle = pVg;*/ - /*pRspWrapper->topicHandle = pTopic;*/ taosWriteQitem(tmq->mqueue, pRspWrapper); tsem_post(&tmq->rspSem); } @@ -1304,8 +1304,8 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { } if (msgEpoch != tmqEpoch) { - tscWarn("consumer:0x%"PRIx64" mismatch rsp from vgId:%d, epoch %d, current epoch %d, reqId:0x%"PRIx64, tmq->consumerId, vgId, - msgEpoch, tmqEpoch, requestId); + tscWarn("consumer:0x%" PRIx64 " mismatch rsp from vgId:%d, epoch %d, current epoch %d, reqId:0x%" PRIx64, + tmq->consumerId, vgId, msgEpoch, tmqEpoch, requestId); } // handle meta rsp @@ -1322,6 +1322,7 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { pRspWrapper->tmqRspType = rspType; pRspWrapper->vgHandle = pVg; pRspWrapper->topicHandle = pTopic; + pRspWrapper->reqId = requestId; if (rspType == TMQ_MSG_TYPE__POLL_RSP) { SDecoder decoder; @@ -1350,13 +1351,11 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { taosMemoryFree(pMsg->pData); taosMemoryFree(pMsg->pEpSet); + taosWriteQitem(tmq->mqueue, pRspWrapper); tscDebug("consumer:0x%" PRIx64 ", put poll res into mqueue, total in queue:%d, reqId:0x%" PRIx64, tmq->consumerId, tmq->mqueue->numOfItems, requestId); - - taosWriteQitem(tmq->mqueue, pRspWrapper); tsem_post(&tmq->rspSem); - return 0; CREATE_MSG_FAIL: @@ -1763,6 +1762,8 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { } } + tscDebug("consumer:0x%"PRIx64" handle rsp, type:%d", tmq->consumerId, rspWrapper->tmqRspType); + if (rspWrapper->tmqRspType == TMQ_MSG_TYPE__END_RSP) { taosFreeQitem(rspWrapper); terrno = TSDB_CODE_TQ_NO_COMMITTED_OFFSET; @@ -1779,7 +1780,8 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { atomic_store_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE); if (pollRspWrapper->dataRsp.blockNum == 0) { - tscDebug("consumer:0x%" PRIx64 " empty block received, vgId:%d", tmq->consumerId, pVg->vgId); + tscDebug("consumer:0x%" PRIx64 " empty block received, vgId:%d, reqId:0x%" PRIx64, tmq->consumerId, pVg->vgId, + pollRspWrapper->reqId); taosFreeQitem(pollRspWrapper); rspWrapper = NULL; continue; @@ -1789,8 +1791,8 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { char buf[80]; tFormatOffset(buf, 80, &pVg->currentOffset); SMqRspObj* pRsp = tmqBuildRspFromWrapper(pollRspWrapper); - tscDebug("consumer:0x%" PRIx64 " process poll rsp, vgId:%d, offset:%s, blocks:%d", tmq->consumerId, - pVg->vgId, buf, pollRspWrapper->dataRsp.blockNum); + tscDebug("consumer:0x%" PRIx64 " process poll rsp, vgId:%d, offset:%s, blocks:%d, reqId:0x%"PRIx64, tmq->consumerId, + pVg->vgId, buf, pollRspWrapper->dataRsp.blockNum, pollRspWrapper->reqId); taosFreeQitem(pollRspWrapper); return pRsp; @@ -1824,17 +1826,19 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { } } else if (rspWrapper->tmqRspType == TMQ_MSG_TYPE__TAOSX_RSP) { SMqPollRspWrapper* pollRspWrapper = (SMqPollRspWrapper*)rspWrapper; - /*atomic_sub_fetch_32(&tmq->readyRequest, 1);*/ int32_t consumerEpoch = atomic_load_32(&tmq->epoch); + if (pollRspWrapper->taosxRsp.head.epoch == consumerEpoch) { SMqClientVg* pVg = pollRspWrapper->vgHandle; - /*printf("vgId:%d, offset %" PRId64 " up to %" PRId64 "\n", pVg->vgId, pVg->currentOffset, - * rspMsg->msg.rspOffset);*/ pVg->currentOffset = pollRspWrapper->taosxRsp.rspOffset; atomic_store_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE); + if (pollRspWrapper->taosxRsp.blockNum == 0) { taosFreeQitem(pollRspWrapper); rspWrapper = NULL; + + tscDebug("consumer:0x%" PRIx64 " taosx empty block received, vgId:%d, reqId:0x%" PRIx64, tmq->consumerId, pVg->vgId, + pollRspWrapper->reqId); continue; } @@ -1845,8 +1849,15 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { } else { pRsp = tmqBuildTaosxRspFromWrapper(pollRspWrapper); } + taosFreeQitem(pollRspWrapper); + + char buf[80]; + tFormatOffset(buf, 80, &pVg->currentOffset); + tscDebug("consumer:0x%" PRIx64 " process taosx poll rsp, vgId:%d, offset:%s, blocks:%d, reqId:0x%"PRIx64, tmq->consumerId, pVg->vgId, + buf, pollRspWrapper->dataRsp.blockNum, pollRspWrapper->reqId); return pRsp; + } else { tscDebug("consumer:0x%" PRIx64 " msg discard since epoch mismatch: msg epoch %d, consumer epoch %d", tmq->consumerId, pollRspWrapper->taosxRsp.head.epoch, consumerEpoch); @@ -1854,6 +1865,8 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { taosFreeQitem(pollRspWrapper); } } else { + tscDebug("consumer:0x%" PRIx64 " not data msg received", tmq->consumerId); + bool reset = false; tmqHandleNoPollRsp(tmq, rspWrapper, &reset); taosFreeQitem(rspWrapper); diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 6cb54c6c18..a45d6d1dec 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -141,7 +141,7 @@ int32_t tDecodeSTqHandle(SDecoder* pDecoder, STqHandle* pHandle); // tqRead int32_t tqScanTaosx(STQ* pTq, const STqHandle* pHandle, STaosxRsp* pRsp, SMqMetaRsp* pMetaRsp, STqOffsetVal* offset); int32_t tqScanData(STQ* pTq, const STqHandle* pHandle, SMqDataRsp* pRsp, STqOffsetVal* pOffset); -int64_t tqFetchLog(STQ* pTq, STqHandle* pHandle, int64_t* fetchOffset, SWalCkHead** pHeadWithCkSum); +int32_t tqFetchLog(STQ* pTq, STqHandle* pHandle, int64_t* fetchOffset, SWalCkHead** pHeadWithCkSum); // tqExec int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxRsp* pRsp); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index a6919d97a9..76a6a8a840 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -330,15 +330,16 @@ int32_t tqSendTaosxRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, co .contLen = tlen, .code = 0, }; + tmsgSendRsp(&rsp); char buf1[80] = {0}; char buf2[80] = {0}; tFormatOffset(buf1, 80, &pRsp->reqOffset); tFormatOffset(buf2, 80, &pRsp->rspOffset); - tqDebug("taosx rsp, vgId:%d, from consumer:0x%" PRIx64 " (epoch %d) send rsp, numOfBlks:%d, req:%s, rsp:%s", - TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2); + tqDebug("taosx rsp, vgId:%d, consumer:0x%" PRIx64 " (epoch %d) send rsp, numOfBlks:%d, req:%s, rsp:%s", + TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2); return 0; } @@ -600,7 +601,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* if (metaRsp.metaRspLen > 0) { code = tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp); - tqDebug("tmq poll: consumer:0x%" PRIx64 " subkey %s, vg %d, send meta offset type:%d,uid:%" PRId64 + tqDebug("tmq poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send meta offset type:%d,uid:%" PRId64 ",version:%" PRId64, consumerId, pHandle->subKey, vgId, metaRsp.rspOffset.type, metaRsp.rspOffset.uid, metaRsp.rspOffset.version); @@ -617,7 +618,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* offset = taosxRsp.rspOffset; } - tqDebug("taosx poll: consumer:0x%" PRIx64 " subkey %s, vg %d, send data blockNum:%d, offset type:%d,uid:%" PRId64 + tqDebug("taosx poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send data blockNum:%d, offset type:%d,uid:%" PRId64 ",version:%" PRId64, consumerId, pHandle->subKey, vgId, taosxRsp.blockNum, taosxRsp.rspOffset.type, taosxRsp.rspOffset.uid, taosxRsp.rspOffset.version); @@ -637,7 +638,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* // todo refactor: this is not correct. int32_t savedEpoch = atomic_load_32(&pHandle->epoch); if (savedEpoch > pRequest->epoch) { - tqWarn("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, vg %d offset %" PRId64 + tqWarn("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey:%s vgId:%d offset %" PRId64 ", found new consumer epoch %d, discard req epoch %d", consumerId, pRequest->epoch, pHandle->subKey, vgId, fetchVer, savedEpoch, pRequest->epoch); break; @@ -645,9 +646,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* if (tqFetchLog(pTq, pHandle, &fetchVer, &pCkHead) < 0) { tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); - if (tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp) < 0) { - code = -1; - } + code = tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp); tDeleteSTaosxRsp(&taosxRsp); taosMemoryFreeClear(pCkHead); return code; @@ -669,11 +668,10 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest->subKey); return -1; } - if (taosxRsp.blockNum > 0 /* threshold */) { + + if (taosxRsp.blockNum > 0) { tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); - if (tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp) < 0) { - code = -1; - } + code = tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp); tDeleteSTaosxRsp(&taosxRsp); taosMemoryFreeClear(pCkHead); return code; @@ -741,21 +739,18 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { taosRUnLockLatch(&pTq->pushLock); taosWLockLatch(&pTq->pushLock); - int32_t savedEpoch = pHandle->epoch; - // 3. update the epoch value -// while (savedEpoch < reqEpoch) { + int32_t savedEpoch = pHandle->epoch; + if (savedEpoch < reqEpoch) { tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch, reqEpoch); pHandle->epoch = reqEpoch; -// savedEpoch = atomic_val_compare_exchange_32(&pHandle->epoch, savedEpoch, reqEpoch); -// } - + } taosWUnLockLatch(&pTq->pushLock); char buf[80]; tFormatOffset(buf, 80, &reqOffset); - tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s", consumerId, - req.epoch, pHandle->subKey, vgId, buf); + tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s, reqId:0x%" PRIx64, + consumerId, req.epoch, pHandle->subKey, vgId, buf, req.reqId); return extractDataForMq(pTq, pHandle, &req, pMsg); } diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index bf73cca925..5bb08df2a0 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -183,14 +183,15 @@ end: return tbSuid == realTbSuid; } -int64_t tqFetchLog(STQ* pTq, STqHandle* pHandle, int64_t* fetchOffset, SWalCkHead** ppCkHead) { +int32_t tqFetchLog(STQ* pTq, STqHandle* pHandle, int64_t* fetchOffset, SWalCkHead** ppCkHead) { int32_t code = 0; + taosThreadMutexLock(&pHandle->pWalReader->mutex); int64_t offset = *fetchOffset; while (1) { if (walFetchHead(pHandle->pWalReader, offset, *ppCkHead) < 0) { - tqDebug("tmq poll: consumer:%" PRId64 ", (epoch %d) vgId:%d offset %" PRId64 ", no more log to return", + tqDebug("tmq poll: consumer:0x%" PRIx64 ", (epoch %d) vgId:%d offset %" PRId64 ", no more log to return", pHandle->consumerId, pHandle->epoch, TD_VID(pTq->pVnode), offset); *fetchOffset = offset - 1; code = -1; @@ -241,6 +242,7 @@ int64_t tqFetchLog(STQ* pTq, STqHandle* pHandle, int64_t* fetchOffset, SWalCkHea offset++; } } + END: taosThreadMutexUnlock(&pHandle->pWalReader->mutex); return code; diff --git a/utils/test/c/tmqSim.c b/utils/test/c/tmqSim.c index 1acf50a7d8..ce4bafb79b 100644 --- a/utils/test/c/tmqSim.c +++ b/utils/test/c/tmqSim.c @@ -386,7 +386,7 @@ void addRowsToVgroupId(SThreadInfo* pInfo, int32_t vgroupId, int32_t rows) { pInfo->rowsOfPerVgroups[pInfo->numOfVgroups][1] += rows; pInfo->numOfVgroups++; - taosFprintfFile(g_fp, "consume id %d, add one new vogroup id: %d\n", pInfo->consumerId, vgroupId); + taosFprintfFile(g_fp, "consume id %d, add new vgroupId:%d\n", pInfo->consumerId, vgroupId); if (pInfo->numOfVgroups > MAX_VGROUP_CNT) { taosFprintfFile(g_fp, "====consume id %d, vgroup num %d over than 32. new vgroupId: %d\n", pInfo->consumerId, pInfo->numOfVgroups, vgroupId); @@ -578,18 +578,25 @@ static int32_t data_msg_process(TAOS_RES* msg, SThreadInfo* pInfo, int32_t msgIn char buf[1024]; int32_t totalRows = 0; - // printf("topic: %s\n", tmq_get_topic_name(msg)); int32_t vgroupId = tmq_get_vgroup_id(msg); const char* dbName = tmq_get_db_name(msg); taosFprintfFile(g_fp, "consumerId: %d, msg index:%d\n", pInfo->consumerId, msgIndex); - taosFprintfFile(g_fp, "dbName: %s, topic: %s, vgroupId: %d\n", dbName != NULL ? dbName : "invalid table", - tmq_get_topic_name(msg), vgroupId); + int32_t index = 0; + for (index = 0; index < pInfo->numOfVgroups; index++) { + if (vgroupId == pInfo->rowsOfPerVgroups[index][0]) { + break; + } + } + + taosFprintfFile(g_fp, "dbName: %s, topic: %s, vgroupId:%d, currentRows:%d\n", dbName != NULL ? dbName : "invalid table", + tmq_get_topic_name(msg), vgroupId, pInfo->rowsOfPerVgroups[index][1]); while (1) { TAOS_ROW row = taos_fetch_row(msg); - - if (row == NULL) break; + if (row == NULL) { + break; + } TAOS_FIELD* fields = taos_fetch_fields(msg); int32_t numOfFields = taos_field_count(msg); @@ -607,7 +614,6 @@ static int32_t data_msg_process(TAOS_RES* msg, SThreadInfo* pInfo, int32_t msgIn #endif dumpToFileForCheck(pInfo->pConsumeRowsFile, row, fields, length, numOfFields, precision); - taos_print_row(buf, row, fields, numOfFields); if (0 != g_stConfInfo.showRowFlag) { @@ -621,7 +627,6 @@ static int32_t data_msg_process(TAOS_RES* msg, SThreadInfo* pInfo, int32_t msgIn } addRowsToVgroupId(pInfo, vgroupId, totalRows); - return totalRows; } @@ -817,7 +822,6 @@ void loop_consume(SThreadInfo* pInfo) { } taos_free_result(tmqMsg); - totalMsgs++; int64_t currentPrintTime = taosGetTimestampMs(); From 731a2138df3cae9b214fd954f1beb8a1e81d62f8 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 13 Mar 2023 17:08:17 +0800 Subject: [PATCH 22/26] fix(tmq): fix error in tmq. --- include/common/tmsg.h | 1 + source/client/src/clientTmq.c | 35 ++- source/client/test/clientTests.cpp | 81 +++++- source/common/src/tmsg.c | 3 +- source/dnode/vnode/src/inc/tq.h | 6 +- source/dnode/vnode/src/inc/vnodeInt.h | 2 +- source/dnode/vnode/src/tq/tq.c | 370 ++++++++++++++---------- source/dnode/vnode/src/tq/tqPush.c | 53 ++-- source/libs/executor/src/executor.c | 2 +- tests/system-test/7-tmq/subscribeDb1.py | 5 +- 10 files changed, 371 insertions(+), 187 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index d9d3c7e297..ee9a08b504 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -3185,6 +3185,7 @@ typedef struct { SArray* blockData; SArray* blockTbName; SArray* blockSchema; + // the following attributes are extended from SMqDataRsp int32_t createTableNum; SArray* createTableLen; SArray* createTableReq; diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 55d0453fbf..3167a4c361 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -24,6 +24,8 @@ #include "tref.h" #include "ttimer.h" +#define VG_POLL_IGNORE_TICK 100 + struct SMqMgmt { int8_t inited; tmr_h timer; @@ -126,16 +128,14 @@ enum { }; typedef struct { - // statistics - int64_t pollCnt; - // offset + int64_t pollCnt; STqOffsetVal committedOffset; STqOffsetVal currentOffset; - // connection info - int32_t vgId; - int32_t vgStatus; - int32_t vgSkipCnt; - SEpSet epSet; + int32_t vgId; + int32_t vgStatus; + int32_t vgSkipCnt; + int32_t vgIgnoreCnt; // once empty block is received, idle for ignoreCnt then start to poll data + SEpSet epSet; } SMqClientVg; typedef struct { @@ -1347,14 +1347,17 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { tDecodeSTaosxRsp(&decoder, &pRspWrapper->taosxRsp); tDecoderClear(&decoder); memcpy(&pRspWrapper->taosxRsp, pMsg->pData, sizeof(SMqRspHead)); + } else { // invalid rspType + tscError("consumer:0x%"PRIx64" invalid rsp msg received, type:%d ignored", tmq->consumerId, rspType); } taosMemoryFree(pMsg->pData); taosMemoryFree(pMsg->pEpSet); taosWriteQitem(tmq->mqueue, pRspWrapper); - tscDebug("consumer:0x%" PRIx64 ", put poll res into mqueue, total in queue:%d, reqId:0x%" PRIx64, tmq->consumerId, - tmq->mqueue->numOfItems, requestId); + tscDebug("consumer:0x%" PRIx64 " put poll res into mqueue, type:%d, vgId:%d, total in queue:%d, reqId:0x%" PRIx64, + tmq->consumerId, rspType, vgId, tmq->mqueue->numOfItems, requestId); + tsem_post(&tmq->rspSem); return 0; @@ -1400,6 +1403,7 @@ static void initClientTopicFromRsp(SMqClientTopic* pTopic, SMqSubTopicEp* pTopic .epSet = pVgEp->epSet, .vgStatus = TMQ_VG_STATUS__IDLE, .vgSkipCnt = 0, + .vgIgnoreCnt = 0, }; taosArrayPush(pTopic->vgs, &clientVg); @@ -1700,6 +1704,13 @@ static int32_t tmqPollImpl(tmq_t* tmq, int64_t timeout) { for (int j = 0; j < numOfVg; j++) { SMqClientVg* pVg = taosArrayGet(pTopic->vgs, j); + if (pVg->vgIgnoreCnt > 0) { + pVg->vgIgnoreCnt -= 1; + tscTrace("consumer:0x%" PRIx64 " epoch %d, vgId:%d idle for %d tick before poll", tmq->consumerId, tmq->epoch, + pVg->vgId, pVg->vgIgnoreCnt); + continue; + } + int32_t vgStatus = atomic_val_compare_exchange_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE, TMQ_VG_STATUS__WAIT); if (vgStatus == TMQ_VG_STATUS__WAIT) { int32_t vgSkipCnt = atomic_add_fetch_32(&pVg->vgSkipCnt, 1); @@ -1810,8 +1821,6 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { if (pollRspWrapper->metaRsp.head.epoch == consumerEpoch) { SMqClientVg* pVg = pollRspWrapper->vgHandle; - /*printf("vgId:%d, offset %" PRId64 " up to %" PRId64 "\n", pVg->vgId, pVg->currentOffset, - * rspMsg->msg.rspOffset);*/ pVg->currentOffset = pollRspWrapper->metaRsp.rspOffset; atomic_store_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE); // build rsp @@ -1836,9 +1845,9 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { if (pollRspWrapper->taosxRsp.blockNum == 0) { taosFreeQitem(pollRspWrapper); rspWrapper = NULL; - tscDebug("consumer:0x%" PRIx64 " taosx empty block received, vgId:%d, reqId:0x%" PRIx64, tmq->consumerId, pVg->vgId, pollRspWrapper->reqId); + pVg->vgIgnoreCnt = VG_POLL_IGNORE_TICK; continue; } diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index 82d29b37eb..0c7e95f8eb 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -898,7 +898,86 @@ TEST(clientCase, update_test) { } } -TEST(clientCase, subscription_test) { +TEST(clientCase, sub_db_test) { + TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); + ASSERT_NE(pConn, nullptr); + + // TAOS_RES* pRes = taos_query(pConn, "create topic topic_t1 as select * from t1"); + // if (taos_errno(pRes) != TSDB_CODE_SUCCESS) { + // printf("failed to create topic, code:%s", taos_errstr(pRes)); + // taos_free_result(pRes); + // return; + // } + + tmq_conf_t* conf = tmq_conf_new(); + tmq_conf_set(conf, "enable.auto.commit", "true"); + tmq_conf_set(conf, "auto.commit.interval.ms", "1000"); + tmq_conf_set(conf, "group.id", "cgrpNamedb"); + tmq_conf_set(conf, "td.connect.user", "root"); + tmq_conf_set(conf, "td.connect.pass", "taosdata"); + tmq_conf_set(conf, "auto.offset.reset", "earliest"); + tmq_conf_set(conf, "experimental.snapshot.enable", "true"); + tmq_conf_set(conf, "msg.with.table.name", "true"); + tmq_conf_set_auto_commit_cb(conf, tmq_commit_cb_print, NULL); + + tmq_t* tmq = tmq_consumer_new(conf, NULL, 0); + tmq_conf_destroy(conf); + + // 创建订阅 topics 列表 + tmq_list_t* topicList = tmq_list_new(); + tmq_list_append(topicList, "topic_db1"); + + // 启动订阅 + tmq_subscribe(tmq, topicList); + tmq_list_destroy(topicList); + + TAOS_FIELD* fields = NULL; + int32_t numOfFields = 0; + int32_t precision = 0; + int32_t totalRows = 0; + int32_t msgCnt = 0; + int32_t timeout = 5000; + + int32_t count = 0; + + while (1) { + TAOS_RES* pRes = tmq_consumer_poll(tmq, timeout); + if (pRes) { + char buf[1024]; + int32_t rows = 0; + + const char* topicName = tmq_get_topic_name(pRes); + const char* dbName = tmq_get_db_name(pRes); + int32_t vgroupId = tmq_get_vgroup_id(pRes); + + printf("topic: %s\n", topicName); + printf("db: %s\n", dbName); + printf("vgroup id: %d\n", vgroupId); + + if (count ++ > 200) { + tmq_unsubscribe(tmq); + break; + } + + while (1) { + TAOS_ROW row = taos_fetch_row(pRes); + if (row == NULL) break; + + fields = taos_fetch_fields(pRes); + numOfFields = taos_field_count(pRes); + precision = taos_result_precision(pRes); + rows++; + taos_print_row(buf, row, fields, numOfFields); + printf("precision: %d, row content: %s\n", precision, buf); + } + } +// return rows; + } + + fprintf(stderr, "%d msg consumed, include %d rows\n", msgCnt, totalRows); +} + +TEST(clientCase, sub_tb_test) { TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); ASSERT_NE(pConn, nullptr); diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 9c7d68f256..9d3adf8692 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -6822,8 +6822,7 @@ int32_t tDecodeSMqDataRsp(SDecoder *pDecoder, SMqDataRsp *pRsp) { } void tDeleteSMqDataRsp(SMqDataRsp *pRsp) { - taosArrayDestroy(pRsp->blockDataLen); - pRsp->blockDataLen = NULL; + pRsp->blockDataLen = taosArrayDestroy(pRsp->blockDataLen);; taosArrayDestroyP(pRsp->blockData, (FDelete)taosMemoryFree); pRsp->blockData = NULL; taosArrayDestroyP(pRsp->blockSchema, (FDelete)tDeleteSSchemaWrapper); diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index a45d6d1dec..ee2bd007ce 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -103,9 +103,9 @@ typedef struct { } STqHandle; typedef struct { - SMqDataRsp dataRsp; + SMqDataRsp* pDataRsp; char subKey[TSDB_SUBSCRIBE_KEY_LEN]; - SRpcHandleInfo pInfo; + SRpcHandleInfo info; } STqPushEntry; struct STQ { @@ -147,7 +147,7 @@ int32_t tqFetchLog(STQ* pTq, STqHandle* pHandle, int64_t* fetchOffset, SWalCkHea int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxRsp* pRsp); // int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SSubmitReq* pReq, STaosxRsp* pRsp); int32_t tqAddBlockDataToRsp(const SSDataBlock* pBlock, SMqDataRsp* pRsp, int32_t numOfCols, int8_t precision); -int32_t tqSendDataRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp); +int32_t tqSendDataRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp, int32_t type); int32_t tqPushDataRsp(STQ* pTq, STqPushEntry* pPushEntry); // tqMeta diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 27d351f915..0fe7f9a773 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -192,7 +192,7 @@ void tqCleanUp(); STQ* tqOpen(const char* path, SVnode* pVnode); void tqClose(STQ*); int tqPushMsg(STQ*, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver); -int tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp); +int tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp, int32_t type); int tqRemovePushEntry(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer); int tqCommit(STQ*); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 76a6a8a840..9dc862ec63 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -68,7 +68,13 @@ static void destroySTqHandle(void* data) { static void tqPushEntryFree(void* data) { STqPushEntry* p = *(void**)data; - tDeleteSMqDataRsp(&p->dataRsp); + if (p->pDataRsp->head.mqMsgType == TMQ_MSG_TYPE__POLL_RSP) { + tDeleteSMqDataRsp(p->pDataRsp); + } else if (p->pDataRsp->head.mqMsgType == TMQ_MSG_TYPE__TAOSX_RSP) { + tDeleteSTaosxRsp((STaosxRsp*)p->pDataRsp); + } + + taosMemoryFree(p->pDataRsp); taosMemoryFree(p); } @@ -166,8 +172,57 @@ int32_t tqSendMetaPollRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, return 0; } +static int32_t doSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, + int64_t consumerId, int32_t type) { + int32_t len = 0; + int32_t code = 0; + + if (type == TMQ_MSG_TYPE__POLL_RSP) { + tEncodeSize(tEncodeSMqDataRsp, pRsp, len, code); + } else if (type == TMQ_MSG_TYPE__TAOSX_RSP) { + tEncodeSize(tEncodeSTaosxRsp, (STaosxRsp*)pRsp, len, code); + } + + if (code < 0) { + return -1; + } + + int32_t tlen = sizeof(SMqRspHead) + len; + void* buf = rpcMallocCont(tlen); + if (buf == NULL) { + return -1; + } + + ((SMqRspHead*)buf)->mqMsgType = type; + ((SMqRspHead*)buf)->epoch = epoch; + ((SMqRspHead*)buf)->consumerId = consumerId; + + void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); + + SEncoder encoder = {0}; + tEncoderInit(&encoder, abuf, len); + + if (type == TMQ_MSG_TYPE__POLL_RSP) { + tEncodeSMqDataRsp(&encoder, pRsp); + } else if (type == TMQ_MSG_TYPE__TAOSX_RSP) { + tEncodeSTaosxRsp(&encoder, (STaosxRsp*) pRsp); + } + + tEncoderClear(&encoder); + + SRpcMsg rsp = { + .info = *pRpcHandleInfo, + .pCont = buf, + .contLen = tlen, + .code = 0, + }; + + tmsgSendRsp(&rsp); + return 0; +} + int32_t tqPushDataRsp(STQ* pTq, STqPushEntry* pPushEntry) { - SMqDataRsp* pRsp = &pPushEntry->dataRsp; + SMqDataRsp* pRsp = pPushEntry->pDataRsp; #if 0 A(taosArrayGetSize(pRsp->blockData) == pRsp->blockNum); @@ -181,37 +236,40 @@ int32_t tqPushDataRsp(STQ* pTq, STqPushEntry* pPushEntry) { } #endif - int32_t len = 0; - int32_t code = 0; - tEncodeSize(tEncodeSMqDataRsp, pRsp, len, code); +// int32_t len = 0; +// int32_t code = 0; +// tEncodeSize(tEncodeSMqDataRsp, pRsp, len, code); +// if (code < 0) { +// return -1; +// } +// +// int32_t tlen = sizeof(SMqRspHead) + len; +// void* buf = rpcMallocCont(tlen); +// if (buf == NULL) { +// return -1; +// } +// +// memcpy(buf, &pPushEntry->dataRsp.head, sizeof(SMqRspHead)); +// +// void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); +// +// SEncoder encoder = {0}; +// tEncoderInit(&encoder, abuf, len); +// tEncodeSMqDataRsp(&encoder, pRsp); +// tEncoderClear(&encoder); +// +// SRpcMsg rsp = { +// .info = pPushEntry->pInfo, +// .pCont = buf, +// .contLen = tlen, +// .code = 0, +// }; +// +// tmsgSendRsp(&rsp); +// - if (code < 0) { - return -1; - } - - int32_t tlen = sizeof(SMqRspHead) + len; - void* buf = rpcMallocCont(tlen); - if (buf == NULL) { - return -1; - } - - memcpy(buf, &pPushEntry->dataRsp.head, sizeof(SMqRspHead)); - - void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); - - SEncoder encoder = {0}; - tEncoderInit(&encoder, abuf, len); - tEncodeSMqDataRsp(&encoder, pRsp); - tEncoderClear(&encoder); - - SRpcMsg rsp = { - .info = pPushEntry->pInfo, - .pCont = buf, - .contLen = tlen, - .code = 0, - }; - - tmsgSendRsp(&rsp); + SMqRspHead* pHeader = &pPushEntry->pDataRsp->head; + doSendDataRsp(&pPushEntry->info, pRsp, pHeader->epoch, pHeader->consumerId, pHeader->mqMsgType); char buf1[80] = {0}; char buf2[80] = {0}; @@ -219,11 +277,10 @@ int32_t tqPushDataRsp(STQ* pTq, STqPushEntry* pPushEntry) { tFormatOffset(buf2, tListLen(buf2), &pRsp->rspOffset); tqDebug("vgId:%d, from consumer:0x%" PRIx64 " (epoch %d) push rsp, block num: %d, req:%s, rsp:%s", TD_VID(pTq->pVnode), pRsp->head.consumerId, pRsp->head.epoch, pRsp->blockNum, buf1, buf2); - return 0; } -int32_t tqSendDataRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp) { +int32_t tqSendDataRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp, int32_t type) { #if 0 A(taosArrayGetSize(pRsp->blockData) == pRsp->blockNum); A(taosArrayGetSize(pRsp->blockDataLen) == pRsp->blockNum); @@ -240,109 +297,127 @@ int32_t tqSendDataRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, con } #endif - int32_t len = 0; - int32_t code = 0; - tEncodeSize(tEncodeSMqDataRsp, pRsp, len, code); - if (code < 0) { - return -1; - } - int32_t tlen = sizeof(SMqRspHead) + len; - void* buf = rpcMallocCont(tlen); - if (buf == NULL) { - return -1; - } - - ((SMqRspHead*)buf)->mqMsgType = TMQ_MSG_TYPE__POLL_RSP; - ((SMqRspHead*)buf)->epoch = pReq->epoch; - ((SMqRspHead*)buf)->consumerId = pReq->consumerId; - - void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); - - SEncoder encoder = {0}; - tEncoderInit(&encoder, abuf, len); - tEncodeSMqDataRsp(&encoder, pRsp); - tEncoderClear(&encoder); - - SRpcMsg rsp = { - .info = pMsg->info, - .pCont = buf, - .contLen = tlen, - .code = 0, - }; - tmsgSendRsp(&rsp); - - char buf1[80] = {0}; - char buf2[80] = {0}; - tFormatOffset(buf1, 80, &pRsp->reqOffset); - tFormatOffset(buf2, 80, &pRsp->rspOffset); - tqDebug("vgId:%d consumer:0x%" PRIx64 " (epoch %d), block num:%d, req:%s, rsp:%s", - TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2); - - return 0; -} - -int32_t tqSendTaosxRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const STaosxRsp* pRsp) { -#if 0 - A(taosArrayGetSize(pRsp->blockData) == pRsp->blockNum); - A(taosArrayGetSize(pRsp->blockDataLen) == pRsp->blockNum); - - if (pRsp->withSchema) { - A(taosArrayGetSize(pRsp->blockSchema) == pRsp->blockNum); - } else { - A(taosArrayGetSize(pRsp->blockSchema) == 0); - } - - if (pRsp->reqOffset.type == TMQ_OFFSET__LOG) { - if (pRsp->blockNum > 0) { - A(pRsp->rspOffset.version > pRsp->reqOffset.version); - } else { - A(pRsp->rspOffset.version >= pRsp->reqOffset.version); - } - } -#endif - - int32_t len = 0; - int32_t code = 0; - tEncodeSize(tEncodeSTaosxRsp, pRsp, len, code); - if (code < 0) { - return -1; - } - int32_t tlen = sizeof(SMqRspHead) + len; - void* buf = rpcMallocCont(tlen); - if (buf == NULL) { - return -1; - } - - ((SMqRspHead*)buf)->mqMsgType = TMQ_MSG_TYPE__TAOSX_RSP; - ((SMqRspHead*)buf)->epoch = pReq->epoch; - ((SMqRspHead*)buf)->consumerId = pReq->consumerId; - - void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); - - SEncoder encoder = {0}; - tEncoderInit(&encoder, abuf, len); - tEncodeSTaosxRsp(&encoder, pRsp); - tEncoderClear(&encoder); - - SRpcMsg rsp = { - .info = pMsg->info, - .pCont = buf, - .contLen = tlen, - .code = 0, - }; - - tmsgSendRsp(&rsp); +// int32_t len = 0; +// int32_t code = 0; +// +// if (type == TMQ_MSG_TYPE__POLL_RSP) { +// tEncodeSize(tEncodeSMqDataRsp, pRsp, len, code); +// } else if (type == TMQ_MSG_TYPE__TAOSX_RSP) { +// tEncodeSize(tEncodeSTaosxRsp, (STaosxRsp*)pRsp, len, code); +// } +// +// if (code < 0) { +// return -1; +// } +// +// int32_t tlen = sizeof(SMqRspHead) + len; +// void* buf = rpcMallocCont(tlen); +// if (buf == NULL) { +// return -1; +// } +// +// ((SMqRspHead*)buf)->mqMsgType = type; +// ((SMqRspHead*)buf)->epoch = pReq->epoch; +// ((SMqRspHead*)buf)->consumerId = pReq->consumerId; +// +// void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); +// +// SEncoder encoder = {0}; +// tEncoderInit(&encoder, abuf, len); +// +// if (type == TMQ_MSG_TYPE__POLL_RSP) { +// tEncodeSMqDataRsp(&encoder, pRsp); +// } else if (type == TMQ_MSG_TYPE__TAOSX_RSP) { +// tEncodeSTaosxRsp(&encoder, (STaosxRsp*) pRsp); +// } +// +// tEncoderClear(&encoder); +// +// SRpcMsg rsp = { +// .info = pMsg->info, +// .pCont = buf, +// .contLen = tlen, +// .code = 0, +// }; +// +// tmsgSendRsp(&rsp); + doSendDataRsp(&pMsg->info, pRsp, pReq->epoch, pReq->consumerId, type); char buf1[80] = {0}; char buf2[80] = {0}; tFormatOffset(buf1, 80, &pRsp->reqOffset); tFormatOffset(buf2, 80, &pRsp->rspOffset); - tqDebug("taosx rsp, vgId:%d, consumer:0x%" PRIx64 " (epoch %d) send rsp, numOfBlks:%d, req:%s, rsp:%s", + tqDebug("vgId:%d consumer:0x%" PRIx64 " (epoch %d) send rsp, block num:%d, req:%s, rsp:%s", TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2); + return 0; } +//int32_t tqSendTaosxRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const STaosxRsp* pRsp) { +//#if 0 +// A(taosArrayGetSize(pRsp->blockData) == pRsp->blockNum); +// A(taosArrayGetSize(pRsp->blockDataLen) == pRsp->blockNum); +// +// if (pRsp->withSchema) { +// A(taosArrayGetSize(pRsp->blockSchema) == pRsp->blockNum); +// } else { +// A(taosArrayGetSize(pRsp->blockSchema) == 0); +// } +// +// if (pRsp->reqOffset.type == TMQ_OFFSET__LOG) { +// if (pRsp->blockNum > 0) { +// A(pRsp->rspOffset.version > pRsp->reqOffset.version); +// } else { +// A(pRsp->rspOffset.version >= pRsp->reqOffset.version); +// } +// } +//#endif +// +// int32_t len = 0; +// int32_t code = 0; +// tEncodeSize(tEncodeSTaosxRsp, pRsp, len, code); +// if (code < 0) { +// return -1; +// } +// +// int32_t tlen = sizeof(SMqRspHead) + len; +// void* buf = rpcMallocCont(tlen); +// if (buf == NULL) { +// terrno = TSDB_CODE_OUT_OF_MEMORY; +// return -1; +// } +// +// ((SMqRspHead*)buf)->mqMsgType = TMQ_MSG_TYPE__TAOSX_RSP; +// ((SMqRspHead*)buf)->epoch = pReq->epoch; +// ((SMqRspHead*)buf)->consumerId = pReq->consumerId; +// +// void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); +// +// SEncoder encoder = {0}; +// tEncoderInit(&encoder, abuf, len); +// tEncodeSTaosxRsp(&encoder, pRsp); +// tEncoderClear(&encoder); +// +// SRpcMsg rsp = { +// .info = pMsg->info, +// .pCont = buf, +// .contLen = tlen, +// .code = 0, +// }; +// +// tmsgSendRsp(&rsp); +// +// char buf1[80] = {0}; +// char buf2[80] = {0}; +// tFormatOffset(buf1, 80, &pRsp->reqOffset); +// tFormatOffset(buf2, 80, &pRsp->rspOffset); +// +// tqDebug("taosx rsp, vgId:%d, consumer:0x%" PRIx64 " (epoch %d) send rsp, numOfBlks:%d, req:%s, rsp:%s", +// TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2); +// return 0; +//} + static FORCE_INLINE bool tqOffsetLessOrEqual(const STqOffset* pLeft, const STqOffset* pRight) { return pLeft->val.type == TMQ_OFFSET__LOG && pRight->val.type == TMQ_OFFSET__LOG && pLeft->val.version <= pRight->val.version; @@ -441,9 +516,7 @@ static int32_t tqInitDataRsp(SMqDataRsp* pRsp, const SMqPollReq* pReq, int8_t su } #endif - /*A(subType == TOPIC_SUB_TYPE__COLUMN);*/ pRsp->withSchema = false; - return 0; } @@ -506,7 +579,7 @@ static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHand tqOffsetResetToLog(&dataRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, offset reset to %" PRId64, consumerId, pHandle->subKey, TD_VID(pTq->pVnode), dataRsp.rspOffset.version); - int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp); + int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); tDeleteSMqDataRsp(&dataRsp); *pBlockReturned = true; @@ -515,7 +588,8 @@ static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHand STaosxRsp taosxRsp = {0}; tqInitTaosxRsp(&taosxRsp, pRequest); tqOffsetResetToLog(&taosxRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); - int32_t code = tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp); + int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); +// int32_t code = tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp); tDeleteSTaosxRsp(&taosxRsp); *pBlockReturned = true; @@ -570,13 +644,13 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* // till now, all data has been transferred to consumer, new data needs to push client once arrived. if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) { - code = tqRegisterPushEntry(pTq, pHandle, pRequest, pMsg, &dataRsp); + code = tqRegisterPushEntry(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); taosWUnLockLatch(&pTq->pushLock); return code; } taosWUnLockLatch(&pTq->pushLock); - code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp); + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&dataRsp, TMQ_MSG_TYPE__POLL_RSP); // NOTE: this pHandle->consumerId may have been changed already. tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, offset type:%d, uid/version:%" PRId64 @@ -611,7 +685,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* } if (taosxRsp.blockNum > 0) { - code = tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp); + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); tDeleteSTaosxRsp(&taosxRsp); return code; } else { @@ -622,13 +696,14 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* ",version:%" PRId64, consumerId, pHandle->subKey, vgId, taosxRsp.blockNum, taosxRsp.rspOffset.type, taosxRsp.rspOffset.uid, taosxRsp.rspOffset.version); - } + } else { - if (offset.type == TMQ_OFFSET__LOG) { +// if (offset.type == TMQ_OFFSET__LOG) { int64_t fetchVer = offset.version + 1; pCkHead = taosMemoryMalloc(sizeof(SWalCkHead) + 2048); if (pCkHead == NULL) { tDeleteSTaosxRsp(&taosxRsp); + terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } @@ -646,14 +721,18 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* if (tqFetchLog(pTq, pHandle, &fetchVer, &pCkHead) < 0) { tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); - code = tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp); - tDeleteSTaosxRsp(&taosxRsp); - taosMemoryFreeClear(pCkHead); - return code; +// if (terrno == 0) { // failed to seek to given ver, but no errors happen. +// code = tqRegisterPushEntry(pTq, pHandle, pRequest, pMsg, (SMqDataRsp*) &taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); +// return code; +// } else { // error happens, return to consumers + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); + tDeleteSTaosxRsp(&taosxRsp); + taosMemoryFreeClear(pCkHead); + return code; +// } } SWalCont* pHead = &pCkHead->head; - tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d) iter log, vgId:%d offset %" PRId64 " msgType %d", consumerId, pRequest->epoch, vgId, fetchVer, pHead->msgType); @@ -663,6 +742,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* .msgLen = pHead->bodyLen - sizeof(SSubmitReq2Msg), .ver = pHead->version, }; + if (tqTaosxScanLog(pTq, pHandle, submit, &taosxRsp) < 0) { tqError("tmq poll: tqTaosxScanLog error %" PRId64 ", in vgId:%d, subkey %s", consumerId, vgId, pRequest->subKey); @@ -671,7 +751,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* if (taosxRsp.blockNum > 0) { tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); - code = tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp); + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); tDeleteSTaosxRsp(&taosxRsp); taosMemoryFreeClear(pCkHead); return code; @@ -1027,15 +1107,15 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { pTask->tbSink.vnode = pTq->pVnode; pTask->tbSink.tbSinkFunc = tqSinkToTablePipeline2; - int32_t version = 1; + int32_t ver1 = 1; SMetaInfo info = {0}; int32_t code = metaGetInfo(pTq->pVnode->pMeta, pTask->tbSink.stbUid, &info, NULL); if (code == TSDB_CODE_SUCCESS) { - version = info.skmVer; + ver1 = info.skmVer; } pTask->tbSink.pTSchema = - tBuildTSchema(pTask->tbSink.pSchemaWrapper->pSchema, pTask->tbSink.pSchemaWrapper->nCols, version); + tBuildTSchema(pTask->tbSink.pSchemaWrapper->pSchema, pTask->tbSink.pSchemaWrapper->nCols, ver1); ASSERT(pTask->tbSink.pTSchema); } diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index c0ed787176..1ba44739d6 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -209,6 +209,7 @@ int32_t tqPushMsgNew(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) { void* pReq = POINTER_SHIFT(msg, sizeof(SSubmitReq2Msg)); int32_t len = msgLen - sizeof(SSubmitReq2Msg); + int32_t vgId = TD_VID(pTq->pVnode); if (msgType == TDMT_VND_SUBMIT) { // lock push mgr to avoid potential msg lost @@ -217,7 +218,7 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) int32_t numOfRegisteredPush = taosHashGetSize(pTq->pPushMgr); if (numOfRegisteredPush > 0) { tqDebug("vgId:%d tq push msg version:%" PRId64 " type:%s, head:%p, body:%p len:%d, numOfPushed consumers:%d", - pTq->pVnode->config.vgId, ver, TMSG_INFO(msgType), msg, pReq, len, numOfRegisteredPush); + vgId, ver, TMSG_INFO(msgType), msg, pReq, len, numOfRegisteredPush); SArray* cachedKeys = taosArrayInit(0, sizeof(void*)); SArray* cachedKeyLens = taosArrayInit(0, sizeof(size_t)); @@ -239,7 +240,10 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) void* pIter = NULL; while (1) { pIter = taosHashIterate(pTq->pPushMgr, pIter); - if (pIter == NULL) break; + if (pIter == NULL) { + break; + } + STqPushEntry* pPushEntry = *(STqPushEntry**)pIter; STqHandle* pHandle = taosHashGet(pTq->pHandle, pPushEntry->subKey, strlen(pPushEntry->subKey)); @@ -248,23 +252,23 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) continue; } - if (pPushEntry->dataRsp.reqOffset.version >= ver) { - tqDebug("vgId:%d, push entry req version %" PRId64 ", while push version %" PRId64 ", skip", - pTq->pVnode->config.vgId, pPushEntry->dataRsp.reqOffset.version, ver); + SMqDataRsp* pRsp = pPushEntry->pDataRsp; + if (pRsp->reqOffset.version >= ver) { + tqDebug("vgId:%d, push entry req version %" PRId64 ", while push version %" PRId64 ", skip", vgId, + pRsp->reqOffset.version, ver); continue; } STqExecHandle* pExec = &pHandle->execHandle; qTaskInfo_t task = pExec->task; - SMqDataRsp* pRsp = &pPushEntry->dataRsp; - // prepare scan mem data SPackedData submit = { .msgStr = data, .msgLen = len, .ver = ver, }; + qStreamSetScanMemData(task, submit); // here start to scan submit block to extract the subscribed data @@ -272,7 +276,7 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) SSDataBlock* pDataBlock = NULL; uint64_t ts = 0; if (qExecTask(task, &pDataBlock, &ts) < 0) { - tqDebug("vgId:%d, tq exec error since %s", pTq->pVnode->config.vgId, terrstr()); + tqDebug("vgId:%d, tq exec error since %s", vgId, terrstr()); } if (pDataBlock == NULL) { @@ -283,11 +287,11 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) pRsp->blockNum++; } - tqDebug("vgId:%d, tq handle push, subkey:%s, block num:%d", pTq->pVnode->config.vgId, pPushEntry->subKey, - pRsp->blockNum); + tqDebug("vgId:%d, tq handle push, subkey:%s, block num:%d", vgId, pPushEntry->subKey, pRsp->blockNum); if (pRsp->blockNum > 0) { // set offset tqOffsetResetToLog(&pRsp->rspOffset, ver); + // remove from hash size_t kLen; void* key = taosHashGetKey(pIter, &kLen); @@ -309,6 +313,7 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) tqError("vgId:%d, tq push hash remove key error, key: %s", pTq->pVnode->config.vgId, (char*)key); } } + taosArrayDestroyP(cachedKeys, (FDelete)taosMemoryFree); taosArrayDestroy(cachedKeyLens); taosMemoryFree(data); @@ -334,9 +339,9 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) }; tqDebug("tq copy write msg %p %d %" PRId64 " from %p", data, len, ver, pReq); - tqProcessSubmitReq(pTq, submit); } + if (msgType == TDMT_VND_DELETE) { tqProcessDelReq(pTq, POINTER_SHIFT(msg, sizeof(SMsgHead)), msgLen - sizeof(SMsgHead), ver); } @@ -346,7 +351,7 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) } int32_t tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, - SMqDataRsp* pDataRsp) { + SMqDataRsp* pDataRsp, int32_t type) { uint64_t consumerId = pRequest->consumerId; int32_t vgId = TD_VID(pTq->pVnode); STqHandle* pTqHandle = pHandle; @@ -359,14 +364,22 @@ int32_t tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, return -1; } - pPushEntry->pInfo = pRpcMsg->info; + pPushEntry->info = pRpcMsg->info; memcpy(pPushEntry->subKey, pTqHandle->subKey, TSDB_SUBSCRIBE_KEY_LEN); - pDataRsp->withTbName = 0; - memcpy(&pPushEntry->dataRsp, pDataRsp, sizeof(SMqDataRsp)); - pPushEntry->dataRsp.head.consumerId = consumerId; - pPushEntry->dataRsp.head.epoch = pRequest->epoch; - pPushEntry->dataRsp.head.mqMsgType = TMQ_MSG_TYPE__POLL_RSP; + if (type == TMQ_MSG_TYPE__TAOSX_RSP) { + pPushEntry->pDataRsp = taosMemoryCalloc(1, sizeof(STaosxRsp)); + memcpy(pPushEntry->pDataRsp, pDataRsp, sizeof(STaosxRsp)); + } else if (type == TMQ_MSG_TYPE__POLL_RSP) { + pPushEntry->pDataRsp = taosMemoryCalloc(1, sizeof(SMqDataRsp)); + memcpy(pPushEntry->pDataRsp, pDataRsp, sizeof(SMqDataRsp)); + } + + SMqRspHead* pHead = &pPushEntry->pDataRsp->head; + pHead->consumerId = consumerId; + pHead->epoch = pRequest->epoch; + pHead->mqMsgType = type; + taosHashPut(pTq->pPushMgr, pTqHandle->subKey, strlen(pTqHandle->subKey), &pPushEntry, sizeof(void*)); tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s offset:%" PRId64 ", vgId:%d save handle to push mgr, total:%d", consumerId, @@ -377,12 +390,14 @@ int32_t tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, int32_t tqRemovePushEntry(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer) { int32_t vgId = TD_VID(pTq->pVnode); STqPushEntry** pEntry = taosHashGet(pTq->pPushMgr, pKey, keyLen); + if (pEntry != NULL) { - uint64_t cId = (*pEntry)->dataRsp.head.consumerId; + uint64_t cId = (*pEntry)->pDataRsp->head.consumerId; ASSERT(consumerId == cId); tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s vgId:%d remove from push mgr, remains:%d", consumerId, (*pEntry)->subKey, vgId, taosHashGetSize(pTq->pPushMgr) - 1); + if (rspConsumer) { // rsp the old consumer with empty block. tqPushDataRsp(pTq, *pEntry); } diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index baf74a83e4..ffab7d92b8 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -221,12 +221,12 @@ int32_t qSetSMAInput(qTaskInfo_t tinfo, const void* pBlocks, size_t numOfBlocks, qTaskInfo_t qCreateQueueExecTaskInfo(void* msg, SReadHandle* readers, int32_t* numOfCols, SSchemaWrapper** pSchema) { if (msg == NULL) { // create raw scan - SExecTaskInfo* pTaskInfo = taosMemoryCalloc(1, sizeof(SExecTaskInfo)); if (NULL == pTaskInfo) { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } + setTaskStatus(pTaskInfo, TASK_NOT_COMPLETED); pTaskInfo->cost.created = taosGetTimestampUs(); diff --git a/tests/system-test/7-tmq/subscribeDb1.py b/tests/system-test/7-tmq/subscribeDb1.py index c5ae44214a..89bf337a20 100644 --- a/tests/system-test/7-tmq/subscribeDb1.py +++ b/tests/system-test/7-tmq/subscribeDb1.py @@ -123,8 +123,9 @@ class TDTestCase: pre_insert = "insert into " sql = pre_insert - t = time.time() - startTs = int(round(t * 1000)) + # t = 1678609778776 #time.time() + t = 1600000000000 + startTs = t #int(round(t * 1000)) #tdLog.debug("doing insert data into stable:%s rows:%d ..."%(stbName, allRows)) for i in range(ctbNum): sql += " %s_%d values "%(stbName,i) From 8a9746d2bf82577e478fdc7faf869c4aa7dd4ab9 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 13 Mar 2023 18:29:04 +0800 Subject: [PATCH 23/26] fix(tmq): fix the invalid read. --- source/client/src/clientTmq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 3167a4c361..95722ae3e5 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1843,11 +1843,11 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { atomic_store_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE); if (pollRspWrapper->taosxRsp.blockNum == 0) { - taosFreeQitem(pollRspWrapper); rspWrapper = NULL; tscDebug("consumer:0x%" PRIx64 " taosx empty block received, vgId:%d, reqId:0x%" PRIx64, tmq->consumerId, pVg->vgId, pollRspWrapper->reqId); pVg->vgIgnoreCnt = VG_POLL_IGNORE_TICK; + taosFreeQitem(pollRspWrapper); continue; } @@ -1859,12 +1859,13 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { pRsp = tmqBuildTaosxRspFromWrapper(pollRspWrapper); } - taosFreeQitem(pollRspWrapper); char buf[80]; tFormatOffset(buf, 80, &pVg->currentOffset); tscDebug("consumer:0x%" PRIx64 " process taosx poll rsp, vgId:%d, offset:%s, blocks:%d, reqId:0x%"PRIx64, tmq->consumerId, pVg->vgId, buf, pollRspWrapper->dataRsp.blockNum, pollRspWrapper->reqId); + + taosFreeQitem(pollRspWrapper); return pRsp; } else { From 8f1f423c9053456c6b543c4a9eddbd90970fa61c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 14 Mar 2023 00:37:09 +0800 Subject: [PATCH 24/26] fix(tmq): fix the error in tmq. --- source/client/src/clientTmq.c | 17 ++++++++--------- utils/test/c/tmqSim.c | 2 -- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 95722ae3e5..522f5cc1b9 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -24,8 +24,6 @@ #include "tref.h" #include "ttimer.h" -#define VG_POLL_IGNORE_TICK 100 - struct SMqMgmt { int8_t inited; tmr_h timer; @@ -134,7 +132,7 @@ typedef struct { int32_t vgId; int32_t vgStatus; int32_t vgSkipCnt; - int32_t vgIgnoreCnt; // once empty block is received, idle for ignoreCnt then start to poll data + int64_t emptyBlockReceiveTs; // once empty block is received, idle for ignoreCnt then start to poll data SEpSet epSet; } SMqClientVg; @@ -1403,7 +1401,7 @@ static void initClientTopicFromRsp(SMqClientTopic* pTopic, SMqSubTopicEp* pTopic .epSet = pVgEp->epSet, .vgStatus = TMQ_VG_STATUS__IDLE, .vgSkipCnt = 0, - .vgIgnoreCnt = 0, + .emptyBlockReceiveTs = 0, }; taosArrayPush(pTopic->vgs, &clientVg); @@ -1704,10 +1702,9 @@ static int32_t tmqPollImpl(tmq_t* tmq, int64_t timeout) { for (int j = 0; j < numOfVg; j++) { SMqClientVg* pVg = taosArrayGet(pTopic->vgs, j); - if (pVg->vgIgnoreCnt > 0) { - pVg->vgIgnoreCnt -= 1; - tscTrace("consumer:0x%" PRIx64 " epoch %d, vgId:%d idle for %d tick before poll", tmq->consumerId, tmq->epoch, - pVg->vgId, pVg->vgIgnoreCnt); + if (taosGetTimestampMs() - pVg->emptyBlockReceiveTs < 100) { // less than 100ms + tscTrace("consumer:0x%" PRIx64 " epoch %d, vgId:%d idle for 100ms before start next poll", tmq->consumerId, tmq->epoch, + pVg->vgId); continue; } @@ -1846,9 +1843,11 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { rspWrapper = NULL; tscDebug("consumer:0x%" PRIx64 " taosx empty block received, vgId:%d, reqId:0x%" PRIx64, tmq->consumerId, pVg->vgId, pollRspWrapper->reqId); - pVg->vgIgnoreCnt = VG_POLL_IGNORE_TICK; + pVg->emptyBlockReceiveTs = taosGetTimestampMs(); taosFreeQitem(pollRspWrapper); continue; + } else { + pVg->emptyBlockReceiveTs = 0; // reset the ts } // build rsp diff --git a/utils/test/c/tmqSim.c b/utils/test/c/tmqSim.c index ce4bafb79b..ce61b80d41 100644 --- a/utils/test/c/tmqSim.c +++ b/utils/test/c/tmqSim.c @@ -735,9 +735,7 @@ void build_consumer(SThreadInfo* pInfo) { } pInfo->tmq = tmq_consumer_new(conf, NULL, 0); - tmq_conf_destroy(conf); - return; } From 9cdd52de84b29fd91b08245137fcf9d52ea8923c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 14 Mar 2023 11:50:02 +0800 Subject: [PATCH 25/26] fix(tmq): fix dead lock. --- source/client/src/clientTmq.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 522f5cc1b9..1a0118c108 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -538,11 +538,6 @@ static int32_t doSendCommitMsg(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicN .handle = NULL, }; - SEp* pEp = GET_ACTIVE_EP(&pVg->epSet); - tscDebug("consumer:0x%" PRIx64 " topic:%s on vgId:%d offset:%" PRId64 " prev:%" PRId64 ", ep:%s:%d, ordinal:%d/%d", - tmq->consumerId, pOffset->subKey, pVg->vgId, pOffset->val.version, pVg->committedOffset.version, pEp->fqdn, - pEp->port, index + 1, totalVgroups); - pMsgSendInfo->requestId = generateRequestId(); pMsgSendInfo->requestObjRefId = 0; pMsgSendInfo->param = pParam; @@ -553,6 +548,12 @@ static int32_t doSendCommitMsg(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicN atomic_add_fetch_32(&pParamSet->waitingRspNum, 1); atomic_add_fetch_32(&pParamSet->totalRspNum, 1); + SEp* pEp = GET_ACTIVE_EP(&pVg->epSet); + tscDebug("consumer:0x%" PRIx64 " topic:%s on vgId:%d send offset:%" PRId64 " prev:%" PRId64 + ", ep:%s:%d, ordinal:%d/%d, req:0x%" PRIx64, + tmq->consumerId, pOffset->subKey, pVg->vgId, pOffset->val.version, pVg->committedOffset.version, pEp->fqdn, + pEp->port, index + 1, totalVgroups, pMsgSendInfo->requestId); + int64_t transporterId = 0; asyncSendMsgToServer(tmq->pTscObj->pAppInfo->pTransporter, &pVg->epSet, &transporterId, pMsgSendInfo); return 0; @@ -627,11 +628,11 @@ HANDLE_RSP: } if (!async) { + taosThreadMutexUnlock(&tmq->lock); tsem_wait(&pParamSet->rspSem); code = pParamSet->rspErr; tsem_destroy(&pParamSet->rspSem); taosMemoryFree(pParamSet); - taosThreadMutexUnlock(&tmq->lock); return code; } else { code = 0; @@ -736,7 +737,7 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, static int32_t tmqCommitInner(tmq_t* tmq, const TAOS_RES* msg, int8_t automatic, int8_t async, tmq_commit_cb* userCb, void* userParam) { - if (msg) { // user invoked commit? + if (msg) { // user invoked commit return tmqCommitMsgImpl(tmq, msg, async, userCb, userParam); } else { // this for auto commit return tmqCommitConsumerImpl(tmq, automatic, async, userCb, userParam); From 15cbbd3a144fb57b657978d4ee2d0a736075e569 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 14 Mar 2023 21:25:52 +0800 Subject: [PATCH 26/26] fix(query): fix error in show cluster alive. --- source/libs/command/src/command.c | 2 +- tests/system-test/6-cluster/5dnode1mnode.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/command/src/command.c b/source/libs/command/src/command.c index c862a75ed3..58c43829cf 100644 --- a/source/libs/command/src/command.c +++ b/source/libs/command/src/command.c @@ -307,7 +307,7 @@ static void setCreateDBResultIntoDataBlock(SSDataBlock* pBlock, char* dbName, ch bool existLeaderRole(TAOS_ROW row, TAOS_FIELD* fields, int nFields) { // vgroup_id | db_name | tables | v1_dnode | v1_status | v2_dnode | v2_status | v3_dnode | v3_status | v4_dnode | // v4_status | cacheload | tsma | - if (nFields != 13) { + if (nFields != 14) { return false; } diff --git a/tests/system-test/6-cluster/5dnode1mnode.py b/tests/system-test/6-cluster/5dnode1mnode.py index f21fae0d7b..d0d9372298 100644 --- a/tests/system-test/6-cluster/5dnode1mnode.py +++ b/tests/system-test/6-cluster/5dnode1mnode.py @@ -29,7 +29,7 @@ class TDTestCase: self.master_dnode = self.TDDnodes.dnodes[0] self.host=self.master_dnode.cfgDict["fqdn"] conn1 = taos.connect(self.master_dnode.cfgDict["fqdn"] , config=self.master_dnode.cfgDir) - tdSql.init(conn1.cursor()) + tdSql.init(conn1.cursor(), True) def getBuildPath(self):