From 53e6d49e8c5762d53d5996b2d253198e9bf7b18d Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 6 Mar 2023 22:29:42 +0800 Subject: [PATCH 01/28] other: add some logs. --- source/libs/executor/src/scanoperator.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 7961d5518a..b6b9fb6b90 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -984,6 +984,7 @@ void resetTableScanInfo(STableScanInfo* pTableScanInfo, STimeWindow* pWin) { pTableScanInfo->scanTimes = 0; pTableScanInfo->currentGroupId = -1; tsdbReaderClose(pTableScanInfo->base.dataReader); + qDebug("1"); pTableScanInfo->base.dataReader = NULL; } @@ -1142,6 +1143,7 @@ static SSDataBlock* doRangeScan(SStreamScanInfo* pInfo, SSDataBlock* pSDB, int32 pInfo->updateWin = (STimeWindow){.skey = INT64_MIN, .ekey = INT64_MAX}; STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; tsdbReaderClose(pTableScanInfo->base.dataReader); + qDebug("2"); pTableScanInfo->base.dataReader = NULL; return NULL; } @@ -1615,6 +1617,7 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { if (!pTaskInfo->streamInfo.returned) { STableScanInfo* pTSInfo = pInfo->pTableScanOp->info; tsdbReaderClose(pTSInfo->base.dataReader); + qDebug("3"); pTSInfo->base.dataReader = NULL; tqOffsetResetToLog(&pTaskInfo->streamInfo.prepareStatus, pTaskInfo->streamInfo.snapshotVer); qDebug("queue scan tsdb over, switch to wal ver %" PRId64 "", pTaskInfo->streamInfo.snapshotVer + 1); @@ -1760,6 +1763,8 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { /*resetTableScanInfo(pTSInfo, pWin);*/ tsdbReaderClose(pTSInfo->base.dataReader); + qDebug("4"); + pTSInfo->base.dataReader = NULL; pInfo->pTableScanOp->status = OP_OPENED; @@ -1805,6 +1810,8 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__NONE; STableScanInfo* pTSInfo = pInfo->pTableScanOp->info; tsdbReaderClose(pTSInfo->base.dataReader); + qDebug("5"); + pTSInfo->base.dataReader = NULL; pTSInfo->base.cond.startVersion = -1; @@ -2601,6 +2608,8 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { return pBlock; } + qDebug("8"); + tsdbReaderClose(pInfo->base.dataReader); pInfo->base.dataReader = NULL; return NULL; From d54ae0c840991998f4fcf8846383179dd2130885 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 7 Mar 2023 17:49:20 +0800 Subject: [PATCH 02/28] fix(tmq): free reader after check the reader status. --- source/dnode/vnode/src/inc/tq.h | 31 +++++++++---------------- source/dnode/vnode/src/tq/tq.c | 20 ++++++++++++---- source/dnode/vnode/src/tq/tqExec.c | 15 +++++++++++- source/libs/executor/src/executor.c | 2 -- source/libs/executor/src/executorimpl.c | 21 +++++++++++++++-- source/libs/executor/src/scanoperator.c | 2 +- 6 files changed, 61 insertions(+), 30 deletions(-) diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 792fed2309..1172062e9b 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -79,8 +79,7 @@ typedef struct { } STqExecDb; typedef struct { - int8_t subType; - + int8_t subType; STqReader* pExecReader; qTaskInfo_t task; union { @@ -89,27 +88,19 @@ typedef struct { STqExecDb execDb; }; int32_t numOfCols; // number of out pout column, temporarily used + bool stop; // denote if needs to be stopped or not } STqExecHandle; typedef struct { - // info - char subKey[TSDB_SUBSCRIBE_KEY_LEN]; - int64_t consumerId; - int32_t epoch; - int8_t fetchMeta; - - int64_t snapshotVer; - - SWalReader* pWalReader; - - SWalRef* pRef; - - // push - STqPushHandle pushHandle; - - // exec - STqExecHandle execHandle; - + char subKey[TSDB_SUBSCRIBE_KEY_LEN]; + int64_t consumerId; + int32_t epoch; + int8_t fetchMeta; + int64_t snapshotVer; + SWalReader* pWalReader; + SWalRef* pRef; + STqPushHandle pushHandle; // push + STqExecHandle execHandle; // exec } STqHandle; typedef struct { diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 4d21a2e7f3..026acd0e64 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -569,16 +569,19 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { SMqDataRsp dataRsp = {0}; tqInitDataRsp(&dataRsp, &req, pHandle->execHandle.subType); + // lock taosWLockLatch(&pTq->pushLock); if (tqScanData(pTq, pHandle, &dataRsp, &fetchOffsetNew) < 0) { return -1; } + // todo handle the case where re-balance occurs. // till now, all data has been rsp to consumer, new data needs to push client once arrived. if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && - dataRsp.reqOffset.version == dataRsp.rspOffset.version) { + dataRsp.reqOffset.version == dataRsp.rspOffset.version && (pHandle->execHandle.stop != false)) { STqPushEntry* pPushEntry = taosMemoryCalloc(1, sizeof(STqPushEntry)); + if (pPushEntry != NULL) { pPushEntry->pInfo = pMsg->info; memcpy(pPushEntry->subKey, pHandle->subKey, TSDB_SUBSCRIBE_KEY_LEN); @@ -591,17 +594,21 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s offset:%" PRId64 ", vgId:%d save handle to push mgr", consumerId, pHandle->subKey, dataRsp.reqOffset.version, TD_VID(pTq->pVnode)); + // unlock taosWUnLockLatch(&pTq->pushLock); return 0; } } - taosWUnLockLatch(&pTq->pushLock); + taosWUnLockLatch(&pTq->pushLock); if (tqSendDataRsp(pTq, pMsg, &req, &dataRsp) < 0) { code = -1; } + pHandle->execHandle.stop = false; + + //NOTE: this pHandle->consumerId may have been changed already. tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, offset type:%d, uid/version:%" PRId64 ", ts:%" PRId64 "", consumerId, pHandle->subKey, TD_VID(pTq->pVnode), dataRsp.blockNum, dataRsp.rspOffset.type, dataRsp.rspOffset.uid, dataRsp.rspOffset.ts); @@ -610,6 +617,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { return code; } + // todo handle the case where re-balance occurs. // for taosx SMqMetaRsp metaRsp = {0}; STaosxRsp taosxRsp = {0}; @@ -894,11 +902,14 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg } } else { // TODO handle qmsg and exec modification - tqInfo("update the consumer info, old consumer id:0x%"PRIx64", new Id:0x%"PRIx64, pHandle->consumerId, req.newConsumerId); + tqInfo("vgId:%d switch consumer from Id:0x%"PRIx64" to Id:0x%"PRIx64, req.vgId, pHandle->consumerId, req.newConsumerId); atomic_store_32(&pHandle->epoch, -1); atomic_store_64(&pHandle->consumerId, req.newConsumerId); atomic_add_fetch_32(&pHandle->epoch, 1); taosMemoryFree(req.qmsg); + + pHandle->execHandle.stop = true; + if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { qStreamCloseTsdbReader(pHandle->execHandle.task); } @@ -906,7 +917,8 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg if (tqMetaSaveHandle(pTq, req.subKey, pHandle) < 0) { return -1; } - // close handle + + pHandle->execHandle.stop = false; } return 0; diff --git a/source/dnode/vnode/src/tq/tqExec.c b/source/dnode/vnode/src/tq/tqExec.c index f97c5ce93c..81b68f8473 100644 --- a/source/dnode/vnode/src/tq/tqExec.c +++ b/source/dnode/vnode/src/tq/tqExec.c @@ -46,11 +46,13 @@ static int32_t tqAddBlockSchemaToRsp(const STqExecHandle* pExec, STaosxRsp* pRsp static int32_t tqAddTbNameToRsp(const STQ* pTq, int64_t uid, STaosxRsp* pRsp, int32_t n) { SMetaReader mr = {0}; metaReaderInit(&mr, pTq->pVnode->pMeta, 0); + // TODO add reference to gurantee success if (metaGetTableEntryByUidCache(&mr, uid) < 0) { metaReaderClear(&mr); return -1; } + for (int32_t i = 0; i < n; i++) { char* tbName = taosStrdup(mr.me.name); taosArrayPush(pRsp->blockTbName, &tbName); @@ -83,13 +85,16 @@ int32_t tqScanData(STQ* pTq, const STqHandle* pHandle, SMqDataRsp* pRsp, STqOffs while (1) { SSDataBlock* pDataBlock = NULL; uint64_t ts = 0; + tqDebug("vgId:%d, tmq task start to execute", pTq->pVnode->config.vgId); if (qExecTask(task, &pDataBlock, &ts) < 0) { tqError("vgId:%d, task exec error since %s", pTq->pVnode->config.vgId, terrstr()); return -1; } + tqDebug("vgId:%d, tmq task executed, get %p", pTq->pVnode->config.vgId, pDataBlock); + // current scan should be stopped asap, since the rebalance occurs. if (pDataBlock == NULL) { break; } @@ -99,7 +104,15 @@ int32_t tqScanData(STQ* pTq, const STqHandle* pHandle, SMqDataRsp* pRsp, STqOffs if (pOffset->type == TMQ_OFFSET__SNAPSHOT_DATA) { rowCnt += pDataBlock->info.rows; - if (rowCnt >= 4096) break; + if (rowCnt >= 4096) { + break; + } + } + + if (pExec->stop) { + tqDebug("vgId:%d, current vgroups has been transferred to other consumer, return results asap", + TD_VID(pTq->pVnode)); + break; } } diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 9fe0f4f8a7..d00f9d9843 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -709,7 +709,6 @@ void qStopTaskOperators(SExecTaskInfo* pTaskInfo) { int32_t qAsyncKillTask(qTaskInfo_t qinfo, int32_t rspCode) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)qinfo; - if (pTaskInfo == NULL) { return TSDB_CODE_QRY_INVALID_QHANDLE; } @@ -717,7 +716,6 @@ int32_t qAsyncKillTask(qTaskInfo_t qinfo, int32_t rspCode) { qDebug("%s execTask async killed", GET_TASKID(pTaskInfo)); setTaskKilled(pTaskInfo, rspCode); - qStopTaskOperators(pTaskInfo); return TSDB_CODE_SUCCESS; diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 0dcefec93d..54e443d1da 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -2771,11 +2771,17 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, SStreamState* pSta } void qStreamCloseTsdbReader(void* task) { - if (task == NULL) return; + if (task == NULL) { + return; + } + SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)task; SOperatorInfo* pOp = pTaskInfo->pRoot; - qDebug("stream close tsdb reader, reset status uid %" PRId64 " ts %" PRId64, pTaskInfo->streamInfo.lastStatus.uid, + + qDebug("stream close tsdb reader, reset status uid:%" PRId64 " ts:%" PRId64, pTaskInfo->streamInfo.lastStatus.uid, pTaskInfo->streamInfo.lastStatus.ts); + + // todo refactor, other thread may already use this read to extract data. pTaskInfo->streamInfo.lastStatus = (STqOffsetVal){0}; while (pOp->numOfDownstream == 1 && pOp->pDownstream[0]) { SOperatorInfo* pDownstreamOp = pOp->pDownstream[0]; @@ -2783,8 +2789,19 @@ void qStreamCloseTsdbReader(void* task) { SStreamScanInfo* pInfo = pDownstreamOp->info; if (pInfo->pTableScanOp) { STableScanInfo* pTSInfo = pInfo->pTableScanOp->info; + + setOperatorCompleted(pInfo->pTableScanOp); + while(pTaskInfo->owner != 0) { + taosMsleep(100); + qDebug("wait for the reader stopping"); + } + tsdbReaderClose(pTSInfo->base.dataReader); pTSInfo->base.dataReader = NULL; + + // restore the status, todo refactor. + pInfo->pTableScanOp->status = OP_OPENED; + pTaskInfo->status = TASK_NOT_COMPLETED; return; } } diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 8ccd0c78fc..e98c27fb88 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -751,7 +751,7 @@ static SSDataBlock* doTableScan(SOperatorInfo* pOperator) { while (1) { SSDataBlock* result = doGroupedTableScan(pOperator); - if (result) { + if (result || (pOperator->status == OP_EXEC_DONE)) { return result; } From 41f26148a20f3cc900152974a91bb3fd04737369 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 8 Mar 2023 12:09:54 +0800 Subject: [PATCH 03/28] fix(mq): re-balance consumer if it is in an in-balance status. --- source/dnode/mnode/impl/inc/mndConsumer.h | 4 ++-- source/dnode/mnode/impl/src/mndConsumer.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/dnode/mnode/impl/inc/mndConsumer.h b/source/dnode/mnode/impl/inc/mndConsumer.h index 1176e1af0b..99b0d06936 100644 --- a/source/dnode/mnode/impl/inc/mndConsumer.h +++ b/source/dnode/mnode/impl/inc/mndConsumer.h @@ -24,10 +24,10 @@ extern "C" { enum { MQ_CONSUMER_STATUS__MODIFY = 1, - MQ_CONSUMER_STATUS__MODIFY_IN_REB, + MQ_CONSUMER_STATUS__MODIFY_IN_REB, // this value is not used anymore MQ_CONSUMER_STATUS__READY, MQ_CONSUMER_STATUS__LOST, - MQ_CONSUMER_STATUS__LOST_IN_REB, + MQ_CONSUMER_STATUS__LOST_IN_REB, // this value is not used anymore MQ_CONSUMER_STATUS__LOST_REBD, MQ_CONSUMER_STATUS__REMOVED, }; diff --git a/source/dnode/mnode/impl/src/mndConsumer.c b/source/dnode/mnode/impl/src/mndConsumer.c index f1ef83aca5..4b6de316a1 100644 --- a/source/dnode/mnode/impl/src/mndConsumer.c +++ b/source/dnode/mnode/impl/src/mndConsumer.c @@ -317,7 +317,7 @@ static int32_t mndProcessMqTimerMsg(SRpcMsg *pMsg) { taosArrayPush(pRebSub->removedConsumers, &pConsumer->consumerId); } taosRUnLockLatch(&pConsumer->lock); - } else if (status == MQ_CONSUMER_STATUS__MODIFY) { + } else if (status == MQ_CONSUMER_STATUS__MODIFY || status == MQ_CONSUMER_STATUS__MODIFY_IN_REB) { taosRLockLatch(&pConsumer->lock); int32_t newTopicNum = taosArrayGetSize(pConsumer->rebNewTopics); From 03aa391859686b32d00bc97a414ad7ee082472f2 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 9 Mar 2023 23:03:51 +0800 Subject: [PATCH 04/28] fix(tmq): add lock before close the tsdbreader. --- source/dnode/vnode/src/inc/tq.h | 1 + source/dnode/vnode/src/inc/vnodeInt.h | 3 + source/dnode/vnode/src/tq/tq.c | 340 +++++++++++++++----------- source/dnode/vnode/src/tq/tqMeta.c | 2 +- source/dnode/vnode/src/tq/tqPush.c | 48 ++++ 5 files changed, 246 insertions(+), 148 deletions(-) diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 1172062e9b..62d6a40333 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -101,6 +101,7 @@ typedef struct { SWalRef* pRef; STqPushHandle pushHandle; // push STqExecHandle execHandle; // exec + int8_t execStatus; // this handle is used to handle the poll requirement } STqHandle; typedef struct { diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index c0d017e350..09ff844d95 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -192,6 +192,9 @@ void tqCleanUp(); STQ* tqOpen(const char* path, SVnode* pVnode); void tqClose(STQ*); int tqPushMsg(STQ*, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver); +int tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp); +int tqRemovePushEntry(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer); + int tqCommit(STQ*); int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd); int32_t tqCheckColModifiable(STQ* pTq, int64_t tbUid, int32_t colId); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 026acd0e64..14bc51eb2c 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -463,152 +463,120 @@ static int32_t tqInitTaosxRsp(STaosxRsp* pRsp, const SMqPollReq* pReq) { return 0; } -int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { - SMqPollReq req = {0}; - int32_t code = 0; - STqOffsetVal fetchOffsetNew; - SWalCkHead* pCkHead = NULL; +static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, + SRpcMsg* pMsg, bool* pBlockReturned) { + uint64_t consumerId = pRequest->consumerId; + STqOffsetVal reqOffset = pRequest->reqOffset; + STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, pRequest->subKey); + *pBlockReturned = false; - if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) { - tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen); - return -1; - } + // In this vnode, data has been polled by consumer for this topic, so let's continue from the last offset value. + if (pOffset != NULL) { + *pOffsetVal = pOffset->val; - int64_t consumerId = req.consumerId; - int32_t reqEpoch = req.epoch; - STqOffsetVal reqOffset = req.reqOffset; - - // 1. find handle - STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); - if (pHandle == NULL) { - tqError("tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s not found", consumerId, TD_VID(pTq->pVnode), - req.subKey); - return -1; - } - - // 2. check rebalance - if (pHandle->consumerId != consumerId) { - tqDebug("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, - consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId); - terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; - return -1; - } - - // update epoch if need - int32_t savedEpoch = atomic_load_32(&pHandle->epoch); - while (savedEpoch < reqEpoch) { - tqDebug("tmq poll: consumer:0x%"PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch, reqEpoch); - savedEpoch = atomic_val_compare_exchange_32(&pHandle->epoch, savedEpoch, reqEpoch); - } - - char buf[80]; - tFormatOffset(buf, 80, &reqOffset); - tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s", consumerId, - req.epoch, pHandle->subKey, TD_VID(pTq->pVnode), buf); - - // 2.reset offset if needed - if (reqOffset.type > 0) { - fetchOffsetNew = reqOffset; + char formatBuf[80]; + tFormatOffset(formatBuf, 80, pOffsetVal); + tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, prev offset found, offset reset to %s and continue.", + consumerId, pHandle->subKey, TD_VID(pTq->pVnode), formatBuf); + return 0; } else { - STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, req.subKey); - if (pOffset != NULL) { - fetchOffsetNew = pOffset->val; - char formatBuf[80]; - tFormatOffset(formatBuf, 80, &fetchOffsetNew); - tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vg %d, offset reset to %s", consumerId, pHandle->subKey, - TD_VID(pTq->pVnode), formatBuf); - } else { - if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEAST) { - if (req.useSnapshot) { - if (pHandle->fetchMeta) { - tqOffsetResetToMeta(&fetchOffsetNew, 0); - } else { - tqOffsetResetToData(&fetchOffsetNew, 0, 0); - } + // no poll occurs in this vnode for this topic, let's seek to the right offset value. + if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEAST) { + if (pRequest->useSnapshot) { + if (pHandle->fetchMeta) { + tqOffsetResetToMeta(pOffsetVal, 0); } else { - pHandle->pRef = walRefFirstVer(pTq->pVnode->pWal, pHandle->pRef); - if (pHandle->pRef == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - tqOffsetResetToLog(&fetchOffsetNew, pHandle->pRef->refVer - 1); + tqOffsetResetToData(pOffsetVal, 0, 0); + } + } else { + pHandle->pRef = walRefFirstVer(pTq->pVnode->pWal, pHandle->pRef); + if (pHandle->pRef == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; } - } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) { - if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { - SMqDataRsp dataRsp = {0}; - tqInitDataRsp(&dataRsp, &req, pHandle->execHandle.subType); - tqOffsetResetToLog(&dataRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); - tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, offset reset to %" PRId64, consumerId, - pHandle->subKey, TD_VID(pTq->pVnode), dataRsp.rspOffset.version); - if (tqSendDataRsp(pTq, pMsg, &req, &dataRsp) < 0) { - code = -1; - } - tDeleteSMqDataRsp(&dataRsp); - return code; - } else { - STaosxRsp taosxRsp = {0}; - tqInitTaosxRsp(&taosxRsp, &req); - tqOffsetResetToLog(&taosxRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); - if (tqSendTaosxRsp(pTq, pMsg, &req, &taosxRsp) < 0) { - code = -1; - } - tDeleteSTaosxRsp(&taosxRsp); - return code; - } - } else if (reqOffset.type == TMQ_OFFSET__RESET_NONE) { - tqError("tmq poll: subkey %s, no offset committed for consumer:0x%" PRIx64 - " in vg %d, subkey %s, reset none failed", - pHandle->subKey, consumerId, TD_VID(pTq->pVnode), req.subKey); - terrno = TSDB_CODE_TQ_NO_COMMITTED_OFFSET; - return -1; + tqOffsetResetToLog(pOffsetVal, pHandle->pRef->refVer - 1); } + } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) { + if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { + SMqDataRsp dataRsp = {0}; + tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType); + + tqOffsetResetToLog(&dataRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); + tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, offset reset to %" PRId64, consumerId, + pHandle->subKey, TD_VID(pTq->pVnode), dataRsp.rspOffset.version); + int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp); + tDeleteSMqDataRsp(&dataRsp); + + *pBlockReturned = true; + return code; + } else { + STaosxRsp taosxRsp = {0}; + tqInitTaosxRsp(&taosxRsp, pRequest); + tqOffsetResetToLog(&taosxRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); + int32_t code = tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp); + tDeleteSTaosxRsp(&taosxRsp); + + *pBlockReturned = true; + return code; + } + } else if (reqOffset.type == TMQ_OFFSET__RESET_NONE) { + tqError("tmq poll: subkey %s, no offset committed for consumer:0x%" PRIx64 + " in vg %d, subkey %s, reset none failed", + pHandle->subKey, consumerId, TD_VID(pTq->pVnode), pRequest->subKey); + terrno = TSDB_CODE_TQ_NO_COMMITTED_OFFSET; + return -1; } } + return 0; +} + +static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg) { + int32_t code = -1; + STqOffsetVal reqOffset = pRequest->reqOffset; + STqOffsetVal fetchOffsetNew; + SWalCkHead* pCkHead = NULL; + uint64_t consumerId = pRequest->consumerId; + + // 1. reset the offset if needed + if (reqOffset.type > 0) { + fetchOffsetNew = reqOffset; + } else { // handle the reset offset cases, according to the consumer's choice. + bool blockReturned = false; + code = extractResetOffsetVal(&fetchOffsetNew, pTq, pHandle, pRequest, pMsg, &blockReturned); + if (code != 0) { + return code; + } + + // empty block returned, quit + if (blockReturned) { + return 0; + } + } + + // this is a normal subscription requirement if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { SMqDataRsp dataRsp = {0}; - tqInitDataRsp(&dataRsp, &req, pHandle->execHandle.subType); + tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType); // lock taosWLockLatch(&pTq->pushLock); - if (tqScanData(pTq, pHandle, &dataRsp, &fetchOffsetNew) < 0) { - return -1; - } + code = tqScanData(pTq, pHandle, &dataRsp, &fetchOffsetNew); - // todo handle the case where re-balance occurs. - // till now, all data has been rsp to consumer, new data needs to push client once arrived. + // till now, all data has been transferred to consumer, new data needs to push client once arrived. if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && - dataRsp.reqOffset.version == dataRsp.rspOffset.version && (pHandle->execHandle.stop != false)) { - STqPushEntry* pPushEntry = taosMemoryCalloc(1, sizeof(STqPushEntry)); - - if (pPushEntry != NULL) { - pPushEntry->pInfo = pMsg->info; - memcpy(pPushEntry->subKey, pHandle->subKey, TSDB_SUBSCRIBE_KEY_LEN); - dataRsp.withTbName = 0; - memcpy(&pPushEntry->dataRsp, &dataRsp, sizeof(SMqDataRsp)); - pPushEntry->dataRsp.head.consumerId = consumerId; - pPushEntry->dataRsp.head.epoch = reqEpoch; - pPushEntry->dataRsp.head.mqMsgType = TMQ_MSG_TYPE__POLL_RSP; - taosHashPut(pTq->pPushMgr, pHandle->subKey, strlen(pHandle->subKey) + 1, &pPushEntry, sizeof(void*)); - - tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s offset:%" PRId64 ", vgId:%d save handle to push mgr", - consumerId, pHandle->subKey, dataRsp.reqOffset.version, TD_VID(pTq->pVnode)); - - // unlock - taosWUnLockLatch(&pTq->pushLock); - return 0; - } + dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) { + code = tqRegisterPushEntry(pTq, pHandle, pRequest, pMsg, &dataRsp); + taosWUnLockLatch(&pTq->pushLock); + return code; } taosWUnLockLatch(&pTq->pushLock); - if (tqSendDataRsp(pTq, pMsg, &req, &dataRsp) < 0) { - code = -1; - } - + code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp); pHandle->execHandle.stop = false; - //NOTE: this pHandle->consumerId may have been changed already. + // NOTE: this pHandle->consumerId may have been changed already. tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, offset type:%d, uid/version:%" PRId64 ", ts:%" PRId64 "", consumerId, pHandle->subKey, TD_VID(pTq->pVnode), dataRsp.blockNum, dataRsp.rspOffset.type, dataRsp.rspOffset.uid, dataRsp.rspOffset.ts); @@ -621,7 +589,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { // for taosx SMqMetaRsp metaRsp = {0}; STaosxRsp taosxRsp = {0}; - tqInitTaosxRsp(&taosxRsp, &req); + tqInitTaosxRsp(&taosxRsp, pRequest); if (fetchOffsetNew.type != TMQ_OFFSET__LOG) { if (tqScanTaosx(pTq, pHandle, &taosxRsp, &metaRsp, &fetchOffsetNew) < 0) { @@ -629,11 +597,9 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { } if (metaRsp.metaRspLen > 0) { - if (tqSendMetaPollRsp(pTq, pMsg, &req, &metaRsp) < 0) { - code = -1; - } + code = tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp); tqDebug("tmq poll: consumer:0x%" PRIx64 " subkey %s, vg %d, send meta offset type:%d,uid:%" PRId64 - ",version:%" PRId64, + ",version:%" PRId64, consumerId, pHandle->subKey, TD_VID(pTq->pVnode), metaRsp.rspOffset.type, metaRsp.rspOffset.uid, metaRsp.rspOffset.version); taosMemoryFree(metaRsp.metaRsp); @@ -642,9 +608,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { } if (taosxRsp.blockNum > 0) { - if (tqSendTaosxRsp(pTq, pMsg, &req, &taosxRsp) < 0) { - code = -1; - } + code = tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp); tDeleteSTaosxRsp(&taosxRsp); return code; } else { @@ -668,17 +632,19 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { walSetReaderCapacity(pHandle->pWalReader, 2048); while (1) { - savedEpoch = atomic_load_32(&pHandle->epoch); - if (savedEpoch > reqEpoch) { + // todo refactor: this is not correct. + int32_t savedEpoch = atomic_load_32(&pHandle->epoch); + if (savedEpoch > pRequest->epoch) { tqWarn("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, vg %d offset %" PRId64 ", found new consumer epoch %d, discard req epoch %d", - consumerId, req.epoch, pHandle->subKey, TD_VID(pTq->pVnode), fetchVer, savedEpoch, reqEpoch); + consumerId, pRequest->epoch, pHandle->subKey, TD_VID(pTq->pVnode), fetchVer, savedEpoch, + pRequest->epoch); break; } if (tqFetchLog(pTq, pHandle, &fetchVer, &pCkHead) < 0) { tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); - if (tqSendTaosxRsp(pTq, pMsg, &req, &taosxRsp) < 0) { + if (tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp) < 0) { code = -1; } tDeleteSTaosxRsp(&taosxRsp); @@ -689,7 +655,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { SWalCont* pHead = &pCkHead->head; tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d) iter log, vgId:%d offset %" PRId64 " msgType %d", consumerId, - req.epoch, TD_VID(pTq->pVnode), fetchVer, pHead->msgType); + pRequest->epoch, TD_VID(pTq->pVnode), fetchVer, pHead->msgType); if (pHead->msgType == TDMT_VND_SUBMIT) { SPackedData submit = { @@ -699,12 +665,12 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { }; if (tqTaosxScanLog(pTq, pHandle, submit, &taosxRsp) < 0) { tqError("tmq poll: tqTaosxScanLog error %" PRId64 ", in vgId:%d, subkey %s", consumerId, TD_VID(pTq->pVnode), - req.subKey); + pRequest->subKey); return -1; } if (taosxRsp.blockNum > 0 /* threshold */) { tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); - if (tqSendTaosxRsp(pTq, pMsg, &req, &taosxRsp) < 0) { + if (tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp) < 0) { code = -1; } tDeleteSTaosxRsp(&taosxRsp); @@ -722,7 +688,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { metaRsp.resMsgType = pHead->msgType; metaRsp.metaRspLen = pHead->bodyLen; metaRsp.metaRsp = pHead->body; - if (tqSendMetaPollRsp(pTq, pMsg, &req, &metaRsp) < 0) { + if (tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp) < 0) { code = -1; taosMemoryFreeClear(pCkHead); tDeleteSTaosxRsp(&taosxRsp); @@ -741,6 +707,68 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { return 0; } +int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { + SMqPollReq req = {0}; + if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) { + tqError("tDeserializeSMqPollReq %d failed", pMsg->contLen); + terrno = TSDB_CODE_INVALID_MSG; + return -1; + } + + int64_t consumerId = req.consumerId; + int32_t reqEpoch = req.epoch; + STqOffsetVal reqOffset = req.reqOffset; + int32_t vgId = TD_VID(pTq->pVnode); + + // 1. find handle + STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); + if (pHandle == NULL) { + tqError("tmq poll: consumer:0x%" PRIx64 " vgId:%d subkey %s not found", consumerId, vgId, req.subKey); + terrno = TSDB_CODE_INVALID_MSG; + return -1; + } + + int8_t oldVal = atomic_val_compare_exchange_8(&pHandle->execStatus, 0, 1); + + // other thread has started to re-balance this handle, return empty block for this poll + if (oldVal != 0) { + SMqDataRsp dataRsp = {0}; + tqInitDataRsp(&dataRsp, &req, pHandle->execHandle.subType); + int32_t code = tqSendDataRsp(pTq, pMsg, &req, &dataRsp); // here we return an empty block to client + tDeleteSMqDataRsp(&dataRsp); + return code; + } + + // pHandle->execStatus == 1, and we are safe now + // keep the epoch in the first plance, this value may be change by other threads. + int32_t savedEpoch = atomic_load_32(&pHandle->epoch); + + // 2. check rebalance status + if (pHandle->consumerId != consumerId) { + tqDebug("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, + consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId); + terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; + atomic_store_8(&pHandle->execStatus, 0); // reset the flag + return -1; + } + + // 3. update the epoch value + while (savedEpoch < reqEpoch) { + tqDebug("tmq poll: consumer:0x%"PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch, reqEpoch); + savedEpoch = atomic_val_compare_exchange_32(&pHandle->epoch, savedEpoch, reqEpoch); + } + + char buf[80]; + tFormatOffset(buf, 80, &reqOffset); + tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s", consumerId, + req.epoch, pHandle->subKey, TD_VID(pTq->pVnode), buf); + + int32_t code = extractDataForMq(pTq, pHandle, &req, pMsg); + + atomic_store_8(&pHandle->execStatus, 0); // restore the flag + return code; +} + int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { SMqVDeleteReq* pReq = (SMqVDeleteReq*)msg; @@ -813,7 +841,8 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg tDecodeSMqRebVgReq(msg, &req); // todo lock - tqDebug("vgId:%d, tq process sub req %s", pTq->pVnode->config.vgId, req.subKey); + tqDebug("vgId:%d, tq process sub req %s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pTq->pVnode->config.vgId, req.subKey, + req.oldConsumerId, req.newConsumerId); STqHandle* pHandle = taosHashGet(pTq->pHandle, req.subKey, strlen(req.subKey)); if (pHandle == NULL) { @@ -821,11 +850,13 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg tqError("vgId:%d, build new consumer handle %s for consumer:0x%" PRIx64 ", but old consumerId is %" PRId64 "", req.vgId, req.subKey, req.newConsumerId, req.oldConsumerId); } + if (req.newConsumerId == -1) { tqError("vgId:%d, tq invalid rebalance request, new consumerId %" PRId64 "", req.vgId, req.newConsumerId); taosMemoryFree(req.qmsg); return 0; } + STqHandle tqHandle = {0}; pHandle = &tqHandle; /*taosInitRWLatch(&pExec->lock);*/ @@ -853,6 +884,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg .initTqReader = true, .version = ver, }; + pHandle->snapshotVer = ver; if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { @@ -867,6 +899,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg } else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__DB) { pHandle->pWalReader = walOpenReader(pTq->pVnode->pWal, NULL); pHandle->execHandle.pExecReader = tqOpenReader(pTq->pVnode); + pHandle->execHandle.execDb.pFilterOutTbUid = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK); buildSnapContext(handle.meta, handle.version, 0, pHandle->execHandle.subType, pHandle->fetchMeta, @@ -875,7 +908,6 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg pHandle->execHandle.task = qCreateQueueExecTaskInfo(NULL, &handle, NULL, NULL); } else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__TABLE) { pHandle->pWalReader = walOpenReader(pTq->pVnode->pWal, NULL); - pHandle->execHandle.execTb.suid = req.suid; SArray* tbUidList = taosArrayInit(0, sizeof(int64_t)); @@ -895,25 +927,39 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg } taosHashPut(pTq->pHandle, req.subKey, strlen(req.subKey), pHandle, sizeof(STqHandle)); - tqDebug("try to persist handle %s consumer:0x%" PRIx64" , old consumer:0x%"PRIx64, req.subKey, pHandle->consumerId, - oldConsumerId); + tqDebug("try to persist handle %s consumer:0x%" PRIx64 " , old consumer:0x%" PRIx64, req.subKey, + pHandle->consumerId, oldConsumerId); if (tqMetaSaveHandle(pTq, req.subKey, pHandle) < 0) { return -1; } } else { - // TODO handle qmsg and exec modification - tqInfo("vgId:%d switch consumer from Id:0x%"PRIx64" to Id:0x%"PRIx64, req.vgId, pHandle->consumerId, req.newConsumerId); +// ASSERT(pHandle->consumerId == req.oldConsumerId || req.oldConsumerId == -1 || pHandle->consumerId == -1); + if (pHandle->consumerId == req.newConsumerId) { // do nothing + tqInfo("vgId:%d consumer:0x%" PRIx64 " remains, no switch occurs", req.vgId, req.newConsumerId); + atomic_store_32(&pHandle->epoch, -1); + atomic_add_fetch_32(&pHandle->epoch, 1); + return tqMetaSaveHandle(pTq, req.subKey, pHandle); + } + + tqInfo("vgId:%d switch consumer from Id:0x%" PRIx64 " to Id:0x%" PRIx64, req.vgId, pHandle->consumerId, + req.newConsumerId); + + taosWLockLatch(&pTq->pushLock); atomic_store_32(&pHandle->epoch, -1); + + // remove if it has been register in the push manager, and return one empty block to consumer + tqRemovePushEntry(pTq, req.subKey, (int32_t) strlen(req.subKey), pHandle->consumerId, true); + atomic_store_64(&pHandle->consumerId, req.newConsumerId); atomic_add_fetch_32(&pHandle->epoch, 1); taosMemoryFree(req.qmsg); pHandle->execHandle.stop = true; - if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { qStreamCloseTsdbReader(pHandle->execHandle.task); } + taosWUnLockLatch(&pTq->pushLock); if (tqMetaSaveHandle(pTq, req.subKey, pHandle) < 0) { return -1; } diff --git a/source/dnode/vnode/src/tq/tqMeta.c b/source/dnode/vnode/src/tq/tqMeta.c index a85e6e0a70..ce0aa144f9 100644 --- a/source/dnode/vnode/src/tq/tqMeta.c +++ b/source/dnode/vnode/src/tq/tqMeta.c @@ -283,7 +283,7 @@ int32_t tqMetaRestoreHandle(STQ* pTq) { tdbTbcMoveToFirst(pCur); while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) { - STqHandle handle; + STqHandle handle = {0}; tDecoderInit(&decoder, (uint8_t*)pVal, vLen); tDecodeSTqHandle(&decoder, &handle); tDecoderClear(&decoder); diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 5a25d7e894..03bb14e447 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -344,3 +344,51 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) return 0; } + +int32_t tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, + SMqDataRsp* pDataRsp) { + uint64_t consumerId = pRequest->consumerId; + int32_t vgId = TD_VID(pTq->pVnode); + STqHandle* pTqHandle = pHandle; + + STqPushEntry* pPushEntry = taosMemoryCalloc(1, sizeof(STqPushEntry)); + if (pPushEntry == NULL) { + tqDebug("tmq poll: consumer:0x%" PRIx64 ", vgId:%d failed to malloc, size:%d", consumerId, vgId, + (int32_t)sizeof(STqPushEntry)); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + pPushEntry->pInfo = pRpcMsg->info; + memcpy(pPushEntry->subKey, pTqHandle->subKey, TSDB_SUBSCRIBE_KEY_LEN); + pDataRsp->withTbName = 0; + + memcpy(&pPushEntry->dataRsp, pDataRsp, sizeof(SMqDataRsp)); + pPushEntry->dataRsp.head.consumerId = consumerId; + pPushEntry->dataRsp.head.epoch = pRequest->epoch; + pPushEntry->dataRsp.head.mqMsgType = TMQ_MSG_TYPE__POLL_RSP; + taosHashPut(pTq->pPushMgr, pTqHandle->subKey, strlen(pTqHandle->subKey), &pPushEntry, sizeof(void*)); + + tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s offset:%" PRId64 ", vgId:%d save handle to push mgr, total:%d", consumerId, + pTqHandle->subKey, pDataRsp->reqOffset.version, vgId, taosHashGetSize(pTq->pPushMgr)); + return 0; +} + +int32_t tqRemovePushEntry(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer) { + int32_t vgId = TD_VID(pTq->pVnode); + STqPushEntry** pEntry = taosHashGet(pTq->pPushMgr, pKey, keyLen); + if (pEntry != NULL) { + uint64_t cId = (*pEntry)->dataRsp.head.consumerId; + ASSERT(consumerId == cId); + + tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s vgId:%d remove from push mgr, remains:%d", consumerId, + (*pEntry)->subKey, vgId, taosHashGetSize(pTq->pPushMgr) - 1); + taosHashRemove(pTq->pPushMgr, pKey, keyLen); + + if (rspConsumer) { // rsp the old consumer with empty block. + tqPushDataRsp(pTq, *pEntry); + } + } + + return 0; +} From 09e84884be2a4bccd90f92915010fae883c84c39 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 9 Mar 2023 23:31:29 +0800 Subject: [PATCH 05/28] refactor: remove invalid assert --- source/dnode/vnode/src/tq/tq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 14bc51eb2c..543fa9dd76 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -933,7 +933,6 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg return -1; } } else { -// ASSERT(pHandle->consumerId == req.oldConsumerId || req.oldConsumerId == -1 || pHandle->consumerId == -1); if (pHandle->consumerId == req.newConsumerId) { // do nothing tqInfo("vgId:%d consumer:0x%" PRIx64 " remains, no switch occurs", req.vgId, req.newConsumerId); atomic_store_32(&pHandle->epoch, -1); From 691cb08e886f63789f9715a5da1b644b6fca64aa Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Mar 2023 09:23:03 +0800 Subject: [PATCH 06/28] refactor: remove some unused attributes. --- source/dnode/vnode/src/inc/tq.h | 10 ++-- source/dnode/vnode/src/tq/tq.c | 90 +++++++++++++----------------- source/dnode/vnode/src/tq/tqExec.c | 6 -- 3 files changed, 43 insertions(+), 63 deletions(-) diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 62d6a40333..6cb54c6c18 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -79,16 +79,15 @@ typedef struct { } STqExecDb; typedef struct { - int8_t subType; - STqReader* pExecReader; - qTaskInfo_t task; + int8_t subType; + STqReader* pExecReader; + qTaskInfo_t task; union { STqExecCol execCol; STqExecTb execTb; STqExecDb execDb; }; - int32_t numOfCols; // number of out pout column, temporarily used - bool stop; // denote if needs to be stopped or not + int32_t numOfCols; // number of out pout column, temporarily used } STqExecHandle; typedef struct { @@ -101,7 +100,6 @@ typedef struct { SWalRef* pRef; STqPushHandle pushHandle; // push STqExecHandle execHandle; // exec - int8_t execStatus; // this handle is used to handle the poll requirement } STqHandle; typedef struct { diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 543fa9dd76..8d55a7bea1 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -534,17 +534,19 @@ static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHand static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg) { int32_t code = -1; - STqOffsetVal reqOffset = pRequest->reqOffset; - STqOffsetVal fetchOffsetNew; + STqOffsetVal offset = {0}; SWalCkHead* pCkHead = NULL; + int32_t vgId = TD_VID(pTq->pVnode); + + STqOffsetVal reqOffset = pRequest->reqOffset; uint64_t consumerId = pRequest->consumerId; // 1. reset the offset if needed if (reqOffset.type > 0) { - fetchOffsetNew = reqOffset; + offset = reqOffset; } else { // handle the reset offset cases, according to the consumer's choice. bool blockReturned = false; - code = extractResetOffsetVal(&fetchOffsetNew, pTq, pHandle, pRequest, pMsg, &blockReturned); + code = extractResetOffsetVal(&offset, pTq, pHandle, pRequest, pMsg, &blockReturned); if (code != 0) { return code; } @@ -562,7 +564,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* // lock taosWLockLatch(&pTq->pushLock); - code = tqScanData(pTq, pHandle, &dataRsp, &fetchOffsetNew); + code = tqScanData(pTq, pHandle, &dataRsp, &offset); // till now, all data has been transferred to consumer, new data needs to push client once arrived. if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && @@ -574,12 +576,12 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* taosWUnLockLatch(&pTq->pushLock); code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp); - pHandle->execHandle.stop = false; // NOTE: this pHandle->consumerId may have been changed already. - tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, offset type:%d, uid/version:%" PRId64 ", ts:%" PRId64 "", - consumerId, pHandle->subKey, TD_VID(pTq->pVnode), dataRsp.blockNum, dataRsp.rspOffset.type, - dataRsp.rspOffset.uid, dataRsp.rspOffset.ts); + tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, offset type:%d, uid/version:%" PRId64 + ", ts:%" PRId64, + consumerId, pHandle->subKey, vgId, dataRsp.blockNum, dataRsp.rspOffset.type, dataRsp.rspOffset.uid, + dataRsp.rspOffset.ts); tDeleteSMqDataRsp(&dataRsp); return code; @@ -591,16 +593,16 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* STaosxRsp taosxRsp = {0}; tqInitTaosxRsp(&taosxRsp, pRequest); - if (fetchOffsetNew.type != TMQ_OFFSET__LOG) { - if (tqScanTaosx(pTq, pHandle, &taosxRsp, &metaRsp, &fetchOffsetNew) < 0) { + if (offset.type != TMQ_OFFSET__LOG) { + if (tqScanTaosx(pTq, pHandle, &taosxRsp, &metaRsp, &offset) < 0) { return -1; } if (metaRsp.metaRspLen > 0) { code = tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp); tqDebug("tmq poll: consumer:0x%" PRIx64 " subkey %s, vg %d, send meta offset type:%d,uid:%" PRId64 - ",version:%" PRId64, - consumerId, pHandle->subKey, TD_VID(pTq->pVnode), metaRsp.rspOffset.type, metaRsp.rspOffset.uid, + ",version:%" PRId64, + consumerId, pHandle->subKey, vgId, metaRsp.rspOffset.type, metaRsp.rspOffset.uid, metaRsp.rspOffset.version); taosMemoryFree(metaRsp.metaRsp); tDeleteSTaosxRsp(&taosxRsp); @@ -612,17 +614,17 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* tDeleteSTaosxRsp(&taosxRsp); return code; } else { - fetchOffsetNew = taosxRsp.rspOffset; + offset = taosxRsp.rspOffset; } tqDebug("taosx poll: consumer:0x%" PRIx64 " subkey %s, vg %d, send data blockNum:%d, offset type:%d,uid:%" PRId64 ",version:%" PRId64, - consumerId, pHandle->subKey, TD_VID(pTq->pVnode), taosxRsp.blockNum, taosxRsp.rspOffset.type, - taosxRsp.rspOffset.uid, taosxRsp.rspOffset.version); + consumerId, pHandle->subKey, vgId, taosxRsp.blockNum, taosxRsp.rspOffset.type, taosxRsp.rspOffset.uid, + taosxRsp.rspOffset.version); } - if (fetchOffsetNew.type == TMQ_OFFSET__LOG) { - int64_t fetchVer = fetchOffsetNew.version + 1; + if (offset.type == TMQ_OFFSET__LOG) { + int64_t fetchVer = offset.version + 1; pCkHead = taosMemoryMalloc(sizeof(SWalCkHead) + 2048); if (pCkHead == NULL) { tDeleteSTaosxRsp(&taosxRsp); @@ -637,8 +639,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* if (savedEpoch > pRequest->epoch) { tqWarn("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, vg %d offset %" PRId64 ", found new consumer epoch %d, discard req epoch %d", - consumerId, pRequest->epoch, pHandle->subKey, TD_VID(pTq->pVnode), fetchVer, savedEpoch, - pRequest->epoch); + consumerId, pRequest->epoch, pHandle->subKey, vgId, fetchVer, savedEpoch, pRequest->epoch); break; } @@ -655,7 +656,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* SWalCont* pHead = &pCkHead->head; tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d) iter log, vgId:%d offset %" PRId64 " msgType %d", consumerId, - pRequest->epoch, TD_VID(pTq->pVnode), fetchVer, pHead->msgType); + pRequest->epoch, vgId, fetchVer, pHead->msgType); if (pHead->msgType == TDMT_VND_SUBMIT) { SPackedData submit = { @@ -664,7 +665,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* .ver = pHead->version, }; if (tqTaosxScanLog(pTq, pHandle, submit, &taosxRsp) < 0) { - tqError("tmq poll: tqTaosxScanLog error %" PRId64 ", in vgId:%d, subkey %s", consumerId, TD_VID(pTq->pVnode), + tqError("tmq poll: tqTaosxScanLog error %" PRId64 ", in vgId:%d, subkey %s", consumerId, vgId, pRequest->subKey); return -1; } @@ -728,45 +729,35 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { return -1; } - int8_t oldVal = atomic_val_compare_exchange_8(&pHandle->execStatus, 0, 1); - - // other thread has started to re-balance this handle, return empty block for this poll - if (oldVal != 0) { - SMqDataRsp dataRsp = {0}; - tqInitDataRsp(&dataRsp, &req, pHandle->execHandle.subType); - int32_t code = tqSendDataRsp(pTq, pMsg, &req, &dataRsp); // here we return an empty block to client - tDeleteSMqDataRsp(&dataRsp); - return code; - } - - // pHandle->execStatus == 1, and we are safe now - // keep the epoch in the first plance, this value may be change by other threads. - int32_t savedEpoch = atomic_load_32(&pHandle->epoch); - - // 2. check rebalance status + // 2. check re-balance status + taosRLockLatch(&pTq->pushLock); if (pHandle->consumerId != consumerId) { tqDebug("ERROR tmq poll: consumer:0x%" PRIx64 " vgId:%d, subkey %s, mismatch for saved handle consumer:0x%" PRIx64, consumerId, TD_VID(pTq->pVnode), req.subKey, pHandle->consumerId); terrno = TSDB_CODE_TMQ_CONSUMER_MISMATCH; - atomic_store_8(&pHandle->execStatus, 0); // reset the flag + taosRUnLockLatch(&pTq->pushLock); return -1; } + taosRUnLockLatch(&pTq->pushLock); + + taosWLockLatch(&pTq->pushLock); + int32_t savedEpoch = pHandle->epoch; // 3. update the epoch value - while (savedEpoch < reqEpoch) { - tqDebug("tmq poll: consumer:0x%"PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch, reqEpoch); - savedEpoch = atomic_val_compare_exchange_32(&pHandle->epoch, savedEpoch, reqEpoch); - } +// while (savedEpoch < reqEpoch) { + tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch, reqEpoch); + pHandle->epoch = reqEpoch; +// savedEpoch = atomic_val_compare_exchange_32(&pHandle->epoch, savedEpoch, reqEpoch); +// } + + taosWUnLockLatch(&pTq->pushLock); char buf[80]; tFormatOffset(buf, 80, &reqOffset); tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s", consumerId, - req.epoch, pHandle->subKey, TD_VID(pTq->pVnode), buf); + req.epoch, pHandle->subKey, vgId, buf); - int32_t code = extractDataForMq(pTq, pHandle, &req, pMsg); - - atomic_store_8(&pHandle->execStatus, 0); // restore the flag - return code; + return extractDataForMq(pTq, pHandle, &req, pMsg); } int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { @@ -953,7 +944,6 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg atomic_add_fetch_32(&pHandle->epoch, 1); taosMemoryFree(req.qmsg); - pHandle->execHandle.stop = true; if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { qStreamCloseTsdbReader(pHandle->execHandle.task); } @@ -962,8 +952,6 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg if (tqMetaSaveHandle(pTq, req.subKey, pHandle) < 0) { return -1; } - - pHandle->execHandle.stop = false; } return 0; diff --git a/source/dnode/vnode/src/tq/tqExec.c b/source/dnode/vnode/src/tq/tqExec.c index 81b68f8473..640b2b57fe 100644 --- a/source/dnode/vnode/src/tq/tqExec.c +++ b/source/dnode/vnode/src/tq/tqExec.c @@ -108,12 +108,6 @@ int32_t tqScanData(STQ* pTq, const STqHandle* pHandle, SMqDataRsp* pRsp, STqOffs break; } } - - if (pExec->stop) { - tqDebug("vgId:%d, current vgroups has been transferred to other consumer, return results asap", - TD_VID(pTq->pVnode)); - break; - } } if (qStreamExtractOffset(task, &pRsp->rspOffset) < 0) { From a4e378e138c07bd9f7bc5e45d008def2f80b9b2a Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Mar 2023 09:30:59 +0800 Subject: [PATCH 07/28] refactor: update some log. --- source/client/test/clientTests.cpp | 4 ++-- source/dnode/mnode/impl/src/mndSubscribe.c | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index 2f3d600019..82d29b37eb 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -925,7 +925,7 @@ TEST(clientCase, subscription_test) { // 创建订阅 topics 列表 tmq_list_t* topicList = tmq_list_new(); -// tmq_list_append(topicList, "topic_t1"); + tmq_list_append(topicList, "topic_t1"); // 启动订阅 tmq_subscribe(tmq, topicList); @@ -954,7 +954,7 @@ TEST(clientCase, subscription_test) { printf("db: %s\n", dbName); printf("vgroup id: %d\n", vgroupId); - if (count ++ > 20) { + if (count ++ > 200) { tmq_unsubscribe(tmq); break; } diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 4d19110f31..924216bcbf 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -224,7 +224,7 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR .pVgEp = pVgEp, }; taosHashPut(pHash, &pVgEp->vgId, sizeof(int32_t), &outputVg, sizeof(SMqRebOutputVg)); - mInfo("sub:%s mq re-balance remove vgId:%d from consumer:%" PRIx64, sub, pVgEp->vgId, consumerId); + mInfo("sub:%s mq re-balance remove vgId:%d from consumer:0x%" PRIx64, sub, pVgEp->vgId, consumerId); } taosArrayDestroy(pConsumerEp->vgs); taosHashRemove(pOutput->pSub->consumerHash, &consumerId, sizeof(int64_t)); @@ -329,7 +329,7 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR newConsumerEp.vgs = taosArrayInit(0, sizeof(void *)); taosHashPut(pOutput->pSub->consumerHash, &consumerId, sizeof(int64_t), &newConsumerEp, sizeof(SMqConsumerEp)); taosArrayPush(pOutput->newConsumers, &consumerId); - mInfo("sub:%s mq rebalance add new consumer:%" PRIx64, sub, consumerId); + mInfo("sub:%s mq rebalance add new consumer:0x%" PRIx64, sub, consumerId); } } @@ -357,7 +357,7 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR taosArrayPush(pConsumerEp->vgs, &pRebVg->pVgEp); pRebVg->newConsumerId = pConsumerEp->consumerId; taosArrayPush(pOutput->rebVgs, pRebVg); - mInfo("mq rebalance: add vgId:%d to consumer:%" PRIx64 " (second scan) (not enough)", pRebVg->pVgEp->vgId, + mInfo("mq rebalance: add vgId:%d to consumer:0x%" PRIx64 " (second scan) (not enough)", pRebVg->pVgEp->vgId, pConsumerEp->consumerId); } } @@ -387,12 +387,12 @@ static int32_t mndDoRebalance(SMnode *pMnode, const SMqRebInputObj *pInput, SMqR taosArrayPush(pConsumerEp->vgs, &pRebVg->pVgEp); pRebVg->newConsumerId = pConsumerEp->consumerId; if (pRebVg->newConsumerId == pRebVg->oldConsumerId) { - mInfo("mq rebalance: skip vg %d for same consumer:%" PRIx64 " (second scan)", pRebVg->pVgEp->vgId, + mInfo("mq rebalance: skip vg %d for same consumer:0x%" PRIx64 " (second scan)", pRebVg->pVgEp->vgId, pConsumerEp->consumerId); continue; } taosArrayPush(pOutput->rebVgs, pRebVg); - mInfo("mq rebalance: add vgId:%d to consumer:%" PRIx64 " (second scan) (unassigned)", pRebVg->pVgEp->vgId, + mInfo("mq rebalance: add vgId:%d to consumer:0x%" PRIx64 " (second scan) (unassigned)", pRebVg->pVgEp->vgId, pConsumerEp->consumerId); } } else { @@ -1019,7 +1019,7 @@ int32_t mndRetrieveSubscribe(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)&pConsumerEp->consumerId, false); - mDebug("mnd show subscriptions: topic %s, consumer:%" PRIx64 " cgroup %s vgid %d", varDataVal(topic), + mDebug("mnd show subscriptions: topic %s, consumer:0x%" PRIx64 " cgroup %s vgid %d", varDataVal(topic), pConsumerEp->consumerId, varDataVal(cgroup), pVgEp->vgId); // offset From b31b7c3f64a7119b1df307fad0869cf37f04dc37 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Mar 2023 10:27:21 +0800 Subject: [PATCH 08/28] fix(tmq): fix the invalid read. --- source/client/src/clientTmq.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 9f24deff94..6a9f88ce15 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1289,9 +1289,11 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { tDecoderClear(&decoder); memcpy(&pRspWrapper->dataRsp, pMsg->pData, sizeof(SMqRspHead)); - tscDebug("consumer:0x%" PRIx64 " recv poll rsp, vgId:%d, req offset:%" PRId64 ", rsp offset:%" PRId64 " type %d, reqId:0x%"PRIx64, - tmq->consumerId, pVg->vgId, pRspWrapper->dataRsp.reqOffset.version, pRspWrapper->dataRsp.rspOffset.version, - rspType, requestId); + tscDebug("consumer:0x%" PRIx64 " recv poll rsp, vgId:%d, req:%" PRId64 ", rsp:%" PRId64 + " type %d, reqId:0x%" PRIx64, + tmq->consumerId, pParam->vgId, pRspWrapper->dataRsp.reqOffset.version, + pRspWrapper->dataRsp.rspOffset.version, rspType, requestId); + } else if (rspType == TMQ_MSG_TYPE__POLL_META_RSP) { SDecoder decoder; tDecoderInit(&decoder, POINTER_SHIFT(pMsg->pData, sizeof(SMqRspHead)), pMsg->len - sizeof(SMqRspHead)); @@ -1378,6 +1380,7 @@ bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { int32_t vgNumGet = taosArrayGetSize(pTopicEp->vgs); topic.vgs = taosArrayInit(vgNumGet, sizeof(SMqClientVg)); + for (int32_t j = 0; j < vgNumGet; j++) { SMqSubVgEp* pVgEp = taosArrayGet(pTopicEp->vgs, j); sprintf(vgKey, "%s:%d", topic.topicName, pVgEp->vgId); From 0d1b2e4b5af07a8c1569e7cda1ee444143f1fd2e Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Mar 2023 10:31:53 +0800 Subject: [PATCH 09/28] fix(tmq): use before remove hash item. --- source/dnode/vnode/src/tq/tqPush.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 03bb14e447..c0ed787176 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -383,11 +383,11 @@ int32_t tqRemovePushEntry(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t c tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s vgId:%d remove from push mgr, remains:%d", consumerId, (*pEntry)->subKey, vgId, taosHashGetSize(pTq->pPushMgr) - 1); - taosHashRemove(pTq->pPushMgr, pKey, keyLen); - if (rspConsumer) { // rsp the old consumer with empty block. tqPushDataRsp(pTq, *pEntry); } + + taosHashRemove(pTq->pPushMgr, pKey, keyLen); } return 0; From 9ca77572d98ae5fa9c65718434186d20591a6049 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Mar 2023 14:09:42 +0800 Subject: [PATCH 10/28] fix(tmq): fix race condition. --- source/client/src/clientTmq.c | 197 ++++++++++++++++++++-------------- 1 file changed, 114 insertions(+), 83 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 6a9f88ce15..3748142b0d 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -106,17 +106,13 @@ struct tmq_t { tmr_h reportTimer; tmr_h commitTimer; - // connection - STscObj* pTscObj; - - // container - SArray* clientTopics; // SArray - STaosQueue* mqueue; // queue of rsp - STaosQall* qall; - STaosQueue* delayedTask; // delayed task queue for heartbeat and auto commit - - // ctl - tsem_t rspSem; + STscObj* pTscObj; // connection + SArray* clientTopics; // SArray + STaosQueue* mqueue; // queue of rsp + STaosQall* qall; + STaosQueue* delayedTask; // delayed task queue for heartbeat and auto commit + TdThreadMutex lock; // used to protect the operation on each topic, when updating the epsets. + tsem_t rspSem; }; enum { @@ -213,9 +209,9 @@ typedef struct { typedef struct { SMqCommitCbParamSet* params; STqOffset* pOffset; - SMqClientVg* pMqVg; - /*char topicName[TSDB_TOPIC_FNAME_LEN];*/ - /*int32_t vgId;*/ + char topicName[TSDB_TOPIC_FNAME_LEN]; + int32_t vgId; + tmq_t* pTmq; } SMqCommitCbParam; static int32_t tmqAskEp(tmq_t* tmq, bool async); @@ -431,6 +427,7 @@ static void tmqCommitRspCountDown(SMqCommitCbParamSet* pParamSet) { int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { SMqCommitCbParam* pParam = (SMqCommitCbParam*)param; SMqCommitCbParamSet* pParamSet = (SMqCommitCbParamSet*)pParam->params; + // push into array #if 0 if (code == 0) { @@ -440,15 +437,34 @@ int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { } #endif - // there may be race condition. fix it - if (pBuf->pEpSet != NULL && pParam->pMqVg != NULL) { - SMqClientVg* pMqVg = pParam->pMqVg; + if (pBuf->pEpSet != NULL) { + // todo extract method + taosThreadMutexLock(&pParam->pTmq->lock); - SEp* pEp = GET_ACTIVE_EP(pBuf->pEpSet); - SEp* pOld = GET_ACTIVE_EP(&(pMqVg->epSet)); - uDebug("subKey:%s update the epset vgId:%d, ep:%s:%d, old ep:%s:%d", pParam->pOffset->subKey, pMqVg->vgId, - pEp->fqdn, pEp->port, pOld->fqdn, pOld->port); - pParam->pMqVg->epSet = *pBuf->pEpSet; + int32_t numOfTopics = taosArrayGetSize(pParam->pTmq->clientTopics); + for(int32_t i = 0; i < numOfTopics; ++i) { + SMqClientTopic* pTopic = taosArrayGet(pParam->pTmq->clientTopics, i); + if (strcmp(pTopic->topicName, pParam->topicName) != 0) { + continue; + } + + int32_t numOfVgs = taosArrayGetSize(pTopic->vgs); + for(int32_t j = 0; j < numOfVgs; ++j) { + SMqClientVg* pClientVg = taosArrayGet(pTopic->vgs, j); + if (pClientVg->vgId == pParam->vgId) { + SEp* pEp = GET_ACTIVE_EP(pBuf->pEpSet); + SEp* pOld = GET_ACTIVE_EP(&(pClientVg->epSet)); + uDebug("subKey:%s update the epset vgId:%d, ep:%s:%d, old ep:%s:%d", pParam->pOffset->subKey, pParam->vgId, + pEp->fqdn, pEp->port, pOld->fqdn, pOld->port); + pClientVg->epSet = *pBuf->pEpSet; + break; + } + } + + break; + } + + taosThreadMutexUnlock(&pParam->pTmq->lock); } taosMemoryFree(pParam->pOffset); @@ -508,7 +524,10 @@ static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, SMqClientTopic* pT pParam->params = pParamSet; pParam->pOffset = pOffset; - pParam->pMqVg = pVg; // there may be an race condition + pParam->vgId = pVg->vgId; + pParam->pTmq = tmq; + + tstrncpy(pParam->topicName, pTopic->topicName, tListLen(pParam->topicName)); // build send info SMsgSendInfo* pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); @@ -659,6 +678,7 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, for (int32_t i = 0; i < numOfTopics; i++) { SMqClientTopic* pTopic = taosArrayGet(tmq->clientTopics, i); + // todo race condition: fix it int32_t numOfVgroups = taosArrayGetSize(pTopic->vgs); for (int32_t j = 0; j < numOfVgroups; j++) { SMqClientVg* pVg = taosArrayGet(pTopic->vgs, j); @@ -944,10 +964,14 @@ void tmqFreeImpl(void* handle) { tmqClearUnhandleMsg(tmq); taosCloseQueue(tmq->mqueue); } - if (tmq->delayedTask) taosCloseQueue(tmq->delayedTask); - taosFreeQall(tmq->qall); + if (tmq->delayedTask) { + taosCloseQueue(tmq->delayedTask); + } + + taosFreeQall(tmq->qall); tsem_destroy(&tmq->rspSem); + taosThreadMutexDestroy(&tmq->lock); int32_t sz = taosArrayGetSize(tmq->clientTopics); for (int32_t i = 0; i < sz; i++) { @@ -955,6 +979,7 @@ void tmqFreeImpl(void* handle) { taosMemoryFreeClear(pTopic->schema.pSchema); taosArrayDestroy(pTopic->vgs); } + taosArrayDestroy(tmq->clientTopics); taos_close_internal(tmq->pTscObj); taosMemoryFree(tmq); @@ -993,9 +1018,10 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) { pTmq->clientTopics = taosArrayInit(0, sizeof(SMqClientTopic)); pTmq->mqueue = taosOpenQueue(); - pTmq->qall = taosAllocateQall(); pTmq->delayedTask = taosOpenQueue(); + pTmq->qall = taosAllocateQall(); + taosThreadMutexInit(&pTmq->lock, NULL); if (pTmq->clientTopics == NULL || pTmq->mqueue == NULL || pTmq->qall == NULL || pTmq->delayedTask == NULL || conf->groupId[0] == 0) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -1193,7 +1219,6 @@ FAIL: } void tmq_conf_set_auto_commit_cb(tmq_conf_t* conf, tmq_commit_cb* cb, void* param) { - // conf->commitCb = cb; conf->commitCbUserParam = param; } @@ -1328,7 +1353,53 @@ CREATE_MSG_FAIL: return -1; } -bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { +static void initClientTopicFromRsp(SMqClientTopic* pTopic, SMqSubTopicEp* pTopicEp, SHashObj* pVgOffsetHashMap, + tmq_t* tmq) { + pTopic->schema = pTopicEp->schema; + pTopicEp->schema.nCols = 0; + pTopicEp->schema.pSchema = NULL; + + char vgKey[TSDB_TOPIC_FNAME_LEN + 22]; + int32_t vgNumGet = taosArrayGetSize(pTopicEp->vgs); + + tstrncpy(pTopic->topicName, pTopicEp->topic, TSDB_TOPIC_FNAME_LEN); + tstrncpy(pTopic->db, pTopicEp->db, TSDB_DB_FNAME_LEN); + + tscDebug("consumer:0x%" PRIx64 ", update topic:%s, numOfVgs:%d", tmq->consumerId, pTopic->topicName, vgNumGet); + pTopic->vgs = taosArrayInit(vgNumGet, sizeof(SMqClientVg)); + + for (int32_t j = 0; j < vgNumGet; j++) { + SMqSubVgEp* pVgEp = taosArrayGet(pTopicEp->vgs, j); + sprintf(vgKey, "%s:%d", pTopic->topicName, pVgEp->vgId); + STqOffsetVal* pOffset = taosHashGet(pVgOffsetHashMap, vgKey, strlen(vgKey)); + STqOffsetVal offsetNew = {.type = tmq->resetOffsetCfg}; + if (pOffset != NULL) { + offsetNew = *pOffset; + } + + SMqClientVg clientVg = { + .pollCnt = 0, + .currentOffset = offsetNew, + .vgId = pVgEp->vgId, + .epSet = pVgEp->epSet, + .vgStatus = TMQ_VG_STATUS__IDLE, + .vgSkipCnt = 0, + }; + + taosArrayPush(pTopic->vgs, &clientVg); + } +} + +static void freeClientVgInfo(void* param) { + SMqClientTopic* pTopic = param; + if (pTopic->schema.nCols) { + taosMemoryFreeClear(pTopic->schema.pSchema); + } + + taosArrayDestroy(pTopic->vgs); +} + +static bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { bool set = false; int32_t topicNumCur = taosArrayGetSize(tmq->clientTopics); @@ -1343,12 +1414,13 @@ bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { return false; } - SHashObj* pHash = taosHashInit(64, MurmurHash3_32, false, HASH_NO_LOCK); - if (pHash == NULL) { + SHashObj* pVgOffsetHashMap = taosHashInit(64, MurmurHash3_32, false, HASH_NO_LOCK); + if (pVgOffsetHashMap == NULL) { taosArrayDestroy(newTopics); return false; } + // todo extract method for (int32_t i = 0; i < topicNumCur; i++) { // find old topic SMqClientTopic* pTopicCur = taosArrayGet(tmq->clientTopics, i); @@ -1362,7 +1434,7 @@ bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { tFormatOffset(buf, 80, &pVgCur->currentOffset); tscDebug("consumer:0x%" PRIx64 ", epoch:%d vgId:%d vgKey:%s, offset:%s", tmq->consumerId, epoch, pVgCur->vgId, vgKey, buf); - taosHashPut(pHash, vgKey, strlen(vgKey), &pVgCur->currentOffset, sizeof(STqOffsetVal)); + taosHashPut(pVgOffsetHashMap, vgKey, strlen(vgKey), &pVgCur->currentOffset, sizeof(STqOffsetVal)); } } } @@ -1370,62 +1442,25 @@ bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { for (int32_t i = 0; i < topicNumGet; i++) { SMqClientTopic topic = {0}; SMqSubTopicEp* pTopicEp = taosArrayGet(pRsp->topics, i); - topic.schema = pTopicEp->schema; - pTopicEp->schema.nCols = 0; - pTopicEp->schema.pSchema = NULL; - tstrncpy(topic.topicName, pTopicEp->topic, TSDB_TOPIC_FNAME_LEN); - tstrncpy(topic.db, pTopicEp->db, TSDB_DB_FNAME_LEN); - - tscDebug("consumer:0x%" PRIx64 ", update topic: %s", tmq->consumerId, topic.topicName); - - int32_t vgNumGet = taosArrayGetSize(pTopicEp->vgs); - topic.vgs = taosArrayInit(vgNumGet, sizeof(SMqClientVg)); - - for (int32_t j = 0; j < vgNumGet; j++) { - SMqSubVgEp* pVgEp = taosArrayGet(pTopicEp->vgs, j); - sprintf(vgKey, "%s:%d", topic.topicName, pVgEp->vgId); - STqOffsetVal* pOffset = taosHashGet(pHash, vgKey, strlen(vgKey)); - STqOffsetVal offsetNew = {.type = tmq->resetOffsetCfg}; - if (pOffset != NULL) { - offsetNew = *pOffset; - } - - SMqClientVg clientVg = { - .pollCnt = 0, - .currentOffset = offsetNew, - .vgId = pVgEp->vgId, - .epSet = pVgEp->epSet, - .vgStatus = TMQ_VG_STATUS__IDLE, - .vgSkipCnt = 0, - }; - taosArrayPush(topic.vgs, &clientVg); - set = true; - } + initClientTopicFromRsp(&topic, pTopicEp, pVgOffsetHashMap, tmq); taosArrayPush(newTopics, &topic); } // destroy current buffered existed topics info if (tmq->clientTopics) { - int32_t sz = taosArrayGetSize(tmq->clientTopics); - for (int32_t i = 0; i < sz; i++) { - SMqClientTopic* pTopic = taosArrayGet(tmq->clientTopics, i); - if (pTopic->schema.nCols) taosMemoryFreeClear(pTopic->schema.pSchema); - taosArrayDestroy(pTopic->vgs); - } - - taosArrayDestroy(tmq->clientTopics); + taosArrayDestroyEx(tmq->clientTopics, freeClientVgInfo); } - taosHashCleanup(pHash); + taosHashCleanup(pVgOffsetHashMap); + + taosThreadMutexLock(&tmq->lock); tmq->clientTopics = newTopics; + taosThreadMutexUnlock(&tmq->lock); - if (taosArrayGetSize(tmq->clientTopics) == 0) { - atomic_store_8(&tmq->status, TMQ_CONSUMER_STATUS__NO_TOPIC); - } else { - atomic_store_8(&tmq->status, TMQ_CONSUMER_STATUS__READY); - } - + int8_t flag = (topicNumGet == 0)? TMQ_CONSUMER_STATUS__NO_TOPIC:TMQ_CONSUMER_STATUS__READY; + atomic_store_8(&tmq->status, flag); atomic_store_32(&tmq->epoch, epoch); + tscDebug("consumer:0x%" PRIx64 " update topic info completed", tmq->consumerId); return set; } @@ -1448,7 +1483,7 @@ int32_t tmqAskEpCb(void* param, SDataBuf* pMsg, int32_t code) { } pParam->code = code; - if (code != 0) { + if (code != TSDB_CODE_SUCCESS) { tscError("consumer:0x%" PRIx64 ", get topic endpoint error, async:%d, code:%s", tmq->consumerId, pParam->async, tstrerror(code)); goto END; @@ -1471,8 +1506,6 @@ int32_t tmqAskEpCb(void* param, SDataBuf* pMsg, int32_t code) { if (!async) { SMqAskEpRsp rsp; tDecodeSMqAskEpRsp(POINTER_SHIFT(pMsg->pData, sizeof(SMqRspHead)), &rsp); - /*printf("rsp epoch %" PRId64 " sz %" PRId64 "\n", rsp.epoch, rsp.topics->size);*/ - /*printf("tmq epoch %" PRId64 " sz %" PRId64 "\n", tmq->epoch, tmq->clientTopics->size);*/ tmqUpdateEp(tmq, head->epoch, &rsp); tDeleteSMqAskEpRsp(&rsp); } else { @@ -1493,7 +1526,6 @@ int32_t tmqAskEpCb(void* param, SDataBuf* pMsg, int32_t code) { } END: - /*atomic_store_8(&tmq->epStatus, 0);*/ if (!async) { tsem_post(&pParam->rspSem); } else { @@ -1545,7 +1577,6 @@ int32_t tmqAskEp(tmq_t* tmq, bool async) { if (pParam == NULL) { tscError("consumer:0x%" PRIx64 ", failed to malloc subscribe param", tmq->consumerId); taosMemoryFree(pReq); - /*atomic_store_8(&tmq->epStatus, 0);*/ return -1; } @@ -1559,7 +1590,6 @@ int32_t tmqAskEp(tmq_t* tmq, bool async) { tsem_destroy(&pParam->rspSem); taosMemoryFree(pParam); taosMemoryFree(pReq); - /*atomic_store_8(&tmq->epStatus, 0);*/ return -1; } @@ -1995,6 +2025,7 @@ int32_t tmq_consumer_close(tmq_t* tmq) { tmq_list_destroy(lst); } + taosRemoveRef(tmqMgmt.rsetId, tmq->refId); return 0; } From 3513de5ab46a01961054b291575296337cbf33a4 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Mar 2023 14:11:58 +0800 Subject: [PATCH 11/28] fix(tmq): fix race condition. --- source/client/src/clientTmq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 3748142b0d..360e1eb569 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1446,15 +1446,16 @@ static bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { taosArrayPush(newTopics, &topic); } + taosHashCleanup(pVgOffsetHashMap); + + taosThreadMutexLock(&tmq->lock); // destroy current buffered existed topics info if (tmq->clientTopics) { taosArrayDestroyEx(tmq->clientTopics, freeClientVgInfo); } - taosHashCleanup(pVgOffsetHashMap); - - taosThreadMutexLock(&tmq->lock); tmq->clientTopics = newTopics; + taosThreadMutexUnlock(&tmq->lock); int8_t flag = (topicNumGet == 0)? TMQ_CONSUMER_STATUS__NO_TOPIC:TMQ_CONSUMER_STATUS__READY; From 1f71cf3a8bc7d735f68a050e15637cc3b9e15450 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Mar 2023 14:18:55 +0800 Subject: [PATCH 12/28] fix(tmq): fix invalid read. --- source/client/src/clientTmq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 360e1eb569..54cacfe156 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1316,8 +1316,7 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { tscDebug("consumer:0x%" PRIx64 " recv poll rsp, vgId:%d, req:%" PRId64 ", rsp:%" PRId64 " type %d, reqId:0x%" PRIx64, - tmq->consumerId, pParam->vgId, pRspWrapper->dataRsp.reqOffset.version, - pRspWrapper->dataRsp.rspOffset.version, rspType, requestId); + tmq->consumerId, vgId, pRspWrapper->dataRsp.reqOffset.version, pRspWrapper->dataRsp.rspOffset.version, rspType, requestId); } else if (rspType == TMQ_MSG_TYPE__POLL_META_RSP) { SDecoder decoder; From d8807d3d5a112d64549c8c2b4b17685946d503e5 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Mar 2023 14:23:16 +0800 Subject: [PATCH 13/28] fix(tmq): fix memory leak. --- source/dnode/vnode/src/tq/tq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 8d55a7bea1..a8ed470f50 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -830,7 +830,6 @@ int32_t tqProcessDelCheckInfoReq(STQ* pTq, int64_t sversion, char* msg, int32_t int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { SMqRebVgReq req = {0}; tDecodeSMqRebVgReq(msg, &req); - // todo lock tqDebug("vgId:%d, tq process sub req %s, Id:0x%" PRIx64 " -> Id:0x%" PRIx64, pTq->pVnode->config.vgId, req.subKey, req.oldConsumerId, req.newConsumerId); @@ -863,8 +862,10 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg // TODO version should be assigned and refed during preprocess SWalRef* pRef = walRefCommittedVer(pTq->pVnode->pWal); if (pRef == NULL) { + taosMemoryFree(req.qmsg); return -1; } + int64_t ver = pRef->refVer; pHandle->pRef = pRef; @@ -921,6 +922,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg tqDebug("try to persist handle %s consumer:0x%" PRIx64 " , old consumer:0x%" PRIx64, req.subKey, pHandle->consumerId, oldConsumerId); if (tqMetaSaveHandle(pTq, req.subKey, pHandle) < 0) { + taosMemoryFree(req.qmsg); return -1; } } else { @@ -928,6 +930,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg tqInfo("vgId:%d consumer:0x%" PRIx64 " remains, no switch occurs", req.vgId, req.newConsumerId); atomic_store_32(&pHandle->epoch, -1); atomic_add_fetch_32(&pHandle->epoch, 1); + taosMemoryFree(req.qmsg); return tqMetaSaveHandle(pTq, req.subKey, pHandle); } @@ -950,10 +953,12 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg taosWUnLockLatch(&pTq->pushLock); if (tqMetaSaveHandle(pTq, req.subKey, pHandle) < 0) { + taosMemoryFree(req.qmsg); return -1; } } + taosMemoryFree(req.qmsg); return 0; } From 05ddf68247e6fc058cc0c2a6e5a1d8907175def8 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Mar 2023 14:27:34 +0800 Subject: [PATCH 14/28] fix(tmq): fix invalid free. --- source/dnode/vnode/src/tq/tq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index a8ed470f50..883816576d 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -945,7 +945,6 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg atomic_store_64(&pHandle->consumerId, req.newConsumerId); atomic_add_fetch_32(&pHandle->epoch, 1); - taosMemoryFree(req.qmsg); if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { qStreamCloseTsdbReader(pHandle->execHandle.task); From 9d680a0995eecb35a39ae14a7b44ddebd057519c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Mar 2023 09:09:03 +0800 Subject: [PATCH 15/28] fix(tmq): fix race condition. --- source/client/src/clientTmq.c | 57 +++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 54cacfe156..825f7ef182 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -147,7 +147,6 @@ typedef struct { } SMqClientVg; typedef struct { - // subscribe info char topicName[TSDB_TOPIC_FNAME_LEN]; char db[TSDB_DB_FNAME_LEN]; SArray* vgs; // SArray @@ -381,7 +380,7 @@ char** tmq_list_to_c_array(const tmq_list_t* list) { return container->pData; } -static int32_t tmqMakeTopicVgKey(char* dst, const char* topicName, int32_t vg) { +static int32_t makeTopicVgroupKey(char* dst, const char* topicName, int32_t vg) { return sprintf(dst, "%s:%d", topicName, vg); } @@ -424,7 +423,7 @@ static void tmqCommitRspCountDown(SMqCommitCbParamSet* pParamSet) { } } -int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { +static int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { SMqCommitCbParam* pParam = (SMqCommitCbParam*)param; SMqCommitCbParamSet* pParamSet = (SMqCommitCbParamSet*)pParam->params; @@ -478,7 +477,7 @@ int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { return 0; } -static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, SMqClientTopic* pTopic, SMqCommitCbParamSet* pParamSet) { +static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicName, SMqCommitCbParamSet* pParamSet) { STqOffset* pOffset = taosMemoryCalloc(1, sizeof(STqOffset)); if (pOffset == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -490,7 +489,7 @@ static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, SMqClientTopic* pT int32_t groupLen = strlen(tmq->groupId); memcpy(pOffset->subKey, tmq->groupId, groupLen); pOffset->subKey[groupLen] = TMQ_SEPARATOR; - strcpy(pOffset->subKey + groupLen + 1, pTopic->topicName); + strcpy(pOffset->subKey + groupLen + 1, pTopicName); int32_t len; int32_t code; @@ -527,7 +526,7 @@ static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, SMqClientTopic* pT pParam->vgId = pVg->vgId; pParam->pTmq = tmq; - tstrncpy(pParam->topicName, pTopic->topicName, tListLen(pParam->topicName)); + tstrncpy(pParam->topicName, pTopicName, tListLen(pParam->topicName)); // build send info SMsgSendInfo* pMsgSendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); @@ -544,7 +543,7 @@ static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, SMqClientTopic* pT .handle = NULL, }; - SEp* pEp = &pVg->epSet.eps[pVg->epSet.inUse]; + SEp* pEp = GET_ACTIVE_EP(&pVg->epSet); tscDebug("consumer:0x%" PRIx64 " topic:%s on vgId:%d offset:%" PRId64 " prev:%" PRId64 ", ep:%s:%d", tmq->consumerId, pOffset->subKey, pVg->vgId, pOffset->val.version, pVg->committedOffset.version, pEp->fqdn, pEp->port); @@ -567,7 +566,7 @@ static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, SMqClientTopic* pT return 0; } -int32_t tmqCommitMsgImpl(tmq_t* tmq, const TAOS_RES* msg, int8_t async, tmq_commit_cb* userCb, void* userParam) { +static int32_t tmqCommitMsgImpl(tmq_t* tmq, const TAOS_RES* msg, int8_t async, tmq_commit_cb* userCb, void* userParam) { char* topic; int32_t vgId; if (TD_RES_TMQ(msg)) { @@ -591,6 +590,7 @@ int32_t tmqCommitMsgImpl(tmq_t* tmq, const TAOS_RES* msg, int8_t async, tmq_comm terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } + pParamSet->refId = tmq->refId; pParamSet->epoch = tmq->epoch; pParamSet->automatic = 0; @@ -601,15 +601,18 @@ int32_t tmqCommitMsgImpl(tmq_t* tmq, const TAOS_RES* msg, int8_t async, tmq_comm int32_t code = -1; + taosThreadMutexLock(&tmq->lock); for (int32_t i = 0; i < taosArrayGetSize(tmq->clientTopics); i++) { SMqClientTopic* pTopic = taosArrayGet(tmq->clientTopics, i); if (strcmp(pTopic->topicName, topic) != 0) continue; for (int32_t j = 0; j < taosArrayGetSize(pTopic->vgs); j++) { SMqClientVg* pVg = taosArrayGet(pTopic->vgs, j); - if (pVg->vgId != vgId) continue; + if (pVg->vgId != vgId) { + continue; + } if (pVg->currentOffset.type > 0 && !tOffsetEqual(&pVg->currentOffset, &pVg->committedOffset)) { - if (tmqSendCommitReq(tmq, pVg, pTopic, pParamSet) < 0) { + if (tmqSendCommitReq(tmq, pVg, pTopic->topicName, pParamSet) < 0) { tsem_destroy(&pParamSet->rspSem); taosMemoryFree(pParamSet); goto FAIL; @@ -623,6 +626,7 @@ HANDLE_RSP: if (pParamSet->totalRspNum == 0) { tsem_destroy(&pParamSet->rspSem); taosMemoryFree(pParamSet); + taosThreadMutexUnlock(&tmq->lock); return 0; } @@ -631,15 +635,18 @@ HANDLE_RSP: code = pParamSet->rspErr; tsem_destroy(&pParamSet->rspSem); taosMemoryFree(pParamSet); + taosThreadMutexUnlock(&tmq->lock); return code; } else { code = 0; } FAIL: + taosThreadMutexUnlock(&tmq->lock); if (code != 0 && async) { userCb(tmq, code, userParam); } + return 0; } @@ -672,7 +679,9 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, // init as 1 to prevent concurrency issue pParamSet->waitingRspNum = 1; + taosThreadMutexLock(&tmq->lock); int32_t numOfTopics = taosArrayGetSize(tmq->clientTopics); + tscDebug("consumer:0x%" PRIx64 " start to commit offset for %d topics", tmq->consumerId, numOfTopics); for (int32_t i = 0; i < numOfTopics; i++) { @@ -681,18 +690,20 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, // todo race condition: fix it int32_t numOfVgroups = taosArrayGetSize(pTopic->vgs); for (int32_t j = 0; j < numOfVgroups; j++) { - SMqClientVg* pVg = taosArrayGet(pTopic->vgs, j); - if (pVg->currentOffset.type > 0 && !tOffsetEqual(&pVg->currentOffset, &pVg->committedOffset)) { - if (tmqSendCommitReq(tmq, pVg, pTopic, pParamSet) < 0) { + SMqClientVg clientVg = *(SMqClientVg*)taosArrayGet(pTopic->vgs, j); + if (clientVg.currentOffset.type > 0 && !tOffsetEqual(&clientVg.currentOffset, &clientVg.committedOffset)) { + if (tmqSendCommitReq(tmq, &clientVg, pTopic->topicName, pParamSet) < 0) { continue; } } else { tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d, not commit, current:%" PRId64 ", ordinal:%d/%d", - tmq->consumerId, pTopic->topicName, pVg->vgId, pVg->currentOffset.version, j + 1, numOfVgroups); + tmq->consumerId, pTopic->topicName, clientVg.vgId, clientVg.currentOffset.version, j + 1, numOfVgroups); } } } + taosThreadMutexUnlock(&tmq->lock); + // no request is sent if (pParamSet->totalRspNum == 0) { tsem_destroy(&pParamSet->rspSem); @@ -1369,8 +1380,10 @@ static void initClientTopicFromRsp(SMqClientTopic* pTopic, SMqSubTopicEp* pTopic for (int32_t j = 0; j < vgNumGet; j++) { SMqSubVgEp* pVgEp = taosArrayGet(pTopicEp->vgs, j); - sprintf(vgKey, "%s:%d", pTopic->topicName, pVgEp->vgId); + + makeTopicVgroupKey(vgKey, pTopic->topicName, pVgEp->vgId); STqOffsetVal* pOffset = taosHashGet(pVgOffsetHashMap, vgKey, strlen(vgKey)); + STqOffsetVal offsetNew = {.type = tmq->resetOffsetCfg}; if (pOffset != NULL) { offsetNew = *pOffset; @@ -1428,7 +1441,8 @@ static bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { tscDebug("consumer:0x%" PRIx64 ", new vg num: %d", tmq->consumerId, vgNumCur); for (int32_t j = 0; j < vgNumCur; j++) { SMqClientVg* pVgCur = taosArrayGet(pTopicCur->vgs, j); - sprintf(vgKey, "%s:%d", pTopicCur->topicName, pVgCur->vgId); + makeTopicVgroupKey(vgKey, pTopicCur->topicName, pVgCur->vgId); + char buf[80]; tFormatOffset(buf, 80, &pVgCur->currentOffset); tscDebug("consumer:0x%" PRIx64 ", epoch:%d vgId:%d vgKey:%s, offset:%s", tmq->consumerId, epoch, @@ -1454,7 +1468,6 @@ static bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { } tmq->clientTopics = newTopics; - taosThreadMutexUnlock(&tmq->lock); int8_t flag = (topicNumGet == 0)? TMQ_CONSUMER_STATUS__NO_TOPIC:TMQ_CONSUMER_STATUS__READY; @@ -1465,7 +1478,7 @@ static bool tmqUpdateEp(tmq_t* tmq, int32_t epoch, const SMqAskEpRsp* pRsp) { return set; } -int32_t tmqAskEpCb(void* param, SDataBuf* pMsg, int32_t code) { +static int32_t tmqAskEpCb(void* param, SDataBuf* pMsg, int32_t code) { SMqAskEpCbParam* pParam = (SMqAskEpCbParam*)param; int8_t async = pParam->async; tmq_t* tmq = taosAcquireRef(tmqMgmt.rsetId, pParam->refId); @@ -1757,7 +1770,7 @@ static int32_t doTmqPollImpl(tmq_t* pTmq, SMqClientTopic* pTopic, SMqClientVg* p } // broadcast the poll request to all related vnodes -int32_t tmqPollImpl(tmq_t* tmq, int64_t timeout) { +static int32_t tmqPollImpl(tmq_t* tmq, int64_t timeout) { int32_t numOfTopics = taosArrayGetSize(tmq->clientTopics); tscDebug("consumer:0x%" PRIx64 " start to poll data, numOfTopics:%d", tmq->consumerId, numOfTopics); @@ -1793,7 +1806,7 @@ int32_t tmqPollImpl(tmq_t* tmq, int64_t timeout) { return 0; } -int32_t tmqHandleNoPollRsp(tmq_t* tmq, SMqRspWrapper* rspWrapper, bool* pReset) { +static int32_t tmqHandleNoPollRsp(tmq_t* tmq, SMqRspWrapper* rspWrapper, bool* pReset) { if (rspWrapper->tmqRspType == TMQ_MSG_TYPE__EP_RSP) { /*printf("ep %d %d\n", rspMsg->head.epoch, tmq->epoch);*/ if (rspWrapper->epoch > atomic_load_32(&tmq->epoch)) { @@ -1813,7 +1826,7 @@ int32_t tmqHandleNoPollRsp(tmq_t* tmq, SMqRspWrapper* rspWrapper, bool* pReset) return 0; } -void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { +static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { tscDebug("consumer:0x%" PRIx64 " start to handle the rsp, total:%d", tmq->consumerId, tmq->qall->numOfItems); while (1) { @@ -2117,11 +2130,9 @@ const char* tmq_get_table_name(TAOS_RES* res) { } void tmq_commit_async(tmq_t* tmq, const TAOS_RES* msg, tmq_commit_cb* cb, void* param) { - // tmqCommitInner(tmq, msg, 0, 1, cb, param); } int32_t tmq_commit_sync(tmq_t* tmq, const TAOS_RES* msg) { - // return tmqCommitInner(tmq, msg, 0, 0, NULL, NULL); } From 1e01f902833515ef2f3ca3f027ee0b216774efe4 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Mar 2023 09:13:26 +0800 Subject: [PATCH 16/28] refactor(tmq): do some internal refactor. --- source/client/src/clientTmq.c | 246 +++++++++++++++++----------------- 1 file changed, 124 insertions(+), 122 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 825f7ef182..ba223984e3 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -214,6 +214,8 @@ typedef struct { } SMqCommitCbParam; static int32_t tmqAskEp(tmq_t* tmq, bool async); +static int32_t makeTopicVgroupKey(char* dst, const char* topicName, int32_t vg); +static int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet); tmq_conf_t* tmq_conf_new() { tmq_conf_t* conf = taosMemoryCalloc(1, sizeof(tmq_conf_t)); @@ -380,42 +382,6 @@ char** tmq_list_to_c_array(const tmq_list_t* list) { return container->pData; } -static int32_t makeTopicVgroupKey(char* dst, const char* topicName, int32_t vg) { - return sprintf(dst, "%s:%d", topicName, vg); -} - -int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet) { - tmq_t* tmq = taosAcquireRef(tmqMgmt.rsetId, pParamSet->refId); - if (tmq == NULL) { - if (!pParamSet->async) { - tsem_destroy(&pParamSet->rspSem); - } - taosMemoryFree(pParamSet); - terrno = TSDB_CODE_TMQ_CONSUMER_CLOSED; - return -1; - } - - // if no more waiting rsp - if (pParamSet->async) { - // call async cb func - if (pParamSet->automatic && tmq->commitCb) { - tmq->commitCb(tmq, pParamSet->rspErr, tmq->commitCbUserParam); - } else if (!pParamSet->automatic && pParamSet->userCb) { - // sem post - pParamSet->userCb(tmq, pParamSet->rspErr, pParamSet->userParam); - } - taosMemoryFree(pParamSet); - } else { - tsem_post(&pParamSet->rspSem); - } - -#if 0 - taosArrayDestroyP(pParamSet->successfulOffsets, taosMemoryFree); - taosArrayDestroyP(pParamSet->failedOffsets, taosMemoryFree); -#endif - return 0; -} - static void tmqCommitRspCountDown(SMqCommitCbParamSet* pParamSet) { int32_t waitingRspNum = atomic_sub_fetch_32(&pParamSet->waitingRspNum, 1); if (waitingRspNum == 0) { @@ -728,8 +694,8 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, return code; } -int32_t tmqCommitInner(tmq_t* tmq, const TAOS_RES* msg, int8_t automatic, int8_t async, tmq_commit_cb* userCb, - void* userParam) { +static int32_t tmqCommitInner(tmq_t* tmq, const TAOS_RES* msg, int8_t automatic, int8_t async, tmq_commit_cb* userCb, + void* userParam) { if (msg) { return tmqCommitMsgImpl(tmq, msg, async, userCb, userParam); } else { @@ -1550,90 +1516,6 @@ END: return code; } -int32_t tmqAskEp(tmq_t* tmq, bool async) { - int32_t code = TSDB_CODE_SUCCESS; -#if 0 - int8_t epStatus = atomic_val_compare_exchange_8(&tmq->epStatus, 0, 1); - if (epStatus == 1) { - int32_t epSkipCnt = atomic_add_fetch_32(&tmq->epSkipCnt, 1); - tscTrace("consumer:0x%" PRIx64 ", skip ask ep cnt %d", tmq->consumerId, epSkipCnt); - if (epSkipCnt < 5000) return 0; - } - atomic_store_32(&tmq->epSkipCnt, 0); -#endif - - SMqAskEpReq req = {0}; - req.consumerId = tmq->consumerId; - req.epoch = tmq->epoch; - strcpy(req.cgroup, tmq->groupId); - - int32_t tlen = tSerializeSMqAskEpReq(NULL, 0, &req); - if (tlen < 0) { - tscError("consumer:0x%" PRIx64 ", tSerializeSMqAskEpReq failed", tmq->consumerId); - return -1; - } - - void* pReq = taosMemoryCalloc(1, tlen); - if (pReq == NULL) { - tscError("consumer:0x%" PRIx64 ", failed to malloc askEpReq msg, size:%d", tmq->consumerId, tlen); - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - if (tSerializeSMqAskEpReq(pReq, tlen, &req) < 0) { - tscError("consumer:0x%" PRIx64 ", tSerializeSMqAskEpReq %d failed", tmq->consumerId, tlen); - taosMemoryFree(pReq); - return -1; - } - - SMqAskEpCbParam* pParam = taosMemoryCalloc(1, sizeof(SMqAskEpCbParam)); - if (pParam == NULL) { - tscError("consumer:0x%" PRIx64 ", failed to malloc subscribe param", tmq->consumerId); - taosMemoryFree(pReq); - return -1; - } - - pParam->refId = tmq->refId; - pParam->epoch = tmq->epoch; - pParam->async = async; - tsem_init(&pParam->rspSem, 0, 0); - - SMsgSendInfo* sendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); - if (sendInfo == NULL) { - tsem_destroy(&pParam->rspSem); - taosMemoryFree(pParam); - taosMemoryFree(pReq); - return -1; - } - - sendInfo->msgInfo = (SDataBuf){ - .pData = pReq, - .len = tlen, - .handle = NULL, - }; - - sendInfo->requestId = generateRequestId(); - sendInfo->requestObjRefId = 0; - sendInfo->param = pParam; - sendInfo->fp = tmqAskEpCb; - sendInfo->msgType = TDMT_MND_TMQ_ASK_EP; - - SEpSet epSet = getEpSet_s(&tmq->pTscObj->pAppInfo->mgmtEp); - tscDebug("consumer:0x%" PRIx64 " ask ep from mnode, async:%d, reqId:0x%" PRIx64, tmq->consumerId, async, - sendInfo->requestId); - - int64_t transporterId = 0; - asyncSendMsgToServer(tmq->pTscObj->pAppInfo->pTransporter, &epSet, &transporterId, sendInfo); - - if (!async) { - tsem_wait(&pParam->rspSem); - code = pParam->code; - taosMemoryFree(pParam); - } - - return code; -} - void tmqBuildConsumeReqImpl(SMqPollReq* pReq, tmq_t* tmq, int64_t timeout, SMqClientTopic* pTopic, SMqClientVg* pVg) { int32_t groupLen = strlen(tmq->groupId); memcpy(pReq->subKey, tmq->groupId, groupLen); @@ -2136,3 +2018,123 @@ void tmq_commit_async(tmq_t* tmq, const TAOS_RES* msg, tmq_commit_cb* cb, void* int32_t tmq_commit_sync(tmq_t* tmq, const TAOS_RES* msg) { return tmqCommitInner(tmq, msg, 0, 0, NULL, NULL); } + +int32_t tmqAskEp(tmq_t* tmq, bool async) { + int32_t code = TSDB_CODE_SUCCESS; +#if 0 + int8_t epStatus = atomic_val_compare_exchange_8(&tmq->epStatus, 0, 1); + if (epStatus == 1) { + int32_t epSkipCnt = atomic_add_fetch_32(&tmq->epSkipCnt, 1); + tscTrace("consumer:0x%" PRIx64 ", skip ask ep cnt %d", tmq->consumerId, epSkipCnt); + if (epSkipCnt < 5000) return 0; + } + atomic_store_32(&tmq->epSkipCnt, 0); +#endif + + SMqAskEpReq req = {0}; + req.consumerId = tmq->consumerId; + req.epoch = tmq->epoch; + strcpy(req.cgroup, tmq->groupId); + + int32_t tlen = tSerializeSMqAskEpReq(NULL, 0, &req); + if (tlen < 0) { + tscError("consumer:0x%" PRIx64 ", tSerializeSMqAskEpReq failed", tmq->consumerId); + return -1; + } + + void* pReq = taosMemoryCalloc(1, tlen); + if (pReq == NULL) { + tscError("consumer:0x%" PRIx64 ", failed to malloc askEpReq msg, size:%d", tmq->consumerId, tlen); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + if (tSerializeSMqAskEpReq(pReq, tlen, &req) < 0) { + tscError("consumer:0x%" PRIx64 ", tSerializeSMqAskEpReq %d failed", tmq->consumerId, tlen); + taosMemoryFree(pReq); + return -1; + } + + SMqAskEpCbParam* pParam = taosMemoryCalloc(1, sizeof(SMqAskEpCbParam)); + if (pParam == NULL) { + tscError("consumer:0x%" PRIx64 ", failed to malloc subscribe param", tmq->consumerId); + taosMemoryFree(pReq); + return -1; + } + + pParam->refId = tmq->refId; + pParam->epoch = tmq->epoch; + pParam->async = async; + tsem_init(&pParam->rspSem, 0, 0); + + SMsgSendInfo* sendInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); + if (sendInfo == NULL) { + tsem_destroy(&pParam->rspSem); + taosMemoryFree(pParam); + taosMemoryFree(pReq); + return -1; + } + + sendInfo->msgInfo = (SDataBuf){ + .pData = pReq, + .len = tlen, + .handle = NULL, + }; + + sendInfo->requestId = generateRequestId(); + sendInfo->requestObjRefId = 0; + sendInfo->param = pParam; + sendInfo->fp = tmqAskEpCb; + sendInfo->msgType = TDMT_MND_TMQ_ASK_EP; + + SEpSet epSet = getEpSet_s(&tmq->pTscObj->pAppInfo->mgmtEp); + tscDebug("consumer:0x%" PRIx64 " ask ep from mnode, async:%d, reqId:0x%" PRIx64, tmq->consumerId, async, + sendInfo->requestId); + + int64_t transporterId = 0; + asyncSendMsgToServer(tmq->pTscObj->pAppInfo->pTransporter, &epSet, &transporterId, sendInfo); + + if (!async) { + tsem_wait(&pParam->rspSem); + code = pParam->code; + taosMemoryFree(pParam); + } + + return code; +} + +int32_t makeTopicVgroupKey(char* dst, const char* topicName, int32_t vg) { + return sprintf(dst, "%s:%d", topicName, vg); +} + +int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet) { + tmq_t* tmq = taosAcquireRef(tmqMgmt.rsetId, pParamSet->refId); + if (tmq == NULL) { + if (!pParamSet->async) { + tsem_destroy(&pParamSet->rspSem); + } + taosMemoryFree(pParamSet); + terrno = TSDB_CODE_TMQ_CONSUMER_CLOSED; + return -1; + } + + // if no more waiting rsp + if (pParamSet->async) { + // call async cb func + if (pParamSet->automatic && tmq->commitCb) { + tmq->commitCb(tmq, pParamSet->rspErr, tmq->commitCbUserParam); + } else if (!pParamSet->automatic && pParamSet->userCb) { + // sem post + pParamSet->userCb(tmq, pParamSet->rspErr, pParamSet->userParam); + } + taosMemoryFree(pParamSet); + } else { + tsem_post(&pParamSet->rspSem); + } + +#if 0 + taosArrayDestroyP(pParamSet->successfulOffsets, taosMemoryFree); + taosArrayDestroyP(pParamSet->failedOffsets, taosMemoryFree); +#endif + return 0; +} \ No newline at end of file From 9e860f02ac942e607db2b1167b92591877fafcf8 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Mar 2023 11:25:37 +0800 Subject: [PATCH 17/28] refactor: do some internal refactor. --- include/common/tcommon.h | 7 -- source/client/src/clientTmq.c | 205 +++++++++++++++++----------------- source/common/src/tmsg.c | 3 +- 3 files changed, 104 insertions(+), 111 deletions(-) diff --git a/include/common/tcommon.h b/include/common/tcommon.h index 2a40976a8b..9e928a79ac 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -25,13 +25,6 @@ extern "C" { #endif -// TODO remove it -enum { - TMQ_CONF__RESET_OFFSET__NONE = -3, - TMQ_CONF__RESET_OFFSET__EARLIEAST = -2, - TMQ_CONF__RESET_OFFSET__LATEST = -1, -}; - // clang-format off #define IS_META_MSG(x) ( \ x == TDMT_VND_CREATE_STB \ diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index ba223984e3..0d3d37dc29 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -20,18 +20,10 @@ #include "tdatablock.h" #include "tdef.h" #include "tglobal.h" -#include "tmsgtype.h" #include "tqueue.h" #include "tref.h" #include "ttimer.h" -#if 0 -#undef tsem_post -#define tsem_post(x) \ - tscInfo("call sem post at %s %d", __FUNCTION__, __LINE__); \ - sem_post(x) -#endif - struct SMqMgmt { int8_t inited; tmr_h timer; @@ -216,6 +208,7 @@ typedef struct { static int32_t tmqAskEp(tmq_t* tmq, bool async); static int32_t makeTopicVgroupKey(char* dst, const char* topicName, int32_t vg); static int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet); +static void tmqCommitRspCountDown(SMqCommitCbParamSet* pParamSet, int64_t consumerId, const char* pTopic, int32_t vgId); tmq_conf_t* tmq_conf_new() { tmq_conf_t* conf = taosMemoryCalloc(1, sizeof(tmq_conf_t)); @@ -227,7 +220,7 @@ tmq_conf_t* tmq_conf_new() { conf->withTbName = false; conf->autoCommit = true; conf->autoCommitInterval = 5000; - conf->resetOffset = TMQ_CONF__RESET_OFFSET__EARLIEAST; + conf->resetOffset = TMQ_OFFSET__RESET_EARLIEAST; conf->hbBgEnable = true; return conf; @@ -235,29 +228,35 @@ tmq_conf_t* tmq_conf_new() { void tmq_conf_destroy(tmq_conf_t* conf) { if (conf) { - if (conf->ip) taosMemoryFree(conf->ip); - if (conf->user) taosMemoryFree(conf->user); - if (conf->pass) taosMemoryFree(conf->pass); + if (conf->ip) { + taosMemoryFree(conf->ip); + } + if (conf->user) { + taosMemoryFree(conf->user); + } + if (conf->pass) { + taosMemoryFree(conf->pass); + } taosMemoryFree(conf); } } tmq_conf_res_t tmq_conf_set(tmq_conf_t* conf, const char* key, const char* value) { - if (strcmp(key, "group.id") == 0) { + if (strcasecmp(key, "group.id") == 0) { tstrncpy(conf->groupId, value, TSDB_CGROUP_LEN); return TMQ_CONF_OK; } - if (strcmp(key, "client.id") == 0) { + if (strcasecmp(key, "client.id") == 0) { tstrncpy(conf->clientId, value, 256); return TMQ_CONF_OK; } - if (strcmp(key, "enable.auto.commit") == 0) { - if (strcmp(value, "true") == 0) { + if (strcasecmp(key, "enable.auto.commit") == 0) { + if (strcasecmp(value, "true") == 0) { conf->autoCommit = true; return TMQ_CONF_OK; - } else if (strcmp(value, "false") == 0) { + } else if (strcasecmp(value, "false") == 0) { conf->autoCommit = false; return TMQ_CONF_OK; } else { @@ -265,31 +264,31 @@ tmq_conf_res_t tmq_conf_set(tmq_conf_t* conf, const char* key, const char* value } } - if (strcmp(key, "auto.commit.interval.ms") == 0) { + if (strcasecmp(key, "auto.commit.interval.ms") == 0) { conf->autoCommitInterval = atoi(value); return TMQ_CONF_OK; } - if (strcmp(key, "auto.offset.reset") == 0) { - if (strcmp(value, "none") == 0) { - conf->resetOffset = TMQ_CONF__RESET_OFFSET__NONE; + if (strcasecmp(key, "auto.offset.reset") == 0) { + if (strcasecmp(value, "none") == 0) { + conf->resetOffset = TMQ_OFFSET__RESET_NONE; return TMQ_CONF_OK; - } else if (strcmp(value, "earliest") == 0) { - conf->resetOffset = TMQ_CONF__RESET_OFFSET__EARLIEAST; + } else if (strcasecmp(value, "earliest") == 0) { + conf->resetOffset = TMQ_OFFSET__RESET_EARLIEAST; return TMQ_CONF_OK; - } else if (strcmp(value, "latest") == 0) { - conf->resetOffset = TMQ_CONF__RESET_OFFSET__LATEST; + } else if (strcasecmp(value, "latest") == 0) { + conf->resetOffset = TMQ_OFFSET__RESET_LATEST; return TMQ_CONF_OK; } else { return TMQ_CONF_INVALID; } } - if (strcmp(key, "msg.with.table.name") == 0) { - if (strcmp(value, "true") == 0) { + if (strcasecmp(key, "msg.with.table.name") == 0) { + if (strcasecmp(value, "true") == 0) { conf->withTbName = true; return TMQ_CONF_OK; - } else if (strcmp(value, "false") == 0) { + } else if (strcasecmp(value, "false") == 0) { conf->withTbName = false; return TMQ_CONF_OK; } else { @@ -297,11 +296,11 @@ tmq_conf_res_t tmq_conf_set(tmq_conf_t* conf, const char* key, const char* value } } - if (strcmp(key, "experimental.snapshot.enable") == 0) { - if (strcmp(value, "true") == 0) { + if (strcasecmp(key, "experimental.snapshot.enable") == 0) { + if (strcasecmp(value, "true") == 0) { conf->snapEnable = true; return TMQ_CONF_OK; - } else if (strcmp(value, "false") == 0) { + } else if (strcasecmp(value, "false") == 0) { conf->snapEnable = false; return TMQ_CONF_OK; } else { @@ -309,42 +308,40 @@ tmq_conf_res_t tmq_conf_set(tmq_conf_t* conf, const char* key, const char* value } } - if (strcmp(key, "experimental.snapshot.batch.size") == 0) { + if (strcasecmp(key, "experimental.snapshot.batch.size") == 0) { conf->snapBatchSize = atoi(value); return TMQ_CONF_OK; } - if (strcmp(key, "enable.heartbeat.background") == 0) { - if (strcmp(value, "true") == 0) { + if (strcasecmp(key, "enable.heartbeat.background") == 0) { + if (strcasecmp(value, "true") == 0) { conf->hbBgEnable = true; return TMQ_CONF_OK; - } else if (strcmp(value, "false") == 0) { + } else if (strcasecmp(value, "false") == 0) { conf->hbBgEnable = false; return TMQ_CONF_OK; } else { return TMQ_CONF_INVALID; } - return TMQ_CONF_OK; } - if (strcmp(key, "td.connect.ip") == 0) { + if (strcasecmp(key, "td.connect.ip") == 0) { conf->ip = taosStrdup(value); return TMQ_CONF_OK; } - if (strcmp(key, "td.connect.user") == 0) { + if (strcasecmp(key, "td.connect.user") == 0) { conf->user = taosStrdup(value); return TMQ_CONF_OK; } - if (strcmp(key, "td.connect.pass") == 0) { + if (strcasecmp(key, "td.connect.pass") == 0) { conf->pass = taosStrdup(value); return TMQ_CONF_OK; } - if (strcmp(key, "td.connect.port") == 0) { + if (strcasecmp(key, "td.connect.port") == 0) { conf->port = atoi(value); return TMQ_CONF_OK; } - if (strcmp(key, "td.connect.db") == 0) { - /*conf->db = taosStrdup(value);*/ + if (strcasecmp(key, "td.connect.db") == 0) { return TMQ_CONF_OK; } @@ -352,7 +349,6 @@ tmq_conf_res_t tmq_conf_set(tmq_conf_t* conf, const char* key, const char* value } tmq_list_t* tmq_list_new() { - // return (tmq_list_t*)taosArrayInit(0, sizeof(void*)); } @@ -382,13 +378,31 @@ char** tmq_list_to_c_array(const tmq_list_t* list) { return container->pData; } -static void tmqCommitRspCountDown(SMqCommitCbParamSet* pParamSet) { - int32_t waitingRspNum = atomic_sub_fetch_32(&pParamSet->waitingRspNum, 1); - if (waitingRspNum == 0) { - tmqCommitDone(pParamSet); +static void updateVgEpset(tmq_t* pTmq, SMqCommitCbParam* pParam, SEpSet* pEpSet) { + int32_t numOfTopics = taosArrayGetSize(pTmq->clientTopics); + for(int32_t i = 0; i < numOfTopics; ++i) { + SMqClientTopic* pTopic = taosArrayGet(pTmq->clientTopics, i); + if (strcmp(pTopic->topicName, pParam->topicName) != 0) { + continue; + } + + int32_t numOfVgs = taosArrayGetSize(pTopic->vgs); + for(int32_t j = 0; j < numOfVgs; ++j) { + SMqClientVg* pClientVg = taosArrayGet(pTopic->vgs, j); + if (pClientVg->vgId == pParam->vgId) { + SEp* pEp = GET_ACTIVE_EP(pEpSet); + SEp* pOld = GET_ACTIVE_EP(&(pClientVg->epSet)); + uDebug("subKey:%s update the epset vgId:%d, ep:%s:%d, old ep:%s:%d", pParam->pOffset->subKey, pParam->vgId, + pEp->fqdn, pEp->port, pOld->fqdn, pOld->port); + pClientVg->epSet = *pEpSet; + break; + } + } + + break; } } - +// todo retry to send the commit if failed static int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { SMqCommitCbParam* pParam = (SMqCommitCbParam*)param; SMqCommitCbParamSet* pParamSet = (SMqCommitCbParamSet*)pParam->params; @@ -402,33 +416,10 @@ static int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { } #endif + // update the epset if needed if (pBuf->pEpSet != NULL) { - // todo extract method taosThreadMutexLock(&pParam->pTmq->lock); - - int32_t numOfTopics = taosArrayGetSize(pParam->pTmq->clientTopics); - for(int32_t i = 0; i < numOfTopics; ++i) { - SMqClientTopic* pTopic = taosArrayGet(pParam->pTmq->clientTopics, i); - if (strcmp(pTopic->topicName, pParam->topicName) != 0) { - continue; - } - - int32_t numOfVgs = taosArrayGetSize(pTopic->vgs); - for(int32_t j = 0; j < numOfVgs; ++j) { - SMqClientVg* pClientVg = taosArrayGet(pTopic->vgs, j); - if (pClientVg->vgId == pParam->vgId) { - SEp* pEp = GET_ACTIVE_EP(pBuf->pEpSet); - SEp* pOld = GET_ACTIVE_EP(&(pClientVg->epSet)); - uDebug("subKey:%s update the epset vgId:%d, ep:%s:%d, old ep:%s:%d", pParam->pOffset->subKey, pParam->vgId, - pEp->fqdn, pEp->port, pOld->fqdn, pOld->port); - pClientVg->epSet = *pBuf->pEpSet; - break; - } - } - - break; - } - + updateVgEpset(pParam->pTmq, pParam, pBuf->pEpSet); taosThreadMutexUnlock(&pParam->pTmq->lock); } @@ -436,10 +427,7 @@ static int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { taosMemoryFree(pBuf->pData); taosMemoryFree(pBuf->pEpSet); - /*tscDebug("receive offset commit cb of %s on vgId:%d, offset is %" PRId64, pParam->pOffset->subKey, pParam->->vgId, - * pOffset->version);*/ - - tmqCommitRspCountDown(pParamSet); + tmqCommitRspCountDown(pParamSet, pParam->pTmq->consumerId, pParam->topicName, pParam->vgId); return 0; } @@ -522,7 +510,6 @@ static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, const char* pTopic pMsgSendInfo->paramFreeFp = taosMemoryFree; pMsgSendInfo->fp = tmqCommitCb; pMsgSendInfo->msgType = TDMT_VND_TMQ_COMMIT_OFFSET; - // send msg atomic_add_fetch_32(&pParamSet->waitingRspNum, 1); atomic_add_fetch_32(&pParamSet->totalRspNum, 1); @@ -652,9 +639,10 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, for (int32_t i = 0; i < numOfTopics; i++) { SMqClientTopic* pTopic = taosArrayGet(tmq->clientTopics, i); + int32_t numOfVgroups = taosArrayGetSize(pTopic->vgs); - // todo race condition: fix it - int32_t numOfVgroups = taosArrayGetSize(pTopic->vgs); + tscDebug("consumer:0x%" PRIx64 " commit offset for topics:%s, numOfVgs:%d", tmq->consumerId, pTopic->topicName, + numOfVgroups); for (int32_t j = 0; j < numOfVgroups; j++) { SMqClientVg clientVg = *(SMqClientVg*)taosArrayGet(pTopic->vgs, j); if (clientVg.currentOffset.type > 0 && !tOffsetEqual(&clientVg.currentOffset, &clientVg.committedOffset)) { @@ -668,6 +656,8 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, } } + tscDebug("consumer:0x%" PRIx64 " total commit:%d for %d topics", tmq->consumerId, pParamSet->waitingRspNum, + numOfTopics); taosThreadMutexUnlock(&tmq->lock); // no request is sent @@ -678,7 +668,7 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, } // count down since waiting rsp num init as 1 - tmqCommitRspCountDown(pParamSet); + tmqCommitRspCountDown(pParamSet, tmq->consumerId, "", 0); if (!async) { tsem_wait(&pParamSet->rspSem); @@ -696,9 +686,9 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, static int32_t tmqCommitInner(tmq_t* tmq, const TAOS_RES* msg, int8_t automatic, int8_t async, tmq_commit_cb* userCb, void* userParam) { - if (msg) { + if (msg) { // user invoked commit? return tmqCommitMsgImpl(tmq, msg, async, userCb, userParam); - } else { + } else { // this for auto commit return tmqCommitConsumerImpl(tmq, automatic, async, userCb, userParam); } } @@ -986,7 +976,7 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) { tmq_t* pTmq = taosMemoryCalloc(1, sizeof(tmq_t)); if (pTmq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - tscError("failed to create consumer, consumer group %s, code:%s", conf->groupId, terrstr()); + tscError("failed to create consumer, groupId:%s, code:%s", conf->groupId, terrstr()); return NULL; } @@ -1002,9 +992,9 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) { if (pTmq->clientTopics == NULL || pTmq->mqueue == NULL || pTmq->qall == NULL || pTmq->delayedTask == NULL || conf->groupId[0] == 0) { terrno = TSDB_CODE_OUT_OF_MEMORY; - tscError("consumer:0x%" PRIx64 " setup failed since %s, consumer group %s", pTmq->consumerId, terrstr(), + tscError("consumer:0x%" PRIx64 " setup failed since %s, groupId:%s", pTmq->consumerId, terrstr(), pTmq->groupId); - goto FAIL; + goto _failed; } // init status @@ -1034,22 +1024,20 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) { if (tsem_init(&pTmq->rspSem, 0, 0) != 0) { tscError("consumer:0x %" PRIx64 " setup failed since %s, consumer group %s", pTmq->consumerId, terrstr(), pTmq->groupId); - goto FAIL; + goto _failed; } // init connection pTmq->pTscObj = taos_connect_internal(conf->ip, user, pass, NULL, NULL, conf->port, CONN_TYPE__TMQ); if (pTmq->pTscObj == NULL) { - tscError("consumer:0x %" PRIx64 " setup failed since %s, consumer group %s", pTmq->consumerId, terrstr(), - pTmq->groupId); + tscError("consumer:0x%" PRIx64 " setup failed since %s, groupId:%s", pTmq->consumerId, terrstr(), pTmq->groupId); tsem_destroy(&pTmq->rspSem); - goto FAIL; + goto _failed; } pTmq->refId = taosAddRef(tmqMgmt.rsetId, pTmq); if (pTmq->refId < 0) { - tmqFreeImpl(pTmq); - return NULL; + goto _failed; } if (pTmq->hbBgEnable) { @@ -1058,16 +1046,17 @@ tmq_t* tmq_consumer_new(tmq_conf_t* conf, char* errstr, int32_t errstrLen) { pTmq->hbLiveTimer = taosTmrStart(tmqSendHbReq, 1000, pRefId, tmqMgmt.timer); } - tscInfo("consumer:0x%" PRIx64 " is setup, groupId:%s", pTmq->consumerId, pTmq->groupId); + char buf[80] = {0}; + STqOffsetVal offset = {.type = pTmq->resetOffsetCfg}; + tFormatOffset(buf, tListLen(buf), &offset); + tscInfo("consumer:0x%" PRIx64 " is setup, groupId:%s, snapshot:%d, autoCommit:%d, commitInterval:%dms, offset:%s, backgroudHB:%d", + pTmq->consumerId, pTmq->groupId, pTmq->useSnapshot, pTmq->autoCommit, pTmq->autoCommitInterval, buf, + pTmq->hbBgEnable); + return pTmq; -FAIL: - if (pTmq->clientTopics) taosArrayDestroy(pTmq->clientTopics); - if (pTmq->mqueue) taosCloseQueue(pTmq->mqueue); - if (pTmq->delayedTask) taosCloseQueue(pTmq->delayedTask); - if (pTmq->qall) taosFreeQall(pTmq->qall); - taosMemoryFree(pTmq); - +_failed: + tmqFreeImpl(pTmq); return NULL; } @@ -2123,10 +2112,10 @@ int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet) { // call async cb func if (pParamSet->automatic && tmq->commitCb) { tmq->commitCb(tmq, pParamSet->rspErr, tmq->commitCbUserParam); - } else if (!pParamSet->automatic && pParamSet->userCb) { - // sem post + } else if (!pParamSet->automatic && pParamSet->userCb) { // sem post pParamSet->userCb(tmq, pParamSet->rspErr, pParamSet->userParam); } + taosMemoryFree(pParamSet); } else { tsem_post(&pParamSet->rspSem); @@ -2137,4 +2126,14 @@ int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet) { taosArrayDestroyP(pParamSet->failedOffsets, taosMemoryFree); #endif return 0; -} \ No newline at end of file +} + +void tmqCommitRspCountDown(SMqCommitCbParamSet* pParamSet, int64_t consumerId, const char* pTopic, int32_t vgId) { + int32_t waitingRspNum = atomic_sub_fetch_32(&pParamSet->waitingRspNum, 1); + tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d commit-rsp received, remain:%d", consumerId, pTopic, vgId, + waitingRspNum); + + if (waitingRspNum == 0) { + tmqCommitDone(pParamSet); + } +} diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 01a0ee7306..1f7e42e9d2 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -6586,8 +6586,9 @@ int32_t tFormatOffset(char *buf, int32_t maxLen, const STqOffsetVal *pVal) { } else if (pVal->type == TMQ_OFFSET__SNAPSHOT_DATA || pVal->type == TMQ_OFFSET__SNAPSHOT_META) { snprintf(buf, maxLen, "offset(snapshot) uid:%" PRId64 " ts:%" PRId64, pVal->uid, pVal->ts); } else { - ASSERT(0); + return TSDB_CODE_INVALID_PARA; } + return 0; } From af423685ca1c6d2f15163b9d73ff78c9e6e88446 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Mar 2023 17:42:01 +0800 Subject: [PATCH 18/28] fix(tmq): handle the commit failure. --- source/client/src/clientTmq.c | 137 ++++++++++++++++++++++------------ 1 file changed, 91 insertions(+), 46 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 0d3d37dc29..14d056224a 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -208,6 +208,8 @@ typedef struct { static int32_t tmqAskEp(tmq_t* tmq, bool async); static int32_t makeTopicVgroupKey(char* dst, const char* topicName, int32_t vg); static int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet); +static int32_t doSendCommitMsg(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicName, SMqCommitCbParamSet* pParamSet, + int32_t index, int32_t totalVgroups); static void tmqCommitRspCountDown(SMqCommitCbParamSet* pParamSet, int64_t consumerId, const char* pTopic, int32_t vgId); tmq_conf_t* tmq_conf_new() { @@ -378,51 +380,90 @@ char** tmq_list_to_c_array(const tmq_list_t* list) { return container->pData; } -static void updateVgEpset(tmq_t* pTmq, SMqCommitCbParam* pParam, SEpSet* pEpSet) { - int32_t numOfTopics = taosArrayGetSize(pTmq->clientTopics); +static SMqClientVg* foundClientVg(SArray* pTopicList, const char* pName, int32_t vgId, int32_t* index, int32_t* numOfVgroups) { + int32_t numOfTopics = taosArrayGetSize(pTopicList); + *index = -1; + *numOfVgroups = 0; + for(int32_t i = 0; i < numOfTopics; ++i) { - SMqClientTopic* pTopic = taosArrayGet(pTmq->clientTopics, i); - if (strcmp(pTopic->topicName, pParam->topicName) != 0) { + SMqClientTopic* pTopic = taosArrayGet(pTopicList, i); + if (strcmp(pTopic->topicName, pName) != 0) { continue; } - int32_t numOfVgs = taosArrayGetSize(pTopic->vgs); - for(int32_t j = 0; j < numOfVgs; ++j) { + *numOfVgroups = taosArrayGetSize(pTopic->vgs); + for (int32_t j = 0; j < (*numOfVgroups); ++j) { SMqClientVg* pClientVg = taosArrayGet(pTopic->vgs, j); - if (pClientVg->vgId == pParam->vgId) { - SEp* pEp = GET_ACTIVE_EP(pEpSet); - SEp* pOld = GET_ACTIVE_EP(&(pClientVg->epSet)); - uDebug("subKey:%s update the epset vgId:%d, ep:%s:%d, old ep:%s:%d", pParam->pOffset->subKey, pParam->vgId, - pEp->fqdn, pEp->port, pOld->fqdn, pOld->port); - pClientVg->epSet = *pEpSet; - break; + if (pClientVg->vgId == vgId) { + *index = j; + return pClientVg; } } - - break; } + + return NULL; } -// todo retry to send the commit if failed + static int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { SMqCommitCbParam* pParam = (SMqCommitCbParam*)param; SMqCommitCbParamSet* pParamSet = (SMqCommitCbParamSet*)pParam->params; - // push into array -#if 0 - if (code == 0) { - taosArrayPush(pParamSet->failedOffsets, &pParam->pOffset); - } else { - taosArrayPush(pParamSet->successfulOffsets, &pParam->pOffset); - } -#endif - - // update the epset if needed - if (pBuf->pEpSet != NULL) { + if (code != TSDB_CODE_SUCCESS) { // if commit offset failed, let's try again taosThreadMutexLock(&pParam->pTmq->lock); - updateVgEpset(pParam->pTmq, pParam, pBuf->pEpSet); + int32_t numOfVgroups, index; + SMqClientVg* pVg = foundClientVg(pParam->pTmq->clientTopics, pParam->topicName, pParam->vgId, &index, &numOfVgroups); + + if (pVg == NULL) { + tscDebug("consumer:0x%" PRIx64 + " subKey:%s vgId:%d commit failed, code:%s has been transferred to other consumer, no need retry ordinal:%d/%d", + pParam->pTmq->consumerId, pParam->pOffset->subKey, pParam->vgId, tstrerror(code), index + 1, numOfVgroups); + } else { // let's retry the commit + int32_t code1 = doSendCommitMsg(pParam->pTmq, pVg, pParam->topicName, pParamSet, index, numOfVgroups); + if (code1 != TSDB_CODE_SUCCESS) { // retry failed. + tscError("consumer:0x%" PRIx64 " topic:%s vgId:%d offset:%" PRId64 + " retry failed, ignore this commit. code:%s ordinal:%d/%d", + pParam->pTmq->consumerId, pParam->topicName, pVg->vgId, pVg->committedOffset.version, + tstrerror(terrno), index + 1, numOfVgroups); + } + } + taosThreadMutexUnlock(&pParam->pTmq->lock); + + taosMemoryFree(pParam->pOffset); + taosMemoryFree(pBuf->pData); + taosMemoryFree(pBuf->pEpSet); + + tmqCommitRspCountDown(pParamSet, pParam->pTmq->consumerId, pParam->topicName, pParam->vgId); + return 0; } + // todo replace the pTmq with refId + taosThreadMutexLock(&pParam->pTmq->lock); + tmq_t* pTmq = pParam->pTmq; + int32_t index = 0, numOfVgroups = 0; + + SMqClientVg* pVg = foundClientVg(pTmq->clientTopics, pParam->topicName, pParam->vgId, &index, &numOfVgroups); + if (pVg == NULL) { + tscDebug("consumer:0x%" PRIx64 " subKey:%s vgId:%d has been transferred to other consumer, ordinal:%d/%d", + pParam->pTmq->consumerId, pParam->pOffset->subKey, pParam->vgId, index + 1, numOfVgroups); + } else { // update the epset if needed + if (pBuf->pEpSet != NULL) { + SEp* pEp = GET_ACTIVE_EP(pBuf->pEpSet); + SEp* pOld = GET_ACTIVE_EP(&(pVg->epSet)); + + tscDebug("consumer:0x%" PRIx64 " subKey:%s update the epset vgId:%d, ep:%s:%d, old ep:%s:%d, ordinal:%d/%d", + pTmq->consumerId, pParam->pOffset->subKey, pParam->vgId, pEp->fqdn, pEp->port, pOld->fqdn, pOld->port, + index + 1, numOfVgroups); + + pVg->epSet = *pBuf->pEpSet; + } + + // update the offset value. + pVg->committedOffset = pVg->currentOffset; + } + + taosThreadMutexUnlock(&pParam->pTmq->lock); + taosMemoryFree(pParam->pOffset); taosMemoryFree(pBuf->pData); taosMemoryFree(pBuf->pEpSet); @@ -431,7 +472,8 @@ static int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { return 0; } -static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicName, SMqCommitCbParamSet* pParamSet) { +static int32_t doSendCommitMsg(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicName, SMqCommitCbParamSet* pParamSet, + int32_t index, int32_t totalVgroups) { STqOffset* pOffset = taosMemoryCalloc(1, sizeof(STqOffset)); if (pOffset == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -498,11 +540,9 @@ static int32_t tmqSendCommitReq(tmq_t* tmq, SMqClientVg* pVg, const char* pTopic }; SEp* pEp = GET_ACTIVE_EP(&pVg->epSet); - tscDebug("consumer:0x%" PRIx64 " topic:%s on vgId:%d offset:%" PRId64 " prev:%" PRId64 ", ep:%s:%d", tmq->consumerId, - pOffset->subKey, pVg->vgId, pOffset->val.version, pVg->committedOffset.version, pEp->fqdn, pEp->port); - - // TODO: put into cb, the commit offset should be move to the callback function - pVg->committedOffset = pVg->currentOffset; + tscDebug("consumer:0x%" PRIx64 " topic:%s on vgId:%d offset:%" PRId64 " prev:%" PRId64 ", ep:%s:%d, ordinal:%d/%d", + tmq->consumerId, pOffset->subKey, pVg->vgId, pOffset->val.version, pVg->committedOffset.version, pEp->fqdn, + pEp->port, index + 1, totalVgroups); pMsgSendInfo->requestId = generateRequestId(); pMsgSendInfo->requestObjRefId = 0; @@ -557,15 +597,19 @@ static int32_t tmqCommitMsgImpl(tmq_t* tmq, const TAOS_RES* msg, int8_t async, t taosThreadMutexLock(&tmq->lock); for (int32_t i = 0; i < taosArrayGetSize(tmq->clientTopics); i++) { SMqClientTopic* pTopic = taosArrayGet(tmq->clientTopics, i); - if (strcmp(pTopic->topicName, topic) != 0) continue; - for (int32_t j = 0; j < taosArrayGetSize(pTopic->vgs); j++) { + if (strcmp(pTopic->topicName, topic) != 0) { + continue; + } + + int32_t numOfVgroups = taosArrayGetSize(pTopic->vgs); + for (int32_t j = 0; j < numOfVgroups; j++) { SMqClientVg* pVg = taosArrayGet(pTopic->vgs, j); if (pVg->vgId != vgId) { continue; } if (pVg->currentOffset.type > 0 && !tOffsetEqual(&pVg->currentOffset, &pVg->committedOffset)) { - if (tmqSendCommitReq(tmq, pVg, pTopic->topicName, pParamSet) < 0) { + if (doSendCommitMsg(tmq, pVg, pTopic->topicName, pParamSet, j, numOfVgroups) < 0) { tsem_destroy(&pParamSet->rspSem); taosMemoryFree(pParamSet); goto FAIL; @@ -634,7 +678,6 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, taosThreadMutexLock(&tmq->lock); int32_t numOfTopics = taosArrayGetSize(tmq->clientTopics); - tscDebug("consumer:0x%" PRIx64 " start to commit offset for %d topics", tmq->consumerId, numOfTopics); for (int32_t i = 0; i < numOfTopics; i++) { @@ -644,14 +687,19 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, tscDebug("consumer:0x%" PRIx64 " commit offset for topics:%s, numOfVgs:%d", tmq->consumerId, pTopic->topicName, numOfVgroups); for (int32_t j = 0; j < numOfVgroups; j++) { - SMqClientVg clientVg = *(SMqClientVg*)taosArrayGet(pTopic->vgs, j); - if (clientVg.currentOffset.type > 0 && !tOffsetEqual(&clientVg.currentOffset, &clientVg.committedOffset)) { - if (tmqSendCommitReq(tmq, &clientVg, pTopic->topicName, pParamSet) < 0) { + SMqClientVg* pVg = taosArrayGet(pTopic->vgs, j); + + if (pVg->currentOffset.type > 0 && !tOffsetEqual(&pVg->currentOffset, &pVg->committedOffset)) { + code = doSendCommitMsg(tmq, pVg, pTopic->topicName, pParamSet, j, numOfVgroups); + if (code != TSDB_CODE_SUCCESS) { + tscError("consumer:0x%" PRIx64 " topic:%s vgId:%d offset:%" PRId64 " failed, code:%s ordinal:%d/%d", + tmq->consumerId, pTopic->topicName, pVg->vgId, pVg->committedOffset.version, tstrerror(terrno), + j + 1, numOfVgroups); continue; } } else { - tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d, not commit, current:%" PRId64 ", ordinal:%d/%d", - tmq->consumerId, pTopic->topicName, clientVg.vgId, clientVg.currentOffset.version, j + 1, numOfVgroups); + tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d, no commit, current:%" PRId64 ", ordinal:%d/%d", + tmq->consumerId, pTopic->topicName, pVg->vgId, pVg->currentOffset.version, j + 1, numOfVgroups); } } } @@ -1804,7 +1852,6 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { taosFreeQitem(pollRspWrapper); } } else { - /*printf("handle ep rsp %d\n", rspMsg->head.mqMsgType);*/ bool reset = false; tmqHandleNoPollRsp(tmq, rspWrapper, &reset); taosFreeQitem(rspWrapper); @@ -1814,8 +1861,6 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { } } } - - tscDebug("consumer:0x%" PRIx64 " handle the rsp completed", tmq->consumerId); } TAOS_RES* tmq_consumer_poll(tmq_t* tmq, int64_t timeout) { From 85ee1318d2f3138fba4ac0d9560a7d2ed97f990a Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Mar 2023 17:44:35 +0800 Subject: [PATCH 19/28] refactor: add some logs. --- source/client/src/clientTmq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 14d056224a..a4f8b46395 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -460,6 +460,8 @@ static int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { // update the offset value. pVg->committedOffset = pVg->currentOffset; + tscDebug("consumer:0x%" PRIx64 " subKey:%s vgId:%d, commit offset success. ordinal:%d/%d", pTmq->consumerId, + pParam->pOffset->subKey, pParam->vgId, index + 1, numOfVgroups); } taosThreadMutexUnlock(&pParam->pTmq->lock); From 26925a1c133c53032ca888d77050091d21b6dfc6 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Mar 2023 18:01:56 +0800 Subject: [PATCH 20/28] refactor: add some logs for tmq. --- source/client/src/clientTmq.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index a4f8b46395..e708b64a92 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -706,7 +706,7 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, } } - tscDebug("consumer:0x%" PRIx64 " total commit:%d for %d topics", tmq->consumerId, pParamSet->waitingRspNum, + tscDebug("consumer:0x%" PRIx64 " total commit:%d for %d topics", tmq->consumerId, pParamSet->waitingRspNum - 1, numOfTopics); taosThreadMutexUnlock(&tmq->lock); @@ -2177,10 +2177,12 @@ int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet) { void tmqCommitRspCountDown(SMqCommitCbParamSet* pParamSet, int64_t consumerId, const char* pTopic, int32_t vgId) { int32_t waitingRspNum = atomic_sub_fetch_32(&pParamSet->waitingRspNum, 1); - tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d commit-rsp received, remain:%d", consumerId, pTopic, vgId, - waitingRspNum); - if (waitingRspNum == 0) { + tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d all commit-rsp received, commit completed", consumerId, pTopic, + vgId); tmqCommitDone(pParamSet); + } else { + tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d commit-rsp received, remain:%d", consumerId, pTopic, vgId, + waitingRspNum); } } From 7f2aae69d4bcea30e0e275aed1e9df64f66a524b Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 13 Mar 2023 01:54:29 +0800 Subject: [PATCH 21/28] fix(tmq): fix error in taosx --- source/client/src/clientTmq.c | 47 +++++++++++++++++++----------- source/dnode/vnode/src/inc/tq.h | 2 +- source/dnode/vnode/src/tq/tq.c | 35 ++++++++++------------ source/dnode/vnode/src/tq/tqRead.c | 6 ++-- utils/test/c/tmqSim.c | 22 ++++++++------ 5 files changed, 63 insertions(+), 49 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index e708b64a92..55d0453fbf 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -150,6 +150,7 @@ typedef struct { int32_t epoch; SMqClientVg* vgHandle; SMqClientTopic* topicHandle; + uint64_t reqId; union { SMqDataRsp dataRsp; SMqMetaRsp metaRsp; @@ -458,8 +459,6 @@ static int32_t tmqCommitCb(void* param, SDataBuf* pBuf, int32_t code) { pVg->epSet = *pBuf->pEpSet; } - // update the offset value. - pVg->committedOffset = pVg->currentOffset; tscDebug("consumer:0x%" PRIx64 " subKey:%s vgId:%d, commit offset success. ordinal:%d/%d", pTmq->consumerId, pParam->pOffset->subKey, pParam->vgId, index + 1, numOfVgroups); } @@ -699,6 +698,9 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, j + 1, numOfVgroups); continue; } + + // update the offset value. + pVg->committedOffset = pVg->currentOffset; } else { tscDebug("consumer:0x%" PRIx64 " topic:%s vgId:%d, no commit, current:%" PRId64 ", ordinal:%d/%d", tmq->consumerId, pTopic->topicName, pVg->vgId, pVg->currentOffset.version, j + 1, numOfVgroups); @@ -1261,8 +1263,8 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { taosMemoryFree(pParam); if (code != 0) { - tscWarn("consumer:0x%"PRIx64" msg from vgId:%d discarded, epoch %d, since %s, reqId:0x%"PRIx64, tmq->consumerId, vgId, - epoch, tstrerror(code), requestId); + tscWarn("consumer:0x%" PRIx64 " msg from vgId:%d discarded, epoch %d, since %s, reqId:0x%" PRIx64, tmq->consumerId, + vgId, epoch, tstrerror(code), requestId); if (pMsg->pData) taosMemoryFree(pMsg->pData); if (pMsg->pEpSet) taosMemoryFree(pMsg->pEpSet); @@ -1281,8 +1283,6 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { } pRspWrapper->tmqRspType = TMQ_MSG_TYPE__END_RSP; - /*pRspWrapper->vgHandle = pVg;*/ - /*pRspWrapper->topicHandle = pTopic;*/ taosWriteQitem(tmq->mqueue, pRspWrapper); tsem_post(&tmq->rspSem); } @@ -1304,8 +1304,8 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { } if (msgEpoch != tmqEpoch) { - tscWarn("consumer:0x%"PRIx64" mismatch rsp from vgId:%d, epoch %d, current epoch %d, reqId:0x%"PRIx64, tmq->consumerId, vgId, - msgEpoch, tmqEpoch, requestId); + tscWarn("consumer:0x%" PRIx64 " mismatch rsp from vgId:%d, epoch %d, current epoch %d, reqId:0x%" PRIx64, + tmq->consumerId, vgId, msgEpoch, tmqEpoch, requestId); } // handle meta rsp @@ -1322,6 +1322,7 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { pRspWrapper->tmqRspType = rspType; pRspWrapper->vgHandle = pVg; pRspWrapper->topicHandle = pTopic; + pRspWrapper->reqId = requestId; if (rspType == TMQ_MSG_TYPE__POLL_RSP) { SDecoder decoder; @@ -1350,13 +1351,11 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { taosMemoryFree(pMsg->pData); taosMemoryFree(pMsg->pEpSet); + taosWriteQitem(tmq->mqueue, pRspWrapper); tscDebug("consumer:0x%" PRIx64 ", put poll res into mqueue, total in queue:%d, reqId:0x%" PRIx64, tmq->consumerId, tmq->mqueue->numOfItems, requestId); - - taosWriteQitem(tmq->mqueue, pRspWrapper); tsem_post(&tmq->rspSem); - return 0; CREATE_MSG_FAIL: @@ -1763,6 +1762,8 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { } } + tscDebug("consumer:0x%"PRIx64" handle rsp, type:%d", tmq->consumerId, rspWrapper->tmqRspType); + if (rspWrapper->tmqRspType == TMQ_MSG_TYPE__END_RSP) { taosFreeQitem(rspWrapper); terrno = TSDB_CODE_TQ_NO_COMMITTED_OFFSET; @@ -1779,7 +1780,8 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { atomic_store_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE); if (pollRspWrapper->dataRsp.blockNum == 0) { - tscDebug("consumer:0x%" PRIx64 " empty block received, vgId:%d", tmq->consumerId, pVg->vgId); + tscDebug("consumer:0x%" PRIx64 " empty block received, vgId:%d, reqId:0x%" PRIx64, tmq->consumerId, pVg->vgId, + pollRspWrapper->reqId); taosFreeQitem(pollRspWrapper); rspWrapper = NULL; continue; @@ -1789,8 +1791,8 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { char buf[80]; tFormatOffset(buf, 80, &pVg->currentOffset); SMqRspObj* pRsp = tmqBuildRspFromWrapper(pollRspWrapper); - tscDebug("consumer:0x%" PRIx64 " process poll rsp, vgId:%d, offset:%s, blocks:%d", tmq->consumerId, - pVg->vgId, buf, pollRspWrapper->dataRsp.blockNum); + tscDebug("consumer:0x%" PRIx64 " process poll rsp, vgId:%d, offset:%s, blocks:%d, reqId:0x%"PRIx64, tmq->consumerId, + pVg->vgId, buf, pollRspWrapper->dataRsp.blockNum, pollRspWrapper->reqId); taosFreeQitem(pollRspWrapper); return pRsp; @@ -1824,17 +1826,19 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { } } else if (rspWrapper->tmqRspType == TMQ_MSG_TYPE__TAOSX_RSP) { SMqPollRspWrapper* pollRspWrapper = (SMqPollRspWrapper*)rspWrapper; - /*atomic_sub_fetch_32(&tmq->readyRequest, 1);*/ int32_t consumerEpoch = atomic_load_32(&tmq->epoch); + if (pollRspWrapper->taosxRsp.head.epoch == consumerEpoch) { SMqClientVg* pVg = pollRspWrapper->vgHandle; - /*printf("vgId:%d, offset %" PRId64 " up to %" PRId64 "\n", pVg->vgId, pVg->currentOffset, - * rspMsg->msg.rspOffset);*/ pVg->currentOffset = pollRspWrapper->taosxRsp.rspOffset; atomic_store_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE); + if (pollRspWrapper->taosxRsp.blockNum == 0) { taosFreeQitem(pollRspWrapper); rspWrapper = NULL; + + tscDebug("consumer:0x%" PRIx64 " taosx empty block received, vgId:%d, reqId:0x%" PRIx64, tmq->consumerId, pVg->vgId, + pollRspWrapper->reqId); continue; } @@ -1845,8 +1849,15 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { } else { pRsp = tmqBuildTaosxRspFromWrapper(pollRspWrapper); } + taosFreeQitem(pollRspWrapper); + + char buf[80]; + tFormatOffset(buf, 80, &pVg->currentOffset); + tscDebug("consumer:0x%" PRIx64 " process taosx poll rsp, vgId:%d, offset:%s, blocks:%d, reqId:0x%"PRIx64, tmq->consumerId, pVg->vgId, + buf, pollRspWrapper->dataRsp.blockNum, pollRspWrapper->reqId); return pRsp; + } else { tscDebug("consumer:0x%" PRIx64 " msg discard since epoch mismatch: msg epoch %d, consumer epoch %d", tmq->consumerId, pollRspWrapper->taosxRsp.head.epoch, consumerEpoch); @@ -1854,6 +1865,8 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { taosFreeQitem(pollRspWrapper); } } else { + tscDebug("consumer:0x%" PRIx64 " not data msg received", tmq->consumerId); + bool reset = false; tmqHandleNoPollRsp(tmq, rspWrapper, &reset); taosFreeQitem(rspWrapper); diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 6cb54c6c18..a45d6d1dec 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -141,7 +141,7 @@ int32_t tDecodeSTqHandle(SDecoder* pDecoder, STqHandle* pHandle); // tqRead int32_t tqScanTaosx(STQ* pTq, const STqHandle* pHandle, STaosxRsp* pRsp, SMqMetaRsp* pMetaRsp, STqOffsetVal* offset); int32_t tqScanData(STQ* pTq, const STqHandle* pHandle, SMqDataRsp* pRsp, STqOffsetVal* pOffset); -int64_t tqFetchLog(STQ* pTq, STqHandle* pHandle, int64_t* fetchOffset, SWalCkHead** pHeadWithCkSum); +int32_t tqFetchLog(STQ* pTq, STqHandle* pHandle, int64_t* fetchOffset, SWalCkHead** pHeadWithCkSum); // tqExec int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxRsp* pRsp); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index a6919d97a9..76a6a8a840 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -330,15 +330,16 @@ int32_t tqSendTaosxRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, co .contLen = tlen, .code = 0, }; + tmsgSendRsp(&rsp); char buf1[80] = {0}; char buf2[80] = {0}; tFormatOffset(buf1, 80, &pRsp->reqOffset); tFormatOffset(buf2, 80, &pRsp->rspOffset); - tqDebug("taosx rsp, vgId:%d, from consumer:0x%" PRIx64 " (epoch %d) send rsp, numOfBlks:%d, req:%s, rsp:%s", - TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2); + tqDebug("taosx rsp, vgId:%d, consumer:0x%" PRIx64 " (epoch %d) send rsp, numOfBlks:%d, req:%s, rsp:%s", + TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2); return 0; } @@ -600,7 +601,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* if (metaRsp.metaRspLen > 0) { code = tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp); - tqDebug("tmq poll: consumer:0x%" PRIx64 " subkey %s, vg %d, send meta offset type:%d,uid:%" PRId64 + tqDebug("tmq poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send meta offset type:%d,uid:%" PRId64 ",version:%" PRId64, consumerId, pHandle->subKey, vgId, metaRsp.rspOffset.type, metaRsp.rspOffset.uid, metaRsp.rspOffset.version); @@ -617,7 +618,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* offset = taosxRsp.rspOffset; } - tqDebug("taosx poll: consumer:0x%" PRIx64 " subkey %s, vg %d, send data blockNum:%d, offset type:%d,uid:%" PRId64 + tqDebug("taosx poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send data blockNum:%d, offset type:%d,uid:%" PRId64 ",version:%" PRId64, consumerId, pHandle->subKey, vgId, taosxRsp.blockNum, taosxRsp.rspOffset.type, taosxRsp.rspOffset.uid, taosxRsp.rspOffset.version); @@ -637,7 +638,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* // todo refactor: this is not correct. int32_t savedEpoch = atomic_load_32(&pHandle->epoch); if (savedEpoch > pRequest->epoch) { - tqWarn("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, vg %d offset %" PRId64 + tqWarn("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey:%s vgId:%d offset %" PRId64 ", found new consumer epoch %d, discard req epoch %d", consumerId, pRequest->epoch, pHandle->subKey, vgId, fetchVer, savedEpoch, pRequest->epoch); break; @@ -645,9 +646,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* if (tqFetchLog(pTq, pHandle, &fetchVer, &pCkHead) < 0) { tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); - if (tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp) < 0) { - code = -1; - } + code = tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp); tDeleteSTaosxRsp(&taosxRsp); taosMemoryFreeClear(pCkHead); return code; @@ -669,11 +668,10 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest->subKey); return -1; } - if (taosxRsp.blockNum > 0 /* threshold */) { + + if (taosxRsp.blockNum > 0) { tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); - if (tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp) < 0) { - code = -1; - } + code = tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp); tDeleteSTaosxRsp(&taosxRsp); taosMemoryFreeClear(pCkHead); return code; @@ -741,21 +739,18 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { taosRUnLockLatch(&pTq->pushLock); taosWLockLatch(&pTq->pushLock); - int32_t savedEpoch = pHandle->epoch; - // 3. update the epoch value -// while (savedEpoch < reqEpoch) { + int32_t savedEpoch = pHandle->epoch; + if (savedEpoch < reqEpoch) { tqDebug("tmq poll: consumer:0x%" PRIx64 " epoch update from %d to %d by poll req", consumerId, savedEpoch, reqEpoch); pHandle->epoch = reqEpoch; -// savedEpoch = atomic_val_compare_exchange_32(&pHandle->epoch, savedEpoch, reqEpoch); -// } - + } taosWUnLockLatch(&pTq->pushLock); char buf[80]; tFormatOffset(buf, 80, &reqOffset); - tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s", consumerId, - req.epoch, pHandle->subKey, vgId, buf); + tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s, reqId:0x%" PRIx64, + consumerId, req.epoch, pHandle->subKey, vgId, buf, req.reqId); return extractDataForMq(pTq, pHandle, &req, pMsg); } diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index bf73cca925..5bb08df2a0 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -183,14 +183,15 @@ end: return tbSuid == realTbSuid; } -int64_t tqFetchLog(STQ* pTq, STqHandle* pHandle, int64_t* fetchOffset, SWalCkHead** ppCkHead) { +int32_t tqFetchLog(STQ* pTq, STqHandle* pHandle, int64_t* fetchOffset, SWalCkHead** ppCkHead) { int32_t code = 0; + taosThreadMutexLock(&pHandle->pWalReader->mutex); int64_t offset = *fetchOffset; while (1) { if (walFetchHead(pHandle->pWalReader, offset, *ppCkHead) < 0) { - tqDebug("tmq poll: consumer:%" PRId64 ", (epoch %d) vgId:%d offset %" PRId64 ", no more log to return", + tqDebug("tmq poll: consumer:0x%" PRIx64 ", (epoch %d) vgId:%d offset %" PRId64 ", no more log to return", pHandle->consumerId, pHandle->epoch, TD_VID(pTq->pVnode), offset); *fetchOffset = offset - 1; code = -1; @@ -241,6 +242,7 @@ int64_t tqFetchLog(STQ* pTq, STqHandle* pHandle, int64_t* fetchOffset, SWalCkHea offset++; } } + END: taosThreadMutexUnlock(&pHandle->pWalReader->mutex); return code; diff --git a/utils/test/c/tmqSim.c b/utils/test/c/tmqSim.c index 1acf50a7d8..ce4bafb79b 100644 --- a/utils/test/c/tmqSim.c +++ b/utils/test/c/tmqSim.c @@ -386,7 +386,7 @@ void addRowsToVgroupId(SThreadInfo* pInfo, int32_t vgroupId, int32_t rows) { pInfo->rowsOfPerVgroups[pInfo->numOfVgroups][1] += rows; pInfo->numOfVgroups++; - taosFprintfFile(g_fp, "consume id %d, add one new vogroup id: %d\n", pInfo->consumerId, vgroupId); + taosFprintfFile(g_fp, "consume id %d, add new vgroupId:%d\n", pInfo->consumerId, vgroupId); if (pInfo->numOfVgroups > MAX_VGROUP_CNT) { taosFprintfFile(g_fp, "====consume id %d, vgroup num %d over than 32. new vgroupId: %d\n", pInfo->consumerId, pInfo->numOfVgroups, vgroupId); @@ -578,18 +578,25 @@ static int32_t data_msg_process(TAOS_RES* msg, SThreadInfo* pInfo, int32_t msgIn char buf[1024]; int32_t totalRows = 0; - // printf("topic: %s\n", tmq_get_topic_name(msg)); int32_t vgroupId = tmq_get_vgroup_id(msg); const char* dbName = tmq_get_db_name(msg); taosFprintfFile(g_fp, "consumerId: %d, msg index:%d\n", pInfo->consumerId, msgIndex); - taosFprintfFile(g_fp, "dbName: %s, topic: %s, vgroupId: %d\n", dbName != NULL ? dbName : "invalid table", - tmq_get_topic_name(msg), vgroupId); + int32_t index = 0; + for (index = 0; index < pInfo->numOfVgroups; index++) { + if (vgroupId == pInfo->rowsOfPerVgroups[index][0]) { + break; + } + } + + taosFprintfFile(g_fp, "dbName: %s, topic: %s, vgroupId:%d, currentRows:%d\n", dbName != NULL ? dbName : "invalid table", + tmq_get_topic_name(msg), vgroupId, pInfo->rowsOfPerVgroups[index][1]); while (1) { TAOS_ROW row = taos_fetch_row(msg); - - if (row == NULL) break; + if (row == NULL) { + break; + } TAOS_FIELD* fields = taos_fetch_fields(msg); int32_t numOfFields = taos_field_count(msg); @@ -607,7 +614,6 @@ static int32_t data_msg_process(TAOS_RES* msg, SThreadInfo* pInfo, int32_t msgIn #endif dumpToFileForCheck(pInfo->pConsumeRowsFile, row, fields, length, numOfFields, precision); - taos_print_row(buf, row, fields, numOfFields); if (0 != g_stConfInfo.showRowFlag) { @@ -621,7 +627,6 @@ static int32_t data_msg_process(TAOS_RES* msg, SThreadInfo* pInfo, int32_t msgIn } addRowsToVgroupId(pInfo, vgroupId, totalRows); - return totalRows; } @@ -817,7 +822,6 @@ void loop_consume(SThreadInfo* pInfo) { } taos_free_result(tmqMsg); - totalMsgs++; int64_t currentPrintTime = taosGetTimestampMs(); From 731a2138df3cae9b214fd954f1beb8a1e81d62f8 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 13 Mar 2023 17:08:17 +0800 Subject: [PATCH 22/28] fix(tmq): fix error in tmq. --- include/common/tmsg.h | 1 + source/client/src/clientTmq.c | 35 ++- source/client/test/clientTests.cpp | 81 +++++- source/common/src/tmsg.c | 3 +- source/dnode/vnode/src/inc/tq.h | 6 +- source/dnode/vnode/src/inc/vnodeInt.h | 2 +- source/dnode/vnode/src/tq/tq.c | 370 ++++++++++++++---------- source/dnode/vnode/src/tq/tqPush.c | 53 ++-- source/libs/executor/src/executor.c | 2 +- tests/system-test/7-tmq/subscribeDb1.py | 5 +- 10 files changed, 371 insertions(+), 187 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index d9d3c7e297..ee9a08b504 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -3185,6 +3185,7 @@ typedef struct { SArray* blockData; SArray* blockTbName; SArray* blockSchema; + // the following attributes are extended from SMqDataRsp int32_t createTableNum; SArray* createTableLen; SArray* createTableReq; diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 55d0453fbf..3167a4c361 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -24,6 +24,8 @@ #include "tref.h" #include "ttimer.h" +#define VG_POLL_IGNORE_TICK 100 + struct SMqMgmt { int8_t inited; tmr_h timer; @@ -126,16 +128,14 @@ enum { }; typedef struct { - // statistics - int64_t pollCnt; - // offset + int64_t pollCnt; STqOffsetVal committedOffset; STqOffsetVal currentOffset; - // connection info - int32_t vgId; - int32_t vgStatus; - int32_t vgSkipCnt; - SEpSet epSet; + int32_t vgId; + int32_t vgStatus; + int32_t vgSkipCnt; + int32_t vgIgnoreCnt; // once empty block is received, idle for ignoreCnt then start to poll data + SEpSet epSet; } SMqClientVg; typedef struct { @@ -1347,14 +1347,17 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) { tDecodeSTaosxRsp(&decoder, &pRspWrapper->taosxRsp); tDecoderClear(&decoder); memcpy(&pRspWrapper->taosxRsp, pMsg->pData, sizeof(SMqRspHead)); + } else { // invalid rspType + tscError("consumer:0x%"PRIx64" invalid rsp msg received, type:%d ignored", tmq->consumerId, rspType); } taosMemoryFree(pMsg->pData); taosMemoryFree(pMsg->pEpSet); taosWriteQitem(tmq->mqueue, pRspWrapper); - tscDebug("consumer:0x%" PRIx64 ", put poll res into mqueue, total in queue:%d, reqId:0x%" PRIx64, tmq->consumerId, - tmq->mqueue->numOfItems, requestId); + tscDebug("consumer:0x%" PRIx64 " put poll res into mqueue, type:%d, vgId:%d, total in queue:%d, reqId:0x%" PRIx64, + tmq->consumerId, rspType, vgId, tmq->mqueue->numOfItems, requestId); + tsem_post(&tmq->rspSem); return 0; @@ -1400,6 +1403,7 @@ static void initClientTopicFromRsp(SMqClientTopic* pTopic, SMqSubTopicEp* pTopic .epSet = pVgEp->epSet, .vgStatus = TMQ_VG_STATUS__IDLE, .vgSkipCnt = 0, + .vgIgnoreCnt = 0, }; taosArrayPush(pTopic->vgs, &clientVg); @@ -1700,6 +1704,13 @@ static int32_t tmqPollImpl(tmq_t* tmq, int64_t timeout) { for (int j = 0; j < numOfVg; j++) { SMqClientVg* pVg = taosArrayGet(pTopic->vgs, j); + if (pVg->vgIgnoreCnt > 0) { + pVg->vgIgnoreCnt -= 1; + tscTrace("consumer:0x%" PRIx64 " epoch %d, vgId:%d idle for %d tick before poll", tmq->consumerId, tmq->epoch, + pVg->vgId, pVg->vgIgnoreCnt); + continue; + } + int32_t vgStatus = atomic_val_compare_exchange_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE, TMQ_VG_STATUS__WAIT); if (vgStatus == TMQ_VG_STATUS__WAIT) { int32_t vgSkipCnt = atomic_add_fetch_32(&pVg->vgSkipCnt, 1); @@ -1810,8 +1821,6 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { if (pollRspWrapper->metaRsp.head.epoch == consumerEpoch) { SMqClientVg* pVg = pollRspWrapper->vgHandle; - /*printf("vgId:%d, offset %" PRId64 " up to %" PRId64 "\n", pVg->vgId, pVg->currentOffset, - * rspMsg->msg.rspOffset);*/ pVg->currentOffset = pollRspWrapper->metaRsp.rspOffset; atomic_store_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE); // build rsp @@ -1836,9 +1845,9 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { if (pollRspWrapper->taosxRsp.blockNum == 0) { taosFreeQitem(pollRspWrapper); rspWrapper = NULL; - tscDebug("consumer:0x%" PRIx64 " taosx empty block received, vgId:%d, reqId:0x%" PRIx64, tmq->consumerId, pVg->vgId, pollRspWrapper->reqId); + pVg->vgIgnoreCnt = VG_POLL_IGNORE_TICK; continue; } diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index 82d29b37eb..0c7e95f8eb 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -898,7 +898,86 @@ TEST(clientCase, update_test) { } } -TEST(clientCase, subscription_test) { +TEST(clientCase, sub_db_test) { + TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); + ASSERT_NE(pConn, nullptr); + + // TAOS_RES* pRes = taos_query(pConn, "create topic topic_t1 as select * from t1"); + // if (taos_errno(pRes) != TSDB_CODE_SUCCESS) { + // printf("failed to create topic, code:%s", taos_errstr(pRes)); + // taos_free_result(pRes); + // return; + // } + + tmq_conf_t* conf = tmq_conf_new(); + tmq_conf_set(conf, "enable.auto.commit", "true"); + tmq_conf_set(conf, "auto.commit.interval.ms", "1000"); + tmq_conf_set(conf, "group.id", "cgrpNamedb"); + tmq_conf_set(conf, "td.connect.user", "root"); + tmq_conf_set(conf, "td.connect.pass", "taosdata"); + tmq_conf_set(conf, "auto.offset.reset", "earliest"); + tmq_conf_set(conf, "experimental.snapshot.enable", "true"); + tmq_conf_set(conf, "msg.with.table.name", "true"); + tmq_conf_set_auto_commit_cb(conf, tmq_commit_cb_print, NULL); + + tmq_t* tmq = tmq_consumer_new(conf, NULL, 0); + tmq_conf_destroy(conf); + + // 创建订阅 topics 列表 + tmq_list_t* topicList = tmq_list_new(); + tmq_list_append(topicList, "topic_db1"); + + // 启动订阅 + tmq_subscribe(tmq, topicList); + tmq_list_destroy(topicList); + + TAOS_FIELD* fields = NULL; + int32_t numOfFields = 0; + int32_t precision = 0; + int32_t totalRows = 0; + int32_t msgCnt = 0; + int32_t timeout = 5000; + + int32_t count = 0; + + while (1) { + TAOS_RES* pRes = tmq_consumer_poll(tmq, timeout); + if (pRes) { + char buf[1024]; + int32_t rows = 0; + + const char* topicName = tmq_get_topic_name(pRes); + const char* dbName = tmq_get_db_name(pRes); + int32_t vgroupId = tmq_get_vgroup_id(pRes); + + printf("topic: %s\n", topicName); + printf("db: %s\n", dbName); + printf("vgroup id: %d\n", vgroupId); + + if (count ++ > 200) { + tmq_unsubscribe(tmq); + break; + } + + while (1) { + TAOS_ROW row = taos_fetch_row(pRes); + if (row == NULL) break; + + fields = taos_fetch_fields(pRes); + numOfFields = taos_field_count(pRes); + precision = taos_result_precision(pRes); + rows++; + taos_print_row(buf, row, fields, numOfFields); + printf("precision: %d, row content: %s\n", precision, buf); + } + } +// return rows; + } + + fprintf(stderr, "%d msg consumed, include %d rows\n", msgCnt, totalRows); +} + +TEST(clientCase, sub_tb_test) { TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); ASSERT_NE(pConn, nullptr); diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 9c7d68f256..9d3adf8692 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -6822,8 +6822,7 @@ int32_t tDecodeSMqDataRsp(SDecoder *pDecoder, SMqDataRsp *pRsp) { } void tDeleteSMqDataRsp(SMqDataRsp *pRsp) { - taosArrayDestroy(pRsp->blockDataLen); - pRsp->blockDataLen = NULL; + pRsp->blockDataLen = taosArrayDestroy(pRsp->blockDataLen);; taosArrayDestroyP(pRsp->blockData, (FDelete)taosMemoryFree); pRsp->blockData = NULL; taosArrayDestroyP(pRsp->blockSchema, (FDelete)tDeleteSSchemaWrapper); diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index a45d6d1dec..ee2bd007ce 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -103,9 +103,9 @@ typedef struct { } STqHandle; typedef struct { - SMqDataRsp dataRsp; + SMqDataRsp* pDataRsp; char subKey[TSDB_SUBSCRIBE_KEY_LEN]; - SRpcHandleInfo pInfo; + SRpcHandleInfo info; } STqPushEntry; struct STQ { @@ -147,7 +147,7 @@ int32_t tqFetchLog(STQ* pTq, STqHandle* pHandle, int64_t* fetchOffset, SWalCkHea int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxRsp* pRsp); // int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SSubmitReq* pReq, STaosxRsp* pRsp); int32_t tqAddBlockDataToRsp(const SSDataBlock* pBlock, SMqDataRsp* pRsp, int32_t numOfCols, int8_t precision); -int32_t tqSendDataRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp); +int32_t tqSendDataRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp, int32_t type); int32_t tqPushDataRsp(STQ* pTq, STqPushEntry* pPushEntry); // tqMeta diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 27d351f915..0fe7f9a773 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -192,7 +192,7 @@ void tqCleanUp(); STQ* tqOpen(const char* path, SVnode* pVnode); void tqClose(STQ*); int tqPushMsg(STQ*, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver); -int tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp); +int tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp, int32_t type); int tqRemovePushEntry(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer); int tqCommit(STQ*); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 76a6a8a840..9dc862ec63 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -68,7 +68,13 @@ static void destroySTqHandle(void* data) { static void tqPushEntryFree(void* data) { STqPushEntry* p = *(void**)data; - tDeleteSMqDataRsp(&p->dataRsp); + if (p->pDataRsp->head.mqMsgType == TMQ_MSG_TYPE__POLL_RSP) { + tDeleteSMqDataRsp(p->pDataRsp); + } else if (p->pDataRsp->head.mqMsgType == TMQ_MSG_TYPE__TAOSX_RSP) { + tDeleteSTaosxRsp((STaosxRsp*)p->pDataRsp); + } + + taosMemoryFree(p->pDataRsp); taosMemoryFree(p); } @@ -166,8 +172,57 @@ int32_t tqSendMetaPollRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, return 0; } +static int32_t doSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, + int64_t consumerId, int32_t type) { + int32_t len = 0; + int32_t code = 0; + + if (type == TMQ_MSG_TYPE__POLL_RSP) { + tEncodeSize(tEncodeSMqDataRsp, pRsp, len, code); + } else if (type == TMQ_MSG_TYPE__TAOSX_RSP) { + tEncodeSize(tEncodeSTaosxRsp, (STaosxRsp*)pRsp, len, code); + } + + if (code < 0) { + return -1; + } + + int32_t tlen = sizeof(SMqRspHead) + len; + void* buf = rpcMallocCont(tlen); + if (buf == NULL) { + return -1; + } + + ((SMqRspHead*)buf)->mqMsgType = type; + ((SMqRspHead*)buf)->epoch = epoch; + ((SMqRspHead*)buf)->consumerId = consumerId; + + void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); + + SEncoder encoder = {0}; + tEncoderInit(&encoder, abuf, len); + + if (type == TMQ_MSG_TYPE__POLL_RSP) { + tEncodeSMqDataRsp(&encoder, pRsp); + } else if (type == TMQ_MSG_TYPE__TAOSX_RSP) { + tEncodeSTaosxRsp(&encoder, (STaosxRsp*) pRsp); + } + + tEncoderClear(&encoder); + + SRpcMsg rsp = { + .info = *pRpcHandleInfo, + .pCont = buf, + .contLen = tlen, + .code = 0, + }; + + tmsgSendRsp(&rsp); + return 0; +} + int32_t tqPushDataRsp(STQ* pTq, STqPushEntry* pPushEntry) { - SMqDataRsp* pRsp = &pPushEntry->dataRsp; + SMqDataRsp* pRsp = pPushEntry->pDataRsp; #if 0 A(taosArrayGetSize(pRsp->blockData) == pRsp->blockNum); @@ -181,37 +236,40 @@ int32_t tqPushDataRsp(STQ* pTq, STqPushEntry* pPushEntry) { } #endif - int32_t len = 0; - int32_t code = 0; - tEncodeSize(tEncodeSMqDataRsp, pRsp, len, code); +// int32_t len = 0; +// int32_t code = 0; +// tEncodeSize(tEncodeSMqDataRsp, pRsp, len, code); +// if (code < 0) { +// return -1; +// } +// +// int32_t tlen = sizeof(SMqRspHead) + len; +// void* buf = rpcMallocCont(tlen); +// if (buf == NULL) { +// return -1; +// } +// +// memcpy(buf, &pPushEntry->dataRsp.head, sizeof(SMqRspHead)); +// +// void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); +// +// SEncoder encoder = {0}; +// tEncoderInit(&encoder, abuf, len); +// tEncodeSMqDataRsp(&encoder, pRsp); +// tEncoderClear(&encoder); +// +// SRpcMsg rsp = { +// .info = pPushEntry->pInfo, +// .pCont = buf, +// .contLen = tlen, +// .code = 0, +// }; +// +// tmsgSendRsp(&rsp); +// - if (code < 0) { - return -1; - } - - int32_t tlen = sizeof(SMqRspHead) + len; - void* buf = rpcMallocCont(tlen); - if (buf == NULL) { - return -1; - } - - memcpy(buf, &pPushEntry->dataRsp.head, sizeof(SMqRspHead)); - - void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); - - SEncoder encoder = {0}; - tEncoderInit(&encoder, abuf, len); - tEncodeSMqDataRsp(&encoder, pRsp); - tEncoderClear(&encoder); - - SRpcMsg rsp = { - .info = pPushEntry->pInfo, - .pCont = buf, - .contLen = tlen, - .code = 0, - }; - - tmsgSendRsp(&rsp); + SMqRspHead* pHeader = &pPushEntry->pDataRsp->head; + doSendDataRsp(&pPushEntry->info, pRsp, pHeader->epoch, pHeader->consumerId, pHeader->mqMsgType); char buf1[80] = {0}; char buf2[80] = {0}; @@ -219,11 +277,10 @@ int32_t tqPushDataRsp(STQ* pTq, STqPushEntry* pPushEntry) { tFormatOffset(buf2, tListLen(buf2), &pRsp->rspOffset); tqDebug("vgId:%d, from consumer:0x%" PRIx64 " (epoch %d) push rsp, block num: %d, req:%s, rsp:%s", TD_VID(pTq->pVnode), pRsp->head.consumerId, pRsp->head.epoch, pRsp->blockNum, buf1, buf2); - return 0; } -int32_t tqSendDataRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp) { +int32_t tqSendDataRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqDataRsp* pRsp, int32_t type) { #if 0 A(taosArrayGetSize(pRsp->blockData) == pRsp->blockNum); A(taosArrayGetSize(pRsp->blockDataLen) == pRsp->blockNum); @@ -240,109 +297,127 @@ int32_t tqSendDataRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, con } #endif - int32_t len = 0; - int32_t code = 0; - tEncodeSize(tEncodeSMqDataRsp, pRsp, len, code); - if (code < 0) { - return -1; - } - int32_t tlen = sizeof(SMqRspHead) + len; - void* buf = rpcMallocCont(tlen); - if (buf == NULL) { - return -1; - } - - ((SMqRspHead*)buf)->mqMsgType = TMQ_MSG_TYPE__POLL_RSP; - ((SMqRspHead*)buf)->epoch = pReq->epoch; - ((SMqRspHead*)buf)->consumerId = pReq->consumerId; - - void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); - - SEncoder encoder = {0}; - tEncoderInit(&encoder, abuf, len); - tEncodeSMqDataRsp(&encoder, pRsp); - tEncoderClear(&encoder); - - SRpcMsg rsp = { - .info = pMsg->info, - .pCont = buf, - .contLen = tlen, - .code = 0, - }; - tmsgSendRsp(&rsp); - - char buf1[80] = {0}; - char buf2[80] = {0}; - tFormatOffset(buf1, 80, &pRsp->reqOffset); - tFormatOffset(buf2, 80, &pRsp->rspOffset); - tqDebug("vgId:%d consumer:0x%" PRIx64 " (epoch %d), block num:%d, req:%s, rsp:%s", - TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2); - - return 0; -} - -int32_t tqSendTaosxRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const STaosxRsp* pRsp) { -#if 0 - A(taosArrayGetSize(pRsp->blockData) == pRsp->blockNum); - A(taosArrayGetSize(pRsp->blockDataLen) == pRsp->blockNum); - - if (pRsp->withSchema) { - A(taosArrayGetSize(pRsp->blockSchema) == pRsp->blockNum); - } else { - A(taosArrayGetSize(pRsp->blockSchema) == 0); - } - - if (pRsp->reqOffset.type == TMQ_OFFSET__LOG) { - if (pRsp->blockNum > 0) { - A(pRsp->rspOffset.version > pRsp->reqOffset.version); - } else { - A(pRsp->rspOffset.version >= pRsp->reqOffset.version); - } - } -#endif - - int32_t len = 0; - int32_t code = 0; - tEncodeSize(tEncodeSTaosxRsp, pRsp, len, code); - if (code < 0) { - return -1; - } - int32_t tlen = sizeof(SMqRspHead) + len; - void* buf = rpcMallocCont(tlen); - if (buf == NULL) { - return -1; - } - - ((SMqRspHead*)buf)->mqMsgType = TMQ_MSG_TYPE__TAOSX_RSP; - ((SMqRspHead*)buf)->epoch = pReq->epoch; - ((SMqRspHead*)buf)->consumerId = pReq->consumerId; - - void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); - - SEncoder encoder = {0}; - tEncoderInit(&encoder, abuf, len); - tEncodeSTaosxRsp(&encoder, pRsp); - tEncoderClear(&encoder); - - SRpcMsg rsp = { - .info = pMsg->info, - .pCont = buf, - .contLen = tlen, - .code = 0, - }; - - tmsgSendRsp(&rsp); +// int32_t len = 0; +// int32_t code = 0; +// +// if (type == TMQ_MSG_TYPE__POLL_RSP) { +// tEncodeSize(tEncodeSMqDataRsp, pRsp, len, code); +// } else if (type == TMQ_MSG_TYPE__TAOSX_RSP) { +// tEncodeSize(tEncodeSTaosxRsp, (STaosxRsp*)pRsp, len, code); +// } +// +// if (code < 0) { +// return -1; +// } +// +// int32_t tlen = sizeof(SMqRspHead) + len; +// void* buf = rpcMallocCont(tlen); +// if (buf == NULL) { +// return -1; +// } +// +// ((SMqRspHead*)buf)->mqMsgType = type; +// ((SMqRspHead*)buf)->epoch = pReq->epoch; +// ((SMqRspHead*)buf)->consumerId = pReq->consumerId; +// +// void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); +// +// SEncoder encoder = {0}; +// tEncoderInit(&encoder, abuf, len); +// +// if (type == TMQ_MSG_TYPE__POLL_RSP) { +// tEncodeSMqDataRsp(&encoder, pRsp); +// } else if (type == TMQ_MSG_TYPE__TAOSX_RSP) { +// tEncodeSTaosxRsp(&encoder, (STaosxRsp*) pRsp); +// } +// +// tEncoderClear(&encoder); +// +// SRpcMsg rsp = { +// .info = pMsg->info, +// .pCont = buf, +// .contLen = tlen, +// .code = 0, +// }; +// +// tmsgSendRsp(&rsp); + doSendDataRsp(&pMsg->info, pRsp, pReq->epoch, pReq->consumerId, type); char buf1[80] = {0}; char buf2[80] = {0}; tFormatOffset(buf1, 80, &pRsp->reqOffset); tFormatOffset(buf2, 80, &pRsp->rspOffset); - tqDebug("taosx rsp, vgId:%d, consumer:0x%" PRIx64 " (epoch %d) send rsp, numOfBlks:%d, req:%s, rsp:%s", + tqDebug("vgId:%d consumer:0x%" PRIx64 " (epoch %d) send rsp, block num:%d, req:%s, rsp:%s", TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2); + return 0; } +//int32_t tqSendTaosxRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const STaosxRsp* pRsp) { +//#if 0 +// A(taosArrayGetSize(pRsp->blockData) == pRsp->blockNum); +// A(taosArrayGetSize(pRsp->blockDataLen) == pRsp->blockNum); +// +// if (pRsp->withSchema) { +// A(taosArrayGetSize(pRsp->blockSchema) == pRsp->blockNum); +// } else { +// A(taosArrayGetSize(pRsp->blockSchema) == 0); +// } +// +// if (pRsp->reqOffset.type == TMQ_OFFSET__LOG) { +// if (pRsp->blockNum > 0) { +// A(pRsp->rspOffset.version > pRsp->reqOffset.version); +// } else { +// A(pRsp->rspOffset.version >= pRsp->reqOffset.version); +// } +// } +//#endif +// +// int32_t len = 0; +// int32_t code = 0; +// tEncodeSize(tEncodeSTaosxRsp, pRsp, len, code); +// if (code < 0) { +// return -1; +// } +// +// int32_t tlen = sizeof(SMqRspHead) + len; +// void* buf = rpcMallocCont(tlen); +// if (buf == NULL) { +// terrno = TSDB_CODE_OUT_OF_MEMORY; +// return -1; +// } +// +// ((SMqRspHead*)buf)->mqMsgType = TMQ_MSG_TYPE__TAOSX_RSP; +// ((SMqRspHead*)buf)->epoch = pReq->epoch; +// ((SMqRspHead*)buf)->consumerId = pReq->consumerId; +// +// void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); +// +// SEncoder encoder = {0}; +// tEncoderInit(&encoder, abuf, len); +// tEncodeSTaosxRsp(&encoder, pRsp); +// tEncoderClear(&encoder); +// +// SRpcMsg rsp = { +// .info = pMsg->info, +// .pCont = buf, +// .contLen = tlen, +// .code = 0, +// }; +// +// tmsgSendRsp(&rsp); +// +// char buf1[80] = {0}; +// char buf2[80] = {0}; +// tFormatOffset(buf1, 80, &pRsp->reqOffset); +// tFormatOffset(buf2, 80, &pRsp->rspOffset); +// +// tqDebug("taosx rsp, vgId:%d, consumer:0x%" PRIx64 " (epoch %d) send rsp, numOfBlks:%d, req:%s, rsp:%s", +// TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->blockNum, buf1, buf2); +// return 0; +//} + static FORCE_INLINE bool tqOffsetLessOrEqual(const STqOffset* pLeft, const STqOffset* pRight) { return pLeft->val.type == TMQ_OFFSET__LOG && pRight->val.type == TMQ_OFFSET__LOG && pLeft->val.version <= pRight->val.version; @@ -441,9 +516,7 @@ static int32_t tqInitDataRsp(SMqDataRsp* pRsp, const SMqPollReq* pReq, int8_t su } #endif - /*A(subType == TOPIC_SUB_TYPE__COLUMN);*/ pRsp->withSchema = false; - return 0; } @@ -506,7 +579,7 @@ static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHand tqOffsetResetToLog(&dataRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, offset reset to %" PRId64, consumerId, pHandle->subKey, TD_VID(pTq->pVnode), dataRsp.rspOffset.version); - int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp); + int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); tDeleteSMqDataRsp(&dataRsp); *pBlockReturned = true; @@ -515,7 +588,8 @@ static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHand STaosxRsp taosxRsp = {0}; tqInitTaosxRsp(&taosxRsp, pRequest); tqOffsetResetToLog(&taosxRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); - int32_t code = tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp); + int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); +// int32_t code = tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp); tDeleteSTaosxRsp(&taosxRsp); *pBlockReturned = true; @@ -570,13 +644,13 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* // till now, all data has been transferred to consumer, new data needs to push client once arrived. if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) { - code = tqRegisterPushEntry(pTq, pHandle, pRequest, pMsg, &dataRsp); + code = tqRegisterPushEntry(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); taosWUnLockLatch(&pTq->pushLock); return code; } taosWUnLockLatch(&pTq->pushLock); - code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp); + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&dataRsp, TMQ_MSG_TYPE__POLL_RSP); // NOTE: this pHandle->consumerId may have been changed already. tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, offset type:%d, uid/version:%" PRId64 @@ -611,7 +685,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* } if (taosxRsp.blockNum > 0) { - code = tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp); + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); tDeleteSTaosxRsp(&taosxRsp); return code; } else { @@ -622,13 +696,14 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* ",version:%" PRId64, consumerId, pHandle->subKey, vgId, taosxRsp.blockNum, taosxRsp.rspOffset.type, taosxRsp.rspOffset.uid, taosxRsp.rspOffset.version); - } + } else { - if (offset.type == TMQ_OFFSET__LOG) { +// if (offset.type == TMQ_OFFSET__LOG) { int64_t fetchVer = offset.version + 1; pCkHead = taosMemoryMalloc(sizeof(SWalCkHead) + 2048); if (pCkHead == NULL) { tDeleteSTaosxRsp(&taosxRsp); + terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } @@ -646,14 +721,18 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* if (tqFetchLog(pTq, pHandle, &fetchVer, &pCkHead) < 0) { tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); - code = tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp); - tDeleteSTaosxRsp(&taosxRsp); - taosMemoryFreeClear(pCkHead); - return code; +// if (terrno == 0) { // failed to seek to given ver, but no errors happen. +// code = tqRegisterPushEntry(pTq, pHandle, pRequest, pMsg, (SMqDataRsp*) &taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); +// return code; +// } else { // error happens, return to consumers + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); + tDeleteSTaosxRsp(&taosxRsp); + taosMemoryFreeClear(pCkHead); + return code; +// } } SWalCont* pHead = &pCkHead->head; - tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d) iter log, vgId:%d offset %" PRId64 " msgType %d", consumerId, pRequest->epoch, vgId, fetchVer, pHead->msgType); @@ -663,6 +742,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* .msgLen = pHead->bodyLen - sizeof(SSubmitReq2Msg), .ver = pHead->version, }; + if (tqTaosxScanLog(pTq, pHandle, submit, &taosxRsp) < 0) { tqError("tmq poll: tqTaosxScanLog error %" PRId64 ", in vgId:%d, subkey %s", consumerId, vgId, pRequest->subKey); @@ -671,7 +751,7 @@ static int32_t extractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* if (taosxRsp.blockNum > 0) { tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); - code = tqSendTaosxRsp(pTq, pMsg, pRequest, &taosxRsp); + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); tDeleteSTaosxRsp(&taosxRsp); taosMemoryFreeClear(pCkHead); return code; @@ -1027,15 +1107,15 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { pTask->tbSink.vnode = pTq->pVnode; pTask->tbSink.tbSinkFunc = tqSinkToTablePipeline2; - int32_t version = 1; + int32_t ver1 = 1; SMetaInfo info = {0}; int32_t code = metaGetInfo(pTq->pVnode->pMeta, pTask->tbSink.stbUid, &info, NULL); if (code == TSDB_CODE_SUCCESS) { - version = info.skmVer; + ver1 = info.skmVer; } pTask->tbSink.pTSchema = - tBuildTSchema(pTask->tbSink.pSchemaWrapper->pSchema, pTask->tbSink.pSchemaWrapper->nCols, version); + tBuildTSchema(pTask->tbSink.pSchemaWrapper->pSchema, pTask->tbSink.pSchemaWrapper->nCols, ver1); ASSERT(pTask->tbSink.pTSchema); } diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index c0ed787176..1ba44739d6 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -209,6 +209,7 @@ int32_t tqPushMsgNew(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) { void* pReq = POINTER_SHIFT(msg, sizeof(SSubmitReq2Msg)); int32_t len = msgLen - sizeof(SSubmitReq2Msg); + int32_t vgId = TD_VID(pTq->pVnode); if (msgType == TDMT_VND_SUBMIT) { // lock push mgr to avoid potential msg lost @@ -217,7 +218,7 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) int32_t numOfRegisteredPush = taosHashGetSize(pTq->pPushMgr); if (numOfRegisteredPush > 0) { tqDebug("vgId:%d tq push msg version:%" PRId64 " type:%s, head:%p, body:%p len:%d, numOfPushed consumers:%d", - pTq->pVnode->config.vgId, ver, TMSG_INFO(msgType), msg, pReq, len, numOfRegisteredPush); + vgId, ver, TMSG_INFO(msgType), msg, pReq, len, numOfRegisteredPush); SArray* cachedKeys = taosArrayInit(0, sizeof(void*)); SArray* cachedKeyLens = taosArrayInit(0, sizeof(size_t)); @@ -239,7 +240,10 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) void* pIter = NULL; while (1) { pIter = taosHashIterate(pTq->pPushMgr, pIter); - if (pIter == NULL) break; + if (pIter == NULL) { + break; + } + STqPushEntry* pPushEntry = *(STqPushEntry**)pIter; STqHandle* pHandle = taosHashGet(pTq->pHandle, pPushEntry->subKey, strlen(pPushEntry->subKey)); @@ -248,23 +252,23 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) continue; } - if (pPushEntry->dataRsp.reqOffset.version >= ver) { - tqDebug("vgId:%d, push entry req version %" PRId64 ", while push version %" PRId64 ", skip", - pTq->pVnode->config.vgId, pPushEntry->dataRsp.reqOffset.version, ver); + SMqDataRsp* pRsp = pPushEntry->pDataRsp; + if (pRsp->reqOffset.version >= ver) { + tqDebug("vgId:%d, push entry req version %" PRId64 ", while push version %" PRId64 ", skip", vgId, + pRsp->reqOffset.version, ver); continue; } STqExecHandle* pExec = &pHandle->execHandle; qTaskInfo_t task = pExec->task; - SMqDataRsp* pRsp = &pPushEntry->dataRsp; - // prepare scan mem data SPackedData submit = { .msgStr = data, .msgLen = len, .ver = ver, }; + qStreamSetScanMemData(task, submit); // here start to scan submit block to extract the subscribed data @@ -272,7 +276,7 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) SSDataBlock* pDataBlock = NULL; uint64_t ts = 0; if (qExecTask(task, &pDataBlock, &ts) < 0) { - tqDebug("vgId:%d, tq exec error since %s", pTq->pVnode->config.vgId, terrstr()); + tqDebug("vgId:%d, tq exec error since %s", vgId, terrstr()); } if (pDataBlock == NULL) { @@ -283,11 +287,11 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) pRsp->blockNum++; } - tqDebug("vgId:%d, tq handle push, subkey:%s, block num:%d", pTq->pVnode->config.vgId, pPushEntry->subKey, - pRsp->blockNum); + tqDebug("vgId:%d, tq handle push, subkey:%s, block num:%d", vgId, pPushEntry->subKey, pRsp->blockNum); if (pRsp->blockNum > 0) { // set offset tqOffsetResetToLog(&pRsp->rspOffset, ver); + // remove from hash size_t kLen; void* key = taosHashGetKey(pIter, &kLen); @@ -309,6 +313,7 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) tqError("vgId:%d, tq push hash remove key error, key: %s", pTq->pVnode->config.vgId, (char*)key); } } + taosArrayDestroyP(cachedKeys, (FDelete)taosMemoryFree); taosArrayDestroy(cachedKeyLens); taosMemoryFree(data); @@ -334,9 +339,9 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) }; tqDebug("tq copy write msg %p %d %" PRId64 " from %p", data, len, ver, pReq); - tqProcessSubmitReq(pTq, submit); } + if (msgType == TDMT_VND_DELETE) { tqProcessDelReq(pTq, POINTER_SHIFT(msg, sizeof(SMsgHead)), msgLen - sizeof(SMsgHead), ver); } @@ -346,7 +351,7 @@ int tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver) } int32_t tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, - SMqDataRsp* pDataRsp) { + SMqDataRsp* pDataRsp, int32_t type) { uint64_t consumerId = pRequest->consumerId; int32_t vgId = TD_VID(pTq->pVnode); STqHandle* pTqHandle = pHandle; @@ -359,14 +364,22 @@ int32_t tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, return -1; } - pPushEntry->pInfo = pRpcMsg->info; + pPushEntry->info = pRpcMsg->info; memcpy(pPushEntry->subKey, pTqHandle->subKey, TSDB_SUBSCRIBE_KEY_LEN); - pDataRsp->withTbName = 0; - memcpy(&pPushEntry->dataRsp, pDataRsp, sizeof(SMqDataRsp)); - pPushEntry->dataRsp.head.consumerId = consumerId; - pPushEntry->dataRsp.head.epoch = pRequest->epoch; - pPushEntry->dataRsp.head.mqMsgType = TMQ_MSG_TYPE__POLL_RSP; + if (type == TMQ_MSG_TYPE__TAOSX_RSP) { + pPushEntry->pDataRsp = taosMemoryCalloc(1, sizeof(STaosxRsp)); + memcpy(pPushEntry->pDataRsp, pDataRsp, sizeof(STaosxRsp)); + } else if (type == TMQ_MSG_TYPE__POLL_RSP) { + pPushEntry->pDataRsp = taosMemoryCalloc(1, sizeof(SMqDataRsp)); + memcpy(pPushEntry->pDataRsp, pDataRsp, sizeof(SMqDataRsp)); + } + + SMqRspHead* pHead = &pPushEntry->pDataRsp->head; + pHead->consumerId = consumerId; + pHead->epoch = pRequest->epoch; + pHead->mqMsgType = type; + taosHashPut(pTq->pPushMgr, pTqHandle->subKey, strlen(pTqHandle->subKey), &pPushEntry, sizeof(void*)); tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s offset:%" PRId64 ", vgId:%d save handle to push mgr, total:%d", consumerId, @@ -377,12 +390,14 @@ int32_t tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, int32_t tqRemovePushEntry(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer) { int32_t vgId = TD_VID(pTq->pVnode); STqPushEntry** pEntry = taosHashGet(pTq->pPushMgr, pKey, keyLen); + if (pEntry != NULL) { - uint64_t cId = (*pEntry)->dataRsp.head.consumerId; + uint64_t cId = (*pEntry)->pDataRsp->head.consumerId; ASSERT(consumerId == cId); tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s vgId:%d remove from push mgr, remains:%d", consumerId, (*pEntry)->subKey, vgId, taosHashGetSize(pTq->pPushMgr) - 1); + if (rspConsumer) { // rsp the old consumer with empty block. tqPushDataRsp(pTq, *pEntry); } diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index baf74a83e4..ffab7d92b8 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -221,12 +221,12 @@ int32_t qSetSMAInput(qTaskInfo_t tinfo, const void* pBlocks, size_t numOfBlocks, qTaskInfo_t qCreateQueueExecTaskInfo(void* msg, SReadHandle* readers, int32_t* numOfCols, SSchemaWrapper** pSchema) { if (msg == NULL) { // create raw scan - SExecTaskInfo* pTaskInfo = taosMemoryCalloc(1, sizeof(SExecTaskInfo)); if (NULL == pTaskInfo) { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } + setTaskStatus(pTaskInfo, TASK_NOT_COMPLETED); pTaskInfo->cost.created = taosGetTimestampUs(); diff --git a/tests/system-test/7-tmq/subscribeDb1.py b/tests/system-test/7-tmq/subscribeDb1.py index c5ae44214a..89bf337a20 100644 --- a/tests/system-test/7-tmq/subscribeDb1.py +++ b/tests/system-test/7-tmq/subscribeDb1.py @@ -123,8 +123,9 @@ class TDTestCase: pre_insert = "insert into " sql = pre_insert - t = time.time() - startTs = int(round(t * 1000)) + # t = 1678609778776 #time.time() + t = 1600000000000 + startTs = t #int(round(t * 1000)) #tdLog.debug("doing insert data into stable:%s rows:%d ..."%(stbName, allRows)) for i in range(ctbNum): sql += " %s_%d values "%(stbName,i) From 8a9746d2bf82577e478fdc7faf869c4aa7dd4ab9 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 13 Mar 2023 18:29:04 +0800 Subject: [PATCH 23/28] fix(tmq): fix the invalid read. --- source/client/src/clientTmq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 3167a4c361..95722ae3e5 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1843,11 +1843,11 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { atomic_store_32(&pVg->vgStatus, TMQ_VG_STATUS__IDLE); if (pollRspWrapper->taosxRsp.blockNum == 0) { - taosFreeQitem(pollRspWrapper); rspWrapper = NULL; tscDebug("consumer:0x%" PRIx64 " taosx empty block received, vgId:%d, reqId:0x%" PRIx64, tmq->consumerId, pVg->vgId, pollRspWrapper->reqId); pVg->vgIgnoreCnt = VG_POLL_IGNORE_TICK; + taosFreeQitem(pollRspWrapper); continue; } @@ -1859,12 +1859,13 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { pRsp = tmqBuildTaosxRspFromWrapper(pollRspWrapper); } - taosFreeQitem(pollRspWrapper); char buf[80]; tFormatOffset(buf, 80, &pVg->currentOffset); tscDebug("consumer:0x%" PRIx64 " process taosx poll rsp, vgId:%d, offset:%s, blocks:%d, reqId:0x%"PRIx64, tmq->consumerId, pVg->vgId, buf, pollRspWrapper->dataRsp.blockNum, pollRspWrapper->reqId); + + taosFreeQitem(pollRspWrapper); return pRsp; } else { From 8f1f423c9053456c6b543c4a9eddbd90970fa61c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 14 Mar 2023 00:37:09 +0800 Subject: [PATCH 24/28] fix(tmq): fix the error in tmq. --- source/client/src/clientTmq.c | 17 ++++++++--------- utils/test/c/tmqSim.c | 2 -- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 95722ae3e5..522f5cc1b9 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -24,8 +24,6 @@ #include "tref.h" #include "ttimer.h" -#define VG_POLL_IGNORE_TICK 100 - struct SMqMgmt { int8_t inited; tmr_h timer; @@ -134,7 +132,7 @@ typedef struct { int32_t vgId; int32_t vgStatus; int32_t vgSkipCnt; - int32_t vgIgnoreCnt; // once empty block is received, idle for ignoreCnt then start to poll data + int64_t emptyBlockReceiveTs; // once empty block is received, idle for ignoreCnt then start to poll data SEpSet epSet; } SMqClientVg; @@ -1403,7 +1401,7 @@ static void initClientTopicFromRsp(SMqClientTopic* pTopic, SMqSubTopicEp* pTopic .epSet = pVgEp->epSet, .vgStatus = TMQ_VG_STATUS__IDLE, .vgSkipCnt = 0, - .vgIgnoreCnt = 0, + .emptyBlockReceiveTs = 0, }; taosArrayPush(pTopic->vgs, &clientVg); @@ -1704,10 +1702,9 @@ static int32_t tmqPollImpl(tmq_t* tmq, int64_t timeout) { for (int j = 0; j < numOfVg; j++) { SMqClientVg* pVg = taosArrayGet(pTopic->vgs, j); - if (pVg->vgIgnoreCnt > 0) { - pVg->vgIgnoreCnt -= 1; - tscTrace("consumer:0x%" PRIx64 " epoch %d, vgId:%d idle for %d tick before poll", tmq->consumerId, tmq->epoch, - pVg->vgId, pVg->vgIgnoreCnt); + if (taosGetTimestampMs() - pVg->emptyBlockReceiveTs < 100) { // less than 100ms + tscTrace("consumer:0x%" PRIx64 " epoch %d, vgId:%d idle for 100ms before start next poll", tmq->consumerId, tmq->epoch, + pVg->vgId); continue; } @@ -1846,9 +1843,11 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) { rspWrapper = NULL; tscDebug("consumer:0x%" PRIx64 " taosx empty block received, vgId:%d, reqId:0x%" PRIx64, tmq->consumerId, pVg->vgId, pollRspWrapper->reqId); - pVg->vgIgnoreCnt = VG_POLL_IGNORE_TICK; + pVg->emptyBlockReceiveTs = taosGetTimestampMs(); taosFreeQitem(pollRspWrapper); continue; + } else { + pVg->emptyBlockReceiveTs = 0; // reset the ts } // build rsp diff --git a/utils/test/c/tmqSim.c b/utils/test/c/tmqSim.c index ce4bafb79b..ce61b80d41 100644 --- a/utils/test/c/tmqSim.c +++ b/utils/test/c/tmqSim.c @@ -735,9 +735,7 @@ void build_consumer(SThreadInfo* pInfo) { } pInfo->tmq = tmq_consumer_new(conf, NULL, 0); - tmq_conf_destroy(conf); - return; } From 9cdd52de84b29fd91b08245137fcf9d52ea8923c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 14 Mar 2023 11:50:02 +0800 Subject: [PATCH 25/28] fix(tmq): fix dead lock. --- source/client/src/clientTmq.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 522f5cc1b9..1a0118c108 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -538,11 +538,6 @@ static int32_t doSendCommitMsg(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicN .handle = NULL, }; - SEp* pEp = GET_ACTIVE_EP(&pVg->epSet); - tscDebug("consumer:0x%" PRIx64 " topic:%s on vgId:%d offset:%" PRId64 " prev:%" PRId64 ", ep:%s:%d, ordinal:%d/%d", - tmq->consumerId, pOffset->subKey, pVg->vgId, pOffset->val.version, pVg->committedOffset.version, pEp->fqdn, - pEp->port, index + 1, totalVgroups); - pMsgSendInfo->requestId = generateRequestId(); pMsgSendInfo->requestObjRefId = 0; pMsgSendInfo->param = pParam; @@ -553,6 +548,12 @@ static int32_t doSendCommitMsg(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicN atomic_add_fetch_32(&pParamSet->waitingRspNum, 1); atomic_add_fetch_32(&pParamSet->totalRspNum, 1); + SEp* pEp = GET_ACTIVE_EP(&pVg->epSet); + tscDebug("consumer:0x%" PRIx64 " topic:%s on vgId:%d send offset:%" PRId64 " prev:%" PRId64 + ", ep:%s:%d, ordinal:%d/%d, req:0x%" PRIx64, + tmq->consumerId, pOffset->subKey, pVg->vgId, pOffset->val.version, pVg->committedOffset.version, pEp->fqdn, + pEp->port, index + 1, totalVgroups, pMsgSendInfo->requestId); + int64_t transporterId = 0; asyncSendMsgToServer(tmq->pTscObj->pAppInfo->pTransporter, &pVg->epSet, &transporterId, pMsgSendInfo); return 0; @@ -627,11 +628,11 @@ HANDLE_RSP: } if (!async) { + taosThreadMutexUnlock(&tmq->lock); tsem_wait(&pParamSet->rspSem); code = pParamSet->rspErr; tsem_destroy(&pParamSet->rspSem); taosMemoryFree(pParamSet); - taosThreadMutexUnlock(&tmq->lock); return code; } else { code = 0; @@ -736,7 +737,7 @@ static int32_t tmqCommitConsumerImpl(tmq_t* tmq, int8_t automatic, int8_t async, static int32_t tmqCommitInner(tmq_t* tmq, const TAOS_RES* msg, int8_t automatic, int8_t async, tmq_commit_cb* userCb, void* userParam) { - if (msg) { // user invoked commit? + if (msg) { // user invoked commit return tmqCommitMsgImpl(tmq, msg, async, userCb, userParam); } else { // this for auto commit return tmqCommitConsumerImpl(tmq, automatic, async, userCb, userParam); From fb855f8aace509727a312e700c57ff51bf22c0d9 Mon Sep 17 00:00:00 2001 From: xinsheng Ren <285808407@qq.com> Date: Tue, 14 Mar 2023 13:41:08 +0800 Subject: [PATCH 26/28] Feature/xsren/win install error base main (#20450) * fix: install service path error on windows * fix: installation action sheet name --------- Co-authored-by: facetosea <25808407@qq.com> --- packaging/tools/tdengine.iss | 6 +++--- packaging/tools/windows_before_install.txt | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packaging/tools/tdengine.iss b/packaging/tools/tdengine.iss index 02f412f5d4..8676fa2c51 100644 --- a/packaging/tools/tdengine.iss +++ b/packaging/tools/tdengine.iss @@ -21,7 +21,7 @@ [Setup] VersionInfoVersion={#MyAppVersion} AppId={{A0F7A93C-79C4-485D-B2B8-F0D03DF42FAB} -AppName={#MyAppName} +AppName={#CusName} AppVersion={#MyAppVersion} ;AppVerName={#MyAppName} {#MyAppVersion} AppPublisher={#MyAppPublisher} @@ -64,8 +64,8 @@ Source: {#MyAppSourceDir}\taosdump.exe; DestDir: "{app}"; DestName: "{#CusPrompt [run] -Filename: {sys}\sc.exe; Parameters: "create taosd start= DEMAND binPath= ""C:\\{#CusName}\\taosd.exe --win_service""" ; Flags: runhidden -Filename: {sys}\sc.exe; Parameters: "create taosadapter start= DEMAND binPath= ""C:\\{#CusName}\\taosadapter.exe""" ; Flags: runhidden +Filename: {sys}\sc.exe; Parameters: "create taosd start= DEMAND binPath= ""C:\\TDengine\\taosd.exe --win_service""" ; Flags: runhidden +Filename: {sys}\sc.exe; Parameters: "create taosadapter start= DEMAND binPath= ""C:\\TDengine\\taosadapter.exe""" ; Flags: runhidden [UninstallRun] RunOnceId: "stoptaosd"; Filename: {sys}\sc.exe; Parameters: "stop taosd" ; Flags: runhidden diff --git a/packaging/tools/windows_before_install.txt b/packaging/tools/windows_before_install.txt index ef783bf10b..91cb35d77e 100644 --- a/packaging/tools/windows_before_install.txt +++ b/packaging/tools/windows_before_install.txt @@ -1,4 +1,4 @@ -TDengine is a high-efficient, scalable, high-available distributed time-series database, which makes a lot of optimizations on inserting and querying data, which is far more efficient than normal regular databases. So TDengine can meet the high requirements of IOT and other areas on storing and querying a large amount of data. +TDengine is an open-source, cloud-native time-series database optimized for Internet of Things (IoT), Connected Cars, and Industrial IoT. With its built-in caching, stream processing, and data subscription capabilities, TDengine offers a simplified solution for time-series data processing. TDengine will be installed under C:\TDengine, users can modify configuration file C:\TDengine\cfg\taos.cfg, set the log file path or other parameters. To start/stop TDengine with administrator privileges: sc start/stop taosd From 40d99e457646d385c84e7a2b7f0cb1069e8b95dd Mon Sep 17 00:00:00 2001 From: Shuduo Sang Date: Tue, 14 Mar 2023 13:47:41 +0800 Subject: [PATCH 27/28] chore: support cus name (#20441) * chore: support cus name * fix: cfg/taos.cfg * chore: update taos-tools --- cmake/taostools_CMakeLists.txt.in | 2 +- packaging/cfg/taos.cfg | 10 ++-- packaging/debRpmAutoInstall.sh | 8 +-- packaging/testpackage.sh | 74 +++++++++++------------ packaging/tools/install.sh | 98 +++++++++++++++---------------- packaging/tools/install_client.sh | 2 +- packaging/tools/make_install.sh | 20 +++---- packaging/tools/makeclient.sh | 2 +- packaging/tools/makepkg.sh | 22 +++---- packaging/tools/post.sh | 2 +- packaging/tools/release_note | 2 +- packaging/tools/repair_link.sh | 8 +-- 12 files changed, 125 insertions(+), 125 deletions(-) diff --git a/cmake/taostools_CMakeLists.txt.in b/cmake/taostools_CMakeLists.txt.in index 1d06995380..d14efcfee3 100644 --- a/cmake/taostools_CMakeLists.txt.in +++ b/cmake/taostools_CMakeLists.txt.in @@ -2,7 +2,7 @@ # taos-tools ExternalProject_Add(taos-tools GIT_REPOSITORY https://github.com/taosdata/taos-tools.git - GIT_TAG 41affde + GIT_TAG ad1a32b SOURCE_DIR "${TD_SOURCE_DIR}/tools/taos-tools" BINARY_DIR "" #BUILD_IN_SOURCE TRUE diff --git a/packaging/cfg/taos.cfg b/packaging/cfg/taos.cfg index 3d3dfc8e73..a98dc5a236 100644 --- a/packaging/cfg/taos.cfg +++ b/packaging/cfg/taos.cfg @@ -1,19 +1,19 @@ ######################################################## # # -# TDengine Configuration # +# Configuration # # Any questions, please email support@taosdata.com # # # ######################################################## ######### 0. Client only configurations ############# -# The interval for TDengine CLI to send heartbeat to mnode +# The interval for CLI to send heartbeat to mnode # shellActivityTimer 3 ############### 1. Cluster End point ############################ -# The end point of the first dnode in the cluster to be connected to when this dnode or a TDengine CLI `taos` is started +# The end point of the first dnode in the cluster to be connected to when this dnode or a CLI `taos` is started # firstEp hostname:6030 # The end point of the second dnode to be connected to if the firstEp is not available @@ -40,10 +40,10 @@ # temporary file's directory, if you are using Windows platform please change to Windows path # tempDir /tmp/ -# Switch for allowing TDengine to collect and report service usage information +# Switch for allowing to collect and report service usage information # telemetryReporting 1 -# Switch for allowing TDengine to collect and report crash information +# Switch for allowing to collect and report crash information # crashReporting 1 # The maximum number of vnodes supported by this dnode diff --git a/packaging/debRpmAutoInstall.sh b/packaging/debRpmAutoInstall.sh index 3579f813e5..2fe18fd7a9 100755 --- a/packaging/debRpmAutoInstall.sh +++ b/packaging/debRpmAutoInstall.sh @@ -1,15 +1,15 @@ #!/usr/bin/expect -set packgeName [lindex $argv 0] +set packageName [lindex $argv 0] set packageSuffix [lindex $argv 1] set timeout 3 if { ${packageSuffix} == "deb" } { - spawn dpkg -i ${packgeName} + spawn dpkg -i ${packageName} } elseif { ${packageSuffix} == "rpm"} { - spawn rpm -ivh ${packgeName} + spawn rpm -ivh ${packageName} } expect "*one:" send "\r" expect "*skip:" send "\r" -expect eof \ No newline at end of file +expect eof diff --git a/packaging/testpackage.sh b/packaging/testpackage.sh index 78d5043b0c..9959d290e7 100755 --- a/packaging/testpackage.sh +++ b/packaging/testpackage.sh @@ -25,7 +25,7 @@ sourcePath="nas" cpuType="x64" lite="true" packageType="tar" -subFile="taos.tar.gz" +subFile="package.tar.gz" while getopts "m:c:f:l:s:o:t:v:h" opt; do case $opt in m) @@ -79,9 +79,9 @@ GREEN_UNDERLINE='\033[4;32m' NC='\033[0m' if [[ ${verMode} = "enterprise" ]];then - prePackag="TDengine-enterprise-${testFile}" + prePackage="TDengine-enterprise-${testFile}" elif [ ${verMode} = "community" ];then - prePackag="TDengine-${testFile}" + prePackage="TDengine-${testFile}" fi if [ ${lite} = "true" ];then packageLite="-Lite" @@ -92,10 +92,10 @@ if [[ "$packageType" = "tar" ]] ;then packageType="tar.gz" fi -tdPath="${prePackag}-${version}" -originTdpPath="${prePackag}-${originversion}" +tdPath="${prePackage}-${version}" +originTdpPath="${prePackage}-${originversion}" -packgeName="${tdPath}-Linux-${cpuType}${packageLite}.${packageType}" +packageName="${tdPath}-Linux-${cpuType}${packageLite}.${packageType}" originPackageName="${originTdpPath}-Linux-${cpuType}${packageLite}.${packageType}" if [ "$testFile" == "server" ] ;then @@ -105,13 +105,13 @@ elif [ ${testFile} = "client" ];then elif [ ${testFile} = "tools" ];then tdPath="taosTools-${version}" originTdpPath="taosTools-${originversion}" - packgeName="${tdPath}-Linux-${cpuType}${packageLite}.${packageType}" + packageName="${tdPath}-Linux-${cpuType}${packageLite}.${packageType}" originPackageName="${originTdpPath}-Linux-${cpuType}${packageLite}.${packageType}" - installCmd="install-taostools.sh" + installCmd="install-tools.sh" fi -echo "tdPath:${tdPath},originTdpPath:${originTdpPath},packgeName:${packgeName},originPackageName:${originPackageName}" +echo "tdPath:${tdPath},originTdpPath:${originTdpPath},packageName:${packageName},originPackageName:${originPackageName}" function cmdInstall { command=$1 if command -v ${command} ;then @@ -206,7 +206,7 @@ else fi -if [[ ${packgeName} =~ "server" ]] ;then +if [[ ${packageName} =~ "server" ]] ;then echoColor BD " pkill -9 taosd " pkill -9 taosd fi @@ -232,25 +232,25 @@ if [ -d ${installPath}/${tdPath} ] ;then fi echoColor G "===== download installPackage =====" -cd ${installPath} && wgetFile ${packgeName} ${version} ${sourcePath} +cd ${installPath} && wgetFile ${packageName} ${version} ${sourcePath} cd ${oriInstallPath} && wgetFile ${originPackageName} ${originversion} ${sourcePath} cd ${installPath} cp -r ${scriptDir}/debRpmAutoInstall.sh . -packageSuffix=$(echo ${packgeName} | awk -F '.' '{print $NF}') +packageSuffix=$(echo ${packageName} | awk -F '.' '{print $NF}') if [ ! -f debRpmAutoInstall.sh ];then echo '#!/usr/bin/expect ' > debRpmAutoInstall.sh - echo 'set packgeName [lindex $argv 0]' >> debRpmAutoInstall.sh + echo 'set packageName [lindex $argv 0]' >> debRpmAutoInstall.sh echo 'set packageSuffix [lindex $argv 1]' >> debRpmAutoInstall.sh echo 'set timeout 3 ' >> debRpmAutoInstall.sh echo 'if { ${packageSuffix} == "deb" } {' >> debRpmAutoInstall.sh - echo ' spawn dpkg -i ${packgeName} ' >> debRpmAutoInstall.sh + echo ' spawn dpkg -i ${packageName} ' >> debRpmAutoInstall.sh echo '} elseif { ${packageSuffix} == "rpm"} {' >> debRpmAutoInstall.sh - echo ' spawn rpm -ivh ${packgeName}' >> debRpmAutoInstall.sh + echo ' spawn rpm -ivh ${packageName}' >> debRpmAutoInstall.sh echo '}' >> debRpmAutoInstall.sh echo 'expect "*one:"' >> debRpmAutoInstall.sh echo 'send "\r"' >> debRpmAutoInstall.sh @@ -261,25 +261,25 @@ fi echoColor G "===== instal Package =====" -if [[ ${packgeName} =~ "deb" ]];then +if [[ ${packageName} =~ "deb" ]];then cd ${installPath} dpkg -r taostools dpkg -r tdengine - if [[ ${packgeName} =~ "TDengine" ]];then - echoColor BD "./debRpmAutoInstall.sh ${packgeName} ${packageSuffix}" && chmod 755 debRpmAutoInstall.sh && ./debRpmAutoInstall.sh ${packgeName} ${packageSuffix} + if [[ ${packageName} =~ "TDengine" ]];then + echoColor BD "./debRpmAutoInstall.sh ${packageName} ${packageSuffix}" && chmod 755 debRpmAutoInstall.sh && ./debRpmAutoInstall.sh ${packageName} ${packageSuffix} else - echoColor BD "dpkg -i ${packgeName}" && dpkg -i ${packgeName} + echoColor BD "dpkg -i ${packageName}" && dpkg -i ${packageName} fi -elif [[ ${packgeName} =~ "rpm" ]];then +elif [[ ${packageName} =~ "rpm" ]];then cd ${installPath} sudo rpm -e tdengine sudo rpm -e taostools - if [[ ${packgeName} =~ "TDengine" ]];then - echoColor BD "./debRpmAutoInstall.sh ${packgeName} ${packageSuffix}" && chmod 755 debRpmAutoInstall.sh && ./debRpmAutoInstall.sh ${packgeName} ${packageSuffix} + if [[ ${packageName} =~ "TDengine" ]];then + echoColor BD "./debRpmAutoInstall.sh ${packageName} ${packageSuffix}" && chmod 755 debRpmAutoInstall.sh && ./debRpmAutoInstall.sh ${packageName} ${packageSuffix} else - echoColor BD "rpm -ivh ${packgeName}" && rpm -ivh ${packgeName} + echoColor BD "rpm -ivh ${packageName}" && rpm -ivh ${packageName} fi -elif [[ ${packgeName} =~ "tar" ]];then +elif [[ ${packageName} =~ "tar" ]];then echoColor G "===== check installPackage File of tar =====" cd ${oriInstallPath} if [ ! -f {originPackageName} ];then @@ -290,7 +290,7 @@ elif [[ ${packgeName} =~ "tar" ]];then echoColor BD "tar -xf ${originPackageName}" && tar -xf ${originPackageName} cd ${installPath} echoColor YD "unzip the new installation package" - echoColor BD "tar -xf ${packgeName}" && tar -xf ${packgeName} + echoColor BD "tar -xf ${packageName}" && tar -xf ${packageName} if [ ${testFile} != "tools" ] ;then cd ${installPath}/${tdPath} && tar xf ${subFile} @@ -326,15 +326,15 @@ fi cd ${installPath} -if [[ ${packgeName} =~ "Lite" ]] || ([[ ${packgeName} =~ "x64" ]] && [[ ${packgeName} =~ "client" ]]) || ([[ ${packgeName} =~ "deb" ]] && [[ ${packgeName} =~ "server" ]]) || ([[ ${packgeName} =~ "rpm" ]] && [[ ${packgeName} =~ "server" ]]) ;then +if [[ ${packageName} =~ "Lite" ]] || ([[ ${packageName} =~ "x64" ]] && [[ ${packageName} =~ "client" ]]) || ([[ ${packageName} =~ "deb" ]] && [[ ${packageName} =~ "server" ]]) || ([[ ${packageName} =~ "rpm" ]] && [[ ${packageName} =~ "server" ]]) ;then echoColor G "===== install taos-tools when package is lite or client =====" cd ${installPath} if [ ! -f "taosTools-2.1.3-Linux-x64.tar.gz " ];then wgetFile taosTools-2.1.3-Linux-x64.tar.gz v2.1.3 web tar xf taosTools-2.1.3-Linux-x64.tar.gz fi - cd taosTools-2.1.3 && bash install-taostools.sh -elif ([[ ${packgeName} =~ "arm64" ]] && [[ ${packgeName} =~ "client" ]]);then + cd taosTools-2.1.3 && bash install-tools.sh +elif ([[ ${packageName} =~ "arm64" ]] && [[ ${packageName} =~ "client" ]]);then echoColor G "===== install taos-tools arm when package is arm64-client =====" cd ${installPath} if [ ! -f "taosTools-2.1.3-Linux-x64.tar.gz " ];then @@ -342,37 +342,37 @@ elif ([[ ${packgeName} =~ "arm64" ]] && [[ ${packgeName} =~ "client" ]]);then tar xf taosTools-2.1.3-Linux-arm64.tar.gz fi - cd taosTools-2.1.3 && bash install-taostools.sh + cd taosTools-2.1.3 && bash install-tools.sh fi echoColor G "===== start TDengine =====" -if [[ ${packgeName} =~ "server" ]] ;then +if [[ ${packageName} =~ "server" ]] ;then echoColor BD " rm -rf /var/lib/taos/* && systemctl restart taosd " rm -rf /var/lib/taos/* systemctl restart taosd fi -rm -rf ${installPath}/${packgeName} +rm -rf ${installPath}/${packageName} rm -rf ${installPath}/${tdPath}/ -# if ([[ ${packgeName} =~ "Lite" ]] && [[ ${packgeName} =~ "tar" ]]) || [[ ${packgeName} =~ "client" ]] ;then +# if ([[ ${packageName} =~ "Lite" ]] && [[ ${packageName} =~ "tar" ]]) || [[ ${packageName} =~ "client" ]] ;then # echoColor G "===== install taos-tools when package is lite or client =====" # cd ${installPath} # wgetFile taosTools-2.1.2-Linux-x64.tar.gz . # tar xf taosTools-2.1.2-Linux-x64.tar.gz -# cd taosTools-2.1.2 && bash install-taostools.sh -# elif [[ ${packgeName} =~ "Lite" ]] && [[ ${packgeName} =~ "deb" ]] ;then +# cd taosTools-2.1.2 && bash install-tools.sh +# elif [[ ${packageName} =~ "Lite" ]] && [[ ${packageName} =~ "deb" ]] ;then # echoColor G "===== install taos-tools when package is lite or client =====" # cd ${installPath} # wgetFile taosTools-2.1.2-Linux-x64.tar.gz . # tar xf taosTools-2.1.2-Linux-x64.tar.gz -# cd taosTools-2.1.2 && bash install-taostools.sh -# elif [[ ${packgeName} =~ "Lite" ]] && [[ ${packgeName} =~ "rpm" ]] ;then +# cd taosTools-2.1.2 && bash install-tools.sh +# elif [[ ${packageName} =~ "Lite" ]] && [[ ${packageName} =~ "rpm" ]] ;then # echoColor G "===== install taos-tools when package is lite or client =====" # cd ${installPath} # wgetFile taosTools-2.1.2-Linux-x64.tar.gz . # tar xf taosTools-2.1.2-Linux-x64.tar.gz -# cd taosTools-2.1.2 && bash install-taostools.sh +# cd taosTools-2.1.2 && bash install-tools.sh # fi diff --git a/packaging/tools/install.sh b/packaging/tools/install.sh index 2495e177e1..a3f8b53d33 100755 --- a/packaging/tools/install.sh +++ b/packaging/tools/install.sh @@ -24,7 +24,7 @@ productName="TDengine" emailName="taosdata.com" uninstallScript="rmtaos" historyFile="taos_history" -tarName="taos.tar.gz" +tarName="package.tar.gz" dataDir="/var/lib/taos" logDir="/var/log/taos" configDir="/etc/taos" @@ -222,24 +222,24 @@ function install_bin() { ${csudo}cp -r ${script_dir}/bin/* ${install_main_dir}/bin && ${csudo}chmod 0555 ${install_main_dir}/bin/* #Make link - [ -x ${install_main_dir}/bin/${clientName} ] && ${csudo}ln -s ${install_main_dir}/bin/${clientName} ${bin_link_dir}/${clientName} || : - [ -x ${install_main_dir}/bin/${serverName} ] && ${csudo}ln -s ${install_main_dir}/bin/${serverName} ${bin_link_dir}/${serverName} || : - [ -x ${install_main_dir}/bin/${udfdName} ] && ${csudo}ln -s ${install_main_dir}/bin/${udfdName} ${bin_link_dir}/${udfdName} || : - [ -x ${install_main_dir}/bin/${adapterName} ] && ${csudo}ln -s ${install_main_dir}/bin/${adapterName} ${bin_link_dir}/${adapterName} || : + [ -x ${install_main_dir}/bin/${clientName} ] && ${csudo}ln -sf ${install_main_dir}/bin/${clientName} ${bin_link_dir}/${clientName} || : + [ -x ${install_main_dir}/bin/${serverName} ] && ${csudo}ln -sf ${install_main_dir}/bin/${serverName} ${bin_link_dir}/${serverName} || : + [ -x ${install_main_dir}/bin/${udfdName} ] && ${csudo}ln -sf ${install_main_dir}/bin/${udfdName} ${bin_link_dir}/${udfdName} || : + [ -x ${install_main_dir}/bin/${adapterName} ] && ${csudo}ln -sf ${install_main_dir}/bin/${adapterName} ${bin_link_dir}/${adapterName} || : [ -x ${install_main_dir}/bin/${benchmarkName} ] && ${csudo}ln -sf ${install_main_dir}/bin/${benchmarkName} ${bin_link_dir}/${demoName} || : [ -x ${install_main_dir}/bin/${benchmarkName} ] && ${csudo}ln -sf ${install_main_dir}/bin/${benchmarkName} ${bin_link_dir}/${benchmarkName} || : - [ -x ${install_main_dir}/bin/${dumpName} ] && ${csudo}ln -s ${install_main_dir}/bin/${dumpName} ${bin_link_dir}/${dumpName} || : - [ -x ${install_main_dir}/bin/${xname} ] && ${csudo}ln -s ${install_main_dir}/bin/${xname} ${bin_link_dir}/${xname} || : - [ -x ${install_main_dir}/bin/${explorerName} ] && ${csudo}ln -s ${install_main_dir}/bin/${explorerName} ${bin_link_dir}/${explorerName} || : - [ -x ${install_main_dir}/bin/TDinsight.sh ] && ${csudo}ln -s ${install_main_dir}/bin/TDinsight.sh ${bin_link_dir}/TDinsight.sh || : - [ -x ${install_main_dir}/bin/remove.sh ] && ${csudo}ln -s ${install_main_dir}/bin/remove.sh ${bin_link_dir}/${uninstallScript} || : - [ -x ${install_main_dir}/bin/set_core.sh ] && ${csudo}ln -s ${install_main_dir}/bin/set_core.sh ${bin_link_dir}/set_core || : + [ -x ${install_main_dir}/bin/${dumpName} ] && ${csudo}ln -sf ${install_main_dir}/bin/${dumpName} ${bin_link_dir}/${dumpName} || : + [ -x ${install_main_dir}/bin/${xname} ] && ${csudo}ln -sf ${install_main_dir}/bin/${xname} ${bin_link_dir}/${xname} || : + [ -x ${install_main_dir}/bin/${explorerName} ] && ${csudo}ln -sf ${install_main_dir}/bin/${explorerName} ${bin_link_dir}/${explorerName} || : + [ -x ${install_main_dir}/bin/TDinsight.sh ] && ${csudo}ln -sf ${install_main_dir}/bin/TDinsight.sh ${bin_link_dir}/TDinsight.sh || : + [ -x ${install_main_dir}/bin/remove.sh ] && ${csudo}ln -sf ${install_main_dir}/bin/remove.sh ${bin_link_dir}/${uninstallScript} || : + [ -x ${install_main_dir}/bin/set_core.sh ] && ${csudo}ln -sf ${install_main_dir}/bin/set_core.sh ${bin_link_dir}/set_core || : if [ "$verMode" == "cluster" ] && [ "$clientName" != "$clientName2" ]; then - [ -x ${install_main_dir}/bin/${clientName} ] && ${csudo}ln -s ${install_main_dir}/bin/${clientName} ${bin_link_dir}/${clientName2} || : - [ -x ${install_main_dir}/bin/${benchmarkName} ] && ${csudo}ln -s ${install_main_dir}/bin/${benchmarkName} ${bin_link_dir}/${benchmarkName2} || : - [ -x ${install_main_dir}/bin/${dumpName} ] && ${csudo}ln -s ${install_main_dir}/bin/${dumpName} ${bin_link_dir}/${dumpName2} || : - [ -x ${install_main_dir}/bin/remove.sh ] && ${csudo}ln -s ${install_main_dir}/bin/remove.sh ${bin_link_dir}/${uninstallScript2} || : + [ -x ${install_main_dir}/bin/${clientName} ] && ${csudo}ln -sf ${install_main_dir}/bin/${clientName} ${bin_link_dir}/${clientName2} || : + [ -x ${install_main_dir}/bin/${benchmarkName} ] && ${csudo}ln -sf ${install_main_dir}/bin/${benchmarkName} ${bin_link_dir}/${benchmarkName2} || : + [ -x ${install_main_dir}/bin/${dumpName} ] && ${csudo}ln -sf ${install_main_dir}/bin/${dumpName} ${bin_link_dir}/${dumpName2} || : + [ -x ${install_main_dir}/bin/remove.sh ] && ${csudo}ln -sf ${install_main_dir}/bin/remove.sh ${bin_link_dir}/${uninstallScript2} || : fi } @@ -250,14 +250,14 @@ function install_lib() { #${csudo}rm -rf ${v15_java_app_dir} || : ${csudo}cp -rf ${script_dir}/driver/* ${install_main_dir}/driver && ${csudo}chmod 777 ${install_main_dir}/driver/* - ${csudo}ln -s ${install_main_dir}/driver/libtaos.* ${lib_link_dir}/libtaos.so.1 - ${csudo}ln -s ${lib_link_dir}/libtaos.so.1 ${lib_link_dir}/libtaos.so + ${csudo}ln -sf ${install_main_dir}/driver/libtaos.* ${lib_link_dir}/libtaos.so.1 + ${csudo}ln -sf ${lib_link_dir}/libtaos.so.1 ${lib_link_dir}/libtaos.so [ -f ${install_main_dir}/driver/libtaosws.so ] && ${csudo}ln -sf ${install_main_dir}/driver/libtaosws.so ${lib_link_dir}/libtaosws.so || : if [[ -d ${lib64_link_dir} && ! -e ${lib64_link_dir}/libtaos.so ]]; then - ${csudo}ln -s ${install_main_dir}/driver/libtaos.* ${lib64_link_dir}/libtaos.so.1 || : - ${csudo}ln -s ${lib64_link_dir}/libtaos.so.1 ${lib64_link_dir}/libtaos.so || : + ${csudo}ln -sf ${install_main_dir}/driver/libtaos.* ${lib64_link_dir}/libtaos.so.1 || : + ${csudo}ln -sf ${lib64_link_dir}/libtaos.so.1 ${lib64_link_dir}/libtaos.so || : [ -f ${install_main_dir}/libtaosws.so ] && ${csudo}ln -sf ${install_main_dir}/libtaosws.so ${lib64_link_dir}/libtaosws.so || : fi @@ -347,10 +347,10 @@ function install_header() { [ -f ${inc_link_dir}/taosws.h ] && ${csudo}rm -f ${inc_link_dir}/taosws.h || : ${csudo}cp -f ${script_dir}/inc/* ${install_main_dir}/include && ${csudo}chmod 644 ${install_main_dir}/include/* - ${csudo}ln -s ${install_main_dir}/include/taos.h ${inc_link_dir}/taos.h - ${csudo}ln -s ${install_main_dir}/include/taosdef.h ${inc_link_dir}/taosdef.h - ${csudo}ln -s ${install_main_dir}/include/taoserror.h ${inc_link_dir}/taoserror.h - ${csudo}ln -s ${install_main_dir}/include/taosudf.h ${inc_link_dir}/taosudf.h + ${csudo}ln -sf ${install_main_dir}/include/taos.h ${inc_link_dir}/taos.h + ${csudo}ln -sf ${install_main_dir}/include/taosdef.h ${inc_link_dir}/taosdef.h + ${csudo}ln -sf ${install_main_dir}/include/taoserror.h ${inc_link_dir}/taoserror.h + ${csudo}ln -sf ${install_main_dir}/include/taosudf.h ${inc_link_dir}/taosudf.h [ -f ${install_main_dir}/include/taosws.h ] && ${csudo}ln -sf ${install_main_dir}/include/taosws.h ${inc_link_dir}/taosws.h || : } @@ -511,7 +511,7 @@ function install_adapter_config() { fi [ -f ${cfg_install_dir}/${adapterName}.toml ] && - ${csudo}ln -s ${cfg_install_dir}/${adapterName}.toml ${install_main_dir}/cfg/${adapterName}.toml + ${csudo}ln -sf ${cfg_install_dir}/${adapterName}.toml ${install_main_dir}/cfg/${adapterName}.toml [ ! -z $1 ] && return 0 || : # only install client @@ -527,7 +527,7 @@ function install_config() { ${csudo}cp -f ${script_dir}/cfg/${configFile} ${cfg_install_dir}/${configFile}.new fi - ${csudo}ln -s ${cfg_install_dir}/${configFile} ${install_main_dir}/cfg + ${csudo}ln -sf ${cfg_install_dir}/${configFile} ${install_main_dir}/cfg [ ! -z $1 ] && return 0 || : # only install client @@ -573,13 +573,13 @@ function install_log() { ${csudo}rm -rf ${log_dir} || : ${csudo}mkdir -p ${log_dir} && ${csudo}chmod 777 ${log_dir} - ${csudo}ln -s ${log_dir} ${install_main_dir}/log + ${csudo}ln -sf ${log_dir} ${install_main_dir}/log } function install_data() { ${csudo}mkdir -p ${data_dir} - ${csudo}ln -s ${data_dir} ${install_main_dir}/data + ${csudo}ln -sf ${data_dir} ${install_main_dir}/data } function install_connector() { @@ -862,21 +862,21 @@ function updateProduct() { openresty_work=false echo - echo -e "${GREEN_DARK}To configure ${productName} ${NC}: edit ${cfg_install_dir}/${configFile}" + echo -e "${GREEN_DARK}To configure ${productName2} ${NC}: edit ${cfg_install_dir}/${configFile}" [ -f ${configDir}/taosadapter.toml ] && [ -f ${installDir}/bin/taosadapter ] && \ - echo -e "${GREEN_DARK}To configure Adapter ${NC}: edit ${configDir}/taosadapter.toml" + echo -e "${GREEN_DARK}To configure ${clientName2} Adapter ${NC}: edit ${configDir}/taosadapter.toml" if ((${service_mod} == 0)); then - echo -e "${GREEN_DARK}To start ${productName} ${NC}: ${csudo}systemctl start ${serverName}${NC}" + echo -e "${GREEN_DARK}To start ${productName2} ${NC}: ${csudo}systemctl start ${serverName}${NC}" [ -f ${service_config_dir}/taosadapter.service ] && [ -f ${installDir}/bin/taosadapter ] && \ - echo -e "${GREEN_DARK}To start Adatper ${NC}: ${csudo}systemctl start taosadapter ${NC}" + echo -e "${GREEN_DARK}To start ${clientName2} Adapter ${NC}: ${csudo}systemctl start taosadapter ${NC}" elif ((${service_mod} == 1)); then - echo -e "${GREEN_DARK}To start ${productName} ${NC}: ${csudo}service ${serverName} start${NC}" + echo -e "${GREEN_DARK}To start ${productName2} ${NC}: ${csudo}service ${serverName} start${NC}" [ -f ${service_config_dir}/taosadapter.service ] && [ -f ${installDir}/bin/taosadapter ] && \ - echo -e "${GREEN_DARK}To start Adapter ${NC}: ${csudo}service taosadapter start${NC}" + echo -e "${GREEN_DARK}To start ${clientName2} Adapter ${NC}: ${csudo}service taosadapter start${NC}" else - echo -e "${GREEN_DARK}To start ${productName} ${NC}: ./${serverName}${NC}" + echo -e "${GREEN_DARK}To start ${productName2} ${NC}: ./${serverName}${NC}" [ -f ${installDir}/bin/taosadapter ] && \ - echo -e "${GREEN_DARK}To start ${clientName} Adapter ${NC}: taosadapter &${NC}" + echo -e "${GREEN_DARK}To start ${clientName2} Adapter ${NC}: taosadapter &${NC}" fi if [ ${openresty_work} = 'true' ]; then @@ -887,7 +887,7 @@ function updateProduct() { if ((${prompt_force} == 1)); then echo "" - echo -e "${RED}Please run '${serverName} --force-keep-file' at first time for the exist ${productName} $exist_version!${NC}" + echo -e "${RED}Please run '${serverName} --force-keep-file' at first time for the exist ${productName2} $exist_version!${NC}" fi echo echo -e "\033[44;32;1m${productName2} is updated successfully!${NC}" @@ -944,21 +944,21 @@ function installProduct() { # Ask if to start the service echo - echo -e "${GREEN_DARK}To configure ${productName} ${NC}: edit ${cfg_install_dir}/${configFile}" + echo -e "${GREEN_DARK}To configure ${productName2} ${NC}: edit ${cfg_install_dir}/${configFile}" [ -f ${configDir}/taosadapter.toml ] && [ -f ${installDir}/bin/taosadapter ] && \ - echo -e "${GREEN_DARK}To configure Taos Adapter ${NC}: edit ${configDir}/taosadapter.toml" + echo -e "${GREEN_DARK}To configure ${clientName2} Adapter ${NC}: edit ${configDir}/taosadapter.toml" if ((${service_mod} == 0)); then - echo -e "${GREEN_DARK}To start ${productName} ${NC}: ${csudo}systemctl start ${serverName}${NC}" + echo -e "${GREEN_DARK}To start ${productName2} ${NC}: ${csudo}systemctl start ${serverName}${NC}" [ -f ${service_config_dir}/taosadapter.service ] && [ -f ${installDir}/bin/taosadapter ] && \ - echo -e "${GREEN_DARK}To start Taos Adatper ${NC}: ${csudo}systemctl start taosadapter ${NC}" + echo -e "${GREEN_DARK}To start ${clientName2} Adapter ${NC}: ${csudo}systemctl start taosadapter ${NC}" elif ((${service_mod} == 1)); then - echo -e "${GREEN_DARK}To start ${productName} ${NC}: ${csudo}service ${serverName} start${NC}" + echo -e "${GREEN_DARK}To start ${productName2} ${NC}: ${csudo}service ${serverName} start${NC}" [ -f ${service_config_dir}/taosadapter.service ] && [ -f ${installDir}/bin/taosadapter ] && \ - echo -e "${GREEN_DARK}To start Taos Adapter ${NC}: ${csudo}service taosadapter start${NC}" + echo -e "${GREEN_DARK}To start ${clientName2} Adapter ${NC}: ${csudo}service taosadapter start${NC}" else - echo -e "${GREEN_DARK}To start ${productName} ${NC}: ${serverName}${NC}" + echo -e "${GREEN_DARK}To start ${productName2} ${NC}: ${serverName}${NC}" [ -f ${installDir}/bin/taosadapter ] && \ - echo -e "${GREEN_DARK}To start Taos Adapter ${NC}: taosadapter &${NC}" + echo -e "${GREEN_DARK}To start ${clientName2} Adapter ${NC}: taosadapter &${NC}" fi if [ ! -z "$firstEp" ]; then @@ -970,24 +970,24 @@ function installProduct() { tmpPort="" fi if [[ "$tmpPort" != "" ]]; then - echo -e "${GREEN_DARK}To access ${productName} ${NC}: ${clientName} -h $tmpFqdn -P $tmpPort${GREEN_DARK} to login into cluster, then${NC}" + echo -e "${GREEN_DARK}To access ${productName2} ${NC}: ${clientName2} -h $tmpFqdn -P $tmpPort${GREEN_DARK} to login into cluster, then${NC}" else - echo -e "${GREEN_DARK}To access ${productName} ${NC}: ${clientName} -h $tmpFqdn${GREEN_DARK} to login into cluster, then${NC}" + echo -e "${GREEN_DARK}To access ${productName2} ${NC}: ${clientName2} -h $tmpFqdn${GREEN_DARK} to login into cluster, then${NC}" fi echo -e "${GREEN_DARK}execute ${NC}: create dnode 'newDnodeFQDN:port'; ${GREEN_DARK}to add this new node${NC}" echo elif [ ! -z "$serverFqdn" ]; then - echo -e "${GREEN_DARK}To access ${productName} ${NC}: ${clientName} -h $serverFqdn${GREEN_DARK} to login into ${productName} server${NC}" + echo -e "${GREEN_DARK}To access ${productName2} ${NC}: ${clientName2} -h $serverFqdn${GREEN_DARK} to login into ${productName2} server${NC}" echo fi - echo -e "\033[44;32;1m${productName} is installed successfully!${NC}" + echo -e "\033[44;32;1m${productName2} is installed successfully!${NC}" echo else # Only install client install_bin install_config echo - echo -e "\033[44;32;1m${productName} client is installed successfully!${NC}" + echo -e "\033[44;32;1m${productName2} client is installed successfully!${NC}" fi touch ~/.${historyFile} diff --git a/packaging/tools/install_client.sh b/packaging/tools/install_client.sh index 1543c59297..d941fbc0cb 100755 --- a/packaging/tools/install_client.sh +++ b/packaging/tools/install_client.sh @@ -17,7 +17,7 @@ serverName="taosd" clientName="taos" uninstallScript="rmtaos" configFile="taos.cfg" -tarName="taos.tar.gz" +tarName="package.tar.gz" osType=Linux pagMode=full diff --git a/packaging/tools/make_install.sh b/packaging/tools/make_install.sh index 5725560bd6..f6fc64d875 100755 --- a/packaging/tools/make_install.sh +++ b/packaging/tools/make_install.sh @@ -606,23 +606,23 @@ function update_TDengine() { echo -e "${GREEN_DARK}To configure ${productName} ${NC}: edit ${configDir}/${configFile}" [ -f ${configDir}/taosadapter.toml ] && [ -f ${installDir}/bin/taosadapter ] && \ - echo -e "${GREEN_DARK}To configure Taos Adapter ${NC}: edit ${configDir}/taosadapter.toml" + echo -e "${GREEN_DARK}To configure Adapter ${NC}: edit ${configDir}/taosadapter.toml" if ((${service_mod} == 0)); then echo -e "${GREEN_DARK}To start ${productName} ${NC}: ${csudo}systemctl start ${serverName}${NC}" [ -f ${service_config_dir}/taosadapter.service ] && [ -f ${installDir}/bin/taosadapter ] && \ - echo -e "${GREEN_DARK}To start Taos Adatper ${NC}: ${csudo}systemctl start taosadapter ${NC}" + echo -e "${GREEN_DARK}To start Adapter ${NC}: ${csudo}systemctl start taosadapter ${NC}" elif ((${service_mod} == 1)); then echo -e "${GREEN_DARK}To start ${productName} ${NC}: ${csudo}service ${serverName} start${NC}" [ -f ${service_config_dir}/taosadapter.service ] && [ -f ${installDir}/bin/taosadapter ] && \ - echo -e "${GREEN_DARK}To start Taos Adapter ${NC}: ${csudo}service taosadapter start${NC}" + echo -e "${GREEN_DARK}To start Adapter ${NC}: ${csudo}service taosadapter start${NC}" else if [ "$osType" != "Darwin" ]; then echo -e "${GREEN_DARK}To start ${productName} ${NC}: ${serverName}${NC}" [ -f ${installDir}/bin/taosadapter ] && \ - echo -e "${GREEN_DARK}To start Taos Adapter ${NC}: taosadapter &${NC}" + echo -e "${GREEN_DARK}To start Adapter ${NC}: taosadapter &${NC}" else echo -e "${GREEN_DARK}To start service ${NC}: launchctl start com.tdengine.taosd${NC}" - echo -e "${GREEN_DARK}To start Taos Adapter ${NC}: launchctl start com.tdengine.taosadapter${NC}" + echo -e "${GREEN_DARK}To start Adapter ${NC}: launchctl start com.tdengine.taosadapter${NC}" fi fi @@ -658,23 +658,23 @@ function install_TDengine() { echo echo -e "${GREEN_DARK}To configure ${productName} ${NC}: edit ${configDir}/${configFile}" [ -f ${configDir}/taosadapter.toml ] && [ -f ${installDir}/bin/taosadapter ] && \ - echo -e "${GREEN_DARK}To configure Taos Adapter ${NC}: edit ${configDir}/taosadapter.toml" + echo -e "${GREEN_DARK}To configure Adapter ${NC}: edit ${configDir}/taosadapter.toml" if ((${service_mod} == 0)); then echo -e "${GREEN_DARK}To start ${productName} ${NC}: ${csudo}systemctl start ${serverName}${NC}" [ -f ${service_config_dir}/taosadapter.service ] && [ -f ${installDir}/bin/taosadapter ] && \ - echo -e "${GREEN_DARK}To start Taos Adapter ${NC}: ${csudo}systemctl start taosadapter ${NC}" + echo -e "${GREEN_DARK}To start Adapter ${NC}: ${csudo}systemctl start taosadapter ${NC}" elif ((${service_mod} == 1)); then echo -e "${GREEN_DARK}To start ${productName} ${NC}: ${csudo}service ${serverName} start${NC}" [ -f ${service_config_dir}/taosadapter.service ] && [ -f ${installDir}/bin/taosadapter ] && \ - echo -e "${GREEN_DARK}To start Taos Adapter ${NC}: ${csudo}service taosadapter start${NC}" + echo -e "${GREEN_DARK}To start Adapter ${NC}: ${csudo}service taosadapter start${NC}" else if [ "$osType" != "Darwin" ]; then echo -e "${GREEN_DARK}To start ${productName} ${NC}: ${serverName}${NC}" [ -f ${installDir}/bin/taosadapter ] && \ - echo -e "${GREEN_DARK}To start Taos Adapter ${NC}: taosadapter &${NC}" + echo -e "${GREEN_DARK}To start Adapter ${NC}: taosadapter &${NC}" else echo -e "${GREEN_DARK}To start service ${NC}: launchctl start com.tdengine.taosd${NC}" - echo -e "${GREEN_DARK}To start Taos Adapter ${NC}: launchctl start com.tdengine.taosadapter${NC}" + echo -e "${GREEN_DARK}To start Adapter ${NC}: launchctl start com.tdengine.taosadapter${NC}" fi fi diff --git a/packaging/tools/makeclient.sh b/packaging/tools/makeclient.sh index 208bfc183c..f46de0f94b 100755 --- a/packaging/tools/makeclient.sh +++ b/packaging/tools/makeclient.sh @@ -24,7 +24,7 @@ clientName2="${12}" productName="TDengine" clientName="taos" configFile="taos.cfg" -tarName="taos.tar.gz" +tarName="package.tar.gz" if [ "$osType" != "Darwin" ]; then script_dir="$(dirname $(readlink -f $0))" diff --git a/packaging/tools/makepkg.sh b/packaging/tools/makepkg.sh index 7ad3cf7b0a..d71d5df47c 100755 --- a/packaging/tools/makepkg.sh +++ b/packaging/tools/makepkg.sh @@ -28,7 +28,7 @@ productName="TDengine" serverName="taosd" clientName="taos" configFile="taos.cfg" -tarName="taos.tar.gz" +tarName="package.tar.gz" dumpName="taosdump" benchmarkName="taosBenchmark" toolsName="taostools" @@ -171,22 +171,22 @@ if [ -n "${taostools_bin_files}" ]; then && cp ${taostools_bin_files} ${taostools_install_dir}/bin \ && chmod a+x ${taostools_install_dir}/bin/* || : - if [ -f ${top_dir}/tools/taos-tools/packaging/tools/install-taostools.sh ]; then - cp ${top_dir}/tools/taos-tools/packaging/tools/install-taostools.sh \ + if [ -f ${top_dir}/tools/taos-tools/packaging/tools/install-tools.sh ]; then + cp ${top_dir}/tools/taos-tools/packaging/tools/install-tools.sh \ ${taostools_install_dir}/ > /dev/null \ - && chmod a+x ${taostools_install_dir}/install-taostools.sh \ - || echo -e "failed to copy install-taostools.sh" + && chmod a+x ${taostools_install_dir}/install-tools.sh \ + || echo -e "failed to copy install-tools.sh" else - echo -e "install-taostools.sh not found" + echo -e "install-tools.sh not found" fi - if [ -f ${top_dir}/tools/taos-tools/packaging/tools/uninstall-taostools.sh ]; then - cp ${top_dir}/tools/taos-tools/packaging/tools/uninstall-taostools.sh \ + if [ -f ${top_dir}/tools/taos-tools/packaging/tools/uninstall-tools.sh ]; then + cp ${top_dir}/tools/taos-tools/packaging/tools/uninstall-tools.sh \ ${taostools_install_dir}/ > /dev/null \ - && chmod a+x ${taostools_install_dir}/uninstall-taostools.sh \ - || echo -e "failed to copy uninstall-taostools.sh" + && chmod a+x ${taostools_install_dir}/uninstall-tools.sh \ + || echo -e "failed to copy uninstall-tools.sh" else - echo -e "uninstall-taostools.sh not found" + echo -e "uninstall-tools.sh not found" fi if [ -f ${build_dir}/lib/libavro.so.23.0.0 ]; then diff --git a/packaging/tools/post.sh b/packaging/tools/post.sh index 78eb7f7587..9861806677 100755 --- a/packaging/tools/post.sh +++ b/packaging/tools/post.sh @@ -530,7 +530,7 @@ function install_service_on_sysvinit() { function clean_service_on_systemd() { taosd_service_config="${service_config_dir}/taosd.service" - # taosd service already is stoped before install in preinst script + # taosd service already is stopped before install in preinst script #if systemctl is-active --quiet taosd; then # echo "TDengine is running, stopping it..." # ${csudo}systemctl stop taosd &> /dev/null || echo &> /dev/null diff --git a/packaging/tools/release_note b/packaging/tools/release_note index 4578a4523c..81b09ba69c 100644 --- a/packaging/tools/release_note +++ b/packaging/tools/release_note @@ -72,7 +72,7 @@ New Features: taos-1.4.13 (Released on 2018-12-14) Bugs Fixed: - - Clients failed to connect to server due to unexpected and invalid packets recieved by the server. + - Clients failed to connect to server due to unexpected and invalid packets received by the server. Features Added: - Add support to HikariCP in TSDB JDBC driver. diff --git a/packaging/tools/repair_link.sh b/packaging/tools/repair_link.sh index 7fd503f270..d71a16023e 100755 --- a/packaging/tools/repair_link.sh +++ b/packaging/tools/repair_link.sh @@ -8,7 +8,7 @@ read -p "Please enter link directory such as /var/lib/taos/tsdb: " linkDir while true; do if [ ! -d $linkDir ]; then - read -p "Paht not exists, please enter the correct link path:" linkDir + read -p "Path not exists, please enter the correct link path:" linkDir continue fi break @@ -28,12 +28,12 @@ for linkFile in $(find -L $linkDir -xtype l); do if [ -z "${dirHash["$dirName"]}" ]; then read -p "Please enter the directory to replace ${dirName}:" newDir - read -p "Do you want to replcace all[y/N]?" replcaceAll - if [[ ( "${replcaceAll}" == "y") || ( "${replcaceAll}" == "Y") ]]; then + read -p "Do you want to replace all[y/N]?" replaceAll + if [[ ( "${replaceAll}" == "y") || ( "${replaceAll}" == "Y") ]]; then dirHash["$dirName"]="$newDir" fi fi - # Replcace the file + # Replace the file ln -sf "${newDir}/${baseName}" "${linkFile}" done From 15cbbd3a144fb57b657978d4ee2d0a736075e569 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 14 Mar 2023 21:25:52 +0800 Subject: [PATCH 28/28] fix(query): fix error in show cluster alive. --- source/libs/command/src/command.c | 2 +- tests/system-test/6-cluster/5dnode1mnode.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/command/src/command.c b/source/libs/command/src/command.c index c862a75ed3..58c43829cf 100644 --- a/source/libs/command/src/command.c +++ b/source/libs/command/src/command.c @@ -307,7 +307,7 @@ static void setCreateDBResultIntoDataBlock(SSDataBlock* pBlock, char* dbName, ch bool existLeaderRole(TAOS_ROW row, TAOS_FIELD* fields, int nFields) { // vgroup_id | db_name | tables | v1_dnode | v1_status | v2_dnode | v2_status | v3_dnode | v3_status | v4_dnode | // v4_status | cacheload | tsma | - if (nFields != 13) { + if (nFields != 14) { return false; } diff --git a/tests/system-test/6-cluster/5dnode1mnode.py b/tests/system-test/6-cluster/5dnode1mnode.py index f21fae0d7b..d0d9372298 100644 --- a/tests/system-test/6-cluster/5dnode1mnode.py +++ b/tests/system-test/6-cluster/5dnode1mnode.py @@ -29,7 +29,7 @@ class TDTestCase: self.master_dnode = self.TDDnodes.dnodes[0] self.host=self.master_dnode.cfgDict["fqdn"] conn1 = taos.connect(self.master_dnode.cfgDict["fqdn"] , config=self.master_dnode.cfgDir) - tdSql.init(conn1.cursor()) + tdSql.init(conn1.cursor(), True) def getBuildPath(self):