From 7f93ec2c5343efe6a9e932520698de9c603e2cfb Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 May 2024 09:37:02 +0800 Subject: [PATCH 01/67] fix(stream): update the checkpoint info for followers, and some internal refactor. --- include/common/tmsg.h | 23 +-- include/common/tmsgdef.h | 3 +- include/dnode/vnode/tqCommon.h | 1 + include/libs/stream/tstream.h | 3 + source/common/src/rsync.c | 4 +- source/dnode/mgmt/mgmt_snode/src/smHandle.c | 1 + source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 2 + source/dnode/snode/inc/sndInt.h | 17 -- source/dnode/snode/src/snode.c | 2 + source/dnode/snode/src/snodeInitApi.c | 6 +- source/dnode/vnode/src/inc/vnodeInt.h | 1 + source/dnode/vnode/src/tq/tq.c | 16 +- source/dnode/vnode/src/tqCommon/tqCommon.c | 43 +++-- source/dnode/vnode/src/vnd/vnodeSvr.c | 5 + source/libs/stream/inc/streamBackendRocksdb.h | 2 +- source/libs/stream/inc/streamInt.h | 3 +- source/libs/stream/src/streamBackendRocksdb.c | 150 +++++++++--------- source/libs/stream/src/streamCheckpoint.c | 144 +++++++++-------- source/libs/stream/src/streamDispatch.c | 4 +- source/libs/stream/src/streamExec.c | 2 +- source/libs/stream/src/streamMeta.c | 9 +- source/libs/stream/src/streamTask.c | 37 ++++- 22 files changed, 272 insertions(+), 206 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index ed23290be4..790b8f883b 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -3440,23 +3440,24 @@ int32_t tSerializeSMDropStreamReq(void* buf, int32_t bufLen, const SMDropStreamR int32_t tDeserializeSMDropStreamReq(void* buf, int32_t bufLen, SMDropStreamReq* pReq); void tFreeMDropStreamReq(SMDropStreamReq* pReq); -typedef struct { - char name[TSDB_STREAM_FNAME_LEN]; - int8_t igNotExists; -} SMRecoverStreamReq; - -typedef struct { - int8_t reserved; -} SMRecoverStreamRsp; - typedef struct { int64_t recoverObjUid; int32_t taskId; int32_t hasCheckPoint; } SMVStreamGatherInfoReq; -// int32_t tSerializeSMRecoverStreamReq(void* buf, int32_t bufLen, const SMRecoverStreamReq* pReq); -// int32_t tDeserializeSMRecoverStreamReq(void* buf, int32_t bufLen, SMRecoverStreamReq* pReq); +typedef struct SVUpdateCheckpointInfoReq { + SMsgHead head; + int64_t streamId; + int32_t taskId; + int64_t checkpointId; + int64_t checkpointVer; + int64_t checkpointTs; + int32_t transId; + int8_t dropRelHTask; + int64_t hStreamId; + int64_t hTaskId; +} SVUpdateCheckpointInfoReq; typedef struct { int64_t leftForVer; diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index a5a3bd5ee0..12f92b1242 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -310,7 +310,7 @@ TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DROP, "stream-task-drop", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_RUN, "stream-task-run", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DISPATCH, "stream-task-dispatch", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_UNUSED1, "stream-unused1", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_UPDATE_CHKPT, "stream-update-chkptinfo", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_RETRIEVE, "stream-retrieve", NULL, NULL) //1035 1036 TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECKPOINT_READY, "stream-checkpoint-ready", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_REPORT_CHECKPOINT, "stream-report-checkpoint", NULL, NULL) @@ -321,6 +321,7 @@ TD_DEF_MSG_TYPE(TDMT_STREAM_MAX_MSG, "stream-max", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_CREATE, "stream-create", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_DROP, "stream-drop", NULL, NULL) + TD_CLOSE_MSG_SEG(TDMT_END_STREAM_MSG) TD_NEW_MSG_SEG(TDMT_MON_MSG) //5 << 8 diff --git a/include/dnode/vnode/tqCommon.h b/include/dnode/vnode/tqCommon.h index ce04ec6953..451f9a00eb 100644 --- a/include/dnode/vnode/tqCommon.h +++ b/include/dnode/vnode/tqCommon.h @@ -38,6 +38,7 @@ int32_t tqStreamTasksGetTotalNum(SStreamMeta* pMeta); int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg); int32_t tqStreamTaskProcessTaskPauseReq(SStreamMeta* pMeta, char* pMsg); int32_t tqStreamTaskProcessTaskResumeReq(void* handle, int64_t sversion, char* pMsg, bool fromVnode); +int32_t tqStreamTaskProcessUpdateCheckpointReq(SStreamMeta* pMeta, char* msg, int32_t msgLen); int32_t tqExpandStreamTask(SStreamTask* pTask, SStreamMeta* pMeta, void* pVnode); void tqSetRestoreVersionInfo(SStreamTask* pTask); diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 3c5d6d6e4c..9d5b7bc6f1 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -736,6 +736,9 @@ int32_t streamBuildAndSendDropTaskMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskI int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SStreamTask* pTask); int32_t streamTaskBuildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SRpcMsg* pMsg, int32_t setCode); +int32_t streamBuildAndSendCheckpointUpdateMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskId* pTaskId, STaskId* pHTaskId, + SCheckpointInfo* pCheckpointInfo, int8_t dropRelHTask); +int32_t streamTaskUpdateTaskCheckpointInfo(SStreamTask* pTask, SVUpdateCheckpointInfoReq* pReq); // stream task state machine, and event handling SStreamTaskSM* streamCreateStateMachine(SStreamTask* pTask); diff --git a/source/common/src/rsync.c b/source/common/src/rsync.c index e448aec5e0..867b407296 100644 --- a/source/common/src/rsync.c +++ b/source/common/src/rsync.c @@ -208,11 +208,11 @@ int32_t downloadRsync(const char* id, const char* path) { int32_t code = execCommand(command); if (code != 0) { - uError("[rsync] get failed code:%d," ERRNO_ERR_FORMAT, code, ERRNO_ERR_DATA); + uError("[rsync] download checkpoint data failed, code:%d," ERRNO_ERR_FORMAT, code, ERRNO_ERR_DATA); return -1; } - uDebug("[rsync] down data:%s successful", id); + uDebug("[rsync] download checkpoint data:%s successfully", id); return 0; } diff --git a/source/dnode/mgmt/mgmt_snode/src/smHandle.c b/source/dnode/mgmt/mgmt_snode/src/smHandle.c index 880e96adfb..b3c8ef4017 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smHandle.c +++ b/source/dnode/mgmt/mgmt_snode/src/smHandle.c @@ -78,6 +78,7 @@ SArray *smGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DROP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RUN, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_UPDATE_CHKPT, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 73a73d19f5..826af71ace 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -967,6 +967,8 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_REQ_CHKPT_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_GET_STREAM_PROGRESS, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_UPDATE_CHKPT, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_REPLICA, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIRM, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/snode/inc/sndInt.h b/source/dnode/snode/inc/sndInt.h index 8c5d056893..2ac66fa1cd 100644 --- a/source/dnode/snode/inc/sndInt.h +++ b/source/dnode/snode/inc/sndInt.h @@ -35,23 +35,6 @@ struct SSnode { SMsgCb msgCb; }; -#if 0 -typedef struct { - SHashObj* pHash; // taskId -> SStreamTask -} SStreamMeta; - -SStreamMeta* sndMetaNew(); -void sndMetaDelete(SStreamMeta* pMeta); - -int32_t sndMetaDeployTask(SStreamMeta* pMeta, SStreamTask* pTask); -SStreamTask* sndMetaGetTask(SStreamMeta* pMeta, int32_t taskId); -int32_t sndMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId); - -int32_t sndDropTaskOfStream(SStreamMeta* pMeta, int64_t streamId); -int32_t sndStopTaskOfStream(SStreamMeta* pMeta, int64_t streamId); -int32_t sndResumeTaskOfStream(SStreamMeta* pMeta, int64_t streamId); -#endif - void initStreamStateAPI(SStorageAPI* pAPI); #ifdef __cplusplus diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 87f0681780..602264be73 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -168,6 +168,8 @@ int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) { return tqStreamTaskProcessTaskPauseReq(pSnode->pMeta, pMsg->pCont); case TDMT_STREAM_TASK_RESUME: return tqStreamTaskProcessTaskResumeReq(pSnode->pMeta, pMsg->info.conn.applyIndex, pMsg->pCont, false); + case TDMT_STREAM_TASK_UPDATE_CHKPT: + return tqStreamTaskProcessUpdateCheckpointReq(pSnode->pMeta, pMsg->pCont, pMsg->contLen); default: ASSERT(0); } diff --git a/source/dnode/snode/src/snodeInitApi.c b/source/dnode/snode/src/snodeInitApi.c index 3b60ef3427..196fa56c99 100644 --- a/source/dnode/snode/src/snodeInitApi.c +++ b/source/dnode/snode/src/snodeInitApi.c @@ -75,8 +75,10 @@ void initStateStoreAPI(SStateStore* pStore) { pStore->streamStateCountGetKeyByRange = streamStateCountGetKeyByRange; pStore->streamStateSessionAllocWinBuffByNextPosition = streamStateSessionAllocWinBuffByNextPosition; - pStore->streamStateCountWinAddIfNotExist = streamStateCountWinAddIfNotExist; - pStore->streamStateCountWinAdd = streamStateCountWinAdd; +//void initStreamStateAPI(SStorageAPI* pAPI) { +// initStateStoreAPI(&pAPI->stateStore); +// initFunctionStateStore(&pAPI->functionStore); +//} pStore->updateInfoInit = updateInfoInit; pStore->updateInfoFillBlockData = updateInfoFillBlockData; diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 419ebd1a6c..9439f7f179 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -295,6 +295,7 @@ int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg); int32_t tqStreamProgressRetrieveReq(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessTaskUpdateCheckpointReq(STQ* pTq, char* msg, int32_t msgLen); // sma int32_t smaInit(); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 79f53e6dec..b75d517997 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -828,7 +828,7 @@ static void doStartFillhistoryStep2(SStreamTask* pTask, SStreamTask* pStreamTask streamSetParamForStreamScannerStep2(pTask, pStep2Range, pWindow); - int64_t dstVer =pStep2Range->minVer; + int64_t dstVer = pStep2Range->minVer; pTask->chkInfo.nextProcessVer = dstVer; walReaderSetSkipToVersion(pTask->exec.pWalReader, dstVer); @@ -1009,6 +1009,20 @@ int32_t tqProcessTaskDropReq(STQ* pTq, char* msg, int32_t msgLen) { return tqStreamTaskProcessDropReq(pTq->pStreamMeta, msg, msgLen); } +int32_t tqProcessTaskUpdateCheckpointReq(STQ* pTq, char* msg, int32_t msgLen) { + int32_t vgId = TD_VID(pTq->pVnode); + SVUpdateCheckpointInfoReq* pReq = (SVUpdateCheckpointInfoReq*)msg; + + if (!pTq->pVnode->restored) { + tqDebug("vgId:%d update-checkpoint-info msg received during restoring, checkpointId:%" PRId64 + ", transId:%d s-task:0x%x ignore it", + vgId, pReq->checkpointId, pReq->transId, pReq->taskId); + return TSDB_CODE_SUCCESS; + } + + return tqStreamTaskProcessUpdateCheckpointReq(pTq->pStreamMeta, msg, msgLen); +} + int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { return tqStreamTaskProcessTaskPauseReq(pTq->pStreamMeta, msg); } diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index 62c3b06b65..91de290e6a 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -65,6 +65,7 @@ int32_t tqExpandStreamTask(SStreamTask* pTask, SStreamMeta* pMeta, void* pVnode) .fillHistory = pTask->info.fillHistory, .winRange = pTask->dataRange.window, }; + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { handle.vnode = pVnode; handle.initTqReader = 1; @@ -577,22 +578,23 @@ int32_t tqStreamTaskProcessDeployReq(SStreamMeta* pMeta, SMsgCb* cb, int64_t sve return code; } -void tqStreamRmTaskBackend(SStreamMeta* pMeta, STaskId* id) { +static void tqStreamRemoveTaskBackend(SStreamMeta* pMeta, const STaskId* pId) { char taskKey[128] = {0}; - sprintf(taskKey, "0x%" PRIx64 "-0x%x", id->streamId, (int32_t)id->taskId); + sprintf(taskKey, "0x%" PRIx64 "-0x%x", pId->streamId, (int32_t)pId->taskId); char* path = taosMemoryCalloc(1, strlen(pMeta->path) + 128); sprintf(path, "%s%s%s", pMeta->path, TD_DIRSEP, taskKey); taosRemoveDir(path); + + tqInfo("vgId:%d drop stream task:0x%x file:%s", pMeta->vgId, (int32_t)pId->taskId, path); taosMemoryFree(path); - // do nothing } int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen) { SVDropStreamTaskReq* pReq = (SVDropStreamTaskReq*)msg; - int32_t vgId = pMeta->vgId; - STaskId hTaskId = {0}; + int32_t vgId = pMeta->vgId; + STaskId hTaskId = {0}; tqDebug("vgId:%d receive msg to drop s-task:0x%x", vgId, pReq->taskId); streamMetaWLock(pMeta); @@ -634,10 +636,32 @@ int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen streamMetaWUnLock(pMeta); - tqStreamRmTaskBackend(pMeta, &id); + tqStreamRemoveTaskBackend(pMeta, &id); return 0; } +int32_t tqStreamTaskProcessUpdateCheckpointReq(SStreamMeta* pMeta, char* msg, int32_t msgLen) { + SVUpdateCheckpointInfoReq* pReq = (SVUpdateCheckpointInfoReq*)msg; + + int32_t vgId = pMeta->vgId; + tqDebug("vgId:%d receive msg to update-checkpoint-info for s-task:0x%x", vgId, pReq->taskId); + + streamMetaWLock(pMeta); + + STaskId id = {.streamId = pReq->streamId, .taskId = pReq->taskId}; + SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); + + if (ppTask != NULL && (*ppTask) != NULL) { + streamTaskUpdateTaskCheckpointInfo(*ppTask, pReq); + } else { // failed to get the task. + tqError("vgId:%d failed to locate the s-task:0x%x to update the checkpoint info, it may have been dropped already", + vgId, pReq->taskId); + } + + streamMetaWUnLock(pMeta); + return TSDB_CODE_SUCCESS; +} + static int32_t restartStreamTasks(SStreamMeta* pMeta, bool isLeader) { int32_t vgId = pMeta->vgId; int32_t code = 0; @@ -927,9 +951,10 @@ static int32_t tqProcessTaskResumeImpl(void* handle, SStreamTask* pTask, int64_t int32_t tqStreamTaskProcessTaskResumeReq(void* handle, int64_t sversion, char* msg, bool fromVnode) { SVResumeStreamTaskReq* pReq = (SVResumeStreamTaskReq*)msg; - SStreamMeta* pMeta = fromVnode ? ((STQ*)handle)->pStreamMeta : handle; - SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); - int32_t code = tqProcessTaskResumeImpl(handle, pTask, sversion, pReq->igUntreated, fromVnode); + + SStreamMeta* pMeta = fromVnode ? ((STQ*)handle)->pStreamMeta : handle; + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); + int32_t code = tqProcessTaskResumeImpl(handle, pTask, sversion, pReq->igUntreated, fromVnode); if (code != 0) { return code; } diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 6d97c1cd79..3fec3d9b8e 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -625,6 +625,11 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg goto _err; } } break; + case TDMT_STREAM_TASK_UPDATE_CHKPT: { + if (tqProcessTaskUpdateCheckpointReq(pVnode->pTq, pMsg->pCont, pMsg->contLen) < 0) { + goto _err; + } + } break; case TDMT_STREAM_TASK_PAUSE: { if (pVnode->restored && vnodeIsLeader(pVnode) && tqProcessTaskPauseReq(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) { diff --git a/source/libs/stream/inc/streamBackendRocksdb.h b/source/libs/stream/inc/streamBackendRocksdb.h index 704bc9a2f2..fbf902a237 100644 --- a/source/libs/stream/inc/streamBackendRocksdb.h +++ b/source/libs/stream/inc/streamBackendRocksdb.h @@ -141,7 +141,7 @@ SListNode* streamBackendAddCompare(void* backend, void* arg); void streamBackendDelCompare(void* backend, void* arg); int32_t streamStateCvtDataFormat(char* path, char* key, void* cfInst); -STaskDbWrapper* taskDbOpen(char* path, char* key, int64_t chkpId); +STaskDbWrapper* taskDbOpen(char* path, char* key, int64_t chkptId); void taskDbDestroy(void* pBackend, bool flush); void taskDbDestroy2(void* pBackend); int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId); diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index 3ccb25a62a..da8a24e6da 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -113,7 +113,7 @@ void destroyDispatchMsg(SStreamDispatchReq* pReq, int32_t numOfVgroups); int32_t getNumOfDispatchBranch(SStreamTask* pTask); void clearBufferedDispatchMsg(SStreamTask* pTask); -int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBlock); +int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock* pBlock); SStreamDataBlock* createStreamBlockFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg); SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamTask* pTask, int64_t resultSize, SArray* pRes); @@ -122,7 +122,6 @@ void destroyStreamDataBlock(SStreamDataBlock* pBlock); int32_t streamRetrieveReqToData(const SStreamRetrieveReq* pReq, SStreamDataBlock* pData); int32_t streamBroadcastToUpTasks(SStreamTask* pTask, const SSDataBlock* pBlock); -int32_t streamSaveTaskCheckpointInfo(SStreamTask* p, int64_t checkpointId); int32_t streamSendCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet); int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t srcTaskId, int32_t index, int64_t checkpointId); diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index b5294a3fb7..d9eea23d21 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -42,7 +42,7 @@ void destroyRocksdbCfInst(RocksdbCfInst* inst); int32_t getCfIdx(const char* cfName); STaskDbWrapper* taskDbOpenImpl(char* key, char* statePath, char* dbPath); -int32_t backendCopyFiles(char* src, char* dst); +static int32_t backendCopyFiles(const char* src, const char* dst); void destroyCompactFilteFactory(void* arg); void destroyCompactFilte(void* arg); @@ -234,12 +234,14 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { tstrerror(TAOS_SYSTEM_ERROR(errno)), state); taosMkDir(state); } + taosMemoryFree(chkp); } - *dst = state; + *dst = state; return 0; } + int32_t remoteChkp_readMetaData(char* path, SArray* list) { char* metaPath = taosMemoryCalloc(1, strlen(path)); sprintf(metaPath, "%s%s%s", path, TD_DIRSEP, "META"); @@ -323,7 +325,7 @@ int32_t remoteChkp_validAndCvtMeta(char* path, SArray* list, int64_t chkpId) { return complete == 1 ? 0 : -1; } -int32_t rebuildFromRemoteChkp_rsync(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { +int32_t rebuildFromRemoteChkp_rsync(char* key, char* chkpPath, int64_t checkpointId, char* defaultPath) { // impl later int32_t code = 0; if (taosIsDir(chkpPath)) { @@ -336,10 +338,14 @@ int32_t rebuildFromRemoteChkp_rsync(char* key, char* chkpPath, int64_t chkpId, c code = streamTaskDownloadCheckpointData(key, chkpPath); if (code != 0) { + stError("failed to download checkpoint data:%s", key); return code; } + stDebug("download backup checkpoint data into:%s, checkpointId:%" PRId64 ", %s", chkpPath, checkpointId, key); + code = backendCopyFiles(chkpPath, defaultPath); + return code; } @@ -378,13 +384,16 @@ int32_t rebuildFromRemoteChkp_s3(char* key, char* chkpPath, int64_t chkpId, char return code; } -int32_t rebuildFromRemoteChkp(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { +int32_t rebuildFromRemoteCheckpoint(char* key, char* chkpPath, int64_t checkpointId, char* defaultPath) { ECHECKPOINT_BACKUP_TYPE type = streamGetCheckpointBackupType(); if (type == DATA_UPLOAD_S3) { - return rebuildFromRemoteChkp_s3(key, chkpPath, chkpId, defaultPath); + return rebuildFromRemoteChkp_s3(key, chkpPath, checkpointId, defaultPath); } else if (type == DATA_UPLOAD_RSYNC) { - return rebuildFromRemoteChkp_rsync(key, chkpPath, chkpId, defaultPath); + return rebuildFromRemoteChkp_rsync(key, chkpPath, checkpointId, defaultPath); + } else { + stError("%s not remote backup checkpoint data for:%"PRId64, key, checkpointId); } + return -1; } @@ -403,7 +412,7 @@ int32_t copyFiles_hardlink(char* src, char* dst, int8_t type) { return taosLinkFile(src, dst); } -int32_t backendFileCopyFilesImpl(char* src, char* dst) { +int32_t backendFileCopyFilesImpl(const char* src, const char* dst) { const char* current = "CURRENT"; size_t currLen = strlen(current); @@ -412,20 +421,26 @@ int32_t backendFileCopyFilesImpl(char* src, char* dst) { int32_t dLen = strlen(dst); char* srcName = taosMemoryCalloc(1, sLen + 64); char* dstName = taosMemoryCalloc(1, dLen + 64); - // copy file to dst + // copy file to dst TdDirPtr pDir = taosOpenDir(src); if (pDir == NULL) { taosMemoryFree(srcName); taosMemoryFree(dstName); + code = TAOS_SYSTEM_ERROR(errno); + errno = 0; - return -1; + return code; } + errno = 0; TdDirEntryPtr de = NULL; + while ((de = taosReadDir(pDir)) != NULL) { char* name = taosGetDirEntryName(de); - if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; + if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) { + continue; + } sprintf(srcName, "%s%s%s", src, TD_DIRSEP, name); sprintf(dstName, "%s%s%s", dst, TD_DIRSEP, name); @@ -433,18 +448,21 @@ int32_t backendFileCopyFilesImpl(char* src, char* dst) { if (strncmp(name, current, strlen(name) <= currLen ? strlen(name) : currLen) == 0) { code = copyFiles_create(srcName, dstName, 0); if (code != 0) { - stError("failed to copy file, detail: %s to %s reason: %s", srcName, dstName, - tstrerror(TAOS_SYSTEM_ERROR(code))); + code = TAOS_SYSTEM_ERROR(code); + stError("failed to copy file, detail: %s to %s reason:%s", srcName, dstName, tstrerror(code)); goto _ERROR; } } else { code = copyFiles_hardlink(srcName, dstName, 0); if (code != 0) { - stError("failed to hard line file, detail: %s to %s, reason: %s", srcName, dstName, - tstrerror(TAOS_SYSTEM_ERROR(code))); + code = TAOS_SYSTEM_ERROR(code); + stError("failed to hardlink file, detail:%s to %s, reason:%s", srcName, dstName, tstrerror(code)); goto _ERROR; + } else { + stDebug("succ hard link file:%s to %s", srcName, dstName); } } + memset(srcName, 0, sLen + 64); memset(dstName, 0, dLen + 64); } @@ -453,76 +471,46 @@ int32_t backendFileCopyFilesImpl(char* src, char* dst) { taosMemoryFreeClear(dstName); taosCloseDir(&pDir); errno = 0; - return 0; + return code; + _ERROR: taosMemoryFreeClear(srcName); taosMemoryFreeClear(dstName); taosCloseDir(&pDir); errno = 0; - return -1; + return code; } -int32_t backendCopyFiles(char* src, char* dst) { + +int32_t backendCopyFiles(const char* src, const char* dst) { return backendFileCopyFilesImpl(src, dst); - // // opt later, just hard link - // int32_t sLen = strlen(src); - // int32_t dLen = strlen(dst); - // char* srcName = taosMemoryCalloc(1, sLen + 64); - // char* dstName = taosMemoryCalloc(1, dLen + 64); - - // TdDirPtr pDir = taosOpenDir(src); - // if (pDir == NULL) { - // taosMemoryFree(srcName); - // taosMemoryFree(dstName); - // return -1; - // } - - // TdDirEntryPtr de = NULL; - // while ((de = taosReadDir(pDir)) != NULL) { - // char* name = taosGetDirEntryName(de); - // if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; - - // sprintf(srcName, "%s%s%s", src, TD_DIRSEP, name); - // sprintf(dstName, "%s%s%s", dst, TD_DIRSEP, name); - // // if (!taosDirEntryIsDir(de)) { - // // // code = taosCopyFile(srcName, dstName); - // // if (code == -1) { - // // goto _err; - // // } - // // } - // return backendFileCopyFilesImpl(src, dst); - - // memset(srcName, 0, sLen + 64); - // memset(dstName, 0, dLen + 64); - // } - - // _err: - // taosMemoryFreeClear(srcName); - // taosMemoryFreeClear(dstName); - // taosCloseDir(&pDir); - // return code >= 0 ? 0 : -1; - - // return 0; } -int32_t rebuildFromLocalChkp(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { + +static int32_t rebuildFromLocalCheckpoint(char* pTaskIdStr, const char* checkpointPath, int64_t chkpId, const char* defaultPath) { int32_t code = 0; + if (taosIsDir(defaultPath)) { taosRemoveDir(defaultPath); taosMkDir(defaultPath); - stInfo("succ to clear stream backend %s", defaultPath); + stInfo("clear task backend path:%s, done", defaultPath); } - if (taosIsDir(chkpPath) && isValidCheckpoint(chkpPath)) { - code = backendCopyFiles(chkpPath, defaultPath); - if (code != 0) { + + if (taosIsDir(checkpointPath) && isValidCheckpoint(checkpointPath)) { + code = backendCopyFiles(checkpointPath, defaultPath); + + if (code != TSDB_CODE_SUCCESS) { taosRemoveDir(defaultPath); taosMkDir(defaultPath); - stError("failed to restart stream backend from %s, reason: %s, start to restart from empty path: %s", chkpPath, - tstrerror(TAOS_SYSTEM_ERROR(errno)), defaultPath); - code = 0; + stError("%s failed to restart stream backend from %s, reason: %s, start to restart from empty path: %s", + pTaskIdStr, checkpointPath, tstrerror(TAOS_SYSTEM_ERROR(errno)), defaultPath); + code = TSDB_CODE_SUCCESS; } else { - stInfo("start to restart stream backend at checkpoint path: %s", chkpPath); + stInfo("%s start to restart stream backend at checkpoint path: %s", pTaskIdStr, checkpointPath); } + } else { + code = TSDB_CODE_FAILED; + stError("%s not valid checkpoint path/data in:%s", pTaskIdStr, checkpointPath); } return code; @@ -533,7 +521,7 @@ int32_t rebuildFromlocalDefault(char* key, char* chkpPath, int64_t chkpId, char* return code; } -int32_t rebuildDirFromChkp2(const char* path, char* key, int64_t chkpId, char** dbPrefixPath, char** dbPath) { +int32_t rebuildDirFormCheckpoint(const char* path, char* key, int64_t chkptId, char** dbPrefixPath, char** dbPath) { // impl later int32_t code = 0; @@ -551,29 +539,33 @@ int32_t rebuildDirFromChkp2(const char* path, char* key, int64_t chkpId, char** taosMulMkDir(defaultPath); } - char* chkpPath = taosMemoryCalloc(1, strlen(path) + 256); - if (chkpId != 0) { - sprintf(chkpPath, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkpId); - code = rebuildFromLocalChkp(key, chkpPath, chkpId, defaultPath); + stDebug("prepare local dir:%s, checkpointId:%d, key:%s succ", defaultPath, chkptId, key); + + char* chkptPath = taosMemoryCalloc(1, strlen(path) + 256); + if (chkptId != 0) { + sprintf(chkptPath, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkptId); + + code = rebuildFromLocalCheckpoint(key, chkptPath, chkptId, defaultPath); if (code != 0) { - code = rebuildFromRemoteChkp(key, chkpPath, chkpId, defaultPath); + code = rebuildFromRemoteCheckpoint(key, chkptPath, chkptId, defaultPath); } if (code != 0) { - stInfo("failed to start stream backend at %s, reason: %s, restart from default defaultPath dir:%s", chkpPath, - tstrerror(TAOS_SYSTEM_ERROR(errno)), defaultPath); + stError("failed to start stream backend at %s, reason: %s, restart from default defaultPath:%s", chkptPath, + tstrerror(code), defaultPath); code = taosMkDir(defaultPath); } } else { - sprintf(chkpPath, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", + sprintf(chkptPath, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", (int64_t)-1); - code = rebuildFromLocalChkp(key, chkpPath, -1, defaultPath); + code = rebuildFromLocalCheckpoint(key, chkptPath, -1, defaultPath); if (code != 0) { code = taosMkDir(defaultPath); } } - taosMemoryFree(chkpPath); + + taosMemoryFree(chkptPath); *dbPath = defaultPath; *dbPrefixPath = prefixPath; @@ -1055,6 +1047,7 @@ _ERROR: rocksdb_checkpoint_object_destroy(cp); return code; } + int32_t chkpPreFlushDb(rocksdb_t* db, rocksdb_column_family_handle_t** cf, int32_t nCf) { if (nCf == 0) return 0; int code = 0; @@ -1098,6 +1091,7 @@ int32_t chkpPreBuildDir(char* path, int64_t chkpId, char** chkpDir, char** chkpI return 0; } + int32_t taskDbBuildSnap(void* arg, SArray* pSnap) { SStreamMeta* pMeta = arg; @@ -1997,11 +1991,11 @@ _EXIT: return NULL; } -STaskDbWrapper* taskDbOpen(char* path, char* key, int64_t chkpId) { +STaskDbWrapper* taskDbOpen(char* path, char* key, int64_t chkptId) { char* statePath = NULL; char* dbPath = NULL; - if (rebuildDirFromChkp2(path, key, chkpId, &statePath, &dbPath) != 0) { + if (rebuildDirFormCheckpoint(path, key, chkptId, &statePath, &dbPath) != 0) { return NULL; } diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 5a4e3a5439..853c6881ba 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -33,8 +33,11 @@ static int32_t deleteCheckpointFile(const char* id, const char* name); static int32_t streamTaskBackupCheckpoint(const char* id, const char* path); static int32_t deleteCheckpoint(const char* id); static int32_t downloadCheckpointByNameS3(const char* id, const char* fname, const char* dstName); +static int32_t continueDispatchCheckpointTriggerBlock(SStreamDataBlock* pBlock, SStreamTask* pTask); +static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType); +static int32_t streamAlignCheckpoint(SStreamTask* pTask); -static int32_t streamAlignCheckpoint(SStreamTask* pTask) { +int32_t streamAlignCheckpoint(SStreamTask* pTask) { int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList); int64_t old = atomic_val_compare_exchange_32(&pTask->chkInfo.downstreamAlignNum, 0, num); if (old == 0) { @@ -44,7 +47,7 @@ static int32_t streamAlignCheckpoint(SStreamTask* pTask) { return atomic_sub_fetch_32(&pTask->chkInfo.downstreamAlignNum, 1); } -static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType) { +int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType) { SStreamDataBlock* pChkpoint = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, sizeof(SSDataBlock)); if (pChkpoint == NULL) { return TSDB_CODE_OUT_OF_MEMORY; @@ -94,7 +97,7 @@ int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSo return appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER); } -static int32_t continueDispatchCheckpointBlock(SStreamDataBlock* pBlock, SStreamTask* pTask) { +int32_t continueDispatchCheckpointTriggerBlock(SStreamDataBlock* pBlock, SStreamTask* pTask) { pBlock->srcTaskId = pTask->id.taskId; pBlock->srcVgId = pTask->pMeta->vgId; @@ -110,7 +113,7 @@ static int32_t continueDispatchCheckpointBlock(SStreamDataBlock* pBlock, SStream return code; } -int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBlock) { +int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock* pBlock) { SSDataBlock* pDataBlock = taosArrayGet(pBlock->blocks, 0); int64_t checkpointId = pDataBlock->info.version; int32_t transId = pDataBlock->info.window.skey; @@ -118,7 +121,7 @@ int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBloc int32_t code = TSDB_CODE_SUCCESS; int32_t vgId = pTask->pMeta->vgId; - stDebug("s-task:%s vgId:%d start to handle the checkpoint block, checkpointId:%" PRId64 " ver:%" PRId64 + stDebug("s-task:%s vgId:%d start to handle the checkpoint-trigger block, checkpointId:%" PRId64 " ver:%" PRId64 ", transId:%d current checkpointingId:%" PRId64, id, vgId, pTask->chkInfo.checkpointId, pTask->chkInfo.checkpointVer, transId, checkpointId); @@ -141,7 +144,7 @@ int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBloc int8_t type = pTask->outputInfo.type; if (type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__SHUFFLE_DISPATCH) { stDebug("s-task:%s set childIdx:%d, and add checkpoint-trigger block into outputQ", id, pTask->info.selfChildId); - continueDispatchCheckpointBlock(pBlock, pTask); + continueDispatchCheckpointTriggerBlock(pBlock, pTask); } else { // only one task exists, no need to dispatch downstream info atomic_add_fetch_32(&pTask->chkInfo.numOfNotReady, 1); streamProcessCheckpointReadyMsg(pTask); @@ -182,7 +185,7 @@ int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBloc // Put the checkpoint block into inputQ, to make sure all blocks with less version have been handled by this task // already. And then, dispatch check point msg to all downstream tasks - code = continueDispatchCheckpointBlock(pBlock, pTask); + code = continueDispatchCheckpointTriggerBlock(pBlock, pTask); } } @@ -227,75 +230,83 @@ void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg) { } } -int32_t streamSaveTaskCheckpointInfo(SStreamTask* p, int64_t checkpointId) { - SStreamMeta* pMeta = p->pMeta; +int32_t streamTaskUpdateTaskCheckpointInfo(SStreamTask* pTask, SVUpdateCheckpointInfoReq* pReq) { + SStreamMeta* pMeta = pTask->pMeta; int32_t vgId = pMeta->vgId; - const char* id = p->id.idStr; int32_t code = 0; - SCheckpointInfo* pCKInfo = &p->chkInfo; + const char* id = pTask->id.idStr; + SCheckpointInfo* pInfo = &pTask->chkInfo; - // fill-history task, rsma task, and sink task will not generate the checkpoint - if ((p->info.fillHistory == 1) || (p->info.taskLevel > TASK_LEVEL__SINK)) { - return code; - } + taosThreadMutexLock(&pTask->lock); - taosThreadMutexLock(&p->lock); + SStreamTaskState* pStatus = streamTaskGetStatus(pTask); - SStreamTaskState* pStatus = streamTaskGetStatus(p); - ETaskStatus prevStatus = pStatus->state; + stDebug("s-task:%s vgId:%d status:%s start to update the checkpoint info, checkpointId:%" PRId64 "->%" PRId64 + " checkpointVer:%" PRId64 "->%" PRId64 " checkpointTs:%" PRId64 "->%" PRId64, + id, vgId, pStatus->name, pInfo->checkpointId, pReq->checkpointId, pInfo->checkpointVer, pReq->checkpointVer, + pInfo->checkpointTime, pReq->checkpointTs); - if (pStatus->state == TASK_STATUS__CK) { - ASSERT(pCKInfo->checkpointId <= pCKInfo->checkpointingId && pCKInfo->checkpointingId == checkpointId && - pCKInfo->checkpointVer <= pCKInfo->processedVer); + // in the + if (pStatus->state != TASK_STATUS__DROPPING) { + ASSERT(pInfo->checkpointId <= pReq->checkpointId && pInfo->checkpointVer <= pReq->checkpointVer); - pCKInfo->checkpointId = pCKInfo->checkpointingId; - pCKInfo->checkpointVer = pCKInfo->processedVer; - pCKInfo->checkpointTime = pCKInfo->startTs; + pInfo->checkpointId = pReq->checkpointId; + pInfo->checkpointVer = pReq->checkpointVer; + pInfo->checkpointTime = pReq->checkpointTs; - streamTaskClearCheckInfo(p, false); - taosThreadMutexUnlock(&p->lock); + streamTaskClearCheckInfo(pTask, false); - code = streamTaskHandleEvent(p->status.pSM, TASK_EVENT_CHECKPOINT_DONE); + // todo handle error + if (pStatus->state == TASK_STATUS__CK) { + code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_CHECKPOINT_DONE); + } else { + stDebug("s-task:0x%x vgId:%d not handle checkpoint-done event, status:%s", pReq->taskId, vgId, pStatus->name); + } } else { - stDebug("s-task:%s vgId:%d status:%s not keep the checkpoint metaInfo, checkpoint:%" PRId64 " failed", id, vgId, - pStatus->name, pCKInfo->checkpointingId); - taosThreadMutexUnlock(&p->lock); + stDebug("s-task:0x%x vgId:%d status:%s not update checkpoint info, checkpointId:%" PRId64 "->%" PRId64 " failed", + pReq->taskId, vgId, pStatus->name, pInfo->checkpointId, pReq->checkpointId); + taosThreadMutexUnlock(&pTask->lock); return TSDB_CODE_STREAM_TASK_IVLD_STATUS; } + if (pReq->dropRelHTask) { + stDebug("s-task:0x%x vgId:%d drop the related fill-history task:0x%" PRIx64 " after update checkpoint", + pReq->taskId, vgId, pReq->hTaskId); + CLEAR_RELATED_FILLHISTORY_TASK(pTask); + } + + stDebug("s-task:0x%x set the persistent status attr to be ready, prev:%s, status in sm:%s", pReq->taskId, + streamTaskGetStatusStr(pTask->status.taskStatus), streamTaskGetStatus(pTask)->name); + + pTask->status.taskStatus = TASK_STATUS__READY; + + code = streamMetaSaveTask(pMeta, pTask); if (code != TSDB_CODE_SUCCESS) { - stDebug("s-task:%s vgId:%d handle event:checkpoint-done failed", id, vgId); + stError("s-task:%s vgId:%d failed to save task info after do checkpoint, checkpointId:%" PRId64 ", since %s", id, + vgId, pReq->checkpointId, terrstr()); return code; } - stDebug("vgId:%d s-task:%s level:%d open upstream inputQ, save status after checkpoint, checkpointId:%" PRId64 - ", Ver(saved):%" PRId64 " currentVer:%" PRId64 ", status: ready, prev:%s", - vgId, id, p->info.taskLevel, checkpointId, pCKInfo->checkpointVer, pCKInfo->nextProcessVer, - streamTaskGetStatusStr(prevStatus)); + taosThreadMutexUnlock(&pTask->lock); + streamMetaWUnLock(pMeta); - // save the task if not sink task - if (p->info.taskLevel <= TASK_LEVEL__SINK) { - streamMetaWLock(pMeta); + // drop task should not in the meta-lock, and drop the related fill-history task now + if (pReq->dropRelHTask) { + streamMetaUnregisterTask(pMeta, pReq->hStreamId, pReq->hTaskId); - code = streamMetaSaveTask(pMeta, p); - if (code != TSDB_CODE_SUCCESS) { - streamMetaWUnLock(pMeta); - stError("s-task:%s vgId:%d failed to save task info after do checkpoint, checkpointId:%" PRId64 ", since %s", id, - vgId, checkpointId, terrstr()); - return code; - } - - code = streamMetaCommit(pMeta); - if (code != TSDB_CODE_SUCCESS) { - stError("s-task:%s vgId:%d failed to commit stream meta after do checkpoint, checkpointId:%" PRId64 ", since %s", - id, vgId, checkpointId, terrstr()); - } - - streamMetaWUnLock(pMeta); + // commit the update + int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); + stDebug("s-task:%s vgId:%d related fill-history task:0x%x dropped, remain tasks:%d", id, vgId, pReq->taskId, numOfTasks); } - return code; + streamMetaWLock(pMeta); + + if (streamMetaCommit(pMeta) < 0) { + // persist to disk + } + + return TSDB_CODE_SUCCESS; } void streamTaskSetFailedCheckpointId(SStreamTask* pTask) { @@ -334,6 +345,7 @@ static int32_t getCheckpointDataMeta(const char* id, const char* path, SArray* l } } } + taosCloseFile(&pFile); taosRemoveFile(file); taosMemoryFree(file); @@ -448,7 +460,9 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { // clear the checkpoint info, and commit the newest checkpoint info if all works are done successfully if (code == TSDB_CODE_SUCCESS) { - code = streamSaveTaskCheckpointInfo(pTask, ckId); + STaskId* pHTaskId = &pTask->hTaskInfo.id; + code = streamBuildAndSendCheckpointUpdateMsg(pTask->pMsgCb, pMeta->vgId, &pTask->id, pHTaskId, &pTask->chkInfo, + dropRelHTask); if (code == TSDB_CODE_SUCCESS) { code = streamTaskRemoteBackupCheckpoint(pTask, ckId, (char*)id); if (code != TSDB_CODE_SUCCESS) { @@ -459,22 +473,6 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { } } - if ((code == TSDB_CODE_SUCCESS) && dropRelHTask) { - // transferred from the halt status, it is done the fill-history procedure and finish with the checkpoint - // free it and remove fill-history task from disk meta-store - taosThreadMutexLock(&pTask->lock); - if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { - SStreamTaskId hTaskId = {.streamId = pTask->hTaskInfo.id.streamId, .taskId = pTask->hTaskInfo.id.taskId}; - - stDebug("s-task:%s fill-history finish checkpoint done, drop related fill-history task:0x%x", id, hTaskId.taskId); - streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &hTaskId, 1); - } else { - stWarn("s-task:%s related fill-history task:0x%x is erased", id, (int32_t)pTask->hTaskInfo.id.taskId); - } - - taosThreadMutexUnlock(&pTask->lock); - } - // clear the checkpoint info if failed if (code != TSDB_CODE_SUCCESS) { taosThreadMutexLock(&pTask->lock); @@ -488,7 +486,7 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { double el = (taosGetTimestampMs() - startTs) / 1000.0; stInfo("s-task:%s vgId:%d level:%d, checkpointId:%" PRId64 " ver:%" PRId64 " elapsed time:%.2f Sec, %s ", id, - pTask->pMeta->vgId, pTask->info.taskLevel, ckId, pTask->chkInfo.checkpointVer, el, + pMeta->vgId, pTask->info.taskLevel, ckId, pTask->chkInfo.checkpointVer, el, (code == TSDB_CODE_SUCCESS) ? "succ" : "failed"); return code; @@ -586,7 +584,7 @@ int32_t downloadCheckpointDataByName(const char* id, const char* fname, const ch int32_t streamTaskDownloadCheckpointData(char* id, char* path) { if (id == NULL || path == NULL || strlen(id) == 0 || strlen(path) == 0 || strlen(path) >= PATH_MAX) { - stError("streamTaskDownloadCheckpointData parameters invalid"); + stError("down checkpoint data parameters invalid"); return -1; } diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 58c6e19581..f3569d8973 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -341,7 +341,7 @@ static int32_t sendDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pDispatch SEpSet* pEpSet = &pTask->outputInfo.fixedDispatcher.epSet; int32_t downstreamTaskId = pTask->outputInfo.fixedDispatcher.taskId; - stDebug("s-task:%s (child taskId:%d) fix-dispatch %d block(s) to s-task:0x%x (vgId:%d), id:%d", id, + stDebug("s-task:%s (child taskId:%d) fix-dispatch %d block(s) to s-task:0x%x (vgId:%d), msgId:%d", id, pTask->info.selfChildId, 1, downstreamTaskId, vgId, msgId); code = doSendDispatchMsg(pTask, pDispatchMsg, vgId, pEpSet); @@ -432,7 +432,7 @@ static void doRetryDispatchData(void* param, void* tmrId) { SEpSet* pEpSet = &pTask->outputInfo.fixedDispatcher.epSet; int32_t downstreamTaskId = pTask->outputInfo.fixedDispatcher.taskId; - stDebug("s-task:%s (child taskId:%d) fix-dispatch %d block(s) to s-task:0x%x (vgId:%d), id:%d", id, + stDebug("s-task:%s (child taskId:%d) fix-dispatch %d block(s) to s-task:0x%x (vgId:%d), msgId:%d", id, pTask->info.selfChildId, 1, downstreamTaskId, vgId, msgId); code = doSendDispatchMsg(pTask, pReq, vgId, pEpSet); diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 047b169ec9..9747ebd2ff 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -589,7 +589,7 @@ static int32_t doStreamExecTask(SStreamTask* pTask) { // dispatch checkpoint msg to all downstream tasks int32_t type = pInput->type; if (type == STREAM_INPUT__CHECKPOINT_TRIGGER) { - streamProcessCheckpointBlock(pTask, (SStreamDataBlock*)pInput); + streamProcessCheckpointTriggerBlock(pTask, (SStreamDataBlock*)pInput); continue; } diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 4fa9b2c66f..95fd057929 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -556,19 +556,22 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { if (pTask->ver < SSTREAM_TASK_SUBTABLE_CHANGED_VER){ pTask->ver = SSTREAM_TASK_VER; } + SEncoder encoder = {0}; tEncoderInit(&encoder, buf, len); tEncodeStreamTask(&encoder, pTask); tEncoderClear(&encoder); int64_t id[2] = {pTask->id.streamId, pTask->id.taskId}; - if (tdbTbUpsert(pMeta->pTaskDb, id, STREAM_TASK_KEY_LEN, buf, len, pMeta->txn) < 0) { + code = tdbTbUpsert(pMeta->pTaskDb, id, STREAM_TASK_KEY_LEN, buf, len, pMeta->txn); + if (code < 0) { stError("s-task:%s save to disk failed, code:%s", pTask->id.idStr, tstrerror(terrno)); - return -1; + } else { + stDebug("s-task:%s vgId:%d stream task write to meta file", pTask->id.idStr, pTask->pMeta->vgId); } taosMemoryFree(buf); - return 0; + return code; } int32_t streamMetaRemoveTask(SStreamMeta* pMeta, STaskId* pTaskId) { diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 1e622f615d..72302f981d 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -686,16 +686,47 @@ int32_t streamBuildAndSendDropTaskMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskI pReq->head.vgId = vgId; pReq->taskId = pTaskId->taskId; pReq->streamId = pTaskId->streamId; - pReq->resetRelHalt = resetRelHalt; + pReq->resetRelHalt = resetRelHalt; // todo: remove this attribute SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_DROP, .pCont = pReq, .contLen = sizeof(SVDropStreamTaskReq)}; int32_t code = tmsgPutToQueue(pMsgCb, WRITE_QUEUE, &msg); if (code != TSDB_CODE_SUCCESS) { stError("vgId:%d failed to send drop task:0x%x msg, code:%s", vgId, pTaskId->taskId, tstrerror(code)); - return code; + } else { + stDebug("vgId:%d build and send drop task:0x%x msg", vgId, pTaskId->taskId); } - stDebug("vgId:%d build and send drop task:0x%x msg", vgId, pTaskId->taskId); + return code; +} + +int32_t streamBuildAndSendCheckpointUpdateMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskId* pTaskId, STaskId* pHTaskId, + SCheckpointInfo* pCheckpointInfo, int8_t dropRelHTask) { + SVUpdateCheckpointInfoReq* pReq = rpcMallocCont(sizeof(SVUpdateCheckpointInfoReq)); + if (pReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + pReq->head.vgId = vgId; + pReq->taskId = pTaskId->taskId; + pReq->streamId = pTaskId->streamId; + pReq->dropRelHTask = dropRelHTask; + pReq->hStreamId = pHTaskId->streamId; + pReq->hTaskId = pHTaskId->taskId; + pReq->transId = pCheckpointInfo->transId; + + pReq->checkpointId = pCheckpointInfo->checkpointingId; + pReq->checkpointVer = pCheckpointInfo->processedVer; + pReq->checkpointTs = pCheckpointInfo->startTs; + + SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_UPDATE_CHKPT, .pCont = pReq, .contLen = sizeof(SVUpdateCheckpointInfoReq)}; + int32_t code = tmsgPutToQueue(pMsgCb, WRITE_QUEUE, &msg); + + if (code != TSDB_CODE_SUCCESS) { + stError("vgId:%d task:0x%x failed to send update checkpoint info msg, code:%s", vgId, pTaskId->taskId, tstrerror(code)); + } else { + stDebug("vgId:%d task:0x%x build and send update checkpoint info msg msg", vgId, pTaskId->taskId); + } return code; } From 11ed1f54b3f37b1d74705699ad1e47ff5f3543d0 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 May 2024 10:09:23 +0800 Subject: [PATCH 02/67] refactor: do some internal refactor. --- source/libs/stream/src/streamBackendRocksdb.c | 1 + source/libs/stream/src/streamCheckpoint.c | 57 ++++++++++++------- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index d9eea23d21..56ea1611f4 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -1209,6 +1209,7 @@ _EXIT: taosMemoryFree(ppCf); return code; } + int32_t streamBackendDoCheckpoint(void* arg, int64_t chkpId) { return taskDbDoCheckpoint(arg, chkpId); } SListNode* streamBackendAddCompare(void* backend, void* arg) { diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 853c6881ba..a1e6838ac3 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -353,42 +353,51 @@ static int32_t getCheckpointDataMeta(const char* id, const char* path, SArray* l } int32_t uploadCheckpointData(void* param) { - SAsyncUploadArg* arg = param; + SAsyncUploadArg* pParam = param; char* path = NULL; int32_t code = 0; SArray* toDelFiles = taosArrayInit(4, sizeof(void*)); - char* taskStr = arg->taskId ? arg->taskId : "NULL"; + char* taskStr = pParam->taskId ? pParam->taskId : "NULL"; - void* pBackend = taskAcquireDb(arg->dbRefId); + void* pBackend = taskAcquireDb(pParam->dbRefId); if (pBackend == NULL) { stError("s-task:%s failed to acquire db", taskStr); - taosMemoryFree(arg->taskId); - taosMemoryFree(arg); + taosMemoryFree(pParam->taskId); + taosMemoryFree(pParam); return -1; } - if ((code = taskDbGenChkpUploadData(arg->pTask->pBackend, ((SStreamMeta*)arg->pMeta)->bkdChkptMgt, arg->chkpId, - (int8_t)(arg->type), &path, toDelFiles)) != 0) { - stError("s-task:%s failed to gen upload checkpoint:%" PRId64 "", taskStr, arg->chkpId); + if ((code = taskDbGenChkpUploadData(pParam->pTask->pBackend, ((SStreamMeta*)pParam->pMeta)->bkdChkptMgt, + pParam->chkpId, + (int8_t)(pParam->type), &path, toDelFiles)) != 0) { + stError("s-task:%s failed to gen upload checkpoint:%" PRId64, taskStr, pParam->chkpId); } - if (arg->type == DATA_UPLOAD_S3) { - if (code == 0 && (code = getCheckpointDataMeta(arg->taskId, path, toDelFiles)) != 0) { - stError("s-task:%s failed to get checkpointId:%" PRId64 " meta", taskStr, arg->chkpId); + if (pParam->type == DATA_UPLOAD_S3) { + if (code == 0 && (code = getCheckpointDataMeta(pParam->taskId, path, toDelFiles)) != 0) { + stError("s-task:%s failed to get checkpointData for checkpointId:%" PRId64 " meta", taskStr, pParam->chkpId); } } - if (code == 0 && (code = streamTaskBackupCheckpoint(arg->taskId, path)) != 0) { - stError("s-task:%s failed to upload checkpointId:%" PRId64, taskStr, arg->chkpId); + if (code == TSDB_CODE_SUCCESS) { + code = streamTaskBackupCheckpoint(pParam->taskId, path); + if (code != TSDB_CODE_SUCCESS) { + stError("s-task:%s failed to upload checkpoint data:%s, checkpointId:%" PRId64, taskStr, path, pParam->chkpId); + } else { + stDebug("s-task:%s backup checkpointId:%"PRId64" to remote succ", taskStr, pParam->chkpId); + } } - taskReleaseDb(arg->dbRefId); + taskReleaseDb(pParam->dbRefId); if (code == 0) { - for (int i = 0; i < taosArrayGetSize(toDelFiles); i++) { - char* p = taosArrayGetP(toDelFiles, i); - code = deleteCheckpointFile(arg->taskId, p); - stDebug("s-task:%s try to del file: %s", taskStr, p); + int32_t size = taosArrayGetSize(toDelFiles); + stDebug("s-task:%s remove redundant %d files", taskStr, size); + + for (int i = 0; i < size; i++) { + char* pName = taosArrayGetP(toDelFiles, i); + code = deleteCheckpointFile(pParam->taskId, pName); + stDebug("s-task:%s try to del file: %s", taskStr, pName); if (code != 0) { break; } @@ -396,15 +405,18 @@ int32_t uploadCheckpointData(void* param) { } taosArrayDestroyP(toDelFiles, taosMemoryFree); + + stDebug("s-task:%s remove local checkpoint dir:%s", taskStr, path); taosRemoveDir(path); taosMemoryFree(path); - taosMemoryFree(arg->taskId); - taosMemoryFree(arg); + + taosMemoryFree(pParam->taskId); + taosMemoryFree(pParam); return code; } -int32_t streamTaskRemoteBackupCheckpoint(SStreamTask* pTask, int64_t chkpId, char* taskId) { +int32_t streamTaskRemoteBackupCheckpoint(SStreamTask* pTask, int64_t checkpointId, char* taskId) { ECHECKPOINT_BACKUP_TYPE type = streamGetCheckpointBackupType(); if (type == DATA_UPLOAD_DISABLE) { return 0; @@ -417,7 +429,7 @@ int32_t streamTaskRemoteBackupCheckpoint(SStreamTask* pTask, int64_t chkpId, cha SAsyncUploadArg* arg = taosMemoryCalloc(1, sizeof(SAsyncUploadArg)); arg->type = type; arg->taskId = taosStrdup(taskId); - arg->chkpId = chkpId; + arg->chkpId = checkpointId; arg->pTask = pTask; arg->dbRefId = taskGetDBRef(pTask->pBackend); arg->pMeta = pTask->pMeta; @@ -613,6 +625,7 @@ int32_t deleteCheckpoint(const char* id) { int32_t deleteCheckpointFile(const char* id, const char* name) { char object[128] = {0}; snprintf(object, sizeof(object), "%s/%s", id, name); + char* tmp = object; s3DeleteObjects((const char**)&tmp, 1); return 0; From 370c25512822ad679cd9ce9bc91b1a4845d49e63 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 May 2024 10:27:55 +0800 Subject: [PATCH 03/67] refactor: do some internal refactor. --- source/common/src/rsync.c | 22 +++++++++++++--------- source/libs/stream/src/streamCheckpoint.c | 3 +-- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/source/common/src/rsync.c b/source/common/src/rsync.c index 867b407296..2324829dbb 100644 --- a/source/common/src/rsync.c +++ b/source/common/src/rsync.c @@ -153,16 +153,16 @@ void startRsync() { } int32_t uploadRsync(const char* id, const char* path) { + int64_t st = taosGetTimestampMs(); + char command[PATH_MAX] = {0}; + #ifdef WINDOWS char pathTransform[PATH_MAX] = {0}; changeDirFromWindowsToLinux(path, pathTransform); -#endif - char command[PATH_MAX] = {0}; -#ifdef WINDOWS - if(pathTransform[strlen(pathTransform) - 1] != '/'){ + if(pathTransform[strlen(pathTransform) - 1] != '/') { #else - if(path[strlen(path) - 1] != '/'){ + if (path[strlen(path) - 1] != '/') { #endif snprintf(command, PATH_MAX, "rsync -av --delete --timeout=10 --bwlimit=100000 %s/ rsync://%s/checkpoint/%s/", #ifdef WINDOWS @@ -178,16 +178,20 @@ int32_t uploadRsync(const char* id, const char* path) { #else path #endif - , tsSnodeAddress, id); + , + tsSnodeAddress, id); } int32_t code = execCommand(command); - if(code != 0){ - uError("[rsync] send failed code:%d," ERRNO_ERR_FORMAT, code, ERRNO_ERR_DATA); + if (code != 0) { + uError("[rsync] s-task:%s upload checkpoint data in:%s to %s failed, code:%d", id, path, tsSnodeAddress, + ERRNO_ERR_FORMAT, code, ERRNO_ERR_DATA); return -1; } - uDebug("[rsync] upload data:%s successful", id); + int64_t el = (taosGetTimestampMs() - st); + uDebug("[rsync] s-task:%s upload checkpoint data in:%s to %s successfully, elapsed time:%" PRId64 "ms", id, path, + tsSnodeAddress, el); return 0; } diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index a1e6838ac3..2f1771279f 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -368,8 +368,7 @@ int32_t uploadCheckpointData(void* param) { } if ((code = taskDbGenChkpUploadData(pParam->pTask->pBackend, ((SStreamMeta*)pParam->pMeta)->bkdChkptMgt, - pParam->chkpId, - (int8_t)(pParam->type), &path, toDelFiles)) != 0) { + pParam->chkpId, (int8_t)(pParam->type), &path, toDelFiles)) != 0) { stError("s-task:%s failed to gen upload checkpoint:%" PRId64, taskStr, pParam->chkpId); } From f8d80612606b1db876c090dfe06b2782afe0c3eb Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 May 2024 11:08:51 +0800 Subject: [PATCH 04/67] refactor: do some internal refactor. --- source/common/src/rsync.c | 4 ++-- source/common/src/tdataformat.c | 1 + source/libs/executor/src/timesliceoperator.c | 1 + source/libs/stream/src/streamBackendRocksdb.c | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/source/common/src/rsync.c b/source/common/src/rsync.c index 2324829dbb..35ae9450f1 100644 --- a/source/common/src/rsync.c +++ b/source/common/src/rsync.c @@ -184,8 +184,8 @@ int32_t uploadRsync(const char* id, const char* path) { int32_t code = execCommand(command); if (code != 0) { - uError("[rsync] s-task:%s upload checkpoint data in:%s to %s failed, code:%d", id, path, tsSnodeAddress, - ERRNO_ERR_FORMAT, code, ERRNO_ERR_DATA); + uError("[rsync] s-task:%s upload checkpoint data in %s to %s failed, code:%d," ERRNO_ERR_FORMAT, id, path, + tsSnodeAddress, code, ERRNO_ERR_DATA); return -1; } diff --git a/source/common/src/tdataformat.c b/source/common/src/tdataformat.c index 04ad00e1dc..f2d37bede6 100644 --- a/source/common/src/tdataformat.c +++ b/source/common/src/tdataformat.c @@ -1320,6 +1320,7 @@ int32_t tRowKeyAssign(SRowKey *pDst, SRowKey *pSrc) { pVal->val = pSrc->pks[i].val; } else { pVal->nData = pSrc->pks[i].nData; + ASSERT(pSrc->pks[i].pData != NULL); memcpy(pVal->pData, pSrc->pks[i].pData, pVal->nData); } } diff --git a/source/libs/executor/src/timesliceoperator.c b/source/libs/executor/src/timesliceoperator.c index 080fd6b914..ec133a1022 100644 --- a/source/libs/executor/src/timesliceoperator.c +++ b/source/libs/executor/src/timesliceoperator.c @@ -194,6 +194,7 @@ static void tRowGetKeyFromColData(int64_t ts, SColumnInfoData* pPkCol, int32_t r } } +// only the timestamp is needed to complete the duplicated timestamp check. static bool checkDuplicateTimestamps(STimeSliceOperatorInfo* pSliceInfo, SColumnInfoData* pTsCol, SColumnInfoData* pPkCol, int32_t curIndex, int32_t rows) { int64_t currentTs = *(int64_t*)colDataGetData(pTsCol, curIndex); diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 60384c9ca1..455466f64c 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -539,7 +539,7 @@ int32_t rebuildDirFormCheckpoint(const char* path, char* key, int64_t chkptId, c taosMulMkDir(defaultPath); } - stDebug("prepare local dir:%s, checkpointId:%d, key:%s succ", defaultPath, chkptId, key); + stDebug("prepare local dir:%s, checkpointId:%" PRId64 ", key:%s succ", defaultPath, chkptId, key); char* chkptPath = taosMemoryCalloc(1, strlen(path) + 256); if (chkptId != 0) { From d214dd5cdd7103a3a31a8f4a09c9998a556e43b4 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 May 2024 16:19:11 +0800 Subject: [PATCH 05/67] fix(stream): fix error in downloading remote backup checkpoint data. --- source/common/src/rsync.c | 14 +++++--- source/libs/stream/src/streamBackendRocksdb.c | 35 +++++++++---------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/source/common/src/rsync.c b/source/common/src/rsync.c index 35ae9450f1..302f17942f 100644 --- a/source/common/src/rsync.c +++ b/source/common/src/rsync.c @@ -196,10 +196,14 @@ int32_t uploadRsync(const char* id, const char* path) { } int32_t downloadRsync(const char* id, const char* path) { + int64_t st = taosGetTimestampMs(); + uDebug("[rsync] %s start to sync data from remote to local:%s", id, path); + #ifdef WINDOWS char pathTransform[PATH_MAX] = {0}; changeDirFromWindowsToLinux(path, pathTransform); #endif + char command[PATH_MAX] = {0}; snprintf(command, PATH_MAX, "rsync -av --timeout=10 --bwlimit=100000 rsync://%s/checkpoint/%s/ %s", tsSnodeAddress, id, @@ -211,13 +215,15 @@ int32_t downloadRsync(const char* id, const char* path) { ); int32_t code = execCommand(command); + + int32_t el = taosGetTimestampMs() - st; if (code != 0) { - uError("[rsync] download checkpoint data failed, code:%d," ERRNO_ERR_FORMAT, code, ERRNO_ERR_DATA); - return -1; + uError("[rsync] %s download checkpoint data:%s failed, code:%d," ERRNO_ERR_FORMAT, id, path, code, ERRNO_ERR_DATA); + } else { + uDebug("[rsync] %s download checkpoint data:%s successfully, elapsed time:%dms", id, path, el); } - uDebug("[rsync] download checkpoint data:%s successfully", id); - return 0; + return code; } int32_t deleteRsync(const char* id) { diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 455466f64c..1a05acf3eb 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -325,28 +325,27 @@ int32_t remoteChkp_validAndCvtMeta(char* path, SArray* list, int64_t chkpId) { return complete == 1 ? 0 : -1; } -int32_t rebuildFromRemoteChkp_rsync(char* key, char* chkpPath, int64_t checkpointId, char* defaultPath) { - // impl later +int32_t rebuildFromRemoteChkp_rsync(char* key, char* chkptPath, int64_t checkpointId, char* defaultPath) { int32_t code = 0; - if (taosIsDir(chkpPath)) { - taosRemoveDir(chkpPath); + if (taosIsDir(chkptPath)) { + taosRemoveDir(chkptPath); + stDebug("remove local checkpoint data dir:%s succ", chkptPath); } if (taosIsDir(defaultPath)) { taosRemoveDir(defaultPath); + taosMulMkDir(defaultPath); + stDebug("clear local backend dir:%s succ", defaultPath); } - code = streamTaskDownloadCheckpointData(key, chkpPath); + code = streamTaskDownloadCheckpointData(key, chkptPath); if (code != 0) { stError("failed to download checkpoint data:%s", key); return code; } - stDebug("download backup checkpoint data into:%s, checkpointId:%" PRId64 ", %s", chkpPath, checkpointId, key); - - code = backendCopyFiles(chkpPath, defaultPath); - - return code; + stDebug("download remote checkpoint data for checkpointId:%" PRId64 ", %s", checkpointId, key); + return backendCopyFiles(chkptPath, defaultPath); } int32_t rebuildFromRemoteChkp_s3(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { @@ -456,7 +455,7 @@ int32_t backendFileCopyFilesImpl(const char* src, const char* dst) { code = copyFiles_hardlink(srcName, dstName, 0); if (code != 0) { code = TAOS_SYSTEM_ERROR(code); - stError("failed to hardlink file, detail:%s to %s, reason:%s", srcName, dstName, tstrerror(code)); + stError("failed to hard link file, detail:%s to %s, reason:%s", srcName, dstName, tstrerror(code)); goto _ERROR; } else { stDebug("succ hard link file:%s to %s", srcName, dstName); @@ -485,14 +484,13 @@ int32_t backendCopyFiles(const char* src, const char* dst) { return backendFileCopyFilesImpl(src, dst); } -static int32_t rebuildFromLocalCheckpoint(char* pTaskIdStr, const char* checkpointPath, int64_t chkpId, const char* defaultPath) { +static int32_t rebuildFromLocalCheckpoint(char* pTaskIdStr, const char* checkpointPath, int64_t chkptId, const char* defaultPath) { int32_t code = 0; if (taosIsDir(defaultPath)) { taosRemoveDir(defaultPath); taosMkDir(defaultPath); - - stInfo("clear task backend path:%s, done", defaultPath); + stInfo("clear task backend dir:%s, done", defaultPath); } if (taosIsDir(checkpointPath) && isValidCheckpoint(checkpointPath)) { @@ -506,11 +504,12 @@ static int32_t rebuildFromLocalCheckpoint(char* pTaskIdStr, const char* checkpoi pTaskIdStr, checkpointPath, tstrerror(TAOS_SYSTEM_ERROR(errno)), defaultPath); code = TSDB_CODE_SUCCESS; } else { - stInfo("%s start to restart stream backend at checkpoint path: %s", pTaskIdStr, checkpointPath); + stInfo("%s copy checkpoint data from:%s to:%s succ, try to start stream backend", pTaskIdStr, checkpointPath, + defaultPath); } } else { code = TSDB_CODE_FAILED; - stError("%s not valid checkpoint path/data in:%s", pTaskIdStr, checkpointPath); + stError("%s no valid checkpoint data for checkpointId:%" PRId64 " in %s", pTaskIdStr, chkptId, checkpointPath); } return code; @@ -522,7 +521,6 @@ int32_t rebuildFromlocalDefault(char* key, char* chkpPath, int64_t chkpId, char* } int32_t rebuildDirFormCheckpoint(const char* path, char* key, int64_t chkptId, char** dbPrefixPath, char** dbPath) { - // impl later int32_t code = 0; char* prefixPath = taosMemoryCalloc(1, strlen(path) + 128); @@ -538,8 +536,7 @@ int32_t rebuildDirFormCheckpoint(const char* path, char* key, int64_t chkptId, c if (!taosIsDir(defaultPath)) { taosMulMkDir(defaultPath); } - - stDebug("prepare local dir:%s, checkpointId:%" PRId64 ", key:%s succ", defaultPath, chkptId, key); + stDebug("local default dir:%s, checkpointId:%" PRId64 ", key:%s succ", defaultPath, chkptId, key); char* chkptPath = taosMemoryCalloc(1, strlen(path) + 256); if (chkptId != 0) { From 3dfffe91130dbc57893bb2fd0f472817c3621a18 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 13 May 2024 15:31:26 +0800 Subject: [PATCH 06/67] fix(stream): async load and start stream tasks. --- include/libs/stream/tstream.h | 4 +++ source/common/src/rsync.c | 36 ++++++++++++------- source/dnode/snode/src/snode.c | 11 +++--- source/dnode/vnode/src/tq/tq.c | 7 ++-- source/dnode/vnode/src/tq/tqMeta.c | 30 +--------------- source/dnode/vnode/src/tqCommon/tqCommon.c | 10 ++++-- source/dnode/vnode/src/vnd/vnodeSync.c | 26 +++++++------- source/libs/stream/src/streamBackendRocksdb.c | 2 +- source/libs/stream/src/streamCheckpoint.c | 12 +++---- source/libs/stream/src/streamSched.c | 6 +++- 10 files changed, 69 insertions(+), 75 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 9d5b7bc6f1..bddc76cfb5 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -58,6 +58,10 @@ extern "C" { #define STREAM_EXEC_T_STOP_ALL_TASKS (-5) #define STREAM_EXEC_T_RESUME_TASK (-6) #define STREAM_EXEC_T_ADD_FAILED_TASK (-7) +// the load and start stream task should be executed after snode has started successfully, since the load of stream +// tasks may incur the download of checkpoint data from remote, which may consume significant network and CPU resources. +#define STREAM_EXEC_T_LOAD_AND_START_ALL_TASKS (-8) +#define STREAM_EXEC_T_LOAD_ALL_TASKS (-9) typedef struct SStreamTask SStreamTask; typedef struct SStreamQueue SStreamQueue; diff --git a/source/common/src/rsync.c b/source/common/src/rsync.c index 302f17942f..149c36cec7 100644 --- a/source/common/src/rsync.c +++ b/source/common/src/rsync.c @@ -130,6 +130,7 @@ void startRsync() { uError("[rsync] build checkpoint backup dir failed, dir:%s,"ERRNO_ERR_FORMAT, tsCheckpointBackupDir, ERRNO_ERR_DATA); return; } + removeEmptyDir(); char confDir[PATH_MAX] = {0}; @@ -186,18 +187,20 @@ int32_t uploadRsync(const char* id, const char* path) { if (code != 0) { uError("[rsync] s-task:%s upload checkpoint data in %s to %s failed, code:%d," ERRNO_ERR_FORMAT, id, path, tsSnodeAddress, code, ERRNO_ERR_DATA); - return -1; + } else { + int64_t el = (taosGetTimestampMs() - st); + uDebug("[rsync] s-task:%s upload checkpoint data in:%s to %s successfully, elapsed time:%" PRId64 "ms", id, path, + tsSnodeAddress, el); } - int64_t el = (taosGetTimestampMs() - st); - uDebug("[rsync] s-task:%s upload checkpoint data in:%s to %s successfully, elapsed time:%" PRId64 "ms", id, path, - tsSnodeAddress, el); - return 0; + return code; } int32_t downloadRsync(const char* id, const char* path) { int64_t st = taosGetTimestampMs(); - uDebug("[rsync] %s start to sync data from remote to local:%s", id, path); + int32_t MAX_RETRY = 60; + int32_t times = 0; + int32_t code = 0; #ifdef WINDOWS char pathTransform[PATH_MAX] = {0}; @@ -205,7 +208,7 @@ int32_t downloadRsync(const char* id, const char* path) { #endif char command[PATH_MAX] = {0}; - snprintf(command, PATH_MAX, "rsync -av --timeout=10 --bwlimit=100000 rsync://%s/checkpoint/%s/ %s", + snprintf(command, PATH_MAX, "rsync -av --debug=all --timeout=10 --bwlimit=100000 rsync://%s/checkpoint/%s/ %s", tsSnodeAddress, id, #ifdef WINDOWS pathTransform @@ -214,13 +217,20 @@ int32_t downloadRsync(const char* id, const char* path) { #endif ); - int32_t code = execCommand(command); + uDebug("[rsync] %s start to sync data from remote to local:%s, %s", id, path, command); - int32_t el = taosGetTimestampMs() - st; - if (code != 0) { - uError("[rsync] %s download checkpoint data:%s failed, code:%d," ERRNO_ERR_FORMAT, id, path, code, ERRNO_ERR_DATA); - } else { - uDebug("[rsync] %s download checkpoint data:%s successfully, elapsed time:%dms", id, path, el); + while(times++ < MAX_RETRY) { + + code = execCommand(command); + if (code != TSDB_CODE_SUCCESS) { + uError("[rsync] %s download checkpoint data:%s failed, retry after 1sec, code:%d," ERRNO_ERR_FORMAT, id, path, code, + ERRNO_ERR_DATA); + taosSsleep(1); + } else { + int32_t el = taosGetTimestampMs() - st; + uDebug("[rsync] %s download checkpoint data:%s successfully, elapsed time:%dms", id, path, el); + break; + } } return code; diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 602264be73..e1b51e3c1a 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -86,6 +86,9 @@ SSnode *sndOpen(const char *path, const SSnodeOpt *pOption) { return NULL; } + stopRsync(); + startRsync(); + pSnode->msgCb = pOption->msgCb; pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE, taosGetTimestampMs(), tqStartTaskCompleteCallback); if (pSnode->pMeta == NULL) { @@ -93,11 +96,6 @@ SSnode *sndOpen(const char *path, const SSnodeOpt *pOption) { goto FAIL; } - streamMetaLoadAllTasks(pSnode->pMeta); - - stopRsync(); - startRsync(); - return pSnode; FAIL: @@ -106,8 +104,7 @@ FAIL: } int32_t sndInit(SSnode *pSnode) { - streamMetaResetTaskStatus(pSnode->pMeta); - streamMetaStartAllTasks(pSnode->pMeta); + streamTaskSchedTask(&pSnode->msgCb, pSnode->pMeta->vgId, 0, 0, STREAM_EXEC_T_LOAD_AND_START_ALL_TASKS); return 0; } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index b75d517997..19c0f60063 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -92,8 +92,6 @@ int32_t tqInitialize(STQ* pTq) { return -1; } - streamMetaLoadAllTasks(pTq->pStreamMeta); - if (tqMetaTransform(pTq) < 0) { return -1; } @@ -800,6 +798,11 @@ int32_t tqProcessTaskCheckRsp(STQ* pTq, SRpcMsg* pMsg) { } int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { + if (!pTq->pVnode->restored) { + tqDebug("vgId:%d not restored, ignore the stream task deploy msg", TD_VID(pTq->pVnode)); + return TSDB_CODE_SUCCESS; + } + return tqStreamTaskProcessDeployReq(pTq->pStreamMeta, &pTq->pVnode->msgCb, sversion, msg, msgLen, vnodeIsRoleLeader(pTq->pVnode), pTq->pVnode->restored); } diff --git a/source/dnode/vnode/src/tq/tqMeta.c b/source/dnode/vnode/src/tq/tqMeta.c index 76322c527f..404cbf26dd 100644 --- a/source/dnode/vnode/src/tq/tqMeta.c +++ b/source/dnode/vnode/src/tq/tqMeta.c @@ -178,6 +178,7 @@ int32_t tqMetaRestoreCheckInfo(STQ* pTq) { goto END; } } + END: tdbFree(pKey); tdbFree(pVal); @@ -514,35 +515,6 @@ int32_t tqMetaTransform(STQ* pTq) { return code; } -//int32_t tqMetaRestoreHandle(STQ* pTq) { -// int code = 0; -// TBC* pCur = NULL; -// if (tdbTbcOpen(pTq->pExecStore, &pCur, NULL) < 0) { -// return -1; -// } -// -// void* pKey = NULL; -// int kLen = 0; -// void* pVal = NULL; -// int vLen = 0; -// -// tdbTbcMoveToFirst(pCur); -// -// while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) { -// STqHandle handle = {0}; -// code = restoreHandle(pTq, pVal, vLen, &handle); -// if (code < 0) { -// tqDestroyTqHandle(&handle); -// break; -// } -// } -// -// tdbFree(pKey); -// tdbFree(pVal); -// tdbTbcClose(pCur); -// return code; -//} - int32_t tqMetaGetHandle(STQ* pTq, const char* key) { void* pVal = NULL; int vLen = 0; diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index 91de290e6a..cbd047cf88 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -158,10 +158,9 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM STaskId id = {.streamId = req.streamId, .taskId = req.taskId}; SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask == NULL || *ppTask == NULL) { - tqError("vgId:%d failed to acquire task:0x%x when handling update, it may have been dropped", vgId, req.taskId); + tqError("vgId:%d failed to acquire task:0x%x when handling update task epset, it may have been dropped", vgId, req.taskId); rsp.code = TSDB_CODE_SUCCESS; streamMetaWUnLock(pMeta); - taosArrayDestroy(req.pNodeList); return rsp.code; } @@ -739,6 +738,13 @@ int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLead } else if (type == STREAM_EXEC_T_ADD_FAILED_TASK) { int32_t code = streamMetaAddFailedTask(pMeta, pReq->streamId, pReq->taskId); return code; + } else if (type == STREAM_EXEC_T_LOAD_AND_START_ALL_TASKS) { + streamMetaLoadAllTasks(pMeta); + int32_t code = streamMetaStartAllTasks(pMeta); + return code; + } else if (type == STREAM_EXEC_T_LOAD_ALL_TASKS) { + streamMetaLoadAllTasks(pMeta); + return 0; } else if (type == STREAM_EXEC_T_RESUME_TASK) { // task resume to run after idle for a while SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 166a230c76..d2c20500be 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -576,24 +576,22 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) if (tsDisableStream) { vInfo("vgId:%d, sync restore finished, not launch stream tasks, since stream tasks are disabled", vgId); } else { - vInfo("vgId:%d sync restore finished, start to launch stream task(s)", pVnode->config.vgId); - int32_t numOfTasks = tqStreamTasksGetTotalNum(pMeta); - if (numOfTasks > 0) { - if (pMeta->startInfo.startAllTasks == 1) { - pMeta->startInfo.restartCount += 1; - tqDebug("vgId:%d in start tasks procedure, inc restartCounter by 1, remaining restart:%d", vgId, - pMeta->startInfo.restartCount); - } else { - pMeta->startInfo.startAllTasks = 1; + vInfo("vgId:%d sync restore finished, start to load and launch stream task(s)", pVnode->config.vgId); + if (pMeta->startInfo.startAllTasks == 1) { + pMeta->startInfo.restartCount += 1; + tqDebug("vgId:%d in start tasks procedure, inc restartCounter by 1, remaining restart:%d", vgId, + pMeta->startInfo.restartCount); + } else { + pMeta->startInfo.startAllTasks = 1; + streamMetaWUnLock(pMeta); - streamMetaWUnLock(pMeta); - tqStreamTaskStartAsync(pMeta, &pVnode->msgCb, false); - return; - } + streamTaskSchedTask(&pVnode->msgCb, TD_VID(pVnode), 0, 0, STREAM_EXEC_T_LOAD_AND_START_ALL_TASKS); + return; } } } else { - vInfo("vgId:%d, sync restore finished, not launch stream tasks since not leader", vgId); + vInfo("vgId:%d, sync restore finished, load stream tasks, not start tasks since not leader", vgId); + streamTaskSchedTask(&pVnode->msgCb, TD_VID(pVnode), 0, 0, STREAM_EXEC_T_LOAD_ALL_TASKS); } streamMetaWUnLock(pMeta); diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 1a05acf3eb..dd897dc431 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -490,7 +490,7 @@ static int32_t rebuildFromLocalCheckpoint(char* pTaskIdStr, const char* checkpoi if (taosIsDir(defaultPath)) { taosRemoveDir(defaultPath); taosMkDir(defaultPath); - stInfo("clear task backend dir:%s, done", defaultPath); + stInfo("clear local backend dir:%s, done", defaultPath); } if (taosIsDir(checkpointPath) && isValidCheckpoint(checkpointPath)) { diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 2f1771279f..1b6c9cc791 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -30,7 +30,7 @@ typedef struct { static int32_t downloadCheckpointDataByName(const char* id, const char* fname, const char* dstName); static int32_t deleteCheckpointFile(const char* id, const char* name); -static int32_t streamTaskBackupCheckpoint(const char* id, const char* path); +static int32_t streamTaskUploadCheckpoint(const char* id, const char* path); static int32_t deleteCheckpoint(const char* id); static int32_t downloadCheckpointByNameS3(const char* id, const char* fname, const char* dstName); static int32_t continueDispatchCheckpointTriggerBlock(SStreamDataBlock* pBlock, SStreamTask* pTask); @@ -321,7 +321,7 @@ static int32_t getCheckpointDataMeta(const char* id, const char* path, SArray* l int32_t code = downloadCheckpointDataByName(id, "META", file); if (code != 0) { - stDebug("chkp failed to download meta file:%s", file); + stDebug("%s chkp failed to download meta file:%s", id, file); taosMemoryFree(file); return code; } @@ -379,7 +379,7 @@ int32_t uploadCheckpointData(void* param) { } if (code == TSDB_CODE_SUCCESS) { - code = streamTaskBackupCheckpoint(pParam->taskId, path); + code = streamTaskUploadCheckpoint(pParam->taskId, path); if (code != TSDB_CODE_SUCCESS) { stError("s-task:%s failed to upload checkpoint data:%s, checkpointId:%" PRId64, taskStr, path, pParam->chkpId); } else { @@ -562,9 +562,9 @@ ECHECKPOINT_BACKUP_TYPE streamGetCheckpointBackupType() { } } -int32_t streamTaskBackupCheckpoint(const char* id, const char* path) { +int32_t streamTaskUploadCheckpoint(const char* id, const char* path) { if (id == NULL || path == NULL || strlen(id) == 0 || strlen(path) == 0 || strlen(path) >= PATH_MAX) { - stError("streamTaskBackupCheckpoint parameters invalid"); + stError("invalid parameters in upload checkpoint, %s", id); return -1; } @@ -580,7 +580,7 @@ int32_t streamTaskBackupCheckpoint(const char* id, const char* path) { // fileName: CURRENT int32_t downloadCheckpointDataByName(const char* id, const char* fname, const char* dstName) { if (id == NULL || fname == NULL || strlen(id) == 0 || strlen(fname) == 0 || strlen(fname) >= PATH_MAX) { - stError("uploadCheckpointByName parameters invalid"); + stError("down load checkpoint data parameters invalid"); return -1; } diff --git a/source/libs/stream/src/streamSched.c b/source/libs/stream/src/streamSched.c index 52e7431e70..9bd12a4fd8 100644 --- a/source/libs/stream/src/streamSched.c +++ b/source/libs/stream/src/streamSched.c @@ -52,7 +52,11 @@ int32_t streamTaskSchedTask(SMsgCb* pMsgCb, int32_t vgId, int64_t streamId, int3 return -1; } - stDebug("vgId:%d create msg to start stream task:0x%x, exec type:%d", vgId, taskId, execType); + if (streamId != 0) { + stDebug("vgId:%d create msg to start stream task:0x%x, exec type:%d", vgId, taskId, execType); + } else { + stDebug("vgId:%d create msg to exec, type:%d", vgId, execType); + } pRunReq->head.vgId = vgId; pRunReq->streamId = streamId; From b7e7de0354709de9f422d071464085e935b3227a Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 13 May 2024 16:45:38 +0800 Subject: [PATCH 07/67] fix(stream): set the stream task load flag. --- include/libs/stream/tstream.h | 1 + source/dnode/vnode/src/vnd/vnodeSync.c | 25 ++++++++++++++++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index bddc76cfb5..9e376b9792 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -491,6 +491,7 @@ typedef struct SStreamMeta { int32_t vgId; int64_t stage; int32_t role; + bool taskLoadFlag; bool closeFlag; bool sendMsgBeforeClosing; // send hb to mnode before close all tasks when switch to follower. STaskStartInfo startInfo; diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index d2c20500be..00203c7bb1 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -576,22 +576,37 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) if (tsDisableStream) { vInfo("vgId:%d, sync restore finished, not launch stream tasks, since stream tasks are disabled", vgId); } else { - vInfo("vgId:%d sync restore finished, start to load and launch stream task(s)", pVnode->config.vgId); + vInfo("vgId:%d sync restore finished, start to load and launch stream task(s)", vgId); if (pMeta->startInfo.startAllTasks == 1) { pMeta->startInfo.restartCount += 1; - tqDebug("vgId:%d in start tasks procedure, inc restartCounter by 1, remaining restart:%d", vgId, + vDebug("vgId:%d in start tasks procedure, inc restartCounter by 1, remaining restart:%d", vgId, pMeta->startInfo.restartCount); } else { pMeta->startInfo.startAllTasks = 1; + + bool loadTaskInfo = pMeta->taskLoadFlag; + pMeta->taskLoadFlag = true; streamMetaWUnLock(pMeta); - streamTaskSchedTask(&pVnode->msgCb, TD_VID(pVnode), 0, 0, STREAM_EXEC_T_LOAD_AND_START_ALL_TASKS); + if (loadTaskInfo) { + tqInfo("vgId:%d stream task already loaded, start them", vgId); + streamTaskSchedTask(&pVnode->msgCb, TD_VID(pVnode), 0, 0, STREAM_EXEC_T_START_ALL_TASKS); + } else { + tqInfo("vgId:%d start load and launch stream task(s)", vgId); + streamTaskSchedTask(&pVnode->msgCb, TD_VID(pVnode), 0, 0, STREAM_EXEC_T_LOAD_AND_START_ALL_TASKS); + } + return; } } } else { - vInfo("vgId:%d, sync restore finished, load stream tasks, not start tasks since not leader", vgId); - streamTaskSchedTask(&pVnode->msgCb, TD_VID(pVnode), 0, 0, STREAM_EXEC_T_LOAD_ALL_TASKS); + if (!pMeta->taskLoadFlag) { + pMeta->taskLoadFlag = true; + vInfo("vgId:%d, sync restore finished, load stream tasks, not start tasks since not leader", vgId); + streamTaskSchedTask(&pVnode->msgCb, TD_VID(pVnode), 0, 0, STREAM_EXEC_T_LOAD_ALL_TASKS); + } else { + vInfo("vgId:%d, sync restore finished, not load stream tasks since already loaded for follower"); + } } streamMetaWUnLock(pMeta); From fb248b2682257911ea6def4d942dcdb1da4eecb7 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 13 May 2024 17:26:39 +0800 Subject: [PATCH 08/67] fix(stream): not save the stream task for rsma. --- source/libs/stream/src/streamCheckpoint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 1b6c9cc791..389eccd66d 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -469,8 +469,8 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { } } - // clear the checkpoint info, and commit the newest checkpoint info if all works are done successfully - if (code == TSDB_CODE_SUCCESS) { + // update the latest checkpoint info if all works are done successfully, for rsma, the pMsgCb is null. + if (code == TSDB_CODE_SUCCESS && (pTask->pMsgCb != NULL)) { STaskId* pHTaskId = &pTask->hTaskInfo.id; code = streamBuildAndSendCheckpointUpdateMsg(pTask->pMsgCb, pMeta->vgId, &pTask->id, pHTaskId, &pTask->chkInfo, dropRelHTask); From 6898eba6edf9cb493413e1542c06239739ca96f8 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 14 May 2024 14:34:31 +0800 Subject: [PATCH 09/67] fix(stream): expand stream tasks are divided into two phase, the first is to build stream task and then expand stream task before start stream tasks. --- include/dnode/vnode/tqCommon.h | 2 +- include/libs/executor/storageapi.h | 2 +- include/libs/function/function.h | 1 + include/libs/stream/streamState.h | 2 +- include/libs/stream/tstream.h | 13 ++-- source/common/src/rsync.c | 2 +- source/dnode/snode/src/snode.c | 10 +-- source/dnode/vnode/src/tq/tq.c | 37 +++++----- source/dnode/vnode/src/tq/tqRead.c | 2 +- source/dnode/vnode/src/tqCommon/tqCommon.c | 26 ++++--- source/dnode/vnode/src/vnd/vnodeSync.c | 23 +----- source/libs/stream/inc/streamBackendRocksdb.h | 2 +- source/libs/stream/inc/streamInt.h | 2 +- source/libs/stream/src/streamBackendRocksdb.c | 73 ++++++++++--------- source/libs/stream/src/streamCheckpoint.c | 11 ++- source/libs/stream/src/streamMeta.c | 25 +++++-- source/libs/stream/src/streamState.c | 4 +- 17 files changed, 122 insertions(+), 115 deletions(-) diff --git a/include/dnode/vnode/tqCommon.h b/include/dnode/vnode/tqCommon.h index 451f9a00eb..0cde499a6b 100644 --- a/include/dnode/vnode/tqCommon.h +++ b/include/dnode/vnode/tqCommon.h @@ -40,7 +40,7 @@ int32_t tqStreamTaskProcessTaskPauseReq(SStreamMeta* pMeta, char* pMsg); int32_t tqStreamTaskProcessTaskResumeReq(void* handle, int64_t sversion, char* pMsg, bool fromVnode); int32_t tqStreamTaskProcessUpdateCheckpointReq(SStreamMeta* pMeta, char* msg, int32_t msgLen); -int32_t tqExpandStreamTask(SStreamTask* pTask, SStreamMeta* pMeta, void* pVnode); +int32_t tqExpandStreamTask(SStreamTask* pTask, SStreamMeta* pMeta); void tqSetRestoreVersionInfo(SStreamTask* pTask); #endif // TDENGINE_TQ_COMMON_H diff --git a/include/libs/executor/storageapi.h b/include/libs/executor/storageapi.h index ec92bd56dd..7042ec2d15 100644 --- a/include/libs/executor/storageapi.h +++ b/include/libs/executor/storageapi.h @@ -410,7 +410,7 @@ typedef struct SStateStore { void (*streamFileStateClear)(struct SStreamFileState* pFileState); bool (*needClearDiskBuff)(struct SStreamFileState* pFileState); - SStreamState* (*streamStateOpen)(char* path, void* pTask, bool specPath, int32_t szPage, int32_t pages); + SStreamState* (*streamStateOpen)(const char* path, void* pTask, bool specPath, int32_t szPage, int32_t pages); void (*streamStateClose)(SStreamState* pState, bool remove); int32_t (*streamStateBegin)(SStreamState* pState); int32_t (*streamStateCommit)(SStreamState* pState); diff --git a/include/libs/function/function.h b/include/libs/function/function.h index 0afda2e160..87bbe21133 100644 --- a/include/libs/function/function.h +++ b/include/libs/function/function.h @@ -150,6 +150,7 @@ typedef struct SBackendCfWrapper { int64_t backendId; char idstr[64]; } SBackendCfWrapper; + typedef struct STdbState { SBackendCfWrapper *pBackendCfWrapper; int64_t backendCfWrapperId; diff --git a/include/libs/stream/streamState.h b/include/libs/stream/streamState.h index 7813b2cc9a..ae5a733ae9 100644 --- a/include/libs/stream/streamState.h +++ b/include/libs/stream/streamState.h @@ -29,7 +29,7 @@ extern "C" { #include "storageapi.h" -SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t szPage, int32_t pages); +SStreamState* streamStateOpen(const char* path, void* pTask, bool specPath, int32_t szPage, int32_t pages); void streamStateClose(SStreamState* pState, bool remove); int32_t streamStateBegin(SStreamState* pState); int32_t streamStateCommit(SStreamState* pState); diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 9e376b9792..1f5aa46f49 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -61,7 +61,6 @@ extern "C" { // the load and start stream task should be executed after snode has started successfully, since the load of stream // tasks may incur the download of checkpoint data from remote, which may consume significant network and CPU resources. #define STREAM_EXEC_T_LOAD_AND_START_ALL_TASKS (-8) -#define STREAM_EXEC_T_LOAD_ALL_TASKS (-9) typedef struct SStreamTask SStreamTask; typedef struct SStreamQueue SStreamQueue; @@ -156,8 +155,6 @@ typedef enum EStreamTaskEvent { TASK_EVENT_DROPPING = 0xA, } EStreamTaskEvent; -typedef int32_t (*__state_trans_user_fn)(SStreamTask*, void* param); - typedef void FTbSink(SStreamTask* pTask, void* vnode, void* data); typedef void FSmaSink(void* vnode, int64_t smaId, const SArray* data); typedef int32_t FTaskExpand(void* ahandle, SStreamTask* pTask, int64_t ver); @@ -491,7 +488,6 @@ typedef struct SStreamMeta { int32_t vgId; int64_t stage; int32_t role; - bool taskLoadFlag; bool closeFlag; bool sendMsgBeforeClosing; // send hb to mnode before close all tasks when switch to follower. STaskStartInfo startInfo; @@ -522,6 +518,9 @@ typedef struct STaskUpdateEntry { int32_t transId; } STaskUpdateEntry; +typedef int32_t (*__state_trans_user_fn)(SStreamTask*, void* param); +typedef int32_t (*__stream_task_expand_fn)(struct SStreamTask* pTask); + SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, SEpSet* pEpset, bool fillHistory, int64_t triggerParam, SArray* pTaskList, bool hasFillhistory, int8_t subtableWithoutMd5); int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask); @@ -675,7 +674,7 @@ int32_t streamTaskReleaseState(SStreamTask* pTask); int32_t streamTaskReloadState(SStreamTask* pTask); void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId); void streamTaskOpenAllUpstreamInput(SStreamTask* pTask); -int32_t streamTaskSetDb(SStreamMeta* pMeta, void* pTask, char* key); +int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key); bool streamTaskIsSinkTask(const SStreamTask* pTask); void streamTaskStatusInit(STaskStatusEntry* pEntry, const SStreamTask* pTask); @@ -723,9 +722,9 @@ void streamMetaResetStartInfo(STaskStartInfo* pMeta); SArray* streamMetaSendMsgBeforeCloseTasks(SStreamMeta* pMeta); void streamMetaUpdateStageRole(SStreamMeta* pMeta, int64_t stage, bool isLeader); void streamMetaLoadAllTasks(SStreamMeta* pMeta); -int32_t streamMetaStartAllTasks(SStreamMeta* pMeta); +int32_t streamMetaStartAllTasks(SStreamMeta* pMeta, __stream_task_expand_fn fn); int32_t streamMetaStopAllTasks(SStreamMeta* pMeta); -int32_t streamMetaStartOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId); +int32_t streamMetaStartOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId, __stream_task_expand_fn fn); bool streamMetaAllTasksReady(const SStreamMeta* pMeta); // timer diff --git a/source/common/src/rsync.c b/source/common/src/rsync.c index 149c36cec7..c4d14a6c2c 100644 --- a/source/common/src/rsync.c +++ b/source/common/src/rsync.c @@ -217,7 +217,7 @@ int32_t downloadRsync(const char* id, const char* path) { #endif ); - uDebug("[rsync] %s start to sync data from remote to local:%s, %s", id, path, command); + uDebug("[rsync] %s start to sync data from remote to:%s, %s", id, path, command); while(times++ < MAX_RETRY) { diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index e1b51e3c1a..ac10aa83a4 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -45,15 +45,10 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer if (code != TSDB_CODE_SUCCESS) { return code; } + pTask->pBackend = NULL; - streamTaskOpenAllUpstreamInput(pTask); - code = tqExpandStreamTask(pTask, pSnode->pMeta, NULL); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - streamTaskResetUpstreamStageInfo(pTask); streamSetupScheduleTrigger(pTask); @@ -96,6 +91,7 @@ SSnode *sndOpen(const char *path, const SSnodeOpt *pOption) { goto FAIL; } + streamMetaLoadAllTasks(pSnode->pMeta); return pSnode; FAIL: @@ -104,7 +100,7 @@ FAIL: } int32_t sndInit(SSnode *pSnode) { - streamTaskSchedTask(&pSnode->msgCb, pSnode->pMeta->vgId, 0, 0, STREAM_EXEC_T_LOAD_AND_START_ALL_TASKS); + streamTaskSchedTask(&pSnode->msgCb, pSnode->pMeta->vgId, 0, 0, STREAM_EXEC_T_START_ALL_TASKS); return 0; } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 19c0f60063..a59a235c50 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -92,6 +92,8 @@ int32_t tqInitialize(STQ* pTq) { return -1; } + streamMetaLoadAllTasks(pTq->pStreamMeta); + if (tqMetaTransform(pTq) < 0) { return -1; } @@ -715,17 +717,18 @@ static void freePtr(void* ptr) { taosMemoryFree(*(void**)ptr); } int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) { int32_t vgId = TD_VID(pTq->pVnode); - tqDebug("s-task:0x%x start to expand task", pTask->id.taskId); + tqDebug("s-task:0x%x start to build task", pTask->id.taskId); int32_t code = streamTaskInit(pTask, pTq->pStreamMeta, &pTq->pVnode->msgCb, nextProcessVer); if (code != TSDB_CODE_SUCCESS) { return code; } - code = tqExpandStreamTask(pTask, pTq->pStreamMeta, pTq->pVnode); - if (code != TSDB_CODE_SUCCESS) { - return code; - } + pTask->pBackend = NULL; +// code = tqExpandStreamTask(pTask, pTq->pStreamMeta); +// if (code != TSDB_CODE_SUCCESS) { +// return code; +// } // sink STaskOutputInfo* pOutputInfo = &pTask->outputInfo; @@ -768,7 +771,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) { const char* pNext = streamTaskGetStatusStr(pTask->status.taskStatus); if (pTask->info.fillHistory) { - tqInfo("vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 + tqInfo("vgId:%d build stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " nextProcessVer:%" PRId64 " child id:%d, level:%d, cur-status:%s, next-status:%s fill-history:%d, related stream task:0x%x " "trigger:%" PRId64 " ms, inputVer:%" PRId64, @@ -777,7 +780,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) { (int32_t)pTask->streamTaskId.taskId, pTask->info.triggerParam, nextProcessVer); } else { tqInfo( - "vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 + "vgId:%d build stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " nextProcessVer:%" PRId64 " child id:%d, level:%d, cur-status:%s next-status:%s fill-history:%d, related fill-task:0x%x trigger:%" PRId64 " ms, inputVer:%" PRId64, @@ -798,10 +801,10 @@ int32_t tqProcessTaskCheckRsp(STQ* pTq, SRpcMsg* pMsg) { } int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { - if (!pTq->pVnode->restored) { - tqDebug("vgId:%d not restored, ignore the stream task deploy msg", TD_VID(pTq->pVnode)); - return TSDB_CODE_SUCCESS; - } +// if (!pTq->pVnode->restored) { +// tqDebug("vgId:%d not restored, ignore the stream task deploy msg", TD_VID(pTq->pVnode)); +// return TSDB_CODE_SUCCESS; +// } return tqStreamTaskProcessDeployReq(pTq->pStreamMeta, &pTq->pVnode->msgCb, sversion, msg, msgLen, vnodeIsRoleLeader(pTq->pVnode), pTq->pVnode->restored); @@ -1016,12 +1019,12 @@ int32_t tqProcessTaskUpdateCheckpointReq(STQ* pTq, char* msg, int32_t msgLen) { int32_t vgId = TD_VID(pTq->pVnode); SVUpdateCheckpointInfoReq* pReq = (SVUpdateCheckpointInfoReq*)msg; - if (!pTq->pVnode->restored) { - tqDebug("vgId:%d update-checkpoint-info msg received during restoring, checkpointId:%" PRId64 - ", transId:%d s-task:0x%x ignore it", - vgId, pReq->checkpointId, pReq->transId, pReq->taskId); - return TSDB_CODE_SUCCESS; - } +// if (!pTq->pVnode->restored) { +// tqDebug("vgId:%d update-checkpoint-info msg received during restoring, checkpointId:%" PRId64 +// ", transId:%d s-task:0x%x ignore it", +// vgId, pReq->checkpointId, pReq->transId, pReq->taskId); +// return TSDB_CODE_SUCCESS; +// } return tqStreamTaskProcessUpdateCheckpointReq(pTq->pStreamMeta, msg, msgLen); } diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 516a47606b..7224657b73 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -1089,7 +1089,7 @@ int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) { } SStreamTask* pTask = *(SStreamTask**)pIter; - if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + if ((pTask->info.taskLevel == TASK_LEVEL__SOURCE) && (pTask->exec.pExecutor != NULL)) { int32_t code = qUpdateTableListForStreamScanner(pTask->exec.pExecutor, tbUidList, isAdd); if (code != 0) { tqError("vgId:%d, s-task:%s update qualified table error for stream task", vgId, pTask->id.idStr); diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index cbd047cf88..dbda3a4541 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -39,9 +39,13 @@ static void restoreStreamTaskId(SStreamTask* pTask, STaskId* pId) { pTask->id.streamId = pId->streamId; } -int32_t tqExpandStreamTask(SStreamTask* pTask, SStreamMeta* pMeta, void* pVnode) { - int32_t vgId = pMeta->vgId; - STaskId taskId = {0}; +int32_t tqExpandStreamTask(SStreamTask* pTask) { + SStreamMeta* pMeta = pTask->pMeta; + int32_t vgId = pMeta->vgId; + STaskId taskId = {0}; + int64_t st = taosGetTimestampMs(); + + tqDebug("s-task:%s vgId:%d start to expand stream task", pTask->id.idStr, vgId); if (pTask->info.fillHistory) { taskId = replaceStreamTaskId(pTask); @@ -67,7 +71,7 @@ int32_t tqExpandStreamTask(SStreamTask* pTask, SStreamMeta* pMeta, void* pVnode) }; if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { - handle.vnode = pVnode; + handle.vnode = ((STQ*)pMeta->ahandle)->pVnode; handle.initTqReader = 1; } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) { handle.numOfVgroups = (int32_t)taosArrayGetSize(pTask->upstreamInfo.pList); @@ -84,6 +88,9 @@ int32_t tqExpandStreamTask(SStreamTask* pTask, SStreamMeta* pMeta, void* pVnode) qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId); } + double el = (taosGetTimestampMs() - st) / 1000.0; + tqDebug("s-task:%s vgId:%d expand stream task completed, elapsed time:%.2fsec", pTask->id.idStr, vgId, el); + return TSDB_CODE_SUCCESS; } @@ -706,7 +713,7 @@ static int32_t restartStreamTasks(SStreamMeta* pMeta, bool isLeader) { streamMetaResetTaskStatus(pMeta); streamMetaWUnLock(pMeta); - streamMetaStartAllTasks(pMeta); + streamMetaStartAllTasks(pMeta, tqExpandStreamTask); } else { streamMetaResetStartInfo(&pMeta->startInfo); streamMetaWUnLock(pMeta); @@ -724,10 +731,10 @@ int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLead int32_t vgId = pMeta->vgId; if (type == STREAM_EXEC_T_START_ONE_TASK) { - streamMetaStartOneTask(pMeta, pReq->streamId, pReq->taskId); + streamMetaStartOneTask(pMeta, pReq->streamId, pReq->taskId, tqExpandStreamTask); return 0; } else if (type == STREAM_EXEC_T_START_ALL_TASKS) { - streamMetaStartAllTasks(pMeta); + streamMetaStartAllTasks(pMeta, tqExpandStreamTask); return 0; } else if (type == STREAM_EXEC_T_RESTART_ALL_TASKS) { restartStreamTasks(pMeta, isLeader); @@ -740,11 +747,8 @@ int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLead return code; } else if (type == STREAM_EXEC_T_LOAD_AND_START_ALL_TASKS) { streamMetaLoadAllTasks(pMeta); - int32_t code = streamMetaStartAllTasks(pMeta); + int32_t code = streamMetaStartAllTasks(pMeta, tqExpandStreamTask); return code; - } else if (type == STREAM_EXEC_T_LOAD_ALL_TASKS) { - streamMetaLoadAllTasks(pMeta); - return 0; } else if (type == STREAM_EXEC_T_RESUME_TASK) { // task resume to run after idle for a while SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 00203c7bb1..8f28871e3b 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -576,37 +576,22 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) if (tsDisableStream) { vInfo("vgId:%d, sync restore finished, not launch stream tasks, since stream tasks are disabled", vgId); } else { - vInfo("vgId:%d sync restore finished, start to load and launch stream task(s)", vgId); + vInfo("vgId:%d sync restore finished, start to launch stream task(s)", vgId); if (pMeta->startInfo.startAllTasks == 1) { pMeta->startInfo.restartCount += 1; vDebug("vgId:%d in start tasks procedure, inc restartCounter by 1, remaining restart:%d", vgId, pMeta->startInfo.restartCount); } else { pMeta->startInfo.startAllTasks = 1; - - bool loadTaskInfo = pMeta->taskLoadFlag; - pMeta->taskLoadFlag = true; streamMetaWUnLock(pMeta); - if (loadTaskInfo) { - tqInfo("vgId:%d stream task already loaded, start them", vgId); - streamTaskSchedTask(&pVnode->msgCb, TD_VID(pVnode), 0, 0, STREAM_EXEC_T_START_ALL_TASKS); - } else { - tqInfo("vgId:%d start load and launch stream task(s)", vgId); - streamTaskSchedTask(&pVnode->msgCb, TD_VID(pVnode), 0, 0, STREAM_EXEC_T_LOAD_AND_START_ALL_TASKS); - } - + tqInfo("vgId:%d stream task already loaded, start them", vgId); + streamTaskSchedTask(&pVnode->msgCb, TD_VID(pVnode), 0, 0, STREAM_EXEC_T_START_ALL_TASKS); return; } } } else { - if (!pMeta->taskLoadFlag) { - pMeta->taskLoadFlag = true; - vInfo("vgId:%d, sync restore finished, load stream tasks, not start tasks since not leader", vgId); - streamTaskSchedTask(&pVnode->msgCb, TD_VID(pVnode), 0, 0, STREAM_EXEC_T_LOAD_ALL_TASKS); - } else { - vInfo("vgId:%d, sync restore finished, not load stream tasks since already loaded for follower"); - } + vInfo("vgId:%d, sync restore finished, not launch stream tasks since not leader", vgId); } streamMetaWUnLock(pMeta); diff --git a/source/libs/stream/inc/streamBackendRocksdb.h b/source/libs/stream/inc/streamBackendRocksdb.h index fbf902a237..48a9d07a5d 100644 --- a/source/libs/stream/inc/streamBackendRocksdb.h +++ b/source/libs/stream/inc/streamBackendRocksdb.h @@ -141,7 +141,7 @@ SListNode* streamBackendAddCompare(void* backend, void* arg); void streamBackendDelCompare(void* backend, void* arg); int32_t streamStateCvtDataFormat(char* path, char* key, void* cfInst); -STaskDbWrapper* taskDbOpen(char* path, char* key, int64_t chkptId); +STaskDbWrapper* taskDbOpen(const char* path, const char* key, int64_t chkptId); void taskDbDestroy(void* pBackend, bool flush); void taskDbDestroy2(void* pBackend); int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId); diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index da8a24e6da..0ac10fe9fe 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -164,7 +164,7 @@ typedef enum ECHECKPOINT_BACKUP_TYPE { ECHECKPOINT_BACKUP_TYPE streamGetCheckpointBackupType(); -int32_t streamTaskDownloadCheckpointData(char* id, char* path); +int32_t streamTaskDownloadCheckpointData(const char* id, char* path); int32_t streamTaskOnNormalTaskReady(SStreamTask* pTask); int32_t streamTaskOnScanhistoryTaskReady(SStreamTask* pTask); diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index dd897dc431..ad5d759b0b 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -40,7 +40,7 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t void destroyRocksdbCfInst(RocksdbCfInst* inst); int32_t getCfIdx(const char* cfName); -STaskDbWrapper* taskDbOpenImpl(char* key, char* statePath, char* dbPath); +STaskDbWrapper* taskDbOpenImpl(const char* key, char* statePath, char* dbPath); static int32_t backendCopyFiles(const char* src, const char* dst); @@ -325,7 +325,7 @@ int32_t remoteChkp_validAndCvtMeta(char* path, SArray* list, int64_t chkpId) { return complete == 1 ? 0 : -1; } -int32_t rebuildFromRemoteChkp_rsync(char* key, char* chkptPath, int64_t checkpointId, char* defaultPath) { +int32_t rebuildFromRemoteChkp_rsync(const char* key, char* chkptPath, int64_t checkpointId, char* defaultPath) { int32_t code = 0; if (taosIsDir(chkptPath)) { taosRemoveDir(chkptPath); @@ -335,7 +335,7 @@ int32_t rebuildFromRemoteChkp_rsync(char* key, char* chkptPath, int64_t checkpoi if (taosIsDir(defaultPath)) { taosRemoveDir(defaultPath); taosMulMkDir(defaultPath); - stDebug("clear local backend dir:%s succ", defaultPath); + stDebug("clear local default dir before download checkpoint data:%s succ", defaultPath); } code = streamTaskDownloadCheckpointData(key, chkptPath); @@ -348,7 +348,7 @@ int32_t rebuildFromRemoteChkp_rsync(char* key, char* chkptPath, int64_t checkpoi return backendCopyFiles(chkptPath, defaultPath); } -int32_t rebuildFromRemoteChkp_s3(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { +int32_t rebuildFromRemoteChkp_s3(const char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { int32_t code = streamTaskDownloadCheckpointData(key, chkpPath); if (code != 0) { return code; @@ -383,14 +383,14 @@ int32_t rebuildFromRemoteChkp_s3(char* key, char* chkpPath, int64_t chkpId, char return code; } -int32_t rebuildFromRemoteCheckpoint(char* key, char* chkpPath, int64_t checkpointId, char* defaultPath) { +int32_t rebuildFromRemoteCheckpoint(const char* key, char* chkpPath, int64_t checkpointId, char* defaultPath) { ECHECKPOINT_BACKUP_TYPE type = streamGetCheckpointBackupType(); if (type == DATA_UPLOAD_S3) { return rebuildFromRemoteChkp_s3(key, chkpPath, checkpointId, defaultPath); } else if (type == DATA_UPLOAD_RSYNC) { return rebuildFromRemoteChkp_rsync(key, chkpPath, checkpointId, defaultPath); } else { - stError("%s not remote backup checkpoint data for:%"PRId64, key, checkpointId); + stError("%s not remote backup checkpoint data for:%" PRId64" restore ", key, checkpointId); } return -1; @@ -484,24 +484,26 @@ int32_t backendCopyFiles(const char* src, const char* dst) { return backendFileCopyFilesImpl(src, dst); } -static int32_t rebuildFromLocalCheckpoint(char* pTaskIdStr, const char* checkpointPath, int64_t chkptId, const char* defaultPath) { +static int32_t rebuildFromLocalCheckpoint(const char* pTaskIdStr, const char* checkpointPath, int64_t checkpointId, + const char* defaultPath) { int32_t code = 0; if (taosIsDir(defaultPath)) { taosRemoveDir(defaultPath); taosMkDir(defaultPath); - stInfo("clear local backend dir:%s, done", defaultPath); + stInfo("%s clear local backend dir:%s, succ", pTaskIdStr, defaultPath); } if (taosIsDir(checkpointPath) && isValidCheckpoint(checkpointPath)) { - code = backendCopyFiles(checkpointPath, defaultPath); + stDebug("%s local checkpoint data existed, checkpointId:%d copy to backend dir", pTaskIdStr, checkpointId); + code = backendCopyFiles(checkpointPath, defaultPath); if (code != TSDB_CODE_SUCCESS) { taosRemoveDir(defaultPath); taosMkDir(defaultPath); - stError("%s failed to restart stream backend from %s, reason: %s, start to restart from empty path: %s", - pTaskIdStr, checkpointPath, tstrerror(TAOS_SYSTEM_ERROR(errno)), defaultPath); + stError("%s failed to start stream backend from local %s, reason:%s, try download checkpoint from remote", + pTaskIdStr, checkpointPath, tstrerror(TAOS_SYSTEM_ERROR(errno))); code = TSDB_CODE_SUCCESS; } else { stInfo("%s copy checkpoint data from:%s to:%s succ, try to start stream backend", pTaskIdStr, checkpointPath, @@ -509,18 +511,13 @@ static int32_t rebuildFromLocalCheckpoint(char* pTaskIdStr, const char* checkpoi } } else { code = TSDB_CODE_FAILED; - stError("%s no valid checkpoint data for checkpointId:%" PRId64 " in %s", pTaskIdStr, chkptId, checkpointPath); + stError("%s no valid data for checkpointId:%" PRId64 " in %s", pTaskIdStr, checkpointId, checkpointPath); } return code; } -int32_t rebuildFromlocalDefault(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { - int32_t code = 0; - return code; -} - -int32_t rebuildDirFormCheckpoint(const char* path, char* key, int64_t chkptId, char** dbPrefixPath, char** dbPath) { +int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId, char** dbPrefixPath, char** dbPath) { int32_t code = 0; char* prefixPath = taosMemoryCalloc(1, strlen(path) + 128); @@ -533,13 +530,23 @@ int32_t rebuildDirFormCheckpoint(const char* path, char* key, int64_t chkptId, c char* defaultPath = taosMemoryCalloc(1, strlen(path) + 256); sprintf(defaultPath, "%s%s%s", prefixPath, TD_DIRSEP, "state"); + if (!taosIsDir(defaultPath)) { taosMulMkDir(defaultPath); } - stDebug("local default dir:%s, checkpointId:%" PRId64 ", key:%s succ", defaultPath, chkptId, key); + + char* checkpointRoot = taosMemoryCalloc(1, strlen(path) + 256); + sprintf(checkpointRoot, "%s%s%s", prefixPath, TD_DIRSEP, "checkpoints"); + + if (!taosIsDir(checkpointRoot)) { + taosMulMkDir(checkpointRoot); + } + taosMemoryFree(checkpointRoot); + + stDebug("%s check local default:%s, checkpointId:%" PRId64 " succ", key, defaultPath, chkptId); char* chkptPath = taosMemoryCalloc(1, strlen(path) + 256); - if (chkptId != 0) { + if (chkptId > 0) { sprintf(chkptPath, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkptId); code = rebuildFromLocalCheckpoint(key, chkptPath, chkptId, defaultPath); @@ -550,16 +557,9 @@ int32_t rebuildDirFormCheckpoint(const char* path, char* key, int64_t chkptId, c if (code != 0) { stError("failed to start stream backend at %s, reason: %s, restart from default defaultPath:%s", chkptPath, tstrerror(code), defaultPath); - code = taosMkDir(defaultPath); - } - } else { - sprintf(chkptPath, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", - (int64_t)-1); - - code = rebuildFromLocalCheckpoint(key, chkptPath, -1, defaultPath); - if (code != 0) { - code = taosMkDir(defaultPath); } + } else { // no valid checkpoint id + stInfo("%s no valid checkpoint ever generated, no need to copy checkpoint data", key); } taosMemoryFree(chkptPath); @@ -646,7 +646,7 @@ void* streamBackendInit(const char* streamPath, int64_t chkpId, int32_t vgId) { if (cfs != NULL) { rocksdb_list_column_families_destroy(cfs, nCf); } - stDebug("succ to init stream backend at %s, backend:%p, vgId:%d", backendPath, pHandle, vgId); + stDebug("%s init stream backend at %s, backend:%p, vgId:%d", backendPath, pHandle, vgId); taosMemoryFreeClear(backendPath); return (void*)pHandle; @@ -1933,6 +1933,7 @@ int32_t taskDbBuildFullPath(char* path, char* key, char** dbFullPath, char** sta *stateFullPath = statePath; return 0; } + void taskDbUpdateChkpId(void* pTaskDb, int64_t chkpId) { STaskDbWrapper* p = pTaskDb; taosThreadMutexLock(&p->mutex); @@ -1940,7 +1941,7 @@ void taskDbUpdateChkpId(void* pTaskDb, int64_t chkpId) { taosThreadMutexUnlock(&p->mutex); } -STaskDbWrapper* taskDbOpenImpl(char* key, char* statePath, char* dbPath) { +STaskDbWrapper* taskDbOpenImpl(const char* key, char* statePath, char* dbPath) { char* err = NULL; char** cfNames = NULL; size_t nCf = 0; @@ -1955,7 +1956,7 @@ STaskDbWrapper* taskDbOpenImpl(char* key, char* statePath, char* dbPath) { cfNames = rocksdb_list_column_families(pTaskDb->dbOpt, dbPath, &nCf, &err); if (nCf == 0) { - stInfo("newly create db, need to restart"); + stInfo("%s newly create db, need to restart", key); // pre create db pTaskDb->db = rocksdb_open(pTaskDb->pCfOpts[0], dbPath, &err); if (pTaskDb->db == NULL) goto _EXIT; @@ -1980,21 +1981,21 @@ STaskDbWrapper* taskDbOpenImpl(char* key, char* statePath, char* dbPath) { cfNames = NULL; } - stDebug("succ to init stream backend at %s, backend:%p", dbPath, pTaskDb); + stDebug("init s-task backend in:%s, backend:%p, %s", dbPath, pTaskDb, key); return pTaskDb; -_EXIT: +_EXIT: taskDbDestroy(pTaskDb, false); if (err) taosMemoryFree(err); if (cfNames) rocksdb_list_column_families_destroy(cfNames, nCf); return NULL; } -STaskDbWrapper* taskDbOpen(char* path, char* key, int64_t chkptId) { +STaskDbWrapper* taskDbOpen(const char* path, const char* key, int64_t chkptId) { char* statePath = NULL; char* dbPath = NULL; - if (rebuildDirFormCheckpoint(path, key, chkptId, &statePath, &dbPath) != 0) { + if (restoreCheckpointData(path, key, chkptId, &statePath, &dbPath) != 0) { return NULL; } diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 389eccd66d..d09e5bf477 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -239,6 +239,14 @@ int32_t streamTaskUpdateTaskCheckpointInfo(SStreamTask* pTask, SVUpdateCheckpoin taosThreadMutexLock(&pTask->lock); + if (pReq->checkpointId <= pInfo->checkpointId) { + stDebug("s-task:%s vgId:%d latest checkpointId:%" PRId64 " checkpointVer:%" PRId64 + " no need to update the checkpoint info, updated checkpointId:%" PRId64 " checkpointVer:%" PRId64 " ignored", + id, vgId, pInfo->checkpointId, pInfo->checkpointVer, pReq->checkpointId, pReq->checkpointVer); + taosThreadMutexUnlock(&pTask->lock); + return TSDB_CODE_SUCCESS; + } + SStreamTaskState* pStatus = streamTaskGetStatus(pTask); stDebug("s-task:%s vgId:%d status:%s start to update the checkpoint info, checkpointId:%" PRId64 "->%" PRId64 @@ -246,7 +254,6 @@ int32_t streamTaskUpdateTaskCheckpointInfo(SStreamTask* pTask, SVUpdateCheckpoin id, vgId, pStatus->name, pInfo->checkpointId, pReq->checkpointId, pInfo->checkpointVer, pReq->checkpointVer, pInfo->checkpointTime, pReq->checkpointTs); - // in the if (pStatus->state != TASK_STATUS__DROPPING) { ASSERT(pInfo->checkpointId <= pReq->checkpointId && pInfo->checkpointVer <= pReq->checkpointVer); @@ -593,7 +600,7 @@ int32_t downloadCheckpointDataByName(const char* id, const char* fname, const ch return 0; } -int32_t streamTaskDownloadCheckpointData(char* id, char* path) { +int32_t streamTaskDownloadCheckpointData(const char* id, char* path) { if (id == NULL || path == NULL || strlen(id) == 0 || strlen(path) == 0 || strlen(path) >= PATH_MAX) { stError("down checkpoint data parameters invalid"); return -1; diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 95fd057929..288d2eeaba 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -239,9 +239,7 @@ int32_t streamMetaMayCvtDbFormat(SStreamMeta* pMeta) { return 0; } -int32_t streamTaskSetDb(SStreamMeta* pMeta, void* arg, char* key) { - SStreamTask* pTask = arg; - +int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key) { int64_t chkpId = pTask->chkInfo.checkpointId; taosThreadMutexLock(&pMeta->backendMutex); @@ -1358,7 +1356,7 @@ static int32_t prepareBeforeStartTasks(SStreamMeta* pMeta, SArray** pList, int64 return TSDB_CODE_SUCCESS; } -int32_t streamMetaStartAllTasks(SStreamMeta* pMeta) { +int32_t streamMetaStartAllTasks(SStreamMeta* pMeta, __stream_task_expand_fn expandFn) { int32_t code = TSDB_CODE_SUCCESS; int32_t vgId = pMeta->vgId; int64_t now = taosGetTimestampMs(); @@ -1392,8 +1390,17 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta) { continue; } - // fill-history task can only be launched by related stream tasks. STaskExecStatisInfo* pInfo = &pTask->execInfo; + + code = expandFn(pTask); + if (code != TSDB_CODE_SUCCESS) { + stError("s-task:0x%x vgId:%d failed to build stream backend", pTaskId->taskId, vgId); + streamMetaAddFailedTaskSelf(pTask, pInfo->readyTs); + streamMetaReleaseTask(pMeta, pTask); + continue; + } + + // fill-history task can only be launched by related stream tasks. if (pTask->info.fillHistory == 1) { stDebug("s-task:%s fill-history task wait related stream task start", pTask->id.idStr); streamMetaReleaseTask(pMeta, pTask); @@ -1481,9 +1488,9 @@ bool streamMetaAllTasksReady(const SStreamMeta* pMeta) { return true; } -int32_t streamMetaStartOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId) { +int32_t streamMetaStartOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId, __stream_task_expand_fn expandFn) { int32_t vgId = pMeta->vgId; - stInfo("vgId:%d start to task:0x%x by checking downstream status", vgId, taskId); + stInfo("vgId:%d start task:0x%x by checking it's downstream status", vgId, taskId); SStreamTask* pTask = streamMetaAcquireTask(pMeta, streamId, taskId); if (pTask == NULL) { @@ -1501,6 +1508,10 @@ int32_t streamMetaStartOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t tas ASSERT(pTask->status.downstreamReady == 0); + if (pTask->pBackend == NULL) { // todo handle the error code + int32_t code = expandFn(pTask); + } + int32_t ret = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_INIT); if (ret != TSDB_CODE_SUCCESS) { stError("s-task:%s vgId:%d failed to handle event:%d", pTask->id.idStr, pMeta->vgId, TASK_EVENT_INIT); diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index 52002b7ea8..47324bd8c9 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -98,7 +98,7 @@ int stateKeyCmpr(const void* pKey1, int kLen1, const void* pKey2, int kLen2) { return winKeyCmprImpl(&pWin1->key, &pWin2->key); } -SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t szPage, int32_t pages) { +SStreamState* streamStateOpen(const char* path, void* pTask, bool specPath, int32_t szPage, int32_t pages) { SStreamState* pState = taosMemoryCalloc(1, sizeof(SStreamState)); stDebug("open stream state %p, %s", pState, path); if (pState == NULL) { @@ -127,7 +127,7 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT); pState->parNameMap = tSimpleHashInit(1024, hashFn); - stInfo("succ to open state %p on backend %p 0x%" PRIx64 "-%d", pState, pMeta->streamBackend, pState->streamId, + stInfo("open state %p on backend %p 0x%" PRIx64 "-%d succ", pState, pMeta->streamBackend, pState->streamId, pState->taskId); return pState; From 4fa8cfbffe9a22a7a1a76d8aeab53ae1caa5f743 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 14 May 2024 15:42:37 +0800 Subject: [PATCH 10/67] fix(stream): fix syntax error. --- source/libs/stream/src/streamBackendRocksdb.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index ad5d759b0b..2638e8db2f 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -580,11 +580,12 @@ bool streamBackendDataIsExist(const char* path, int64_t chkpId, int32_t vgId) { taosMemoryFree(state); return exist; } + void* streamBackendInit(const char* streamPath, int64_t chkpId, int32_t vgId) { char* backendPath = NULL; int32_t code = rebuildDirFromCheckpoint(streamPath, chkpId, &backendPath); - stDebug("start to init stream backend at %s, checkpointid: %" PRId64 " vgId:%d", backendPath, chkpId, vgId); + stDebug("start to init stream backend:%s, checkpointId:%" PRId64 " vgId:%d", backendPath, chkpId, vgId); uint32_t dbMemLimit = nextPow2(tsMaxStreamBackendCache) << 20; SBackendWrapper* pHandle = taosMemoryCalloc(1, sizeof(SBackendWrapper)); @@ -643,10 +644,12 @@ void* streamBackendInit(const char* streamPath, int64_t chkpId, int32_t vgId) { goto _EXIT; } } + if (cfs != NULL) { rocksdb_list_column_families_destroy(cfs, nCf); } - stDebug("%s init stream backend at %s, backend:%p, vgId:%d", backendPath, pHandle, vgId); + + stDebug("init stream backend at %s, backend:%p, vgId:%d", backendPath, pHandle, vgId); taosMemoryFreeClear(backendPath); return (void*)pHandle; From 1ccf743215b5e033175847c7919bcb11679828f8 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 14 May 2024 16:19:21 +0800 Subject: [PATCH 11/67] fix(stream): fix syntax error. --- source/libs/stream/src/streamBackendRocksdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 2638e8db2f..c6e580dce9 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -495,7 +495,7 @@ static int32_t rebuildFromLocalCheckpoint(const char* pTaskIdStr, const char* ch } if (taosIsDir(checkpointPath) && isValidCheckpoint(checkpointPath)) { - stDebug("%s local checkpoint data existed, checkpointId:%d copy to backend dir", pTaskIdStr, checkpointId); + stDebug("%s local checkpoint data existed, checkpointId:%" PRId64 " copy to backend dir", pTaskIdStr, checkpointId); code = backendCopyFiles(checkpointPath, defaultPath); if (code != TSDB_CODE_SUCCESS) { From 4eacf86843f06055ac47a87a12a73768396e7195 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 14 May 2024 18:05:13 +0800 Subject: [PATCH 12/67] fix(tsdb): fix deadlock when stopping reader. --- source/libs/executor/src/executor.c | 17 ++++++++++------- source/libs/stream/src/streamMeta.c | 2 ++ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 29f667cb66..2f360044c9 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -824,14 +824,17 @@ int32_t qKillTask(qTaskInfo_t tinfo, int32_t rspCode) { qDebug("%s sync killed execTask", GET_TASKID(pTaskInfo)); setTaskKilled(pTaskInfo, TSDB_CODE_TSC_QUERY_KILLED); - taosWLockLatch(&pTaskInfo->lock); - while (qTaskIsExecuting(pTaskInfo)) { - taosMsleep(10); + while(1) { + taosWLockLatch(&pTaskInfo->lock); + if (qTaskIsExecuting(pTaskInfo)) { // let's wait for 100 ms and try again + taosWUnLockLatch(&pTaskInfo->lock); + taosMsleep(100); + } else { // not running now + pTaskInfo->code = rspCode; + taosWUnLockLatch(&pTaskInfo->lock); + return TSDB_CODE_SUCCESS; + } } - pTaskInfo->code = rspCode; - taosWUnLockLatch(&pTaskInfo->lock); - - return TSDB_CODE_SUCCESS; } bool qTaskIsExecuting(qTaskInfo_t qinfo) { diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 288d2eeaba..ec5cf2f4f6 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -1645,6 +1645,8 @@ int32_t streamMetaAddFailedTask(SStreamMeta* pMeta, int64_t streamId, int32_t ta streamMetaAddTaskLaunchResult(pMeta, hId.streamId, hId.taskId, startTs, now, false); } } else { + streamMetaRUnLock(pMeta); + stError("failed to locate the stream task:0x%" PRIx64 "-0x%x (vgId:%d), it may have been destroyed or stopped", streamId, taskId, pMeta->vgId); code = TSDB_CODE_STREAM_TASK_NOT_EXIST; From 610aa80e654cfd597f094840df9c60d38d05ed73 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 15 May 2024 00:08:38 +0800 Subject: [PATCH 13/67] fix(stream): init backend for fill-history task. --- include/libs/stream/tstream.h | 1 - source/dnode/vnode/src/tqCommon/tqCommon.c | 4 -- source/libs/stream/inc/streamInt.h | 2 +- source/libs/stream/src/streamMeta.c | 48 +++++++++++++++------ source/libs/stream/src/streamStartHistory.c | 2 +- source/libs/stream/src/streamTaskSm.c | 2 +- 6 files changed, 37 insertions(+), 22 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 1f5aa46f49..7cd8391c80 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -60,7 +60,6 @@ extern "C" { #define STREAM_EXEC_T_ADD_FAILED_TASK (-7) // the load and start stream task should be executed after snode has started successfully, since the load of stream // tasks may incur the download of checkpoint data from remote, which may consume significant network and CPU resources. -#define STREAM_EXEC_T_LOAD_AND_START_ALL_TASKS (-8) typedef struct SStreamTask SStreamTask; typedef struct SStreamQueue SStreamQueue; diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index dbda3a4541..544e820695 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -745,10 +745,6 @@ int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLead } else if (type == STREAM_EXEC_T_ADD_FAILED_TASK) { int32_t code = streamMetaAddFailedTask(pMeta, pReq->streamId, pReq->taskId); return code; - } else if (type == STREAM_EXEC_T_LOAD_AND_START_ALL_TASKS) { - streamMetaLoadAllTasks(pMeta); - int32_t code = streamMetaStartAllTasks(pMeta, tqExpandStreamTask); - return code; } else if (type == STREAM_EXEC_T_RESUME_TASK) { // task resume to run after idle for a while SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index 0ac10fe9fe..10bdccdb29 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -166,7 +166,7 @@ ECHECKPOINT_BACKUP_TYPE streamGetCheckpointBackupType(); int32_t streamTaskDownloadCheckpointData(const char* id, char* path); int32_t streamTaskOnNormalTaskReady(SStreamTask* pTask); -int32_t streamTaskOnScanhistoryTaskReady(SStreamTask* pTask); +int32_t streamTaskOnScanHistoryTaskReady(SStreamTask* pTask); typedef int32_t (*__stream_async_exec_fn_t)(void* param); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index ec5cf2f4f6..5402c066a2 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -1376,13 +1376,11 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta, __stream_task_expand_fn expa return TSDB_CODE_SUCCESS; } - numOfTasks = taosArrayGetSize(pTaskList); - // broadcast the check downstream tasks msg + numOfTasks = taosArrayGetSize(pTaskList); for (int32_t i = 0; i < numOfTasks; ++i) { SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); - // todo: use hashTable instead SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId); if (pTask == NULL) { stError("vgId:%d failed to acquire task:0x%x during start tasks", pMeta->vgId, pTaskId->taskId); @@ -1391,13 +1389,14 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta, __stream_task_expand_fn expa } STaskExecStatisInfo* pInfo = &pTask->execInfo; - - code = expandFn(pTask); - if (code != TSDB_CODE_SUCCESS) { - stError("s-task:0x%x vgId:%d failed to build stream backend", pTaskId->taskId, vgId); - streamMetaAddFailedTaskSelf(pTask, pInfo->readyTs); - streamMetaReleaseTask(pMeta, pTask); - continue; + if (pTask->pBackend == NULL) { + code = expandFn(pTask); + if (code != TSDB_CODE_SUCCESS) { + stError("s-task:0x%x vgId:%d failed to expand stream backend", pTaskId->taskId, vgId); + streamMetaAddFailedTaskSelf(pTask, pInfo->readyTs); + streamMetaReleaseTask(pMeta, pTask); + continue; + } } // fill-history task can only be launched by related stream tasks. @@ -1407,6 +1406,7 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta, __stream_task_expand_fn expa continue; } + // ready now, start the related fill-history task if (pTask->status.downstreamReady == 1) { if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { stDebug("s-task:%s downstream ready, no need to check downstream, check only related fill-history task", @@ -1429,7 +1429,7 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta, __stream_task_expand_fn expa streamMetaReleaseTask(pMeta, pTask); } - stInfo("vgId:%d start tasks completed", pMeta->vgId); + stInfo("vgId:%d start all task(s) completed", pMeta->vgId); taosArrayDestroy(pTaskList); return code; } @@ -1494,7 +1494,7 @@ int32_t streamMetaStartOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t tas SStreamTask* pTask = streamMetaAcquireTask(pMeta, streamId, taskId); if (pTask == NULL) { - stError("vgId:%d failed to acquire task:0x%x during start tasks", pMeta->vgId, taskId); + stError("vgId:%d failed to acquire task:0x%x when starting task", pMeta->vgId, taskId); streamMetaAddFailedTask(pMeta, streamId, taskId); return TSDB_CODE_STREAM_TASK_IVLD_STATUS; } @@ -1507,9 +1507,29 @@ int32_t streamMetaStartOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t tas } ASSERT(pTask->status.downstreamReady == 0); - - if (pTask->pBackend == NULL) { // todo handle the error code + if (pTask->pBackend == NULL) { int32_t code = expandFn(pTask); + if (code != TSDB_CODE_SUCCESS) { + streamMetaAddFailedTaskSelf(pTask, pInfo->readyTs); + streamMetaReleaseTask(pMeta, pTask); + return code; + } + + if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { + SStreamTask* pHTask = streamMetaAcquireTask(pMeta, pTask->hTaskInfo.id.streamId, pTask->hTaskInfo.id.taskId); + if (pHTask != NULL) { + if (pHTask->pBackend == NULL) { + code = expandFn(pHTask); + if (code != TSDB_CODE_SUCCESS) { + streamMetaAddFailedTaskSelf(pHTask, pInfo->readyTs); + streamMetaReleaseTask(pMeta, pHTask); + return code; + } + } + + streamMetaReleaseTask(pMeta, pHTask); + } + } } int32_t ret = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_INIT); diff --git a/source/libs/stream/src/streamStartHistory.c b/source/libs/stream/src/streamStartHistory.c index 6882f6617d..7a864a60d2 100644 --- a/source/libs/stream/src/streamStartHistory.c +++ b/source/libs/stream/src/streamStartHistory.c @@ -155,7 +155,7 @@ int32_t streamTaskOnNormalTaskReady(SStreamTask* pTask) { return TSDB_CODE_SUCCESS; } -int32_t streamTaskOnScanhistoryTaskReady(SStreamTask* pTask) { +int32_t streamTaskOnScanHistoryTaskReady(SStreamTask* pTask) { // set the state to be ready streamTaskSetReady(pTask); streamTaskSetRangeStreamCalc(pTask); diff --git a/source/libs/stream/src/streamTaskSm.c b/source/libs/stream/src/streamTaskSm.c index cced6a6b84..82ea2f88ef 100644 --- a/source/libs/stream/src/streamTaskSm.c +++ b/source/libs/stream/src/streamTaskSm.c @@ -584,7 +584,7 @@ void doInitStateTransferTable(void) { // initialization event handle STaskStateTrans trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__READY, TASK_EVENT_INIT, streamTaskInitStatus, streamTaskOnNormalTaskReady, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__SCAN_HISTORY, TASK_EVENT_INIT_SCANHIST, streamTaskInitStatus, streamTaskOnScanhistoryTaskReady, NULL); + trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__SCAN_HISTORY, TASK_EVENT_INIT_SCANHIST, streamTaskInitStatus, streamTaskOnScanHistoryTaskReady, NULL); taosArrayPush(streamTaskSMTrans, &trans); // scan-history related event From 042ed3caff99aaeba336273840aba2370c0bcf0d Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 15 May 2024 10:08:56 +0800 Subject: [PATCH 14/67] fix(stream): fix invalid unlock. --- source/libs/stream/src/streamMeta.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 5402c066a2..39ed23730e 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -257,19 +257,18 @@ int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key) return 0; } - STaskDbWrapper* pBackend = taskDbOpen(pMeta->path, key, chkpId); + STaskDbWrapper* pBackend = NULL; while (1) { - if (pBackend == NULL) { - taosThreadMutexUnlock(&pMeta->backendMutex); - taosMsleep(1000); - stDebug("backend held by other task, restart later, path:%s, key:%s", pMeta->path, key); - } else { - taosThreadMutexUnlock(&pMeta->backendMutex); + pBackend = taskDbOpen(pMeta->path, key, chkpId); + if (pBackend != NULL) { break; } + taosThreadMutexUnlock(&pMeta->backendMutex); + taosMsleep(1000); + + stDebug("backend held by other task, restart later, path:%s, key:%s", pMeta->path, key); taosThreadMutexLock(&pMeta->backendMutex); - pBackend = taskDbOpen(pMeta->path, key, chkpId); } int64_t tref = taosAddRef(taskDbWrapperId, pBackend); From abd90c733a526904982fd95dfc72f631b39481e0 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 15 May 2024 14:24:35 +0800 Subject: [PATCH 15/67] fix(stream): remove backend files only after the in-memory task is dropped. --- include/libs/stream/tstream.h | 2 ++ source/dnode/vnode/src/tqCommon/tqCommon.c | 15 ++-------- source/libs/stream/src/streamBackendRocksdb.c | 6 +++- source/libs/stream/src/streamMeta.c | 2 ++ source/libs/stream/src/streamTask.c | 28 ++++++++++++++----- tests/system-test/1-insert/drop.py | 1 + 6 files changed, 33 insertions(+), 21 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 7cd8391c80..0840694964 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -290,6 +290,7 @@ typedef struct SStreamStatus { int64_t lastExecTs; // last exec time stamp int32_t inScanHistorySentinel; bool appendTranstateBlock; // has append the transfer state data block already + bool removeBackendFiles; // remove backend files on disk when free stream tasks } SStreamStatus; typedef struct SDataRange { @@ -675,6 +676,7 @@ void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId); void streamTaskOpenAllUpstreamInput(SStreamTask* pTask); int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key); bool streamTaskIsSinkTask(const SStreamTask* pTask); +void streamTaskSetRemoveBackendFiles(SStreamTask* pTask); void streamTaskStatusInit(STaskStatusEntry* pEntry, const SStreamTask* pTask); void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc); diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index 544e820695..b5b5ef8755 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -584,18 +584,6 @@ int32_t tqStreamTaskProcessDeployReq(SStreamMeta* pMeta, SMsgCb* cb, int64_t sve return code; } -static void tqStreamRemoveTaskBackend(SStreamMeta* pMeta, const STaskId* pId) { - char taskKey[128] = {0}; - sprintf(taskKey, "0x%" PRIx64 "-0x%x", pId->streamId, (int32_t)pId->taskId); - - char* path = taosMemoryCalloc(1, strlen(pMeta->path) + 128); - sprintf(path, "%s%s%s", pMeta->path, TD_DIRSEP, taskKey); - taosRemoveDir(path); - - tqInfo("vgId:%d drop stream task:0x%x file:%s", pMeta->vgId, (int32_t)pId->taskId, path); - taosMemoryFree(path); -} - int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen) { SVDropStreamTaskReq* pReq = (SVDropStreamTaskReq*)msg; @@ -616,6 +604,7 @@ int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen hTaskId.taskId = pTask->hTaskInfo.id.taskId; } + streamTaskSetRemoveBackendFiles(pTask); streamTaskClearHTaskAttr(pTask, pReq->resetRelHalt); streamMetaReleaseTask(pMeta, pTask); } @@ -642,7 +631,7 @@ int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen streamMetaWUnLock(pMeta); - tqStreamRemoveTaskBackend(pMeta, &id); +// tqStreamRemoveTaskBackend(pMeta, &id); return 0; } diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index c6e580dce9..b6bbe2e383 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -1968,11 +1968,15 @@ STaskDbWrapper* taskDbOpenImpl(const char* key, char* statePath, char* dbPath) { if (cfNames != NULL) { rocksdb_list_column_families_destroy(cfNames, nCf); } + taosMemoryFree(err); err = NULL; cfNames = rocksdb_list_column_families(pTaskDb->dbOpt, dbPath, &nCf, &err); - ASSERT(err == NULL); + if (err != NULL) { + stError("%s failed to create column-family, %s, %d, reason:%s", key, dbPath, nCf, err); + goto _EXIT; + } } if (taskDbOpenCfs(pTaskDb, dbPath, cfNames, nCf) != 0) { diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 39ed23730e..27bd001588 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -1521,7 +1521,9 @@ int32_t streamMetaStartOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t tas code = expandFn(pHTask); if (code != TSDB_CODE_SUCCESS) { streamMetaAddFailedTaskSelf(pHTask, pInfo->readyTs); + streamMetaReleaseTask(pMeta, pHTask); + streamMetaReleaseTask(pMeta, pTask); return code; } } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 72302f981d..2cb388954d 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -187,8 +187,9 @@ int32_t tDecodeStreamTaskId(SDecoder* pDecoder, STaskId* pTaskId) { } void tFreeStreamTask(SStreamTask* pTask) { - char* p = NULL; - int32_t taskId = pTask->id.taskId; + char* p = NULL; + int32_t taskId = pTask->id.taskId; + STaskExecStatisInfo* pStatis = &pTask->execInfo; ETaskStatus status1 = TASK_STATUS__UNINIT; @@ -200,7 +201,7 @@ void tFreeStreamTask(SStreamTask* pTask) { } taosThreadMutexUnlock(&pTask->lock); - stDebug("start to free s-task:0x%x, %p, state:%s", taskId, pTask, p); + stDebug("start to free s-task:0x%x %p, state:%s", taskId, pTask, p); SCheckpointInfo* pCkInfo = &pTask->chkInfo; stDebug("s-task:0x%x task exec summary: create:%" PRId64 ", init:%" PRId64 ", start:%" PRId64 @@ -275,10 +276,6 @@ void tFreeStreamTask(SStreamTask* pTask) { taskDbRemoveRef(pTask->pBackend); } - if (pTask->id.idStr != NULL) { - taosMemoryFree((void*)pTask->id.idStr); - } - if (pTask->pNameMap) { tSimpleHashCleanup(pTask->pNameMap); } @@ -292,6 +289,19 @@ void tFreeStreamTask(SStreamTask* pTask) { pTask->outputInfo.pNodeEpsetUpdateList = taosArrayDestroy(pTask->outputInfo.pNodeEpsetUpdateList); + if ((pTask->status.removeBackendFiles) && (pTask->pMeta != NULL)) { + char* path = taosMemoryCalloc(1, strlen(pTask->pMeta->path) + 128); + sprintf(path, "%s%s%s", pTask->pMeta->path, TD_DIRSEP, pTask->id.idStr); + taosRemoveDir(path); + + stInfo("s-task:0x%x vgId:%d remove all backend files:%s", taskId, pTask->pMeta->vgId, path); + taosMemoryFree(path); + } + + if (pTask->id.idStr != NULL) { + taosMemoryFree((void*)pTask->id.idStr); + } + taosMemoryFree(pTask); stDebug("s-task:0x%x free task completed", taskId); } @@ -896,4 +906,8 @@ int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq) { return code; } return streamTrySchedExec(pTask); +} + +void streamTaskSetRemoveBackendFiles(SStreamTask* pTask) { + pTask->status.removeBackendFiles = true; } \ No newline at end of file diff --git a/tests/system-test/1-insert/drop.py b/tests/system-test/1-insert/drop.py index 21817ef20d..493e1491b8 100644 --- a/tests/system-test/1-insert/drop.py +++ b/tests/system-test/1-insert/drop.py @@ -20,6 +20,7 @@ from util.common import * from util.sqlset import * class TDTestCase: + updatecfgDict = {'stdebugflag':143} def init(self, conn, logSql, replicaVar=1): self.replicaVar = int(replicaVar) tdLog.debug("start to execute %s" % __file__) From 2b442119288c263bef2d5299512448e088d4e3c4 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 15 May 2024 14:52:59 +0800 Subject: [PATCH 16/67] fix(test): update test case and fix a typo. --- source/libs/stream/src/streamBackendRocksdb.c | 2 +- source/util/test/cfgTest.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index b6bbe2e383..398980dfba 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -1974,7 +1974,7 @@ STaskDbWrapper* taskDbOpenImpl(const char* key, char* statePath, char* dbPath) { cfNames = rocksdb_list_column_families(pTaskDb->dbOpt, dbPath, &nCf, &err); if (err != NULL) { - stError("%s failed to create column-family, %s, %d, reason:%s", key, dbPath, nCf, err); + stError("%s failed to create column-family, %s, %" PRIzu ", reason:%s", key, dbPath, nCf, err); goto _EXIT; } } diff --git a/source/util/test/cfgTest.cpp b/source/util/test/cfgTest.cpp index 92422b6a80..9f8645b14c 100644 --- a/source/util/test/cfgTest.cpp +++ b/source/util/test/cfgTest.cpp @@ -67,7 +67,7 @@ TEST_F(CfgTest, 02_Basic) { SConfigItem* pItem = NULL; SConfigIter* pIter = cfgCreateIter(pConfig); - while((pItem == cfgNextIter(pIter)) != NULL) { + while((pItem = cfgNextIter(pIter)) != NULL) { switch (pItem->dtype) { case CFG_DTYPE_BOOL: printf("index:%d, cfg:%s value:%d\n", size, pItem->name, pItem->bval); From 130f7a292d718aba1c62db4f98cb81ca338f39bf Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sun, 19 May 2024 22:36:09 +0800 Subject: [PATCH 17/67] fix(stream): reset the error code when trying to restore checkpoint. --- source/libs/stream/src/streamBackendRocksdb.c | 5 +++-- source/libs/stream/src/streamMeta.c | 10 +++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 398980dfba..5b03876f02 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -390,7 +390,7 @@ int32_t rebuildFromRemoteCheckpoint(const char* key, char* chkpPath, int64_t che } else if (type == DATA_UPLOAD_RSYNC) { return rebuildFromRemoteChkp_rsync(key, chkpPath, checkpointId, defaultPath); } else { - stError("%s not remote backup checkpoint data for:%" PRId64" restore ", key, checkpointId); + stError("%s no remote backup checkpoint data for:%" PRId64, key, checkpointId); } return -1; @@ -543,7 +543,7 @@ int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId } taosMemoryFree(checkpointRoot); - stDebug("%s check local default:%s, checkpointId:%" PRId64 " succ", key, defaultPath, chkptId); + stDebug("%s check local backend dir:%s, checkpointId:%" PRId64 " succ", key, defaultPath, chkptId); char* chkptPath = taosMemoryCalloc(1, strlen(path) + 256); if (chkptId > 0) { @@ -557,6 +557,7 @@ int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId if (code != 0) { stError("failed to start stream backend at %s, reason: %s, restart from default defaultPath:%s", chkptPath, tstrerror(code), defaultPath); + code = 0; // reset the error code } } else { // no valid checkpoint id stInfo("%s no valid checkpoint ever generated, no need to copy checkpoint data", key); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 27bd001588..a6453871c8 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -791,11 +791,11 @@ int32_t streamMetaCommit(SStreamMeta* pMeta) { } int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta) { - int64_t chkpId = 0; + int64_t checkpointId = 0; TBC* pCur = NULL; if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { - return chkpId; + return checkpointId; } void* pKey = NULL; @@ -816,16 +816,16 @@ int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta) { } tDecoderClear(&decoder); - chkpId = TMAX(chkpId, info.checkpointId); + checkpointId = TMAX(checkpointId, info.checkpointId); } - stDebug("get max chkp id: %" PRId64 "", chkpId); + stDebug("vgId:%d get max checkpointId:%" PRId64, pMeta->vgId, checkpointId); tdbFree(pKey); tdbFree(pVal); tdbTbcClose(pCur); - return chkpId; + return checkpointId; } // not allowed to return error code From f6af165d9ac40a6142ca6c740fb3e6ae20124928 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 21 May 2024 10:09:55 +0800 Subject: [PATCH 18/67] refactor(stream): add more info in info-meta --- source/common/src/systable.c | 3 +++ source/dnode/mnode/impl/src/mndStream.c | 12 ++++++++++++ source/dnode/vnode/src/tq/tqSink.c | 4 ++-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/source/common/src/systable.c b/source/common/src/systable.c index 9de682dd3a..d5dc1581a4 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -187,6 +187,8 @@ static const SSysDbTableSchema streamTaskSchema[] = { {.name = "status", .bytes = 12 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "stage", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, {.name = "in_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "input_idle", .bytes = 12, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "dispatch_data", .bytes = 12, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, // {.name = "out_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "info", .bytes = 35, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "start_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = false}, @@ -195,6 +197,7 @@ static const SSysDbTableSchema streamTaskSchema[] = { {.name = "checkpoint_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = false}, {.name = "checkpoint_id", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, {.name = "checkpoint_version", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, + {.name = "checkpoint_size", .bytes = 15, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "checkpoint_backup", .bytes = 15, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "ds_err_info", .bytes = 25, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "history_task_id", .bytes = 16 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index fb15e4b857..03385ea226 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -1629,6 +1629,14 @@ static int32_t setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SS pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); + // input idle + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetNULL(pColInfo, numOfRows); + + // dispatch data + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetNULL(pColInfo, numOfRows); + // output queue // sprintf(buf, queueInfoStr, pe->outputQUsed, pe->outputRate); // STR_TO_VARSTR(vbuf, buf); @@ -1679,6 +1687,10 @@ static int32_t setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SS pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char*)&pe->checkpointInfo.latestVer, false); + // checkpoint size + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetNULL(pColInfo, numOfRows); + // checkpoint backup status pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, 0, true); diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index c0f58fc3ec..ce6db55cef 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -744,7 +744,7 @@ int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDat tqTrace("s-task:%s set the dstTable uid from cache:%" PRId64, id, pTableData->uid); } } else { - // The auto-create option will always set to be open for those submit messages, which arrive during the period + // The auto-create option will always set to be open for those submit messages, which arrives during the period // the creating of the destination table, due to the absence of the user-specified table in TSDB. When scanning // data from WAL, those submit messages, with auto-created table option, will be discarded expect the first, for // those mismatched table uids. Only the FIRST table has the correct table uid, and those remain all have @@ -752,7 +752,7 @@ int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDat SMetaReader mr = {0}; metaReaderDoInit(&mr, pVnode->pMeta, META_READER_LOCK); - // table not in cache, let's try the extract it from tsdb meta + // table not in cache, let's try to extract it from tsdb meta if (metaGetTableEntryByName(&mr, dstTableName) < 0) { metaReaderClear(&mr); From be39044b669dd928730f677923ea6ae19a4781d6 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 22 May 2024 11:04:54 +0800 Subject: [PATCH 19/67] fix(stream): add more info --- include/libs/stream/tstream.h | 18 ++++-- source/common/src/systable.c | 12 ++-- source/dnode/mnode/impl/src/mndStream.c | 75 +++++++++++++++++++++---- source/libs/stream/src/streamExec.c | 31 ++++++++-- source/libs/stream/src/streamMeta.c | 7 ++- source/libs/stream/src/streamTask.c | 4 ++ source/libs/stream/src/streammsg.c | 12 ++++ 7 files changed, 136 insertions(+), 23 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 0840694964..ea0f1824b3 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -357,8 +357,12 @@ typedef struct STaskExecStatisInfo { double step2El; int32_t updateCount; int64_t latestUpdateTs; - int32_t processDataBlocks; - int64_t processDataSize; + int32_t inputDataBlocks; + int64_t inputDataSize; + double procsThroughput; + int64_t outputDataBlocks; + int64_t outputDataSize; + double outputThroughput; int32_t dispatch; int64_t dispatchDataSize; int32_t checkpoint; @@ -566,6 +570,8 @@ typedef struct STaskCkptInfo { int64_t latestId; // saved checkpoint id int64_t latestVer; // saved checkpoint ver int64_t latestTime; // latest checkpoint time + int64_t latestSize; // latest checkpoint size + int8_t remoteBackup; // latest checkpoint backup done int64_t activeId; // current active checkpoint id int32_t activeTransId; // checkpoint trans id int8_t failed; // denote if the checkpoint is failed or not @@ -583,8 +589,12 @@ typedef struct STaskStatusEntry { int64_t inputQUnchangeCounter; double inputQUsed; // in MiB double inputRate; - double sinkQuota; // existed quota size for sink task - double sinkDataSize; // sink to dst data size + double procsThroughput; // duration between one element put into input queue and being processed. + double procsTotal; // duration between one element put into input queue and being processed. + double outputThroughput; // the size of dispatched result blocks in bytes + double outputTotal; // the size of dispatched result blocks in bytes + double sinkQuota; // existed quota size for sink task + double sinkDataSize; // sink to dst data size int64_t startTime; int64_t startCheckpointId; int64_t startCheckpointVer; diff --git a/source/common/src/systable.c b/source/common/src/systable.c index d5dc1581a4..297f43399f 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -187,8 +187,12 @@ static const SSysDbTableSchema streamTaskSchema[] = { {.name = "status", .bytes = 12 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "stage", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, {.name = "in_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, - {.name = "input_idle", .bytes = 12, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, - {.name = "dispatch_data", .bytes = 12, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "process_total", .bytes = 14, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "process_throughput", .bytes = 14, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "out_total", .bytes = 14, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "out_throughput", .bytes = 14, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, +// {.name = "dispatch_throughput", .bytes = 12, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, +// {.name = "dispatch_total", .bytes = 12, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, // {.name = "out_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "info", .bytes = 35, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "start_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = false}, @@ -196,10 +200,10 @@ static const SSysDbTableSchema streamTaskSchema[] = { {.name = "start_ver", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, {.name = "checkpoint_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = false}, {.name = "checkpoint_id", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, - {.name = "checkpoint_version", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, + {.name = "checkpoint_ver", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, {.name = "checkpoint_size", .bytes = 15, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "checkpoint_backup", .bytes = 15, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, - {.name = "ds_err_info", .bytes = 25, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "extra_info", .bytes = 25, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "history_task_id", .bytes = 16 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "history_task_status", .bytes = 12 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, }; diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 03385ea226..105565067c 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -1620,22 +1620,75 @@ static int32_t setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SS colDataSetVal(pColInfo, numOfRows, (const char *)&pe->stage, false); // input queue - char vbuf[30] = {0}; - char buf[25] = {0}; - const char *queueInfoStr = "%4.2fMiB (%5.2f%)"; + char vbuf[40] = {0}; + char buf[40] = {0}; + const char *queueInfoStr = "%4.2f MiB (%6.2f%)"; sprintf(buf, queueInfoStr, pe->inputQUsed, pe->inputRate); STR_TO_VARSTR(vbuf, buf); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); - // input idle - pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetNULL(pColInfo, numOfRows); + // input total + const char* formatTotalMb = "%7.2f MiB"; + const char* formatTotalGb = "%7.2f GiB"; + if (pe->procsTotal < 1024) { + sprintf(buf, formatTotalMb, pe->procsTotal); + } else { + sprintf(buf, formatTotalGb, pe->procsTotal / 1024); + } + + memset(vbuf, 0, tListLen(vbuf)); + STR_TO_VARSTR(vbuf, buf); - // dispatch data pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetNULL(pColInfo, numOfRows); + colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); + + // process throughput + const char* formatKb = "%7.2f KiB/s"; + const char* formatMb = "%7.2f MiB/s"; + if (pe->procsThroughput < 1024) { + sprintf(buf, formatKb, pe->procsThroughput); + } else { + sprintf(buf, formatMb, pe->procsThroughput / 1024); + } + + memset(vbuf, 0, tListLen(vbuf)); + STR_TO_VARSTR(vbuf, buf); + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); + + // output total + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + + if (pTask->info.taskLevel == TASK_LEVEL__SINK) { + colDataSetNULL(pColInfo, numOfRows); + } else { + sprintf(buf, formatTotalMb, pe->outputTotal); + memset(vbuf, 0, tListLen(vbuf)); + STR_TO_VARSTR(vbuf, buf); + + colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); + } + + // output throughput + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + + if (pTask->info.taskLevel == TASK_LEVEL__SINK) { + colDataSetNULL(pColInfo, numOfRows); + } else { + if (pe->outputThroughput < 1024) { + sprintf(buf, formatKb, pe->outputThroughput); + } else { + sprintf(buf, formatMb, pe->outputThroughput / 1024); + } + + memset(vbuf, 0, tListLen(vbuf)); + STR_TO_VARSTR(vbuf, buf); + + colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); + } // output queue // sprintf(buf, queueInfoStr, pe->outputQUsed, pe->outputRate); @@ -1646,12 +1699,14 @@ static int32_t setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SS // info if (pTask->info.taskLevel == TASK_LEVEL__SINK) { - const char *sinkStr = "%.2fMiB"; + const char *sinkStr = "%.2f MiB"; sprintf(buf, sinkStr, pe->sinkDataSize); } else if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { // offset info const char *offsetStr = "%" PRId64 " [%" PRId64 ", %" PRId64 "]"; - sprintf(buf, offsetStr, pe->processedVer, pe->verRange.minVer, pe->verRange.maxVer); + snprintf(buf, tListLen(buf), offsetStr, pe->processedVer, pe->verRange.minVer, pe->verRange.maxVer); + } else { + memset(buf, 0, tListLen(buf)); } STR_TO_VARSTR(vbuf, buf); diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 9747ebd2ff..9daca9a99c 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -96,7 +96,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i *totalSize = 0; int32_t size = 0; - int32_t numOfBlocks = 0; + int32_t numOfBlocks= 0; SArray* pRes = NULL; while (1) { @@ -586,6 +586,9 @@ static int32_t doStreamExecTask(SStreamTask* pTask) { } } + pTask->execInfo.inputDataBlocks += numOfBlocks; + pTask->execInfo.inputDataSize += blockSize; + // dispatch checkpoint msg to all downstream tasks int32_t type = pInput->type; if (type == STREAM_INPUT__CHECKPOINT_TRIGGER) { @@ -601,11 +604,21 @@ static int32_t doStreamExecTask(SStreamTask* pTask) { if (pTask->info.taskLevel == TASK_LEVEL__SINK) { ASSERT(type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__CHECKPOINT); + int64_t st = taosGetTimestampMs(); + // here only handle the data block sink operation if (type == STREAM_INPUT__DATA_BLOCK) { pTask->execInfo.sink.dataSize += blockSize; stDebug("s-task:%s sink task start to sink %d blocks, size:%.2fKiB", id, numOfBlocks, SIZE_IN_KiB(blockSize)); doOutputResultBlockImpl(pTask, (SStreamDataBlock*)pInput); + + double el = (taosGetTimestampMs() - st) / 1000.0; + if (fabs(el - 0.0) <= DBL_EPSILON) { + pTask->execInfo.procsThroughput = 0; + } else { + pTask->execInfo.procsThroughput = (blockSize / el); + } + continue; } } @@ -639,13 +652,23 @@ static int32_t doStreamExecTask(SStreamTask* pTask) { int64_t ver = pTask->chkInfo.processedVer; doSetStreamInputBlock(pTask, pInput, &ver, id); - int64_t resSize = 0; + int64_t totalSize = 0; int32_t totalBlocks = 0; - streamTaskExecImpl(pTask, pInput, &resSize, &totalBlocks); + streamTaskExecImpl(pTask, pInput, &totalSize, &totalBlocks); double el = (taosGetTimestampMs() - st) / 1000.0; stDebug("s-task:%s batch of input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d", id, el, - SIZE_IN_MiB(resSize), totalBlocks); + SIZE_IN_MiB(totalSize), totalBlocks); + + pTask->execInfo.outputDataBlocks += totalBlocks; + pTask->execInfo.outputDataSize += totalSize; + if (fabs(el - 0.0) <= DBL_EPSILON) { + pTask->execInfo.procsThroughput = 0; + pTask->execInfo.outputThroughput = 0; + } else { + pTask->execInfo.outputThroughput = (totalSize / el); + pTask->execInfo.procsThroughput = (blockSize / el); + } SCheckpointInfo* pInfo = &pTask->chkInfo; diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index a6453871c8..7f1128b929 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -1017,8 +1017,13 @@ static int32_t metaHeartbeatToMnodeImpl(SStreamMeta* pMeta) { .checkpointInfo.latestId = (*pTask)->chkInfo.checkpointId, .checkpointInfo.latestVer = (*pTask)->chkInfo.checkpointVer, .checkpointInfo.latestTime = (*pTask)->chkInfo.checkpointTime, + .checkpointInfo.latestSize = 0, + .checkpointInfo.remoteBackup = 0, .hTaskId = (*pTask)->hTaskInfo.id.taskId, - + .procsTotal = SIZE_IN_MiB((*pTask)->execInfo.inputDataSize), + .outputTotal = SIZE_IN_MiB((*pTask)->execInfo.outputDataSize), + .procsThroughput = SIZE_IN_KiB((*pTask)->execInfo.procsThroughput), + .outputThroughput = SIZE_IN_KiB((*pTask)->execInfo.outputThroughput), .startCheckpointId = (*pTask)->execInfo.startCheckpointId, .startCheckpointVer = (*pTask)->execInfo.startCheckpointVer, }; diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 2cb388954d..c056e2a4b6 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -771,6 +771,10 @@ void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc) pDst->stage = pSrc->stage; pDst->inputQUsed = pSrc->inputQUsed; pDst->inputRate = pSrc->inputRate; + pDst->procsTotal = pSrc->procsTotal; + pDst->procsThroughput = pSrc->procsThroughput; + pDst->outputTotal = pSrc->outputTotal; + pDst->outputThroughput = pSrc->outputThroughput; pDst->processedVer = pSrc->processedVer; pDst->verRange = pSrc->verRange; pDst->sinkQuota = pSrc->sinkQuota; diff --git a/source/libs/stream/src/streammsg.c b/source/libs/stream/src/streammsg.c index 5e52b927c6..9b69833234 100644 --- a/source/libs/stream/src/streammsg.c +++ b/source/libs/stream/src/streammsg.c @@ -335,6 +335,10 @@ int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pReq) { if (tEncodeI32(pEncoder, ps->nodeId) < 0) return -1; if (tEncodeDouble(pEncoder, ps->inputQUsed) < 0) return -1; if (tEncodeDouble(pEncoder, ps->inputRate) < 0) return -1; + if (tEncodeDouble(pEncoder, ps->procsTotal) < 0) return -1; + if (tEncodeDouble(pEncoder, ps->procsThroughput) < 0) return -1; + if (tEncodeDouble(pEncoder, ps->outputTotal) < 0) return -1; + if (tEncodeDouble(pEncoder, ps->outputThroughput) < 0) return -1; if (tEncodeDouble(pEncoder, ps->sinkQuota) < 0) return -1; if (tEncodeDouble(pEncoder, ps->sinkDataSize) < 0) return -1; if (tEncodeI64(pEncoder, ps->processedVer) < 0) return -1; @@ -346,6 +350,8 @@ int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pReq) { if (tEncodeI64(pEncoder, ps->checkpointInfo.latestId) < 0) return -1; if (tEncodeI64(pEncoder, ps->checkpointInfo.latestVer) < 0) return -1; if (tEncodeI64(pEncoder, ps->checkpointInfo.latestTime) < 0) return -1; + if (tEncodeI64(pEncoder, ps->checkpointInfo.latestSize) < 0) return -1; + if (tEncodeI8(pEncoder, ps->checkpointInfo.remoteBackup) < 0) return -1; if (tEncodeI64(pEncoder, ps->startTime) < 0) return -1; if (tEncodeI64(pEncoder, ps->startCheckpointId) < 0) return -1; if (tEncodeI64(pEncoder, ps->startCheckpointVer) < 0) return -1; @@ -381,6 +387,10 @@ int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pReq) { if (tDecodeI32(pDecoder, &entry.nodeId) < 0) return -1; if (tDecodeDouble(pDecoder, &entry.inputQUsed) < 0) return -1; if (tDecodeDouble(pDecoder, &entry.inputRate) < 0) return -1; + if (tDecodeDouble(pDecoder, &entry.procsTotal) < 0) return -1; + if (tDecodeDouble(pDecoder, &entry.procsThroughput) < 0) return -1; + if (tDecodeDouble(pDecoder, &entry.outputTotal) < 0) return -1; + if (tDecodeDouble(pDecoder, &entry.outputThroughput) < 0) return -1; if (tDecodeDouble(pDecoder, &entry.sinkQuota) < 0) return -1; if (tDecodeDouble(pDecoder, &entry.sinkDataSize) < 0) return -1; if (tDecodeI64(pDecoder, &entry.processedVer) < 0) return -1; @@ -393,6 +403,8 @@ int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pReq) { if (tDecodeI64(pDecoder, &entry.checkpointInfo.latestId) < 0) return -1; if (tDecodeI64(pDecoder, &entry.checkpointInfo.latestVer) < 0) return -1; if (tDecodeI64(pDecoder, &entry.checkpointInfo.latestTime) < 0) return -1; + if (tDecodeI64(pDecoder, &entry.checkpointInfo.latestSize) < 0) return -1; + if (tDecodeI8(pDecoder, &entry.checkpointInfo.remoteBackup) < 0) return -1; if (tDecodeI64(pDecoder, &entry.startTime) < 0) return -1; if (tDecodeI64(pDecoder, &entry.startCheckpointId) < 0) return -1; if (tDecodeI64(pDecoder, &entry.startCheckpointVer) < 0) return -1; From 77852fa70249ccec4dc10ebee573e8bc9f191bf5 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 22 May 2024 13:20:45 +0800 Subject: [PATCH 20/67] fix(stream): avoid data overflow and fix test cases. --- source/dnode/mnode/impl/src/mndStream.c | 2 +- tests/system-test/0-others/information_schema.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 105565067c..d65d708bb8 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -1700,7 +1700,7 @@ static int32_t setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SS // info if (pTask->info.taskLevel == TASK_LEVEL__SINK) { const char *sinkStr = "%.2f MiB"; - sprintf(buf, sinkStr, pe->sinkDataSize); + snprintf(buf, tListLen(buf), sinkStr, pe->sinkDataSize); } else if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { // offset info const char *offsetStr = "%" PRId64 " [%" PRId64 ", %" PRId64 "]"; diff --git a/tests/system-test/0-others/information_schema.py b/tests/system-test/0-others/information_schema.py index 944b2fbb1e..965cdee9ce 100644 --- a/tests/system-test/0-others/information_schema.py +++ b/tests/system-test/0-others/information_schema.py @@ -222,7 +222,7 @@ class TDTestCase: tdSql.query("select * from information_schema.ins_columns where db_name ='information_schema'") tdLog.info(len(tdSql.queryResult)) - tdSql.checkEqual(True, len(tdSql.queryResult) in range(254, 255)) + tdSql.checkEqual(True, len(tdSql.queryResult) in range(258, 259)) tdSql.query("select * from information_schema.ins_columns where db_name ='performance_schema'") tdSql.checkEqual(54, len(tdSql.queryResult)) From 565d1f408cdd8f0bd1ed2e7937aad0f4fda5d431 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 22 May 2024 13:26:49 +0800 Subject: [PATCH 21/67] fix(stream): adjust buf size, and avoid overflow --- source/dnode/mnode/impl/src/mndStream.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index d65d708bb8..98f438bf88 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -1620,10 +1620,10 @@ static int32_t setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SS colDataSetVal(pColInfo, numOfRows, (const char *)&pe->stage, false); // input queue - char vbuf[40] = {0}; - char buf[40] = {0}; + char vbuf[37] = {0}; + char buf[35] = {0}; const char *queueInfoStr = "%4.2f MiB (%6.2f%)"; - sprintf(buf, queueInfoStr, pe->inputQUsed, pe->inputRate); + snprintf(buf, tListLen(buf), queueInfoStr, pe->inputQUsed, pe->inputRate); STR_TO_VARSTR(vbuf, buf); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); @@ -1633,9 +1633,9 @@ static int32_t setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SS const char* formatTotalMb = "%7.2f MiB"; const char* formatTotalGb = "%7.2f GiB"; if (pe->procsTotal < 1024) { - sprintf(buf, formatTotalMb, pe->procsTotal); + snprintf(buf, tListLen(buf), formatTotalMb, pe->procsTotal); } else { - sprintf(buf, formatTotalGb, pe->procsTotal / 1024); + snprintf(buf, tListLen(buf), formatTotalGb, pe->procsTotal / 1024); } memset(vbuf, 0, tListLen(vbuf)); @@ -1648,9 +1648,9 @@ static int32_t setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SS const char* formatKb = "%7.2f KiB/s"; const char* formatMb = "%7.2f MiB/s"; if (pe->procsThroughput < 1024) { - sprintf(buf, formatKb, pe->procsThroughput); + snprintf(buf, tListLen(buf), formatKb, pe->procsThroughput); } else { - sprintf(buf, formatMb, pe->procsThroughput / 1024); + snprintf(buf, tListLen(buf), formatMb, pe->procsThroughput / 1024); } memset(vbuf, 0, tListLen(vbuf)); @@ -1679,9 +1679,9 @@ static int32_t setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SS colDataSetNULL(pColInfo, numOfRows); } else { if (pe->outputThroughput < 1024) { - sprintf(buf, formatKb, pe->outputThroughput); + snprintf(buf, tListLen(buf), formatKb, pe->outputThroughput); } else { - sprintf(buf, formatMb, pe->outputThroughput / 1024); + snprintf(buf, tListLen(buf), formatMb, pe->outputThroughput / 1024); } memset(vbuf, 0, tListLen(vbuf)); From 317742acc1913e67c6e8f66a39bf398536ee231c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 22 May 2024 13:29:38 +0800 Subject: [PATCH 22/67] fix(stream): adjust buf size. --- source/common/src/systable.c | 2 +- source/dnode/mnode/impl/src/mndStream.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/common/src/systable.c b/source/common/src/systable.c index 297f43399f..4fdbe1b9f0 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -194,7 +194,7 @@ static const SSysDbTableSchema streamTaskSchema[] = { // {.name = "dispatch_throughput", .bytes = 12, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, // {.name = "dispatch_total", .bytes = 12, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, // {.name = "out_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, - {.name = "info", .bytes = 35, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "info", .bytes = 40, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "start_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = false}, {.name = "start_id", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, {.name = "start_ver", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 98f438bf88..cb0a6c5d99 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -1620,8 +1620,8 @@ static int32_t setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SS colDataSetVal(pColInfo, numOfRows, (const char *)&pe->stage, false); // input queue - char vbuf[37] = {0}; - char buf[35] = {0}; + char vbuf[40] = {0}; + char buf[38] = {0}; const char *queueInfoStr = "%4.2f MiB (%6.2f%)"; snprintf(buf, tListLen(buf), queueInfoStr, pe->inputQUsed, pe->inputRate); STR_TO_VARSTR(vbuf, buf); From d13aff3b7045f7d874fbe2ba4a4777576b019e79 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 22 May 2024 13:33:17 +0800 Subject: [PATCH 23/67] fix(stream): adjust buf size, and avoid overflow --- source/common/src/systable.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/common/src/systable.c b/source/common/src/systable.c index 4fdbe1b9f0..980b590748 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -194,16 +194,16 @@ static const SSysDbTableSchema streamTaskSchema[] = { // {.name = "dispatch_throughput", .bytes = 12, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, // {.name = "dispatch_total", .bytes = 12, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, // {.name = "out_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, - {.name = "info", .bytes = 40, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "info", .bytes = 40+ VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "start_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = false}, {.name = "start_id", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, {.name = "start_ver", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, {.name = "checkpoint_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = false}, {.name = "checkpoint_id", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, {.name = "checkpoint_ver", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, - {.name = "checkpoint_size", .bytes = 15, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, - {.name = "checkpoint_backup", .bytes = 15, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, - {.name = "extra_info", .bytes = 25, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "checkpoint_size", .bytes = 14 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "checkpoint_backup", .bytes = 14 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "extra_info", .bytes = 25 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "history_task_id", .bytes = 16 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "history_task_status", .bytes = 12 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, }; From 5aae69338c397e8457499f80cb56b088e3ab6f84 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 22 May 2024 16:13:13 +0800 Subject: [PATCH 24/67] fix(test): update test cases. --- tests/system-test/0-others/information_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system-test/0-others/information_schema.py b/tests/system-test/0-others/information_schema.py index 965cdee9ce..bf2f1c01f6 100644 --- a/tests/system-test/0-others/information_schema.py +++ b/tests/system-test/0-others/information_schema.py @@ -222,7 +222,7 @@ class TDTestCase: tdSql.query("select * from information_schema.ins_columns where db_name ='information_schema'") tdLog.info(len(tdSql.queryResult)) - tdSql.checkEqual(True, len(tdSql.queryResult) in range(258, 259)) + tdSql.checkEqual(True, len(tdSql.queryResult) in range(259, 260)) tdSql.query("select * from information_schema.ins_columns where db_name ='performance_schema'") tdSql.checkEqual(54, len(tdSql.queryResult)) From 96e148778705a826cd3922dfc091f6fcb2968417 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 22 May 2024 16:57:09 +0800 Subject: [PATCH 25/67] refactor: do some internal refactor. --- source/libs/stream/src/streamMeta.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 7f1128b929..519800be28 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -151,10 +151,10 @@ int32_t streamMetaCheckBackendCompatible(SStreamMeta* pMeta) { int8_t ret = STREAM_STATA_COMPATIBLE; TBC* pCur = NULL; - if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { - // no task info, no stream + if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { // no task info, no stream return ret; } + void* pKey = NULL; int32_t kLen = 0; void* pVal = NULL; @@ -165,20 +165,24 @@ int32_t streamMetaCheckBackendCompatible(SStreamMeta* pMeta) { if (pVal == NULL || vLen == 0) { break; } + SDecoder decoder; SCheckpointInfo info; tDecoderInit(&decoder, (uint8_t*)pVal, vLen); if (tDecodeStreamTaskChkInfo(&decoder, &info) < 0) { continue; } + if (info.msgVer <= SSTREAM_TASK_INCOMPATIBLE_VER) { ret = STREAM_STATA_NO_COMPATIBLE; } else if (info.msgVer >= SSTREAM_TASK_NEED_CONVERT_VER) { ret = STREAM_STATA_NEED_CONVERT; } + tDecoderClear(&decoder); break; } + tdbFree(pKey); tdbFree(pVal); tdbTbcClose(pCur); @@ -193,6 +197,7 @@ int32_t streamMetaCvtDbFormat(SStreamMeta* pMeta) { if (exist == false) { return code; } + SBackendWrapper* pBackend = streamBackendInit(pMeta->path, chkpId, pMeta->vgId); void* pIter = taosHashIterate(pBackend->cfInst, NULL); @@ -219,6 +224,7 @@ _EXIT: return code; } + int32_t streamMetaMayCvtDbFormat(SStreamMeta* pMeta) { int8_t compatible = streamMetaCheckBackendCompatible(pMeta); if (compatible == STREAM_STATA_COMPATIBLE) { @@ -854,6 +860,10 @@ void streamMetaLoadAllTasks(SStreamMeta* pMeta) { tdbTbcMoveToFirst(pCur); while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) { + if (pVal == NULL || vLen == 0) { + break; + } + SStreamTask* pTask = taosMemoryCalloc(1, sizeof(SStreamTask)); if (pTask == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; From f146ae4198287a7a305604794b4df8b473910c13 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 22 May 2024 19:46:24 +0800 Subject: [PATCH 26/67] fix(stream): --- source/libs/stream/src/streamDispatch.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index f3569d8973..7c80d15307 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -1023,7 +1023,7 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i // trans-state msg has been sent to downstream successfully. let's transfer the fill-history task state if (pTask->msgInfo.dispatchMsgType == STREAM_INPUT__TRANS_STATE) { - stDebug("s-task:%s dispatch transtate msgId:%d to downstream successfully, start to prepare transfer state", id, msgId); + stDebug("s-task:%s dispatch trans-state msgId:%d to downstream successfully, start to prepare transfer state", id, msgId); ASSERT(pTask->info.fillHistory == 1); code = streamTransferStatePrepare(pTask); @@ -1121,21 +1121,7 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S streamTaskCloseUpstreamInput(pTask, pReq->upstreamTaskId); stDebug("s-task:%s close inputQ for upstream:0x%x, msgId:%d", id, pReq->upstreamTaskId, pReq->msgId); } else if (pReq->type == STREAM_INPUT__TRANS_STATE) { - atomic_add_fetch_32(&pTask->upstreamInfo.numOfClosed, 1); - streamTaskCloseUpstreamInput(pTask, pReq->upstreamTaskId); - - // disable the related stream task here to avoid it to receive the newly arrived data after the transfer-state - STaskId* pRelTaskId = &pTask->streamTaskId; - SStreamTask* pStreamTask = streamMetaAcquireTask(pMeta, pRelTaskId->streamId, pRelTaskId->taskId); - if (pStreamTask != NULL) { - atomic_add_fetch_32(&pStreamTask->upstreamInfo.numOfClosed, 1); - streamTaskCloseUpstreamInput(pStreamTask, pReq->upstreamRelTaskId); - streamMetaReleaseTask(pMeta, pStreamTask); - } - - stDebug("s-task:%s close inputQ for upstream:0x%x since trans-state msgId:%d recv, rel stream-task:0x%" PRIx64 - " close inputQ for upstream:0x%x", - id, pReq->upstreamTaskId, pReq->msgId, pTask->streamTaskId.taskId, pReq->upstreamRelTaskId); + stDebug("s-task:%s recv trans-state msgId:%d from upstream:0x%x", id, pReq->msgId, pReq->upstreamTaskId); } status = streamTaskAppendInputBlocks(pTask, pReq); From 61b08259ba2672c186dd760785660ffb4dd014bc Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 23 May 2024 15:38:16 +0800 Subject: [PATCH 27/67] fix(stream): initialization fill-history tasks before start all other stream tasks. --- source/libs/stream/src/streamMeta.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 519800be28..190c60289f 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -1392,6 +1392,33 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta, __stream_task_expand_fn expa // broadcast the check downstream tasks msg numOfTasks = taosArrayGetSize(pTaskList); + + // prepare the fill-history task before starting all stream tasks, to avoid fill-history tasks are started without + // initialization , when the operation of check downstream tasks status is executed far quickly. + for(int32_t i = 0; i < numOfTasks; ++i) { + SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId); + if (pTask == NULL) { + stError("vgId:%d failed to acquire task:0x%x during start tasks", pMeta->vgId, pTaskId->taskId); + streamMetaAddFailedTask(pMeta, pTaskId->streamId, pTaskId->taskId); + continue; + } + + if (pTask->info.fillHistory == 1) { + if (pTask->pBackend == NULL) { // TODO: add test cases for this + code = expandFn(pTask); + if (code != TSDB_CODE_SUCCESS) { + stError("s-task:0x%x vgId:%d failed to expand stream backend", pTaskId->taskId, vgId); + streamMetaAddFailedTaskSelf(pTask, pTask->execInfo.readyTs); + } + } else { + stDebug("s-task:0x%x vgId:%d fill-history task backend has initializied already", pTaskId->taskId, vgId); + } + } + + streamMetaReleaseTask(pMeta, pTask); + } + for (int32_t i = 0; i < numOfTasks; ++i) { SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); From 69567799ebae4487bbde103474d323fff9c9253a Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 23 May 2024 15:45:59 +0800 Subject: [PATCH 28/67] fix(stream): initialization tasks before start all tasks. --- source/libs/stream/src/streamMeta.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 190c60289f..00fe1207dd 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -1395,25 +1395,23 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta, __stream_task_expand_fn expa // prepare the fill-history task before starting all stream tasks, to avoid fill-history tasks are started without // initialization , when the operation of check downstream tasks status is executed far quickly. - for(int32_t i = 0; i < numOfTasks; ++i) { + for (int32_t i = 0; i < numOfTasks; ++i) { SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); - SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId); + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId); if (pTask == NULL) { stError("vgId:%d failed to acquire task:0x%x during start tasks", pMeta->vgId, pTaskId->taskId); streamMetaAddFailedTask(pMeta, pTaskId->streamId, pTaskId->taskId); continue; } - if (pTask->info.fillHistory == 1) { - if (pTask->pBackend == NULL) { // TODO: add test cases for this - code = expandFn(pTask); - if (code != TSDB_CODE_SUCCESS) { - stError("s-task:0x%x vgId:%d failed to expand stream backend", pTaskId->taskId, vgId); - streamMetaAddFailedTaskSelf(pTask, pTask->execInfo.readyTs); - } - } else { - stDebug("s-task:0x%x vgId:%d fill-history task backend has initializied already", pTaskId->taskId, vgId); + if (pTask->pBackend == NULL) { // TODO: add test cases for this + code = expandFn(pTask); + if (code != TSDB_CODE_SUCCESS) { + stError("s-task:0x%x vgId:%d failed to expand stream backend", pTaskId->taskId, vgId); + streamMetaAddFailedTaskSelf(pTask, pTask->execInfo.readyTs); } + } else { + stDebug("s-task:0x%x vgId:%d fill-history task backend has initialized already", pTaskId->taskId, vgId); } streamMetaReleaseTask(pMeta, pTask); @@ -1430,15 +1428,6 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta, __stream_task_expand_fn expa } STaskExecStatisInfo* pInfo = &pTask->execInfo; - if (pTask->pBackend == NULL) { - code = expandFn(pTask); - if (code != TSDB_CODE_SUCCESS) { - stError("s-task:0x%x vgId:%d failed to expand stream backend", pTaskId->taskId, vgId); - streamMetaAddFailedTaskSelf(pTask, pInfo->readyTs); - streamMetaReleaseTask(pMeta, pTask); - continue; - } - } // fill-history task can only be launched by related stream tasks. if (pTask->info.fillHistory == 1) { From 8ccfa728f89d43f1ab07e8935f824a2c745e0650 Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao> Date: Thu, 23 May 2024 16:17:15 +0800 Subject: [PATCH 29/67] set pk column --- source/libs/executor/src/streamtimewindowoperator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/executor/src/streamtimewindowoperator.c b/source/libs/executor/src/streamtimewindowoperator.c index 2da9ed0353..8b24c1f591 100644 --- a/source/libs/executor/src/streamtimewindowoperator.c +++ b/source/libs/executor/src/streamtimewindowoperator.c @@ -855,7 +855,7 @@ static void doStreamIntervalAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDat int32_t pkLen = 0; SColumnInfoData* pPkColDataInfo = NULL; if (hasSrcPrimaryKeyCol(&pInfo->basic)) { - pPkColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); + pPkColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->basic.primaryPkIndex); } if (pSDataBlock->info.window.skey != tsCols[0] || pSDataBlock->info.window.ekey != tsCols[endRowId]) { @@ -2142,7 +2142,7 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData int32_t pkLen = 0; SColumnInfoData* pPkColDataInfo = NULL; if (hasSrcPrimaryKeyCol(&pInfo->basic)) { - pPkColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); + pPkColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->basic.primaryPkIndex); } for (int32_t i = 0; i < rows;) { From be107b204c2ce9bcd37454caed05da919dd351d6 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 28 May 2024 09:54:22 +0800 Subject: [PATCH 30/67] enh(stream): support retrieve checkpoint data remotely when start stream tasks in vnodes. --- include/common/rsync.h | 2 +- include/common/tmsgdef.h | 1 + include/dnode/vnode/tqCommon.h | 1 + include/libs/stream/streammsg.h | 18 +- include/libs/stream/tstream.h | 32 +- source/common/src/rsync.c | 8 +- source/dnode/mgmt/mgmt_snode/src/smHandle.c | 2 + source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 2 + source/dnode/vnode/src/inc/vnodeInt.h | 1 + source/dnode/vnode/src/sma/smaRollup.c | 3 +- source/dnode/vnode/src/tq/tq.c | 22 +- source/dnode/vnode/src/tqCommon/tqCommon.c | 63 +++- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 2 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 4 + source/libs/stream/inc/streamInt.h | 35 ++ source/libs/stream/src/streamBackendRocksdb.c | 14 +- source/libs/stream/src/streamCheckStatus.c | 2 +- source/libs/stream/src/streamCheckpoint.c | 341 +++++++++++++++--- source/libs/stream/src/streamDispatch.c | 182 +++++++--- source/libs/stream/src/streamExec.c | 2 +- source/libs/stream/src/streamMeta.c | 10 +- source/libs/stream/src/streamTask.c | 123 ++++++- source/libs/stream/src/streammsg.c | 8 +- 23 files changed, 716 insertions(+), 162 deletions(-) diff --git a/include/common/rsync.h b/include/common/rsync.h index f613a35f48..0840b51793 100644 --- a/include/common/rsync.h +++ b/include/common/rsync.h @@ -13,7 +13,7 @@ extern "C" { void stopRsync(); void startRsync(); -int32_t uploadRsync(const char* id, const char* path); +int32_t uploadByRsync(const char* id, const char* path); int32_t downloadRsync(const char* id, const char* path); int32_t deleteRsync(const char* id); diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 2d07b56e4c..d1ac2c79c3 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -321,6 +321,7 @@ TD_DEF_MSG_TYPE(TDMT_STREAM_MAX_MSG, "stream-max", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_CREATE, "stream-create", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_DROP, "stream-drop", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_RETRIEVE_TRIGGER, "stream-retri-trigger", NULL, NULL) TD_CLOSE_MSG_SEG(TDMT_END_STREAM_MSG) diff --git a/include/dnode/vnode/tqCommon.h b/include/dnode/vnode/tqCommon.h index 0cde499a6b..6d52b10182 100644 --- a/include/dnode/vnode/tqCommon.h +++ b/include/dnode/vnode/tqCommon.h @@ -36,6 +36,7 @@ int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLead int32_t tqStartTaskCompleteCallback(SStreamMeta* pMeta); int32_t tqStreamTasksGetTotalNum(SStreamMeta* pMeta); int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg); +int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg); int32_t tqStreamTaskProcessTaskPauseReq(SStreamMeta* pMeta, char* pMsg); int32_t tqStreamTaskProcessTaskResumeReq(void* handle, int64_t sversion, char* pMsg, bool fromVnode); int32_t tqStreamTaskProcessUpdateCheckpointReq(SStreamMeta* pMeta, char* msg, int32_t msgLen); diff --git a/include/libs/stream/streammsg.h b/include/libs/stream/streammsg.h index 5436442284..87c756b10c 100644 --- a/include/libs/stream/streammsg.h +++ b/include/libs/stream/streammsg.h @@ -22,17 +22,17 @@ extern "C" { #endif -typedef struct SStreamChildEpInfo { +typedef struct SStreamUpstreamEpInfo { int32_t nodeId; int32_t childId; int32_t taskId; SEpSet epSet; bool dataAllowed; // denote if the data from this upstream task is allowed to put into inputQ, not serialize it int64_t stage; // upstream task stage value, to denote if the upstream node has restart/replica changed/transfer -} SStreamChildEpInfo; +} SStreamUpstreamEpInfo; -int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); -int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamChildEpInfo* pInfo); +int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamUpstreamEpInfo* pInfo); +int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamUpstreamEpInfo* pInfo); // mndTrigger: denote if this checkpoint is triggered by mnode or as requested from tasks when transfer-state finished typedef struct { @@ -171,6 +171,16 @@ int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pRsp); int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pRsp); void tCleanupStreamHbMsg(SStreamHbMsg* pMsg); +typedef struct SRetrieveChkptTriggerReq { + SMsgHead head; + int64_t streamId; + int64_t checkpointId; + int32_t upstreamNodeId; + int32_t upstreamTaskId; + int32_t downstreamNodeId; + int64_t downstreamTaskId; +} SRetrieveChkptTriggerReq; + typedef struct { SMsgHead head; int64_t streamId; diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index ea0f1824b3..185ab7ad51 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -61,10 +61,11 @@ extern "C" { // the load and start stream task should be executed after snode has started successfully, since the load of stream // tasks may incur the download of checkpoint data from remote, which may consume significant network and CPU resources. -typedef struct SStreamTask SStreamTask; -typedef struct SStreamQueue SStreamQueue; -typedef struct SStreamTaskSM SStreamTaskSM; -typedef struct SStreamQueueItem SStreamQueueItem; +typedef struct SStreamTask SStreamTask; +typedef struct SStreamQueue SStreamQueue; +typedef struct SStreamTaskSM SStreamTaskSM; +typedef struct SStreamQueueItem SStreamQueueItem; +typedef struct SActiveCheckpointInfo SActiveCheckpointInfo; #define SSTREAM_TASK_VER 4 #define SSTREAM_TASK_INCOMPATIBLE_VER 1 @@ -270,13 +271,10 @@ typedef struct SCheckpointInfo { int64_t checkpointTime; // latest checkpoint time int64_t processedVer; int64_t nextProcessVer; // current offset in WAL, not serialize it - int64_t failedId; // record the latest failed checkpoint id - int64_t checkpointingId; - int32_t downstreamAlignNum; int32_t numOfNotReady; - bool dispatchCheckpointTrigger; + + SActiveCheckpointInfo* pActiveInfo; int64_t msgVer; - int32_t transId; } SCheckpointInfo; typedef struct SStreamStatus { @@ -436,7 +434,6 @@ struct SStreamTask { SHistoryTaskInfo hTaskInfo; STaskId streamTaskId; STaskExecStatisInfo execInfo; - SArray* pReadyMsgList; // SArray TdThreadMutex lock; // secure the operation of set task status and puting data into inputQ SMsgCb* pMsgCb; // msg handle SStreamState* pState; // state backend @@ -619,7 +616,8 @@ int32_t streamSetupScheduleTrigger(SStreamTask* pTask); int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pMsg); int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code); -SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId); +SStreamUpstreamEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId); +SEpSet* streamTaskGetDownstreamEpInfo(SStreamTask* pTask, int32_t taskId); void streamTaskInputFail(SStreamTask* pTask); @@ -672,6 +670,17 @@ int32_t streamStartScanHistoryAsync(SStreamTask* pTask, int8_t igUntreated); int32_t streamExecScanHistoryInFuture(SStreamTask* pTask, int32_t idleDuration); bool streamHistoryTaskSetVerRangeStep2(SStreamTask* pTask, int64_t latestVer); +// checkpoint related +int32_t streamTaskGetActiveCheckpointInfo(const SStreamTask* pTask, int32_t* pTransId, int64_t* pCheckpointId); +int32_t streamTaskSetActiveCheckpointInfo(SStreamTask* pTask, int64_t activeCheckpointId); +int32_t streamTaskSetFailedChkptInfo(SStreamTask* pTask, int32_t transId, int64_t checkpointId); +bool streamTaskAlreadySendTrigger(SStreamTask* pTask, int32_t downstreamNodeId); +void streamTaskGetTriggerRecvStatus(SStreamTask* pTask, int32_t* pRecved, int32_t* pTotal); +void streamTaskInitTriggerDispatchInfo(SStreamTask* pTask); +void streamTaskSetTriggerDispatchConfirmed(SStreamTask* pTask, int32_t vgId); +int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t checkpointType, int32_t dstTaskId, int32_t vgId, + SEpSet* pEpset); + int32_t streamQueueGetNumOfItems(const SStreamQueue* pQueue); // common @@ -682,6 +691,7 @@ int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstre void streamTaskSetFixedDownstreamInfo(SStreamTask* pTask, const SStreamTask* pDownstreamTask); int32_t streamTaskReleaseState(SStreamTask* pTask); int32_t streamTaskReloadState(SStreamTask* pTask); +void streamTaskOpenUpstreamInput(SStreamTask* pTask, int32_t taskId); void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId); void streamTaskOpenAllUpstreamInput(SStreamTask* pTask); int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key); diff --git a/source/common/src/rsync.c b/source/common/src/rsync.c index c4d14a6c2c..1b03b142e0 100644 --- a/source/common/src/rsync.c +++ b/source/common/src/rsync.c @@ -153,7 +153,7 @@ void startRsync() { uDebug("[rsync] start server successful"); } -int32_t uploadRsync(const char* id, const char* path) { +int32_t uploadByRsync(const char* id, const char* path) { int64_t st = taosGetTimestampMs(); char command[PATH_MAX] = {0}; @@ -196,6 +196,7 @@ int32_t uploadRsync(const char* id, const char* path) { return code; } +// abort from retry if quit int32_t downloadRsync(const char* id, const char* path) { int64_t st = taosGetTimestampMs(); int32_t MAX_RETRY = 60; @@ -220,11 +221,10 @@ int32_t downloadRsync(const char* id, const char* path) { uDebug("[rsync] %s start to sync data from remote to:%s, %s", id, path, command); while(times++ < MAX_RETRY) { - code = execCommand(command); if (code != TSDB_CODE_SUCCESS) { - uError("[rsync] %s download checkpoint data:%s failed, retry after 1sec, code:%d," ERRNO_ERR_FORMAT, id, path, code, - ERRNO_ERR_DATA); + uError("[rsync] %s download checkpoint data:%s failed, retry after 1sec, times:%d, code:%d," ERRNO_ERR_FORMAT, id, + path, times, code, ERRNO_ERR_DATA); taosSsleep(1); } else { int32_t el = taosGetTimestampMs() - st; diff --git a/source/dnode/mgmt/mgmt_snode/src/smHandle.c b/source/dnode/mgmt/mgmt_snode/src/smHandle.c index b3c8ef4017..9b07b6a3d8 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smHandle.c +++ b/source/dnode/mgmt/mgmt_snode/src/smHandle.c @@ -89,6 +89,8 @@ SArray *smGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_TRIGGER, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_TRIGGER_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_RESET, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_HEARTBEAT_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_REQ_CHKPT_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index d9175bf5fe..bfc9e92293 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -961,6 +961,8 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_TRIGGER, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_TRIGGER_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_UPDATE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_RESET, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_HEARTBEAT_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 9439f7f179..f3bdc98994 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -257,6 +257,7 @@ int tqScanWalAsync(STQ* pTq, bool ckPause); int32_t tqStopStreamTasksAsync(STQ* pTq); int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp); int32_t tqProcessTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessTaskRetrieveTriggerMsg(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessStreamHbRsp(STQ* pTq, SRpcMsg* pMsg); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 5441d0c4c1..b8d0e30d30 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -1285,7 +1285,8 @@ _checkpoint: if (pItem && pItem->pStreamTask) { SStreamTask *pTask = pItem->pStreamTask; // atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); - pTask->chkInfo.checkpointingId = checkpointId; + streamTaskSetActiveCheckpointInfo(pTask, checkpointId); + pTask->chkInfo.checkpointId = checkpointId; // 1pTask->checkpointingId; pTask->chkInfo.checkpointVer = pItem->submitReqVer; pTask->info.triggerParam = pItem->fetchResultVer; diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index a59a235c50..037c2a7b7a 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -725,10 +725,6 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) { } pTask->pBackend = NULL; -// code = tqExpandStreamTask(pTask, pTq->pStreamMeta); -// if (code != TSDB_CODE_SUCCESS) { -// return code; -// } // sink STaskOutputInfo* pOutputInfo = &pTask->outputInfo; @@ -1092,6 +1088,10 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) int32_t len = pMsg->contLen - sizeof(SMsgHead); int32_t code = 0; +// if (pTq->pStreamMeta->vgId == 2) { +// ASSERT(0); +// } + // disable auto rsp to mnode pRsp->info.handle = NULL; @@ -1140,10 +1140,7 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) } if (pTask->status.downstreamReady != 1) { - pTask->chkInfo.failedId = req.checkpointId; // record the latest failed checkpoint id - pTask->chkInfo.checkpointingId = req.checkpointId; - pTask->chkInfo.transId = req.transId; - + streamTaskSetFailedChkptInfo(pTask, req.transId, req.checkpointId); // record the latest failed checkpoint id tqError("s-task:%s not ready for checkpoint, since downstream not ready, ignore this checkpointId:%" PRId64 ", transId:%d set it failed", pTask->id.idStr, req.checkpointId, req.transId); @@ -1182,9 +1179,12 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) // check if the checkpoint msg already sent or not. if (status == TASK_STATUS__CK) { + int64_t checkpointId = 0; + streamTaskGetActiveCheckpointInfo(pTask, NULL, &checkpointId); + tqWarn("s-task:%s repeatly recv checkpoint-source msg checkpointId:%" PRId64 " transId:%d already handled, ignore msg and continue process checkpoint", - pTask->id.idStr, pTask->chkInfo.checkpointingId, req.transId); + pTask->id.idStr, checkpointId, req.transId); taosThreadMutexUnlock(&pTask->lock); streamMetaReleaseTask(pMeta, pTask); @@ -1244,6 +1244,10 @@ int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg) { return tqStreamTaskProcessTaskResetReq(pTq->pStreamMeta, pMsg); } +int32_t tqProcessTaskRetrieveTriggerMsg(STQ* pTq, SRpcMsg* pMsg) { + return tqStreamTaskProcessRetrieveTriggerReq(pTq->pStreamMeta, pMsg); +} + // this function is needed, do not try to remove it. int32_t tqProcessStreamHbRsp(STQ* pTq, SRpcMsg* pMsg) { return tqStreamProcessStreamHbRsp(pTq->pStreamMeta, pMsg); diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index b5b5ef8755..b9c0589dc5 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -699,9 +699,7 @@ static int32_t restartStreamTasks(SStreamMeta* pMeta, bool isLeader) { } if (isLeader && !tsDisableStream) { - streamMetaResetTaskStatus(pMeta); streamMetaWUnLock(pMeta); - streamMetaStartAllTasks(pMeta, tqExpandStreamTask); } else { streamMetaResetStartInfo(&pMeta->startInfo); @@ -839,13 +837,19 @@ int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { // clear flag set during do checkpoint, and open inputQ for all upstream tasks SStreamTaskState *pState = streamTaskGetStatus(pTask); if (pState->state == TASK_STATUS__CK) { + int32_t tranId = 0; + int64_t activeChkId = 0; + streamTaskGetActiveCheckpointInfo(pTask, &tranId, &activeChkId); + tqDebug("s-task:%s reset task status from checkpoint, current checkpointingId:%" PRId64 ", transId:%d", - pTask->id.idStr, pTask->chkInfo.checkpointingId, pTask->chkInfo.transId); + pTask->id.idStr, activeChkId, tranId); + streamTaskSetStatusReady(pTask); } else if (pState->state == TASK_STATUS__UNINIT) { tqDebug("s-task:%s start task by checking downstream tasks", pTask->id.idStr); ASSERT(pTask->status.downstreamReady == 0); - /*int32_t ret = */ streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_INIT); +// /*int32_t ret = */ streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_INIT); + tqStreamStartOneTaskAsync(pMeta, pTask->pMsgCb, pTask->id.streamId, pTask->id.taskId); } else { tqDebug("s-task:%s status:%s do nothing after receiving reset-task from mnode", pTask->id.idStr, pState->name); } @@ -856,6 +860,57 @@ int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { return TSDB_CODE_SUCCESS; } +int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { + SRetrieveChkptTriggerReq* pReq = (SRetrieveChkptTriggerReq*) pMsg; + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->upstreamTaskId); + if (pTask == NULL) { + tqError("vgId:%d process retrieve checkpoint trigger, checkpointId:%" PRId64 + " from s-task:0x%x, failed to acquire task:0x%x, it may have been dropped already", + pMeta->vgId, pReq->checkpointId, (int32_t)pReq->downstreamTaskId, pReq->upstreamTaskId); + return TSDB_CODE_SUCCESS; + } + + tqDebug("s-task:0x%x recv retrieve checkpoint-trigger msg from downstream s-task:0x%x, checkpointId:%" PRId64, + pReq->upstreamTaskId, (int32_t)pReq->downstreamTaskId, pReq->checkpointId); + + SStreamTaskState* pState = streamTaskGetStatus(pTask); + if (pState->state == TASK_STATUS__CK) { // recv the checkpoint-source/trigger already + int32_t transId = 0; + int64_t checkpointId = 0; + + streamTaskGetActiveCheckpointInfo(pTask, &transId, &checkpointId); + ASSERT (checkpointId == pReq->checkpointId); + + if (streamTaskAlreadySendTrigger(pTask, pReq->downstreamNodeId)) { + // re-send the lost checkpoint-trigger msg to downstream task + SEpSet* pEpset = streamTaskGetDownstreamEpInfo(pTask, pReq->downstreamTaskId); + streamTaskSendCheckpointTriggerMsg(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER, pReq->downstreamTaskId, + pReq->downstreamNodeId, pEpset); + } else { // not send checkpoint-trigger yet, wait + int32_t recv = 0, total = 0; + streamTaskGetTriggerRecvStatus(pTask, &recv, &total); + + if (recv == total) { // add the ts info + tqWarn("s-task:%s all upstream send checkpoint-source/trigger, but not processed yet, wait", pTask->id.idStr); + } else { + tqWarn( + "s-task:%s not all upstream send checkpoint-source/trigger, total recv:%d/%d, wait for all upstream " + "sending checkpoint-source/trigger", + pTask->id.idStr, recv, total); + } + } + } else { // upstream not recv the checkpoint-source/trigger till now + ASSERT(pState->state == TASK_STATUS__READY || pState->state == TASK_STATUS__HALT); + tqWarn( + "s-task:%s not recv checkpoint-source from mnode or checkpoint-trigger from upstream yet, wait for all " + "upstream sending checkpoint-source/trigger", + pTask->id.idStr); + } + + return TSDB_CODE_SUCCESS; +} + int32_t tqStreamTaskProcessTaskPauseReq(SStreamMeta* pMeta, char* pMsg) { SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)pMsg; diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index ea742108aa..fd1bb391b2 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -422,7 +422,7 @@ int32_t vnodeSnapRead(SVSnapReader *pReader, uint8_t **ppData, uint32_t *nData) } // STREAM ============ - vInfo("vgId:%d stream task start", vgId); + vInfo("vgId:%d stream task start to take snapshot", vgId); if (!pReader->streamTaskDone) { if (pReader->pStreamTaskReader == NULL) { code = streamTaskSnapReaderOpen(pReader->pVnode->pTq, pReader->sver, pReader->sver, &pReader->pStreamTaskReader); diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index f6b32c5543..02343206ad 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -842,12 +842,16 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) return tqProcessTaskScanHistory(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_CHECKPOINT_READY: return tqProcessTaskCheckpointReadyMsg(pVnode->pTq, pMsg); + case TDMT_STREAM_RETRIEVE_TRIGGER: + return tqProcessTaskRetrieveTriggerMsg(pVnode->pTq, pMsg); case TDMT_MND_STREAM_HEARTBEAT_RSP: return tqProcessStreamHbRsp(pVnode->pTq, pMsg); case TDMT_MND_STREAM_REQ_CHKPT_RSP: return tqProcessStreamReqCheckpointRsp(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_CHECKPOINT_READY_RSP: return tqProcessTaskCheckpointReadyRsp(pVnode->pTq, pMsg); + case TDMT_STREAM_RETRIEVE_TRIGGER_RSP: + return tqProcessTaskCheckpointReadyMsg(pVnode->pTq, pMsg); case TDMT_VND_GET_STREAM_PROGRESS: return tqStreamProgressRetrieveReq(pVnode->pTq, pMsg); default: diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index 10bdccdb29..68c3ab1a6b 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -52,6 +52,18 @@ extern "C" { #define stTrace(...) do { if (stDebugFlag & DEBUG_TRACE) { taosPrintLog("STM ", DEBUG_TRACE, stDebugFlag, __VA_ARGS__); }} while(0) // clang-format on +struct SActiveCheckpointInfo { + TdThreadMutex lock; + int32_t transId; + int64_t firstRecvTs; // first time to recv checkpoint trigger info + int64_t activeId; // current active checkpoint id + int64_t failedId; + bool dispatchTrigger; + SArray* pDispatchTriggerList; // SArray + SArray* pReadyMsgList; // SArray + tmr_h pCheckTmr; +}; + typedef struct { int8_t type; SSDataBlock* pBlock; @@ -81,6 +93,24 @@ struct STokenBucket { int64_t quotaFillTimestamp; // fill timestamp }; +typedef struct { + int32_t upStreamTaskId; + SEpSet upstreamNodeEpset; + int32_t nodeId; + SRpcMsg msg; + int64_t recvTs; + int32_t transId; + int64_t checkpointId; +} SStreamChkptReadyInfo; + +typedef struct { + int64_t sendTs; + int64_t recvTs; + bool recved; + int32_t nodeId; + int32_t taskId; +} STaskTriggerSendInfo; + struct SStreamQueue { STaosQueue* pQueue; STaosQall* qall; @@ -113,6 +143,9 @@ void destroyDispatchMsg(SStreamDispatchReq* pReq, int32_t numOfVgroups); int32_t getNumOfDispatchBranch(SStreamTask* pTask); void clearBufferedDispatchMsg(SStreamTask* pTask); +int32_t streamTaskBuildAndSendTriggerMsg(SStreamTask* pTask, const SStreamDataBlock* pData, int32_t dstTaskId, + int32_t vgId, SEpSet* pEpset); + int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock* pBlock); SStreamDataBlock* createStreamBlockFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg); SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamTask* pTask, int64_t resultSize, @@ -131,11 +164,13 @@ int32_t streamTaskSendCheckpointReq(SStreamTask* pTask); void streamTaskSetFailedCheckpointId(SStreamTask* pTask); int32_t streamTaskGetNumOfDownstream(const SStreamTask* pTask); +int32_t streamTaskGetNumOfUpstream(const SStreamTask* pTask); int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t numRate, float quotaRate, const char*); STaskId streamTaskGetTaskId(const SStreamTask* pTask); void streamTaskInitForLaunchHTask(SHistoryTaskInfo* pInfo); void streamTaskSetRetryInfoForLaunch(SHistoryTaskInfo* pInfo); int32_t streamTaskResetTimewindowFilter(SStreamTask* pTask); +void streamTaskClearActiveInfo(SActiveCheckpointInfo* pInfo); void streamClearChkptReadyMsg(SStreamTask* pTask); EExtractDataCode streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks, diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 28db0a76c6..87fb615d5f 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -386,12 +386,12 @@ int32_t rebuildFromRemoteChkp_s3(const char* key, char* chkpPath, int64_t chkpId return code; } -int32_t rebuildFromRemoteCheckpoint(const char* key, char* chkpPath, int64_t checkpointId, char* defaultPath) { +int32_t rebuildFromRemoteCheckpoint(const char* key, char* chkptPath, int64_t checkpointId, char* defaultPath) { ECHECKPOINT_BACKUP_TYPE type = streamGetCheckpointBackupType(); if (type == DATA_UPLOAD_S3) { - return rebuildFromRemoteChkp_s3(key, chkpPath, checkpointId, defaultPath); + return rebuildFromRemoteChkp_s3(key, chkptPath, checkpointId, defaultPath); } else if (type == DATA_UPLOAD_RSYNC) { - return rebuildFromRemoteChkp_rsync(key, chkpPath, checkpointId, defaultPath); + return rebuildFromRemoteChkp_rsync(key, chkptPath, checkpointId, defaultPath); } else { stError("%s no remote backup checkpoint data for:%" PRId64, key, checkpointId); } @@ -538,7 +538,9 @@ int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId taosMulMkDir(defaultPath); } - char* checkpointRoot = taosMemoryCalloc(1, strlen(path) + 256); + int32_t pathLen = strlen(path) + 256; + + char* checkpointRoot = taosMemoryCalloc(1, pathLen); sprintf(checkpointRoot, "%s%s%s", prefixPath, TD_DIRSEP, "checkpoints"); if (!taosIsDir(checkpointRoot)) { @@ -548,9 +550,9 @@ int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId stDebug("%s check local backend dir:%s, checkpointId:%" PRId64 " succ", key, defaultPath, chkptId); - char* chkptPath = taosMemoryCalloc(1, strlen(path) + 256); + char* chkptPath = taosMemoryCalloc(1, pathLen); if (chkptId > 0) { - sprintf(chkptPath, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkptId); + snprintf(chkptPath, pathLen, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkptId); code = rebuildFromLocalCheckpoint(key, chkptPath, chkptId, defaultPath); if (code != 0) { diff --git a/source/libs/stream/src/streamCheckStatus.c b/source/libs/stream/src/streamCheckStatus.c index 4a8ca69ba5..f083ff8a61 100644 --- a/source/libs/stream/src/streamCheckStatus.c +++ b/source/libs/stream/src/streamCheckStatus.c @@ -40,7 +40,7 @@ static SDownstreamStatusInfo* findCheckRspStatus(STaskCheckInfo* pInfo, int32_t int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage, int64_t* oldStage) { - SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); + SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); ASSERT(pInfo != NULL); *oldStage = pInfo->stage; diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index d09e5bf477..43b39b8574 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -20,9 +20,9 @@ typedef struct { ECHECKPOINT_BACKUP_TYPE type; - char* taskId; - int64_t chkpId; + char* taskId; + int64_t chkpId; SStreamTask* pTask; int64_t dbRefId; void* pMeta; @@ -35,22 +35,33 @@ static int32_t deleteCheckpoint(const char* id); static int32_t downloadCheckpointByNameS3(const char* id, const char* fname, const char* dstName); static int32_t continueDispatchCheckpointTriggerBlock(SStreamDataBlock* pBlock, SStreamTask* pTask); static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType); -static int32_t streamAlignCheckpoint(SStreamTask* pTask); +static int32_t doSendRetrieveTriggerMsg(SStreamTask* pTask, SArray* pNotSendList); +static SStreamDataBlock* createChkptTriggerBlock(SStreamTask* pTask, int32_t checkpointType); +static void checkpointTriggerMonitorFn(void* param, void* tmrId); -int32_t streamAlignCheckpoint(SStreamTask* pTask) { - int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList); - int64_t old = atomic_val_compare_exchange_32(&pTask->chkInfo.downstreamAlignNum, 0, num); - if (old == 0) { - stDebug("s-task:%s set initial align upstream num:%d", pTask->id.idStr, num); +bool streamTaskIsAllUpstreamSendTrigger(SStreamTask* pTask) { + SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; + int32_t numOfUpstreams = taosArrayGetSize(pTask->upstreamInfo.pList); + bool allSend = true; + + taosThreadMutexLock(&pActiveInfo->lock); + int32_t numOfRecv = taosArrayGetSize(pActiveInfo->pReadyMsgList); + + if (numOfRecv < numOfUpstreams) { + stDebug("s-task:%s received checkpoint-trigger block, idx:%d, %d upstream tasks not send yet, total:%d", + pTask->id.idStr, pTask->info.selfChildId, (numOfUpstreams - numOfRecv), numOfUpstreams); + allSend = false; } - return atomic_sub_fetch_32(&pTask->chkInfo.downstreamAlignNum, 1); + taosThreadMutexUnlock(&pActiveInfo->lock); + return allSend; } -int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType) { +SStreamDataBlock* createChkptTriggerBlock(SStreamTask* pTask, int32_t checkpointType) { SStreamDataBlock* pChkpoint = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, sizeof(SSDataBlock)); if (pChkpoint == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; } pChkpoint->type = checkpointType; @@ -58,12 +69,13 @@ int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType) { SSDataBlock* pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock)); if (pBlock == NULL) { taosFreeQitem(pChkpoint); - return TSDB_CODE_OUT_OF_MEMORY; + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; } pBlock->info.type = STREAM_CHECKPOINT; - pBlock->info.version = pTask->chkInfo.checkpointingId; - pBlock->info.window.ekey = pBlock->info.window.skey = pTask->chkInfo.transId; // NOTE: set the transId + pBlock->info.version = pTask->chkInfo.pActiveInfo->activeId; + pBlock->info.window.ekey = pBlock->info.window.skey = pTask->chkInfo.pActiveInfo->transId; // NOTE: set the transId pBlock->info.rows = 1; pBlock->info.childId = pTask->info.selfChildId; @@ -71,6 +83,14 @@ int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType) { taosArrayPush(pChkpoint->blocks, pBlock); taosMemoryFree(pBlock); + terrno = 0; + + return pChkpoint; +} + +int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType) { + SStreamDataBlock* pChkpoint = createChkptTriggerBlock(pTask, checkpointType); + if (streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pChkpoint) < 0) { return TSDB_CODE_OUT_OF_MEMORY; } @@ -87,23 +107,43 @@ int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSo int32_t code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_GEN_CHECKPOINT); ASSERT(code == TSDB_CODE_SUCCESS); - pTask->chkInfo.transId = pReq->transId; - pTask->chkInfo.checkpointingId = pReq->checkpointId; + pTask->chkInfo.pActiveInfo->transId = pReq->transId; + pTask->chkInfo.pActiveInfo->activeId = pReq->checkpointId; pTask->chkInfo.numOfNotReady = streamTaskGetNumOfDownstream(pTask); pTask->chkInfo.startTs = taosGetTimestampMs(); pTask->execInfo.checkpoint += 1; // 2. Put the checkpoint block into inputQ, to make sure all blocks with less version have been handled by this task + // and this is the last item in the inputQ. return appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER); } +int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t checkpointType, int32_t dstTaskId, int32_t vgId, + SEpSet* pEpset) { + SStreamDataBlock* pChkpoint = createChkptTriggerBlock(pTask, checkpointType); + + pChkpoint->srcTaskId = pTask->id.taskId; + pChkpoint->srcVgId = pTask->pMeta->vgId; + + int32_t code = streamTaskBuildAndSendTriggerMsg(pTask, pChkpoint, dstTaskId, vgId, pEpset); + if (code == TSDB_CODE_SUCCESS) { + stDebug("s-task:%s build and send checkpoint-trigger dispatch msg succ, stage:%" PRId64, pTask->id.idStr, + pTask->pMeta->stage); + } else { + // todo handle send data failure + stError("s-task:%s failed to build and send trigger msg", pTask->id.idStr); + } + + return code; +} + int32_t continueDispatchCheckpointTriggerBlock(SStreamDataBlock* pBlock, SStreamTask* pTask) { pBlock->srcTaskId = pTask->id.taskId; pBlock->srcVgId = pTask->pMeta->vgId; int32_t code = taosWriteQitem(pTask->outputq.queue->pQueue, pBlock); if (code == 0) { - ASSERT(pTask->chkInfo.dispatchCheckpointTrigger == false); + ASSERT(pTask->chkInfo.pActiveInfo->dispatchTrigger == false); streamDispatchStreamBlock(pTask); } else { stError("s-task:%s failed to put checkpoint into outputQ, code:%s", pTask->id.idStr, tstrerror(code)); @@ -127,8 +167,8 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock // set task status if (streamTaskGetStatus(pTask)->state != TASK_STATUS__CK) { - pTask->chkInfo.checkpointingId = checkpointId; - pTask->chkInfo.transId = transId; + pTask->chkInfo.pActiveInfo->activeId = checkpointId; + pTask->chkInfo.pActiveInfo->transId = transId; code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_GEN_CHECKPOINT); if (code != TSDB_CODE_SUCCESS) { @@ -136,12 +176,20 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock streamFreeQitem((SStreamQueueItem*)pBlock); return code; } + + SActiveCheckpointInfo* pActive = pTask->chkInfo.pActiveInfo; + if (pActive->pCheckTmr == NULL) { + pActive->pCheckTmr = taosTmrStart(checkpointTriggerMonitorFn, 10000, pTask, streamTimer); + } else { + taosTmrReset(checkpointTriggerMonitorFn, 10000, pTask, streamTimer, &pActive->pCheckTmr); + } } // todo fix race condition: set the status and append checkpoint block int32_t taskLevel = pTask->info.taskLevel; if (taskLevel == TASK_LEVEL__SOURCE) { int8_t type = pTask->outputInfo.type; + if (type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__SHUFFLE_DISPATCH) { stDebug("s-task:%s set childIdx:%d, and add checkpoint-trigger block into outputQ", id, pTask->info.selfChildId); continueDispatchCheckpointTriggerBlock(pBlock, pTask); @@ -161,23 +209,19 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock streamAddCheckpointReadyMsg(pTask, pBlock->srcTaskId, pTask->info.selfChildId, checkpointId); // there are still some upstream tasks not send checkpoint request, do nothing and wait for then - int32_t notReady = streamAlignCheckpoint(pTask); - int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList); - if (notReady > 0) { - stDebug("s-task:%s received checkpoint block, idx:%d, %d upstream tasks not send checkpoint info yet, total:%d", - id, pTask->info.selfChildId, notReady, num); + bool allSend = streamTaskIsAllUpstreamSendTrigger(pTask); + if (!allSend) { streamFreeQitem((SStreamQueueItem*)pBlock); return code; } + int32_t num = streamTaskGetNumOfUpstream(pTask); if (taskLevel == TASK_LEVEL__SINK) { - stDebug("s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, send ready msg to upstream", - id, num); + stDebug("s-task:%s process checkpoint-trigger block, all %d upstreams sent, send ready msg to upstream", id, num); streamFreeQitem((SStreamQueueItem*)pBlock); streamTaskBuildCheckpoint(pTask); } else { // source & agg tasks need to forward the checkpoint msg downwards - stDebug("s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, continue forwards msg", id, - num); + stDebug("s-task:%s process checkpoint-trigger block, all %d upstreams sent, forwards to downstream", id, num); // set the needed checked downstream tasks, only when all downstream tasks do checkpoint complete, this task // can start local checkpoint procedure @@ -216,14 +260,10 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask) { } void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg) { - pTask->chkInfo.checkpointingId = 0; // clear the checkpoint id - pTask->chkInfo.failedId = 0; pTask->chkInfo.startTs = 0; // clear the recorded start time pTask->chkInfo.numOfNotReady = 0; - pTask->chkInfo.transId = 0; - pTask->chkInfo.dispatchCheckpointTrigger = false; - pTask->chkInfo.downstreamAlignNum = 0; + streamTaskClearActiveInfo(pTask->chkInfo.pActiveInfo); streamTaskOpenAllUpstreamInput(pTask); // open inputQ for all upstream tasks if (clearChkpReadyMsg) { streamClearChkptReadyMsg(pTask); @@ -317,9 +357,9 @@ int32_t streamTaskUpdateTaskCheckpointInfo(SStreamTask* pTask, SVUpdateCheckpoin } void streamTaskSetFailedCheckpointId(SStreamTask* pTask) { - pTask->chkInfo.failedId = pTask->chkInfo.checkpointingId; + pTask->chkInfo.pActiveInfo->failedId = pTask->chkInfo.pActiveInfo->activeId; stDebug("s-task:%s mark the checkpointId:%" PRId64 " (transId:%d) failed", pTask->id.idStr, - pTask->chkInfo.checkpointingId, pTask->chkInfo.transId); + pTask->chkInfo.pActiveInfo->activeId, pTask->chkInfo.pActiveInfo->transId); } static int32_t getCheckpointDataMeta(const char* id, const char* path, SArray* list) { @@ -363,7 +403,7 @@ int32_t uploadCheckpointData(void* param) { SAsyncUploadArg* pParam = param; char* path = NULL; int32_t code = 0; - SArray* toDelFiles = taosArrayInit(4, sizeof(void*)); + SArray* toDelFiles = taosArrayInit(4, POINTER_BYTES); char* taskStr = pParam->taskId ? pParam->taskId : "NULL"; void* pBackend = taskAcquireDb(pParam->dbRefId); @@ -387,10 +427,10 @@ int32_t uploadCheckpointData(void* param) { if (code == TSDB_CODE_SUCCESS) { code = streamTaskUploadCheckpoint(pParam->taskId, path); - if (code != TSDB_CODE_SUCCESS) { - stError("s-task:%s failed to upload checkpoint data:%s, checkpointId:%" PRId64, taskStr, path, pParam->chkpId); + if (code == TSDB_CODE_SUCCESS) { + stDebug("s-task:%s upload checkpointId:%" PRId64 " to remote succ", taskStr, pParam->chkpId); } else { - stDebug("s-task:%s backup checkpointId:%"PRId64" to remote succ", taskStr, pParam->chkpId); + stError("s-task:%s failed to upload checkpointId:%" PRId64 " data:%s", taskStr, pParam->chkpId, path); } } @@ -403,19 +443,25 @@ int32_t uploadCheckpointData(void* param) { for (int i = 0; i < size; i++) { char* pName = taosArrayGetP(toDelFiles, i); code = deleteCheckpointFile(pParam->taskId, pName); - stDebug("s-task:%s try to del file: %s", taskStr, pName); if (code != 0) { + stDebug("s-task:%s failed to del file: %s", taskStr, pName); break; } } + + stDebug("s-task:%s remove redundant files done", taskStr); } taosArrayDestroyP(toDelFiles, taosMemoryFree); - stDebug("s-task:%s remove local checkpoint dir:%s", taskStr, path); - taosRemoveDir(path); - taosMemoryFree(path); + if (code == TSDB_CODE_SUCCESS) { + stDebug("s-task:%s remove local checkpointId:%" PRId64 " data %s", taskStr, pParam->chkpId, path); + taosRemoveDir(path); + } else { + stDebug("s-task:%s update checkpointId:%" PRId64 " keep local checkpoint data", taskStr, pParam->chkpId); + } + taosMemoryFree(path); taosMemoryFree(pParam->taskId); taosMemoryFree(pParam); @@ -446,7 +492,7 @@ int32_t streamTaskRemoteBackupCheckpoint(SStreamTask* pTask, int64_t checkpointI int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { int32_t code = TSDB_CODE_SUCCESS; int64_t startTs = pTask->chkInfo.startTs; - int64_t ckId = pTask->chkInfo.checkpointingId; + int64_t ckId = pTask->chkInfo.pActiveInfo->activeId; const char* id = pTask->id.idStr; bool dropRelHTask = (streamTaskGetPrevStatus(pTask) == TASK_STATUS__HALT); SStreamMeta* pMeta = pTask->pMeta; @@ -510,6 +556,211 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { return code; } +void checkpointTriggerMonitorFn(void* param, void* tmrId) { + SStreamTask* pTask = param; + SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; + + int32_t vgId = pTask->pMeta->vgId; + int64_t now = taosGetTimestampMs(); + stDebug("s-task:%s vgId:%d checkpoint-trigger monit start, ts:%" PRId64, pTask->id.idStr, vgId, now); + + taosThreadMutexLock(&pTask->lock); + SStreamTaskState* pState = streamTaskGetStatus(pTask); + if (pState->state == TASK_STATUS__CK) { + stDebug("s-task:%s vgId:%d not in checkpoint status, quit from monitor checkpoint-trigger", pTask->id.idStr, vgId); + taosThreadMutexUnlock(&pTask->lock); + return; + } + taosThreadMutexUnlock(&pTask->lock); + + taosThreadMutexLock(&pActiveInfo->lock); + + // send msg to retrieve checkpoint trigger msg + SArray* pList = pTask->upstreamInfo.pList; + ASSERT(pTask->info.taskLevel > TASK_LEVEL__SOURCE); + SArray* pNotSendList = taosArrayInit(4, sizeof(SStreamUpstreamEpInfo)); + + for(int32_t i = 0; i < taosArrayGetSize(pList); ++i) { + SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pList, i); + + bool recved = false; + for(int32_t j = 0; j < taosArrayGetSize(pActiveInfo->pReadyMsgList); ++j) { + SStreamChkptReadyInfo* pReady = taosArrayGet(pActiveInfo->pReadyMsgList, j); + if (pInfo->nodeId == pReady->nodeId) { + recved = true; + break; + } + } + + if (!recved) { // make sure the inputQ is opened for not recv upstream checkpoint-trigger message + streamTaskOpenUpstreamInput(pTask, pInfo->taskId); + taosArrayPush(pNotSendList, pInfo); + } + } + + // do send retrieve checkpoint trigger msg to upstream + doSendRetrieveTriggerMsg(pTask, pNotSendList); + taosThreadMutexUnlock(&pActiveInfo->lock); + + // check every 100ms + if (taosArrayGetSize(pNotSendList) > 0) { + taosTmrReset(checkpointTriggerMonitorFn, 10000, pTask, streamTimer, &pActiveInfo->pCheckTmr); + } + + taosArrayDestroy(pNotSendList); +} + +int32_t doSendRetrieveTriggerMsg(SStreamTask* pTask, SArray* pNotSendList) { + int32_t code = 0; + int32_t vgId = pTask->pMeta->vgId; + const char* pId = pTask->id.idStr; + + for (int32_t i = 0; i < taosArrayGetSize(pNotSendList); i++) { + SStreamUpstreamEpInfo* pUpstreamTask = taosArrayGet(pNotSendList, i); + + SRetrieveChkptTriggerReq* pReq = rpcMallocCont(sizeof(SRetrieveChkptTriggerReq)); + if (pReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + stError("vgId:%d failed to create msg to retrieve trigger msg for task:%s exec, code:out of memory", vgId, pId); + continue; + } + + pReq->head.vgId = htonl(pUpstreamTask->nodeId); + pReq->streamId = pTask->id.streamId; + pReq->downstreamTaskId = pTask->id.taskId; + pReq->downstreamNodeId = vgId; + pReq->upstreamTaskId = pUpstreamTask->taskId; + pReq->upstreamNodeId = pUpstreamTask->nodeId; + pReq->checkpointId = pTask->chkInfo.pActiveInfo->activeId; + + SRpcMsg rpcMsg = {0}; + initRpcMsg(&rpcMsg, TDMT_STREAM_RETRIEVE, pReq, sizeof(SRetrieveChkptTriggerReq)); + + code = tmsgSendReq(&pUpstreamTask->epSet, &rpcMsg); + } + + return TSDB_CODE_SUCCESS; +} + +bool streamTaskAlreadySendTrigger(SStreamTask* pTask, int32_t downstreamNodeId) { + SStreamTaskState* pStatus = streamTaskGetStatus(pTask); + if (pStatus->state != TASK_STATUS__CK) { + return false; + } + + SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; + + taosThreadMutexLock(&pInfo->lock); + if (!pInfo->dispatchTrigger) { + taosThreadMutexUnlock(&pInfo->lock); + return false; + } + + for(int32_t i = 0; i < taosArrayGetSize(pInfo->pDispatchTriggerList); ++i) { + STaskTriggerSendInfo* pSendInfo = taosArrayGet(pInfo->pDispatchTriggerList, i); + if (pSendInfo->nodeId != downstreamNodeId) { + continue; + } + + // has send trigger msg to downstream node, + if (pSendInfo->recved) { + stWarn("s-task:%s checkpoint-trigger msg send at:%"PRId64" and recv confirmed, checkpointId:%"PRId64 ", transId:%d", + pTask->id.idStr, pSendInfo->sendTs, pInfo->activeId, pInfo->transId); + } else { + stWarn("s-task:%s checkpoint-trigger send at:%"PRId64", checkpointId:%"PRId64", transId:%d", pTask->id.idStr, + pSendInfo->sendTs, pInfo->activeId, pInfo->transId); + } + + taosThreadMutexUnlock(&pInfo->lock); + return true; + } + + ASSERT(0); + return false; +} + +void streamTaskGetTriggerRecvStatus(SStreamTask* pTask, int32_t* pRecved, int32_t* pTotal) { + *pRecved = taosArrayGetSize(pTask->chkInfo.pActiveInfo->pReadyMsgList); + + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + *pTotal = 1; + } else { + *pTotal = streamTaskGetNumOfUpstream(pTask); + } +} + +// record the dispatch checkpoint trigger info in the list +void streamTaskInitTriggerDispatchInfo(SStreamTask* pTask) { + SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; + + int64_t now = taosGetTimestampMs(); + taosThreadMutexLock(&pInfo->lock); + + // outputQ should be empty here + ASSERT(streamQueueGetNumOfItems(pTask->outputq.queue) == 1); + + pInfo->dispatchTrigger = true; + if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { + STaskDispatcherFixed* pDispatch = &pTask->outputInfo.fixedDispatcher; + + STaskTriggerSendInfo p = {.sendTs = now, .recved = false, .nodeId = pDispatch->nodeId, .taskId = pDispatch->taskId}; + taosArrayPush(pInfo->pDispatchTriggerList, &p); + } else { + for (int32_t i = 0; i < streamTaskGetNumOfDownstream(pTask); ++i) { + SVgroupInfo* pVgInfo = taosArrayGet(pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos, i); + + STaskTriggerSendInfo p = {.sendTs = now, .recved = false, .nodeId = pVgInfo->vgId, .taskId = pVgInfo->taskId}; + taosArrayPush(pInfo->pDispatchTriggerList, &p); + } + } + + taosThreadMutexUnlock(&pInfo->lock); +} + +int32_t streamTaskGetNumOfConfirmed(SStreamTask* pTask) { + SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; + + int32_t num = 0; + taosThreadMutexLock(&pInfo->lock); + for(int32_t i = 0; i < taosArrayGetSize(pInfo->pDispatchTriggerList); ++i) { + STaskTriggerSendInfo* p = taosArrayGet(pInfo->pDispatchTriggerList, i); + if (p->recved) { + num ++; + } + } + taosThreadMutexUnlock(&pInfo->lock); + return num; +} + +void streamTaskSetTriggerDispatchConfirmed(SStreamTask* pTask, int32_t vgId) { + SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; + + int32_t taskId = 0; + + taosThreadMutexLock(&pInfo->lock); + + for (int32_t i = 0; i < taosArrayGetSize(pInfo->pDispatchTriggerList); ++i) { + STaskTriggerSendInfo* p = taosArrayGet(pInfo->pDispatchTriggerList, i); + if (p->nodeId == vgId) { + ASSERT(p->recved == false); + + p->recved = true; + p->recvTs = taosGetTimestampMs(); + taskId = p->taskId; + break; + } + } + + taosThreadMutexUnlock(&pInfo->lock); + + int32_t numOfConfirmed = streamTaskGetNumOfConfirmed(pTask); + int32_t total = streamTaskGetNumOfDownstream(pTask); + stDebug("s-task:%s set downstream:0x%x(vgId:%d) checkpoint-trigger dispatch confirmed, total confirmed:%d/%d", + pTask->id.idStr, taskId, vgId, numOfConfirmed, total); + + ASSERT(taskId != 0); +} + static int32_t uploadCheckpointToS3(const char* id, const char* path) { TdDirPtr pDir = taosOpenDir(path); if (pDir == NULL) return -1; @@ -576,7 +827,7 @@ int32_t streamTaskUploadCheckpoint(const char* id, const char* path) { } if (strlen(tsSnodeAddress) != 0) { - return uploadRsync(id, path); + return uploadByRsync(id, path); } else if (tsS3StreamEnabled) { return uploadCheckpointToS3(id, path); } diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 3115c2cb43..c8a626a739 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -23,12 +23,6 @@ typedef struct SBlockName { char parTbName[TSDB_TABLE_NAME_LEN]; } SBlockName; -typedef struct { - int32_t upStreamTaskId; - SEpSet upstreamNodeEpset; - SRpcMsg msg; -} SStreamChkptReadyInfo; - static void doRetryDispatchData(void* param, void* tmrId); static int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t vgId, SEpSet* pEpSet); static int32_t streamAddBlockIntoDispatchMsg(const SSDataBlock* pBlock, SStreamDispatchReq* pReq); @@ -85,12 +79,14 @@ int32_t streamTaskBroadcastRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* r void* buf = NULL; int32_t sz = taosArrayGetSize(pTask->upstreamInfo.pList); ASSERT(sz > 0); + for (int32_t i = 0; i < sz; i++) { req->reqId = tGenIdPI64(); - SStreamChildEpInfo* pEpInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); + SStreamUpstreamEpInfo* pEpInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); req->dstNodeId = pEpInfo->nodeId; req->dstTaskId = pEpInfo->taskId; int32_t len; + tEncodeSize(tEncodeStreamRetrieveReq, req, len, code); if (code != 0) { ASSERT(0); @@ -115,7 +111,6 @@ int32_t streamTaskBroadcastRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* r code = tmsgSendReq(&pEpInfo->epSet, &rpcMsg); if (code != 0) { - ASSERT(0); rpcFreeCont(buf); return code; } @@ -124,15 +119,16 @@ int32_t streamTaskBroadcastRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* r stDebug("s-task:%s (child %d) send retrieve req to task:0x%x (vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr, pTask->info.selfChildId, pEpInfo->taskId, pEpInfo->nodeId, req->reqId); } + return code; } static int32_t buildStreamRetrieveReq(SStreamTask* pTask, const SSDataBlock* pBlock, SStreamRetrieveReq* req){ - SRetrieveTableRsp* pRetrieve = NULL; int32_t dataStrLen = sizeof(SRetrieveTableRsp) + blockGetEncodeSize(pBlock); - - pRetrieve = taosMemoryCalloc(1, dataStrLen); - if (pRetrieve == NULL) return TSDB_CODE_OUT_OF_MEMORY; + SRetrieveTableRsp* pRetrieve = taosMemoryCalloc(1, dataStrLen); + if (pRetrieve == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } int32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); pRetrieve->useconds = 0; @@ -231,6 +227,28 @@ void clearBufferedDispatchMsg(SStreamTask* pTask) { pMsgInfo->dispatchMsgType = 0; } +int32_t streamTaskBuildAndSendTriggerMsg(SStreamTask* pTask, const SStreamDataBlock* pData, int32_t dstTaskId, + int32_t vgId, SEpSet* pEpset) { + SStreamDispatchReq* pReq = taosMemoryCalloc(1, sizeof(SStreamDispatchReq)); + + int32_t numOfBlocks = taosArrayGetSize(pData->blocks); + int32_t code = tInitStreamDispatchReq(pReq, pTask, pData->srcVgId, numOfBlocks, dstTaskId, pData->type); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + for (int32_t i = 0; i < numOfBlocks; i++) { + SSDataBlock* pDataBlock = taosArrayGet(pData->blocks, i); + code = streamAddBlockIntoDispatchMsg(pDataBlock, pReq); + if (code != TSDB_CODE_SUCCESS) { + destroyDispatchMsg(pReq, 1); + return code; + } + } + + return doSendDispatchMsg(pTask, pReq, vgId, pEpset); +} + static int32_t doBuildDispatchMsg(SStreamTask* pTask, const SStreamDataBlock* pData) { int32_t code = 0; int32_t numOfBlocks = taosArrayGetSize(pData->blocks); @@ -357,8 +375,8 @@ static int32_t sendDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pDispatch for (int32_t i = 0; i < numOfVgroups; i++) { if (pDispatchMsg[i].blockNum > 0) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); - stDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to vgId:%d", pTask->id.idStr, - pTask->info.selfChildId, pDispatchMsg[i].blockNum, pVgInfo->vgId); + stDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to vgId:%d", id, pTask->info.selfChildId, + pDispatchMsg[i].blockNum, pVgInfo->vgId); code = doSendDispatchMsg(pTask, &pDispatchMsg[i], pVgInfo->vgId, &pVgInfo->epSet); if (code < 0) { @@ -372,8 +390,7 @@ static int32_t sendDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pDispatch } } - stDebug("s-task:%s complete shuffle-dispatch blocks to all %d vnodes, msgId:%d", pTask->id.idStr, numOfVgroups, - msgId); + stDebug("s-task:%s complete shuffle-dispatch blocks to all %d vnodes, msgId:%d", id, numOfVgroups, msgId); } return code; @@ -562,7 +579,7 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { return 0; } - if (pTask->chkInfo.dispatchCheckpointTrigger) { + if (pTask->chkInfo.pActiveInfo->dispatchTrigger) { stDebug("s-task:%s already send checkpoint trigger, not dispatch anymore", id); atomic_store_8(&pTask->outputq.status, TASK_OUTPUT_STATUS__NORMAL); return 0; @@ -590,6 +607,10 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { } else { // todo handle build dispatch msg failed } + if (pBlock->type == STREAM_INPUT__CHECKPOINT_TRIGGER) { + streamTaskInitTriggerDispatchInfo(pTask); + } + int32_t retryCount = 0; while (1) { code = sendDispatchMsg(pTask, pTask->msgInfo.pData); @@ -624,18 +645,24 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { // this function is usually invoked by sink/agg task int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask) { - int32_t num = taosArrayGetSize(pTask->pReadyMsgList); + SArray* pList = pTask->chkInfo.pActiveInfo->pReadyMsgList; + + taosThreadMutexLock(&pTask->chkInfo.pActiveInfo->lock); + + int32_t num = taosArrayGetSize(pList); ASSERT(taosArrayGetSize(pTask->upstreamInfo.pList) == num); for (int32_t i = 0; i < num; ++i) { - SStreamChkptReadyInfo* pInfo = taosArrayGet(pTask->pReadyMsgList, i); + SStreamChkptReadyInfo* pInfo = taosArrayGet(pList, i); tmsgSendReq(&pInfo->upstreamNodeEpset, &pInfo->msg); stDebug("s-task:%s level:%d checkpoint ready msg sent to upstream:0x%x", pTask->id.idStr, pTask->info.taskLevel, pInfo->upStreamTaskId); } - taosArrayClear(pTask->pReadyMsgList); + taosArrayClear(pList); + + taosThreadMutexUnlock(&pTask->chkInfo.pActiveInfo->lock); stDebug("s-task:%s level:%d checkpoint ready msg sent to all %d upstreams", pTask->id.idStr, pTask->info.taskLevel, num); @@ -644,21 +671,22 @@ int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask) { // this function is only invoked by source task, and send rsp to mnode int32_t streamTaskSendCheckpointSourceRsp(SStreamTask* pTask) { - taosThreadMutexLock(&pTask->lock); + SArray* pList = pTask->chkInfo.pActiveInfo->pReadyMsgList; + taosThreadMutexLock(&pTask->chkInfo.pActiveInfo->lock); ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); - if (taosArrayGetSize(pTask->pReadyMsgList) == 1) { - SStreamChkptReadyInfo* pInfo = taosArrayGet(pTask->pReadyMsgList, 0); + if (taosArrayGetSize(pList) == 1) { + SStreamChkptReadyInfo* pInfo = taosArrayGet(pList, 0); tmsgSendRsp(&pInfo->msg); - taosArrayClear(pTask->pReadyMsgList); + taosArrayClear(pList); stDebug("s-task:%s level:%d source checkpoint completed msg sent to mnode", pTask->id.idStr, pTask->info.taskLevel); } else { stDebug("s-task:%s level:%d already send rsp checkpoint success to mnode", pTask->id.idStr, pTask->info.taskLevel); } - taosThreadMutexUnlock(&pTask->lock); + taosThreadMutexUnlock(&pTask->chkInfo.pActiveInfo->lock); return TSDB_CODE_SUCCESS; } @@ -777,17 +805,34 @@ int32_t streamTaskBuildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRp } int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SStreamTask* pTask) { - SStreamChkptReadyInfo info = {0}; + SStreamChkptReadyInfo info = { + .recvTs = taosGetTimestampMs(), .transId = pReq->transId, .checkpointId = pReq->checkpointId}; + streamTaskBuildCheckpointSourceRsp(pReq, pRpcInfo, &info.msg, TSDB_CODE_SUCCESS); - if (pTask->pReadyMsgList == NULL) { - pTask->pReadyMsgList = taosArrayInit(4, sizeof(SStreamChkptReadyInfo)); + SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; + taosThreadMutexLock(&pActiveInfo->lock); + + int32_t size = taosArrayGetSize(pActiveInfo->pReadyMsgList); + if (size > 0) { + ASSERT(size == 1); + + SStreamChkptReadyInfo* pReady = taosArrayGet(pActiveInfo->pReadyMsgList, 0); + if (pReady->transId == pReq->transId) { + stWarn("s-task:%s repeatly recv checkpoint source msg from mnode, checkpointId:%" PRId64 ", ignore", + pTask->id.idStr, pReq->checkpointId); + } else { + stError("s-task:%s checkpointId:%" PRId64 " transId:%d not completed, new transId:%d checkpointId:%" PRId64 + " recv from mnode", + pTask->id.idStr, pReady->checkpointId, pReady->transId, pReq->transId, pReq->checkpointId); + ASSERT(0); // failed to handle it + } + } else { + taosArrayPush(pActiveInfo->pReadyMsgList, &info); + stDebug("s-task:%s add checkpoint source rsp msg, total:%d", pTask->id.idStr, size); } - taosArrayPush(pTask->pReadyMsgList, &info); - - int32_t size = taosArrayGetSize(pTask->pReadyMsgList); - stDebug("s-task:%s add checkpoint source rsp msg, total:%d", pTask->id.idStr, size); + taosThreadMutexUnlock(&pActiveInfo->lock); return TSDB_CODE_SUCCESS; } @@ -799,7 +844,7 @@ int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, return TSDB_CODE_SUCCESS; } - SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); + SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); SStreamCheckpointReadyMsg req = {0}; req.downstreamNodeId = pTask->pMeta->vgId; @@ -833,7 +878,14 @@ int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, ASSERT(req.upstreamTaskId != 0); - SStreamChkptReadyInfo info = {.upStreamTaskId = pInfo->taskId, .upstreamNodeEpset = pInfo->epSet}; + SStreamChkptReadyInfo info = { + .upStreamTaskId = pInfo->taskId, + .upstreamNodeEpset = pInfo->epSet, + .nodeId = req.upstreamNodeId, + .recvTs = taosGetTimestampMs(), + .checkpointId = req.checkpointId, + }; + initRpcMsg(&info.msg, TDMT_STREAM_TASK_CHECKPOINT_READY, buf, tlen + sizeof(SMsgHead)); stDebug("s-task:%s (level:%d) prepare checkpoint ready msg to upstream s-task:0x%" PRIx64 @@ -841,39 +893,65 @@ int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, pTask->id.idStr, pTask->info.taskLevel, req.streamId, req.upstreamTaskId, req.upstreamNodeId, index, req.upstreamNodeId); - if (pTask->pReadyMsgList == NULL) { - pTask->pReadyMsgList = taosArrayInit(4, sizeof(SStreamChkptReadyInfo)); + SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; + taosThreadMutexLock(&pActiveInfo->lock); + + bool recved = false; + int32_t size = taosArrayGetSize(pActiveInfo->pReadyMsgList); + for (int32_t i = 0; i < size; ++i) { + SStreamChkptReadyInfo* p = taosArrayGet(pActiveInfo->pReadyMsgList, i); + if (p->nodeId == req.upstreamNodeId) { + if (p->checkpointId == req.checkpointId) { + stWarn("s-task:%s repeatly recv checkpoint-source msg from task:0x%x vgId:%d, checkpointId:%" PRId64 ", ignore", + pTask->id.idStr, p->upStreamTaskId, p->nodeId, p->checkpointId); + } else { + stError("s-task:%s checkpointId:%" PRId64 " not completed, new checkpointId:%" PRId64 " recv", + pTask->id.idStr, p->checkpointId, checkpointId); + ASSERT(0); // failed to handle it + } + + recved = true; + break; + } } - taosArrayPush(pTask->pReadyMsgList, &info); + if (!recved) { + taosArrayPush(pActiveInfo->pReadyMsgList, &info); + } + + taosThreadMutexUnlock(&pActiveInfo->lock); return 0; } void streamClearChkptReadyMsg(SStreamTask* pTask) { - if (pTask->pReadyMsgList == NULL) { + SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; + if (pActiveInfo == NULL) { return; } - for (int i = 0; i < taosArrayGetSize(pTask->pReadyMsgList); i++) { - SStreamChkptReadyInfo* pInfo = taosArrayGet(pTask->pReadyMsgList, i); + for (int i = 0; i < taosArrayGetSize(pActiveInfo->pReadyMsgList); i++) { + SStreamChkptReadyInfo* pInfo = taosArrayGet(pActiveInfo->pReadyMsgList, i); rpcFreeCont(pInfo->msg.pCont); } - taosArrayClear(pTask->pReadyMsgList); + + taosArrayClear(pActiveInfo->pReadyMsgList); } // this message has been sent successfully, let's try next one. -static int32_t handleDispatchSuccessRsp(SStreamTask* pTask, int32_t downstreamId) { +static int32_t handleDispatchSuccessRsp(SStreamTask* pTask, int32_t downstreamId, int32_t downstreamNodeId) { stDebug("s-task:%s destroy dispatch msg:%p", pTask->id.idStr, pTask->msgInfo.pData); bool delayDispatch = (pTask->msgInfo.dispatchMsgType == STREAM_INPUT__CHECKPOINT_TRIGGER); if (delayDispatch) { taosThreadMutexLock(&pTask->lock); // we only set the dispatch msg info for current checkpoint trans - if (streamTaskGetStatus(pTask)->state == TASK_STATUS__CK && pTask->chkInfo.checkpointingId == pTask->msgInfo.checkpointId) { - ASSERT(pTask->chkInfo.transId == pTask->msgInfo.transId); - pTask->chkInfo.dispatchCheckpointTrigger = true; - stDebug("s-task:%s checkpoint-trigger msg rsp for checkpointId:%" PRId64 " transId:%d confirmed", - pTask->id.idStr, pTask->msgInfo.checkpointId, pTask->msgInfo.transId); + if (streamTaskGetStatus(pTask)->state == TASK_STATUS__CK && + pTask->chkInfo.pActiveInfo->activeId == pTask->msgInfo.checkpointId) { + ASSERT(pTask->chkInfo.pActiveInfo->transId == pTask->msgInfo.transId); + stDebug("s-task:%s checkpoint-trigger msg to 0x%x rsp for checkpointId:%" PRId64 " transId:%d confirmed", + pTask->id.idStr, downstreamId, pTask->msgInfo.checkpointId, pTask->msgInfo.transId); + + streamTaskSetTriggerDispatchConfirmed(pTask, downstreamNodeId); } else { stWarn("s-task:%s checkpoint-trigger msg rsp for checkpointId:%" PRId64 " transId:%d discard, since expired", pTask->id.idStr, pTask->msgInfo.checkpointId, pTask->msgInfo.transId); @@ -966,10 +1044,10 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i pTask->info.taskLevel == TASK_LEVEL__SOURCE) { stError("s-task:%s failed to dispatch checkpoint-trigger msg, checkpointId:%" PRId64 ", set the current checkpoint failed, and send rsp to mnode", - id, pTask->chkInfo.checkpointingId); + id, pTask->chkInfo.pActiveInfo->activeId); { // send checkpoint failure msg to mnode directly - pTask->chkInfo.failedId = pTask->chkInfo.checkpointingId; // record the latest failed checkpoint id - pTask->chkInfo.checkpointingId = pTask->chkInfo.checkpointingId; + pTask->chkInfo.pActiveInfo->failedId = pTask->chkInfo.pActiveInfo->activeId; // record the latest failed checkpoint id + pTask->chkInfo.pActiveInfo->activeId = pTask->chkInfo.pActiveInfo->activeId; streamTaskSendCheckpointSourceRsp(pTask); } } else { @@ -1035,7 +1113,7 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i // now ready for next data output atomic_store_8(&pTask->outputq.status, TASK_OUTPUT_STATUS__NORMAL); } else { - handleDispatchSuccessRsp(pTask, pRsp->downstreamTaskId); + handleDispatchSuccessRsp(pTask, pRsp->downstreamTaskId, pRsp->downstreamNodeId); } } } @@ -1096,7 +1174,7 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S stDebug("s-task:%s receive dispatch msg from taskId:0x%x(vgId:%d), msgLen:%" PRId64 ", msgId:%d", id, pReq->upstreamTaskId, pReq->upstreamNodeId, pReq->totalLen, pReq->msgId); - SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, pReq->upstreamTaskId); + SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, pReq->upstreamTaskId); ASSERT(pInfo != NULL); if (pMeta->role == NODE_ROLE_FOLLOWER) { diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index b60164fca9..87f239f31c 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -426,7 +426,7 @@ int32_t streamTransferStatePrepare(SStreamTask* pTask) { streamMetaReleaseTask(pMeta, pStreamTask); return code; } else { - stDebug("s-task:%s halt by related fill-history task:%s", pStreamTask->id.idStr, pTask->id.idStr); + stDebug("s-task:%s sink task halt by related fill-history task:%s", pStreamTask->id.idStr, pTask->id.idStr); } streamMetaReleaseTask(pMeta, pStreamTask); } diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 00fe1207dd..7356dc01ce 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -1044,13 +1044,13 @@ static int32_t metaHeartbeatToMnodeImpl(SStreamMeta* pMeta) { entry.sinkDataSize = SIZE_IN_MiB((*pTask)->execInfo.sink.dataSize); } - if ((*pTask)->chkInfo.checkpointingId != 0) { - entry.checkpointInfo.failed = ((*pTask)->chkInfo.failedId >= (*pTask)->chkInfo.checkpointingId) ? 1 : 0; - entry.checkpointInfo.activeId = (*pTask)->chkInfo.checkpointingId; - entry.checkpointInfo.activeTransId = (*pTask)->chkInfo.transId; + if ((*pTask)->chkInfo.pActiveInfo->activeId != 0) { + entry.checkpointInfo.failed = ((*pTask)->chkInfo.pActiveInfo->failedId >= (*pTask)->chkInfo.pActiveInfo->activeId) ? 1 : 0; + entry.checkpointInfo.activeId = (*pTask)->chkInfo.pActiveInfo->activeId; + entry.checkpointInfo.activeTransId = (*pTask)->chkInfo.pActiveInfo->transId; if (entry.checkpointInfo.failed) { - stInfo("s-task:%s set kill checkpoint trans in hb, transId:%d", (*pTask)->id.idStr, (*pTask)->chkInfo.transId); + stInfo("s-task:%s set kill checkpoint trans in hb, transId:%d", (*pTask)->id.idStr, (*pTask)->chkInfo.pActiveInfo->transId); } } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index c056e2a4b6..e0415c8467 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -24,6 +24,8 @@ static void streamTaskDestroyUpstreamInfo(SUpstreamInfo* pUpstreamInfo); static void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet, bool* pUpdated); static void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet, bool* pUpdate); +static void streamTaskDestroyActiveChkptInfo(SActiveCheckpointInfo* pInfo); +static SActiveCheckpointInfo* streamTaskCreateActiveChkptInfo(); static int32_t addToTaskset(SArray* pArray, SStreamTask* pTask) { int32_t childId = taosArrayGetSize(pArray); @@ -70,12 +72,12 @@ static void freeItem(void* p) { } static void freeUpstreamItem(void* p) { - SStreamChildEpInfo** pInfo = p; + SStreamUpstreamEpInfo** pInfo = p; taosMemoryFree(*pInfo); } -static SStreamChildEpInfo* createStreamTaskEpInfo(const SStreamTask* pTask) { - SStreamChildEpInfo* pEpInfo = taosMemoryMalloc(sizeof(SStreamChildEpInfo)); +static SStreamUpstreamEpInfo* createStreamTaskEpInfo(const SStreamTask* pTask) { + SStreamUpstreamEpInfo* pEpInfo = taosMemoryMalloc(sizeof(SStreamUpstreamEpInfo)); if (pEpInfo == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; @@ -254,7 +256,6 @@ void tFreeStreamTask(SStreamTask* pTask) { } streamClearChkptReadyMsg(pTask); - pTask->pReadyMsgList = taosArrayDestroy(pTask->pReadyMsgList); if (pTask->msgInfo.pData != NULL) { clearBufferedDispatchMsg(pTask); @@ -302,6 +303,9 @@ void tFreeStreamTask(SStreamTask* pTask) { taosMemoryFree((void*)pTask->id.idStr); } + streamTaskDestroyActiveChkptInfo(pTask->chkInfo.pActiveInfo); + pTask->chkInfo.pActiveInfo = NULL; + taosMemoryFree(pTask); stDebug("s-task:0x%x free task completed", taskId); } @@ -414,6 +418,10 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i return TSDB_CODE_OUT_OF_MEMORY; } + if (pTask->chkInfo.pActiveInfo == NULL) { + pTask->chkInfo.pActiveInfo = streamTaskCreateActiveChkptInfo(); + } + return TSDB_CODE_SUCCESS; } @@ -433,8 +441,12 @@ int32_t streamTaskGetNumOfDownstream(const SStreamTask* pTask) { } } +int32_t streamTaskGetNumOfUpstream(const SStreamTask* pTask) { + return taosArrayGetSize(pTask->upstreamInfo.pList); +} + int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstreamTask) { - SStreamChildEpInfo* pEpInfo = createStreamTaskEpInfo(pUpstreamTask); + SStreamUpstreamEpInfo* pEpInfo = createStreamTaskEpInfo(pUpstreamTask); if (pEpInfo == NULL) { return TSDB_CODE_OUT_OF_MEMORY; } @@ -453,7 +465,7 @@ void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpS int32_t numOfUpstream = taosArrayGetSize(pTask->upstreamInfo.pList); for (int32_t i = 0; i < numOfUpstream; ++i) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); + SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); if (pInfo->nodeId == nodeId) { bool equal = isEpsetEqual(&pInfo->epSet, pEpSet); if (!equal) { @@ -589,7 +601,7 @@ void streamTaskResetUpstreamStageInfo(SStreamTask* pTask) { int32_t size = taosArrayGetSize(pTask->upstreamInfo.pList); for (int32_t i = 0; i < size; ++i) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); + SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); pInfo->stage = -1; } @@ -603,7 +615,7 @@ void streamTaskOpenAllUpstreamInput(SStreamTask* pTask) { } for (int32_t i = 0; i < num; ++i) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); + SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); pInfo->dataAllowed = true; } @@ -612,12 +624,19 @@ void streamTaskOpenAllUpstreamInput(SStreamTask* pTask) { } void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId) { - SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, taskId); + SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, taskId); if (pInfo != NULL) { pInfo->dataAllowed = false; } } +void streamTaskOpenUpstreamInput(SStreamTask* pTask, int32_t taskId) { + SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, taskId); + if (pInfo != NULL) { + pInfo->dataAllowed = true; + } +} + bool streamTaskIsAllUpstreamClosed(SStreamTask* pTask) { return pTask->upstreamInfo.numOfClosed == taosArrayGetSize(pTask->upstreamInfo.pList); } @@ -723,9 +742,9 @@ int32_t streamBuildAndSendCheckpointUpdateMsg(SMsgCb* pMsgCb, int32_t vgId, SStr pReq->dropRelHTask = dropRelHTask; pReq->hStreamId = pHTaskId->streamId; pReq->hTaskId = pHTaskId->taskId; - pReq->transId = pCheckpointInfo->transId; + pReq->transId = pCheckpointInfo->pActiveInfo->transId; - pReq->checkpointId = pCheckpointInfo->checkpointingId; + pReq->checkpointId = pCheckpointInfo->pActiveInfo->activeId; pReq->checkpointVer = pCheckpointInfo->processedVer; pReq->checkpointTs = pCheckpointInfo->startTs; @@ -860,10 +879,10 @@ int32_t streamTaskSendCheckpointReq(SStreamTask* pTask) { return 0; } -SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId) { +SStreamUpstreamEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId) { int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList); for (int32_t i = 0; i < num; ++i) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); + SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); if (pInfo->taskId == taskId) { return pInfo; } @@ -873,6 +892,24 @@ SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t return NULL; } +SEpSet* streamTaskGetDownstreamEpInfo(SStreamTask* pTask, int32_t taskId) { + if (pTask->info.taskLevel == TASK_OUTPUT__FIXED_DISPATCH) { + if (pTask->outputInfo.fixedDispatcher.taskId == taskId) { + return &pTask->outputInfo.fixedDispatcher.epSet; + } + } else if (pTask->info.taskLevel == TASK_OUTPUT__SHUFFLE_DISPATCH) { + SArray* pList = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos; + for(int32_t i = 0; i < taosArrayGetSize(pList); ++i) { + SVgroupInfo* pVgInfo = taosArrayGet(pList, i); + if (pVgInfo->taskId == taskId) { + return &pVgInfo->epSet; + } + } + } + + return NULL; +} + char* createStreamTaskIdStr(int64_t streamId, int32_t taskId) { char buf[128] = {0}; sprintf(buf, "0x%" PRIx64 "-0x%x", streamId, taskId); @@ -914,4 +951,64 @@ int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq) { void streamTaskSetRemoveBackendFiles(SStreamTask* pTask) { pTask->status.removeBackendFiles = true; +} + +int32_t streamTaskGetActiveCheckpointInfo(const SStreamTask* pTask, int32_t* pTransId, int64_t* pCheckpointId) { + if (pTransId != NULL) { + *pTransId = pTask->chkInfo.pActiveInfo->transId; + } + + if (pCheckpointId != NULL) { + *pCheckpointId = pTask->chkInfo.pActiveInfo->activeId; + } + + return TSDB_CODE_SUCCESS; +} + +int32_t streamTaskSetActiveCheckpointInfo(SStreamTask* pTask, int64_t activeCheckpointId) { + pTask->chkInfo.pActiveInfo->activeId = activeCheckpointId; + return TSDB_CODE_SUCCESS; +} + +int32_t streamTaskSetFailedChkptInfo(SStreamTask* pTask, int32_t transId, int64_t checkpointId) { + pTask->chkInfo.pActiveInfo->transId = transId; + pTask->chkInfo.pActiveInfo->activeId = checkpointId; + pTask->chkInfo.pActiveInfo->failedId = checkpointId; + return TSDB_CODE_SUCCESS; +} + +SActiveCheckpointInfo* streamTaskCreateActiveChkptInfo() { + SActiveCheckpointInfo* pInfo = taosMemoryCalloc(1, sizeof(SActiveCheckpointInfo)); + taosThreadMutexInit(&pInfo->lock, NULL); + + pInfo->pDispatchTriggerList = taosArrayInit(4, sizeof(STaskTriggerSendInfo)); + pInfo->pReadyMsgList = taosArrayInit(4, sizeof(SStreamChkptReadyInfo)); + return pInfo; +} + +void streamTaskDestroyActiveChkptInfo(SActiveCheckpointInfo* pInfo) { + if (pInfo == NULL) { + return; + } + + taosThreadMutexDestroy(&pInfo->lock); + pInfo->pDispatchTriggerList = taosArrayDestroy(pInfo->pDispatchTriggerList); + pInfo->pReadyMsgList = taosArrayDestroy(pInfo->pReadyMsgList); + + if (pInfo->pCheckTmr != NULL) { + taosTmrStop(pInfo->pCheckTmr); + pInfo->pCheckTmr = NULL; + } + + taosMemoryFree(pInfo); +} + +void streamTaskClearActiveInfo(SActiveCheckpointInfo* pInfo) { + pInfo->activeId = 0; // clear the checkpoint id + pInfo->failedId = 0; + pInfo->transId = 0; + pInfo->dispatchTrigger = false; + + taosArrayClear(pInfo->pReadyMsgList); + taosArrayClear(pInfo->pDispatchTriggerList); } \ No newline at end of file diff --git a/source/libs/stream/src/streammsg.c b/source/libs/stream/src/streammsg.c index 9b69833234..f8228a8f5f 100644 --- a/source/libs/stream/src/streammsg.c +++ b/source/libs/stream/src/streammsg.c @@ -17,7 +17,7 @@ #include "streammsg.h" #include "tstream.h" -int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo) { +int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamUpstreamEpInfo* pInfo) { if (tEncodeI32(pEncoder, pInfo->taskId) < 0) return -1; if (tEncodeI32(pEncoder, pInfo->nodeId) < 0) return -1; if (tEncodeI32(pEncoder, pInfo->childId) < 0) return -1; @@ -26,7 +26,7 @@ int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo) return 0; } -int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamChildEpInfo* pInfo) { +int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamUpstreamEpInfo* pInfo) { if (tDecodeI32(pDecoder, &pInfo->taskId) < 0) return -1; if (tDecodeI32(pDecoder, &pInfo->nodeId) < 0) return -1; if (tDecodeI32(pDecoder, &pInfo->childId) < 0) return -1; @@ -481,7 +481,7 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { int32_t epSz = taosArrayGetSize(pTask->upstreamInfo.pList); if (tEncodeI32(pEncoder, epSz) < 0) return -1; for (int32_t i = 0; i < epSz; i++) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); + SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); if (tEncodeStreamEpInfo(pEncoder, pInfo) < 0) return -1; } @@ -557,7 +557,7 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { pTask->upstreamInfo.pList = taosArrayInit(epSz, POINTER_BYTES); for (int32_t i = 0; i < epSz; i++) { - SStreamChildEpInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamChildEpInfo)); + SStreamUpstreamEpInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamUpstreamEpInfo)); if (pInfo == NULL) return -1; if (tDecodeStreamEpInfo(pDecoder, pInfo) < 0) { taosMemoryFreeClear(pInfo); From cc318d7e2d7da8491558a64e7f7569cfb30d2898 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 28 May 2024 11:34:04 +0800 Subject: [PATCH 31/67] enh(stream): sink task does not initialize the backend. --- source/dnode/vnode/src/tqCommon/tqCommon.c | 15 +++++++++------ source/libs/stream/src/streamBackendRocksdb.c | 2 +- source/libs/stream/src/streamMeta.c | 3 +-- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index b9c0589dc5..a137086d21 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -51,12 +51,15 @@ int32_t tqExpandStreamTask(SStreamTask* pTask) { taskId = replaceStreamTaskId(pTask); } - pTask->pState = streamStateOpen(pMeta->path, pTask, false, -1, -1); - if (pTask->pState == NULL) { - tqError("s-task:%s (vgId:%d) failed to open state for task, expand task failed", pTask->id.idStr, vgId); - return -1; - } else { - tqDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); + // sink task does not need the pState + if (pTask->info.taskLevel != TASK_LEVEL__SINK) { + pTask->pState = streamStateOpen(pMeta->path, pTask, false, -1, -1); + if (pTask->pState == NULL) { + tqError("s-task:%s (vgId:%d) failed to open state for task, expand task failed", pTask->id.idStr, vgId); + return -1; + } else { + tqDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); + } } if (pTask->info.fillHistory) { diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 87fb615d5f..d42a3b545a 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -338,7 +338,7 @@ int32_t rebuildFromRemoteChkp_rsync(const char* key, char* chkptPath, int64_t ch if (taosIsDir(defaultPath)) { taosRemoveDir(defaultPath); taosMulMkDir(defaultPath); - stDebug("clear local default dir before download checkpoint data:%s succ", defaultPath); + stDebug("clear local default dir before downloading checkpoint data:%s succ", defaultPath); } code = streamTaskDownloadCheckpointData(key, chkptPath); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 51a6542256..e0822f60e7 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -250,7 +250,7 @@ int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key) taosThreadMutexLock(&pMeta->backendMutex); void** ppBackend = taosHashGet(pMeta->pTaskDbUnique, key, strlen(key)); - if (ppBackend != NULL && *ppBackend != NULL) { + if ((ppBackend != NULL) && (*ppBackend != NULL)) { taskDbAddRef(*ppBackend); STaskDbWrapper* pBackend = *ppBackend; @@ -258,7 +258,6 @@ int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key) pTask->pBackend = pBackend; taosThreadMutexUnlock(&pMeta->backendMutex); - stDebug("s-task:0x%x set backend %p", pTask->id.taskId, pBackend); return 0; } From 8d54d45054f9e2f85e7a045e6a45a56750efc689 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 28 May 2024 17:30:14 +0800 Subject: [PATCH 32/67] fix(stream): fix error found by CI. --- source/dnode/vnode/src/tq/tq.c | 4 ---- source/dnode/vnode/src/tqCommon/tqCommon.c | 2 +- source/libs/stream/src/streamCheckpoint.c | 18 +++++++++++++----- source/libs/stream/src/streamDispatch.c | 9 +++++---- 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 037c2a7b7a..b54429f6b6 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1088,10 +1088,6 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) int32_t len = pMsg->contLen - sizeof(SMsgHead); int32_t code = 0; -// if (pTq->pStreamMeta->vgId == 2) { -// ASSERT(0); -// } - // disable auto rsp to mnode pRsp->info.handle = NULL; diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index a137086d21..f87556e24e 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -864,7 +864,7 @@ int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { } int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { - SRetrieveChkptTriggerReq* pReq = (SRetrieveChkptTriggerReq*) pMsg; + SRetrieveChkptTriggerReq* pReq = (SRetrieveChkptTriggerReq*) pMsg->pCont; SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->upstreamTaskId); if (pTask == NULL) { diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 43b39b8574..968493b595 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -562,11 +562,11 @@ void checkpointTriggerMonitorFn(void* param, void* tmrId) { int32_t vgId = pTask->pMeta->vgId; int64_t now = taosGetTimestampMs(); - stDebug("s-task:%s vgId:%d checkpoint-trigger monit start, ts:%" PRId64, pTask->id.idStr, vgId, now); + stDebug("s-task:%s vgId:%d checkpoint-trigger monitor start, ts:%" PRId64, pTask->id.idStr, vgId, now); taosThreadMutexLock(&pTask->lock); SStreamTaskState* pState = streamTaskGetStatus(pTask); - if (pState->state == TASK_STATUS__CK) { + if (pState->state != TASK_STATUS__CK) { stDebug("s-task:%s vgId:%d not in checkpoint status, quit from monitor checkpoint-trigger", pTask->id.idStr, vgId); taosThreadMutexUnlock(&pTask->lock); return; @@ -599,12 +599,14 @@ void checkpointTriggerMonitorFn(void* param, void* tmrId) { } // do send retrieve checkpoint trigger msg to upstream + int32_t size = taosArrayGetSize(pNotSendList); doSendRetrieveTriggerMsg(pTask, pNotSendList); taosThreadMutexUnlock(&pActiveInfo->lock); // check every 100ms - if (taosArrayGetSize(pNotSendList) > 0) { + if (size > 0) { taosTmrReset(checkpointTriggerMonitorFn, 10000, pTask, streamTimer, &pActiveInfo->pCheckTmr); + stDebug("s-task:%s start monitor trigger in 10sec", pTask->id.idStr); } taosArrayDestroy(pNotSendList); @@ -614,8 +616,11 @@ int32_t doSendRetrieveTriggerMsg(SStreamTask* pTask, SArray* pNotSendList) { int32_t code = 0; int32_t vgId = pTask->pMeta->vgId; const char* pId = pTask->id.idStr; + int32_t size = taosArrayGetSize(pNotSendList); - for (int32_t i = 0; i < taosArrayGetSize(pNotSendList); i++) { + stDebug("s-task:%s start to send trigger-retrieve msg to %d upstream(s)", pId, size); + + for (int32_t i = 0; i < size; i++) { SStreamUpstreamEpInfo* pUpstreamTask = taosArrayGet(pNotSendList, i); SRetrieveChkptTriggerReq* pReq = rpcMallocCont(sizeof(SRetrieveChkptTriggerReq)); @@ -633,10 +638,13 @@ int32_t doSendRetrieveTriggerMsg(SStreamTask* pTask, SArray* pNotSendList) { pReq->upstreamNodeId = pUpstreamTask->nodeId; pReq->checkpointId = pTask->chkInfo.pActiveInfo->activeId; + SRpcMsg rpcMsg = {0}; - initRpcMsg(&rpcMsg, TDMT_STREAM_RETRIEVE, pReq, sizeof(SRetrieveChkptTriggerReq)); + initRpcMsg(&rpcMsg, TDMT_STREAM_RETRIEVE_TRIGGER, pReq, sizeof(SRetrieveChkptTriggerReq)); code = tmsgSendReq(&pUpstreamTask->epSet, &rpcMsg); + stDebug("s-task:%s vgId:%d send retrieve msg to 0x%x checkpointId:%" PRId64, pId, vgId, pUpstreamTask->taskId, + pReq->checkpointId); } return TSDB_CODE_SUCCESS; diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index c8a626a739..821ae68497 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -595,8 +595,9 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { return 0; } - ASSERT(pBlock->type == STREAM_INPUT__DATA_BLOCK || pBlock->type == STREAM_INPUT__CHECKPOINT_TRIGGER || - pBlock->type == STREAM_INPUT__TRANS_STATE); + int32_t type = pBlock->type; + ASSERT(type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__CHECKPOINT_TRIGGER || + type == STREAM_INPUT__TRANS_STATE); pTask->execInfo.dispatch += 1; pTask->msgInfo.startTs = taosGetTimestampMs(); @@ -607,7 +608,7 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { } else { // todo handle build dispatch msg failed } - if (pBlock->type == STREAM_INPUT__CHECKPOINT_TRIGGER) { + if (type == STREAM_INPUT__CHECKPOINT_TRIGGER) { streamTaskInitTriggerDispatchInfo(pTask); } @@ -829,7 +830,7 @@ int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHa } } else { taosArrayPush(pActiveInfo->pReadyMsgList, &info); - stDebug("s-task:%s add checkpoint source rsp msg, total:%d", pTask->id.idStr, size); + stDebug("s-task:%s add checkpoint source rsp msg, total:%d", pTask->id.idStr, size + 1); } taosThreadMutexUnlock(&pActiveInfo->lock); From d1d868f23941d669bfd5b9563709c77f4a97c3d2 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 29 May 2024 00:08:29 +0800 Subject: [PATCH 33/67] refactor: do some internal refactor and set the pActiveInfo for rsma. --- include/dnode/vnode/tqCommon.h | 1 + include/libs/stream/streammsg.h | 8 +++ include/libs/stream/tstream.h | 5 +- source/dnode/snode/src/snode.c | 4 ++ source/dnode/vnode/src/inc/vnodeInt.h | 3 +- source/dnode/vnode/src/sma/smaRollup.c | 2 +- source/dnode/vnode/src/tq/tq.c | 6 +- source/dnode/vnode/src/tq/tqStreamTask.c | 3 +- source/dnode/vnode/src/tqCommon/tqCommon.c | 25 ++++++- source/dnode/vnode/src/vnd/vnodeSvr.c | 4 +- source/libs/stream/src/streamCheckStatus.c | 2 +- source/libs/stream/src/streamCheckpoint.c | 77 +++++++++++++++------- source/libs/stream/src/streamDispatch.c | 2 +- source/libs/stream/src/streamTask.c | 1 - 14 files changed, 104 insertions(+), 39 deletions(-) diff --git a/include/dnode/vnode/tqCommon.h b/include/dnode/vnode/tqCommon.h index 6d52b10182..0076d79312 100644 --- a/include/dnode/vnode/tqCommon.h +++ b/include/dnode/vnode/tqCommon.h @@ -37,6 +37,7 @@ int32_t tqStartTaskCompleteCallback(SStreamMeta* pMeta); int32_t tqStreamTasksGetTotalNum(SStreamMeta* pMeta); int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg); int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg); +int32_t tqStreamTaskProcessRetrieveTriggerRsp(SStreamMeta* pMeta, SRpcMsg* pMsg); int32_t tqStreamTaskProcessTaskPauseReq(SStreamMeta* pMeta, char* pMsg); int32_t tqStreamTaskProcessTaskResumeReq(void* handle, int64_t sversion, char* pMsg, bool fromVnode); int32_t tqStreamTaskProcessUpdateCheckpointReq(SStreamMeta* pMeta, char* msg, int32_t msgLen); diff --git a/include/libs/stream/streammsg.h b/include/libs/stream/streammsg.h index 87c756b10c..96701fe21d 100644 --- a/include/libs/stream/streammsg.h +++ b/include/libs/stream/streammsg.h @@ -181,6 +181,14 @@ typedef struct SRetrieveChkptTriggerReq { int64_t downstreamTaskId; } SRetrieveChkptTriggerReq; +typedef struct SCheckpointTriggerRsp { + int64_t streamId; + int64_t checkpointId; + int32_t upstreamTaskId; + int32_t taskId; + int32_t transId; +} SCheckpointTriggerRsp; + typedef struct { SMsgHead head; int64_t streamId; diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 185ab7ad51..0d01913235 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -678,8 +678,7 @@ bool streamTaskAlreadySendTrigger(SStreamTask* pTask, int32_t downstreamNodeI void streamTaskGetTriggerRecvStatus(SStreamTask* pTask, int32_t* pRecved, int32_t* pTotal); void streamTaskInitTriggerDispatchInfo(SStreamTask* pTask); void streamTaskSetTriggerDispatchConfirmed(SStreamTask* pTask, int32_t vgId); -int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t checkpointType, int32_t dstTaskId, int32_t vgId, - SEpSet* pEpset); +int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t dstTaskId, SRpcHandleInfo* pRpcInfo); int32_t streamQueueGetNumOfItems(const SStreamQueue* pQueue); @@ -753,6 +752,7 @@ tmr_h streamTimerGetInstance(); // checkpoint int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); +int32_t streamTaskProcessCheckpointTriggerRsp(SStreamTask* pTask, SCheckpointTriggerRsp* pRsp); int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask); int32_t streamTaskBuildCheckpoint(SStreamTask* pTask); void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg); @@ -764,6 +764,7 @@ int32_t streamTaskBuildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRp int32_t streamBuildAndSendCheckpointUpdateMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskId* pTaskId, STaskId* pHTaskId, SCheckpointInfo* pCheckpointInfo, int8_t dropRelHTask); int32_t streamTaskUpdateTaskCheckpointInfo(SStreamTask* pTask, SVUpdateCheckpointInfoReq* pReq); +SActiveCheckpointInfo* streamTaskCreateActiveChkptInfo(); // stream task state machine, and event handling SStreamTaskSM* streamCreateStateMachine(SStreamTask* pTask); diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index ac10aa83a4..89ab6d52c3 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -136,6 +136,10 @@ int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) { return tqStreamProcessReqCheckpointRsp(pSnode->pMeta, pMsg); case TDMT_STREAM_TASK_CHECKPOINT_READY_RSP: return tqStreamProcessCheckpointReadyRsp(pSnode->pMeta, pMsg); + case TDMT_STREAM_RETRIEVE_TRIGGER: + return tqStreamTaskProcessRetrieveTriggerReq(pSnode->pMeta, pMsg); + case TDMT_STREAM_RETRIEVE_TRIGGER_RSP: + return tqStreamTaskProcessRetrieveTriggerRsp(pSnode->pMeta, pMsg); default: sndError("invalid snode msg:%d", pMsg->msgType); ASSERT(0); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 59599fdae6..b369bd6039 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -254,7 +254,8 @@ int tqScanWalAsync(STQ* pTq, bool ckPause); int32_t tqStopStreamTasksAsync(STQ* pTq); int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp); int32_t tqProcessTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg); -int32_t tqProcessTaskRetrieveTriggerMsg(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessTaskRetrieveTriggerReq(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessTaskRetrieveTriggerRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessStreamHbRsp(STQ* pTq, SRpcMsg* pMsg); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 6babcf3c80..7138ecbeaa 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -298,7 +298,7 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat pStreamTask->chkInfo.checkpointId = streamMetaGetLatestCheckpointId(pStreamTask->pMeta); tdRSmaTaskInit(pStreamTask->pMeta, pItem, &pStreamTask->id); pStreamTask->status.pSM = streamCreateStateMachine(pStreamTask); - + pStreamTask->chkInfo.pActiveInfo = streamTaskCreateActiveChkptInfo(); pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { terrno = TSDB_CODE_RSMA_STREAM_STATE_OPEN; diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index b54429f6b6..d8460d2cb5 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1240,10 +1240,14 @@ int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg) { return tqStreamTaskProcessTaskResetReq(pTq->pStreamMeta, pMsg); } -int32_t tqProcessTaskRetrieveTriggerMsg(STQ* pTq, SRpcMsg* pMsg) { +int32_t tqProcessTaskRetrieveTriggerReq(STQ* pTq, SRpcMsg* pMsg) { return tqStreamTaskProcessRetrieveTriggerReq(pTq->pStreamMeta, pMsg); } +int32_t tqProcessTaskRetrieveTriggerRsp(STQ* pTq, SRpcMsg* pMsg) { + return tqStreamTaskProcessRetrieveTriggerRsp(pTq->pStreamMeta, pMsg); +} + // this function is needed, do not try to remove it. int32_t tqProcessStreamHbRsp(STQ* pTq, SRpcMsg* pMsg) { return tqStreamProcessStreamHbRsp(pTq->pStreamMeta, pMsg); diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index 5a29f67ae3..bcf17bf1e1 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -291,8 +291,7 @@ bool doPutDataIntoInputQ(SStreamTask* pTask, int64_t maxVer, int32_t* numOfItems } } else { walReaderSeekVer(pTask->exec.pWalReader, pTask->chkInfo.nextProcessVer); - tqError("s-task:%s append input queue failed, code:too many items, ver:%" PRId64, id, - pTask->chkInfo.nextProcessVer); + tqTrace("s-task:%s append input queue failed, code:too many items, ver:%" PRId64, id, pTask->chkInfo.nextProcessVer); break; } } diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index f87556e24e..ee3f1a3760 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -887,9 +887,9 @@ int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) if (streamTaskAlreadySendTrigger(pTask, pReq->downstreamNodeId)) { // re-send the lost checkpoint-trigger msg to downstream task - SEpSet* pEpset = streamTaskGetDownstreamEpInfo(pTask, pReq->downstreamTaskId); - streamTaskSendCheckpointTriggerMsg(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER, pReq->downstreamTaskId, - pReq->downstreamNodeId, pEpset); + tqDebug("s-task:%s re-send checkpoint-trigger to:0x%x, checkpointId:%" PRId64 ", transId:%d", pTask->id.idStr, + (int32_t)pReq->downstreamTaskId, checkpointId, transId); + streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, &pMsg->info); } else { // not send checkpoint-trigger yet, wait int32_t recv = 0, total = 0; streamTaskGetTriggerRecvStatus(pTask, &recv, &total); @@ -914,6 +914,25 @@ int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) return TSDB_CODE_SUCCESS; } +int32_t tqStreamTaskProcessRetrieveTriggerRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { + SCheckpointTriggerRsp* pRsp = pMsg->pCont; + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pRsp->streamId, pRsp->taskId); + if (pTask == NULL) { + tqError( + "vgId:%d process retrieve checkpoint-trigger, failed to acquire task:0x%x, it may have been dropped already", + pMeta->vgId, pRsp->taskId); + return TSDB_CODE_STREAM_TASK_NOT_EXIST; + } + + tqDebug("s-task:%s recv re-send checkpoint-trigger msg from upstream:0x%x, checkpointId:%"PRId64", transId:%d", + pTask->id.idStr, pRsp->upstreamTaskId, pRsp->checkpointId, pRsp->transId); + + streamTaskProcessCheckpointTriggerRsp(pTask, pRsp); + streamMetaReleaseTask(pMeta, pTask); + return TSDB_CODE_SUCCESS; +} + int32_t tqStreamTaskProcessTaskPauseReq(SStreamMeta* pMeta, char* pMsg) { SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)pMsg; diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 490c3f08ce..426f85fa5e 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -843,7 +843,7 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) case TDMT_STREAM_TASK_CHECKPOINT_READY: return tqProcessTaskCheckpointReadyMsg(pVnode->pTq, pMsg); case TDMT_STREAM_RETRIEVE_TRIGGER: - return tqProcessTaskRetrieveTriggerMsg(pVnode->pTq, pMsg); + return tqProcessTaskRetrieveTriggerReq(pVnode->pTq, pMsg); case TDMT_MND_STREAM_HEARTBEAT_RSP: return tqProcessStreamHbRsp(pVnode->pTq, pMsg); case TDMT_MND_STREAM_REQ_CHKPT_RSP: @@ -851,7 +851,7 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) case TDMT_STREAM_TASK_CHECKPOINT_READY_RSP: return tqProcessTaskCheckpointReadyRsp(pVnode->pTq, pMsg); case TDMT_STREAM_RETRIEVE_TRIGGER_RSP: - return tqProcessTaskCheckpointReadyMsg(pVnode->pTq, pMsg); + return tqProcessTaskRetrieveTriggerRsp(pVnode->pTq, pMsg); case TDMT_VND_GET_STREAM_PROGRESS: return tqStreamProgressRetrieveReq(pVnode->pTq, pMsg); default: diff --git a/source/libs/stream/src/streamCheckStatus.c b/source/libs/stream/src/streamCheckStatus.c index f083ff8a61..d5c676433f 100644 --- a/source/libs/stream/src/streamCheckStatus.c +++ b/source/libs/stream/src/streamCheckStatus.c @@ -246,7 +246,7 @@ int32_t streamTaskProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* } int32_t streamTaskSendCheckRsp(const SStreamMeta* pMeta, int32_t vgId, SStreamTaskCheckRsp* pRsp, - SRpcHandleInfo* pRpcInfo, int32_t taskId) { + SRpcHandleInfo* pRpcInfo, int32_t taskId) { SEncoder encoder; int32_t code; int32_t len; diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 968493b595..fb3c705a4d 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -118,23 +118,40 @@ int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSo return appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER); } -int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t checkpointType, int32_t dstTaskId, int32_t vgId, - SEpSet* pEpset) { - SStreamDataBlock* pChkpoint = createChkptTriggerBlock(pTask, checkpointType); +int32_t streamTaskProcessCheckpointTriggerRsp(SStreamTask* pTask, SCheckpointTriggerRsp* pRsp) { + ASSERT(pTask->info.taskLevel != TASK_LEVEL__SOURCE); - pChkpoint->srcTaskId = pTask->id.taskId; - pChkpoint->srcVgId = pTask->pMeta->vgId; - - int32_t code = streamTaskBuildAndSendTriggerMsg(pTask, pChkpoint, dstTaskId, vgId, pEpset); - if (code == TSDB_CODE_SUCCESS) { - stDebug("s-task:%s build and send checkpoint-trigger dispatch msg succ, stage:%" PRId64, pTask->id.idStr, - pTask->pMeta->stage); - } else { - // todo handle send data failure - stError("s-task:%s failed to build and send trigger msg", pTask->id.idStr); + SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; + if (pInfo->transId != pRsp->transId || pInfo->activeId != pRsp->checkpointId) { + // todo handle error + return -1; } - return code; + taosThreadMutexLock(&pTask->lock); + SStreamTaskState* pState = streamTaskGetStatus(pTask); + if (pState->state != TASK_STATUS__CK) { + // todo handle error + taosThreadMutexUnlock(&pTask->lock); + return -1; + } + + taosThreadMutexUnlock(&pTask->lock); + + appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER); + return TSDB_CODE_SUCCESS; +} + +int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t dstTaskId, SRpcHandleInfo* pRpcInfo) { + SCheckpointTriggerRsp* pRsp = rpcMallocCont(sizeof(SCheckpointTriggerRsp)); + pRsp->streamId = pTask->id.streamId; + pRsp->upstreamTaskId = pTask->id.taskId; + pRsp->taskId = dstTaskId; + pRsp->checkpointId = pTask->chkInfo.pActiveInfo->activeId; + pRsp->transId = pTask->chkInfo.pActiveInfo->transId; + + SRpcMsg rspMsg = {.code = 0, .pCont = pRsp, .contLen = sizeof(SCheckpointTriggerRsp), .info = *pRpcInfo}; + tmsgSendRsp(&rspMsg); + return 0; } int32_t continueDispatchCheckpointTriggerBlock(SStreamDataBlock* pBlock, SStreamTask* pTask) { @@ -617,8 +634,15 @@ int32_t doSendRetrieveTriggerMsg(SStreamTask* pTask, SArray* pNotSendList) { int32_t vgId = pTask->pMeta->vgId; const char* pId = pTask->id.idStr; int32_t size = taosArrayGetSize(pNotSendList); + int32_t numOfUpstream = streamTaskGetNumOfUpstream(pTask); - stDebug("s-task:%s start to send trigger-retrieve msg to %d upstream(s)", pId, size); + if (size <= 0) { + stDebug("s-task:%s all upstream checkpoint trigger recved, no need to send retrieve", pId); + return code; + } + + stDebug("s-task:%s %d/%d not recv checkpoint-trigger from upstream(s), start to send trigger-retrieve", pId, size, + numOfUpstream); for (int32_t i = 0; i < size; i++) { SStreamUpstreamEpInfo* pUpstreamTask = taosArrayGet(pNotSendList, i); @@ -643,21 +667,23 @@ int32_t doSendRetrieveTriggerMsg(SStreamTask* pTask, SArray* pNotSendList) { initRpcMsg(&rpcMsg, TDMT_STREAM_RETRIEVE_TRIGGER, pReq, sizeof(SRetrieveChkptTriggerReq)); code = tmsgSendReq(&pUpstreamTask->epSet, &rpcMsg); - stDebug("s-task:%s vgId:%d send retrieve msg to 0x%x checkpointId:%" PRId64, pId, vgId, pUpstreamTask->taskId, - pReq->checkpointId); + stDebug("s-task:%s vgId:%d send checkpoint-trigger retrieve msg to 0x%x(vgId:%d) checkpointId:%" PRId64, pId, vgId, + pUpstreamTask->taskId, pUpstreamTask->nodeId, pReq->checkpointId); } return TSDB_CODE_SUCCESS; } bool streamTaskAlreadySendTrigger(SStreamTask* pTask, int32_t downstreamNodeId) { - SStreamTaskState* pStatus = streamTaskGetStatus(pTask); + int64_t now = taosGetTimestampMs(); + const char* id = pTask->id.idStr; + SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; + SStreamTaskState* pStatus = streamTaskGetStatus(pTask); + if (pStatus->state != TASK_STATUS__CK) { return false; } - SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; - taosThreadMutexLock(&pInfo->lock); if (!pInfo->dispatchTrigger) { taosThreadMutexUnlock(&pInfo->lock); @@ -671,12 +697,15 @@ bool streamTaskAlreadySendTrigger(SStreamTask* pTask, int32_t downstreamNodeId) } // has send trigger msg to downstream node, + double before = (now - pSendInfo->sendTs) / 1000.0; if (pSendInfo->recved) { - stWarn("s-task:%s checkpoint-trigger msg send at:%"PRId64" and recv confirmed, checkpointId:%"PRId64 ", transId:%d", - pTask->id.idStr, pSendInfo->sendTs, pInfo->activeId, pInfo->transId); + stWarn("s-task:%s checkpoint-trigger msg already send at:%" PRId64 + "(%.2fs before) and recv confirmed by downstream:0x%x, checkpointId:%" PRId64 ", transId:%d", + id, pSendInfo->sendTs, before, pSendInfo->taskId, pInfo->activeId, pInfo->transId); } else { - stWarn("s-task:%s checkpoint-trigger send at:%"PRId64", checkpointId:%"PRId64", transId:%d", pTask->id.idStr, - pSendInfo->sendTs, pInfo->activeId, pInfo->transId); + stWarn("s-task:%s checkpoint-trigger already send at:%" PRId64 "(%.2fs before), checkpointId:%" PRId64 + ", transId:%d", + id, pSendInfo->sendTs, before, pInfo->activeId, pInfo->transId); } taosThreadMutexUnlock(&pInfo->lock); diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 821ae68497..ecea494be2 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -480,7 +480,7 @@ static void doRetryDispatchData(void* param, void* tmrId) { void streamRetryDispatchData(SStreamTask* pTask, int64_t waitDuration) { pTask->msgInfo.retryCount++; - stWarn("s-task:%s retry send dispatch data in %" PRId64 "ms, in timer msgId:%d, retryTimes:%d", pTask->id.idStr, + stTrace("s-task:%s retry send dispatch data in %" PRId64 "ms, in timer msgId:%d, retryTimes:%d", pTask->id.idStr, waitDuration, pTask->execInfo.dispatch, pTask->msgInfo.retryCount); if (pTask->msgInfo.pTimer != NULL) { diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index e0415c8467..6d30cc6759 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -25,7 +25,6 @@ static void streamTaskDestroyUpstreamInfo(SUpstreamInfo* pUpstreamInfo); static void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet, bool* pUpdated); static void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet, bool* pUpdate); static void streamTaskDestroyActiveChkptInfo(SActiveCheckpointInfo* pInfo); -static SActiveCheckpointInfo* streamTaskCreateActiveChkptInfo(); static int32_t addToTaskset(SArray* pArray, SStreamTask* pTask) { int32_t childId = taosArrayGetSize(pArray); From 225dfb1a9335d2d6cc3ed03d03b87985aede07a1 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 29 May 2024 00:30:42 +0800 Subject: [PATCH 34/67] enh(stream): check checkpoint-trigger msg every 100ms. --- source/libs/stream/inc/streamInt.h | 4 ++- source/libs/stream/src/streamCheckStatus.c | 2 +- source/libs/stream/src/streamCheckpoint.c | 39 +++++++++++++++++----- source/libs/stream/src/streamExec.c | 4 +++ 4 files changed, 39 insertions(+), 10 deletions(-) diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index 68c3ab1a6b..10db53ea38 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -61,7 +61,9 @@ struct SActiveCheckpointInfo { bool dispatchTrigger; SArray* pDispatchTriggerList; // SArray SArray* pReadyMsgList; // SArray - tmr_h pCheckTmr; + + int32_t checkCounter; + tmr_h pCheckTmr; }; typedef struct { diff --git a/source/libs/stream/src/streamCheckStatus.c b/source/libs/stream/src/streamCheckStatus.c index d5c676433f..11fecf7683 100644 --- a/source/libs/stream/src/streamCheckStatus.c +++ b/source/libs/stream/src/streamCheckStatus.c @@ -285,7 +285,7 @@ int32_t streamTaskStartMonitorCheckRsp(SStreamTask* pTask) { streamTaskInitTaskCheckInfo(pInfo, &pTask->outputInfo, taosGetTimestampMs()); int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); - stDebug("s-task:%s start check-rsp monit, ref:%d ", pTask->id.idStr, ref); + stDebug("s-task:%s start check-rsp monitor, ref:%d ", pTask->id.idStr, ref); if (pInfo->checkRspTmr == NULL) { pInfo->checkRspTmr = taosTmrStart(rspMonitorFn, CHECK_RSP_CHECK_INTERVAL, pTask, streamTimer); diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index fb3c705a4d..414c4e2a76 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -36,8 +36,9 @@ static int32_t downloadCheckpointByNameS3(const char* id, const char* fname, con static int32_t continueDispatchCheckpointTriggerBlock(SStreamDataBlock* pBlock, SStreamTask* pTask); static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType); static int32_t doSendRetrieveTriggerMsg(SStreamTask* pTask, SArray* pNotSendList); +static void checkpointTriggerMonitorFn(void* param, void* tmrId); + static SStreamDataBlock* createChkptTriggerBlock(SStreamTask* pTask, int32_t checkpointType); -static void checkpointTriggerMonitorFn(void* param, void* tmrId); bool streamTaskIsAllUpstreamSendTrigger(SStreamTask* pTask) { SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; @@ -194,11 +195,13 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock return code; } + int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s start check-rsp monitor in 10s, ref:%d ", pTask->id.idStr, ref); SActiveCheckpointInfo* pActive = pTask->chkInfo.pActiveInfo; if (pActive->pCheckTmr == NULL) { - pActive->pCheckTmr = taosTmrStart(checkpointTriggerMonitorFn, 10000, pTask, streamTimer); + pActive->pCheckTmr = taosTmrStart(checkpointTriggerMonitorFn, 100, pTask, streamTimer); } else { - taosTmrReset(checkpointTriggerMonitorFn, 10000, pTask, streamTimer, &pActive->pCheckTmr); + taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActive->pCheckTmr); } } @@ -575,17 +578,33 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { void checkpointTriggerMonitorFn(void* param, void* tmrId) { SStreamTask* pTask = param; + int32_t vgId = pTask->pMeta->vgId; + int64_t now = taosGetTimestampMs(); + const char* id = pTask->id.idStr; + SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; - int32_t vgId = pTask->pMeta->vgId; - int64_t now = taosGetTimestampMs(); - stDebug("s-task:%s vgId:%d checkpoint-trigger monitor start, ts:%" PRId64, pTask->id.idStr, vgId, now); + // check the status every 100ms + if (streamTaskShouldStop(pTask)) { + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s vgId:%d quit from monitor checkpoint-trigger, ref:%d", id, vgId, ref); + streamMetaReleaseTask(pTask->pMeta, pTask); + return; + } + + if (++pActiveInfo->checkCounter < 100) { + return; + } + + pActiveInfo->checkCounter = 0; + stDebug("s-task:%s vgId:%d checkpoint-trigger monitor in tmr, ts:%" PRId64, pTask->id.idStr, vgId, now); taosThreadMutexLock(&pTask->lock); SStreamTaskState* pState = streamTaskGetStatus(pTask); if (pState->state != TASK_STATUS__CK) { stDebug("s-task:%s vgId:%d not in checkpoint status, quit from monitor checkpoint-trigger", pTask->id.idStr, vgId); taosThreadMutexUnlock(&pTask->lock); + streamMetaReleaseTask(pTask->pMeta, pTask); return; } taosThreadMutexUnlock(&pTask->lock); @@ -622,8 +641,12 @@ void checkpointTriggerMonitorFn(void* param, void* tmrId) { // check every 100ms if (size > 0) { - taosTmrReset(checkpointTriggerMonitorFn, 10000, pTask, streamTimer, &pActiveInfo->pCheckTmr); - stDebug("s-task:%s start monitor trigger in 10sec", pTask->id.idStr); + stDebug("s-task:%s start to monitor checkpoint-trigger in 10s", id); + taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pCheckTmr); + } else { + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s all checkpoint-trigger recved, quit from monitor checkpoint-trigger tmr, ref:%d", id, ref); + streamMetaReleaseTask(pTask->pMeta, pTask); } taosArrayDestroy(pNotSendList); diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 87f239f31c..1e85b959a8 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -597,6 +597,10 @@ static int32_t doStreamExecTask(SStreamTask* pTask) { // dispatch checkpoint msg to all downstream tasks int32_t type = pInput->type; if (type == STREAM_INPUT__CHECKPOINT_TRIGGER) { + if (pTask->pMeta->vgId == 2) { + taosSsleep(20); + } + streamProcessCheckpointTriggerBlock(pTask, (SStreamDataBlock*)pInput); continue; } From a9abe78eab4fc622d34aa005913043fade07258e Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 29 May 2024 09:16:37 +0800 Subject: [PATCH 35/67] other: comment invalid sleep for test purpose --- source/libs/stream/src/streamCheckpoint.c | 3 ++- source/libs/stream/src/streamExec.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 414c4e2a76..756183a624 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -196,9 +196,10 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock } int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); - stDebug("s-task:%s start check-rsp monitor in 10s, ref:%d ", pTask->id.idStr, ref); + stDebug("s-task:%s start checkpoint-trigger monitor in 10s, ref:%d ", pTask->id.idStr, ref); SActiveCheckpointInfo* pActive = pTask->chkInfo.pActiveInfo; if (pActive->pCheckTmr == NULL) { + streamMetaAcquireOneTask(pTask); pActive->pCheckTmr = taosTmrStart(checkpointTriggerMonitorFn, 100, pTask, streamTimer); } else { taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActive->pCheckTmr); diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 1e85b959a8..2b92696b88 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -598,7 +598,7 @@ static int32_t doStreamExecTask(SStreamTask* pTask) { int32_t type = pInput->type; if (type == STREAM_INPUT__CHECKPOINT_TRIGGER) { if (pTask->pMeta->vgId == 2) { - taosSsleep(20); +// taosSsleep(20); } streamProcessCheckpointTriggerBlock(pTask, (SStreamDataBlock*)pInput); From 09efbad11741d8f5385602b6540df1c17d3278fd Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 29 May 2024 11:16:26 +0800 Subject: [PATCH 36/67] fix(stream): drop related fill history task. --- source/libs/stream/src/streamCheckpoint.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 756183a624..a177c56f77 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -305,6 +305,17 @@ int32_t streamTaskUpdateTaskCheckpointInfo(SStreamTask* pTask, SVUpdateCheckpoin " no need to update the checkpoint info, updated checkpointId:%" PRId64 " checkpointVer:%" PRId64 " ignored", id, vgId, pInfo->checkpointId, pInfo->checkpointVer, pReq->checkpointId, pReq->checkpointVer); taosThreadMutexUnlock(&pTask->lock); + + { // destroy the related fill-history tasks + // drop task should not in the meta-lock, and drop the related fill-history task now + if (pReq->dropRelHTask) { + streamMetaUnregisterTask(pMeta, pReq->hStreamId, pReq->hTaskId); + int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); + stDebug("s-task:%s vgId:%d related fill-history task:0x%x dropped in update checkpointInfo, remain tasks:%d", + id, vgId, pReq->taskId, numOfTasks); + } + } + return TSDB_CODE_SUCCESS; } From 27ac6ca16ed78ad5c0cb232e3e156098ebcc8717 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 29 May 2024 14:12:14 +0800 Subject: [PATCH 37/67] fix(stream): desc the ref for checkpoint-trigger timer. --- include/libs/stream/tstream.h | 1 + source/libs/stream/src/streamCheckpoint.c | 11 ++++++++--- source/libs/stream/src/streamDispatch.c | 9 +++++---- source/libs/stream/src/streamQueue.c | 12 ++++++++++-- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 0d01913235..7e1c80c842 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -681,6 +681,7 @@ void streamTaskSetTriggerDispatchConfirmed(SStreamTask* pTask, int32_t vgId); int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t dstTaskId, SRpcHandleInfo* pRpcInfo); int32_t streamQueueGetNumOfItems(const SStreamQueue* pQueue); +int32_t streamQueueGetNumOfUnAccessedItems(const SStreamQueue* pQueue); // common void streamTaskPause(SStreamTask* pTask); diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index a177c56f77..3865d030f5 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -198,8 +198,9 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); stDebug("s-task:%s start checkpoint-trigger monitor in 10s, ref:%d ", pTask->id.idStr, ref); SActiveCheckpointInfo* pActive = pTask->chkInfo.pActiveInfo; + streamMetaAcquireOneTask(pTask); + if (pActive->pCheckTmr == NULL) { - streamMetaAcquireOneTask(pTask); pActive->pCheckTmr = taosTmrStart(checkpointTriggerMonitorFn, 100, pTask, streamTimer); } else { taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActive->pCheckTmr); @@ -605,6 +606,7 @@ void checkpointTriggerMonitorFn(void* param, void* tmrId) { } if (++pActiveInfo->checkCounter < 100) { + taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pCheckTmr); return; } @@ -614,7 +616,10 @@ void checkpointTriggerMonitorFn(void* param, void* tmrId) { taosThreadMutexLock(&pTask->lock); SStreamTaskState* pState = streamTaskGetStatus(pTask); if (pState->state != TASK_STATUS__CK) { - stDebug("s-task:%s vgId:%d not in checkpoint status, quit from monitor checkpoint-trigger", pTask->id.idStr, vgId); + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s vgId:%d not in checkpoint status, quit from monitor checkpoint-trigger, ref:%d", pTask->id.idStr, + vgId, ref); + taosThreadMutexUnlock(&pTask->lock); streamMetaReleaseTask(pTask->pMeta, pTask); return; @@ -769,7 +774,7 @@ void streamTaskInitTriggerDispatchInfo(SStreamTask* pTask) { taosThreadMutexLock(&pInfo->lock); // outputQ should be empty here - ASSERT(streamQueueGetNumOfItems(pTask->outputq.queue) == 1); + ASSERT(streamQueueGetNumOfUnAccessedItems(pTask->outputq.queue) == 0); pInfo->dispatchTrigger = true; if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index ecea494be2..0a8a65544c 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -566,9 +566,10 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { const char* id = pTask->id.idStr; int32_t numOfElems = streamQueueGetNumOfItems(pTask->outputq.queue); if (numOfElems > 0) { - double size = SIZE_IN_MiB(taosQueueMemorySize(pTask->outputq.queue->pQueue)); - stDebug("s-task:%s start to dispatch intermediate block to downstream, elem in outputQ:%d, size:%.2fMiB", id, - numOfElems, size); + double size = SIZE_IN_MiB(taosQueueMemorySize(pTask->outputq.queue->pQueue)); + int32_t numOfUnAccessed = streamQueueGetNumOfUnAccessedItems(pTask->outputq.queue); + stDebug("s-task:%s start to dispatch intermediate block to downstream, elem in outputQ:%d/%d, size:%.2fMiB", id, + numOfUnAccessed, numOfElems, size); } // to make sure only one dispatch is running @@ -889,7 +890,7 @@ int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, initRpcMsg(&info.msg, TDMT_STREAM_TASK_CHECKPOINT_READY, buf, tlen + sizeof(SMsgHead)); - stDebug("s-task:%s (level:%d) prepare checkpoint ready msg to upstream s-task:0x%" PRIx64 + stDebug("s-task:%s (level:%d) prepare checkpoint-ready msg to upstream s-task:0x%" PRIx64 ":0x%x (vgId:%d) idx:%d, vgId:%d", pTask->id.idStr, pTask->info.taskLevel, req.streamId, req.upstreamTaskId, req.upstreamNodeId, index, req.upstreamNodeId); diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 247baea16f..461d53d5a9 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -117,6 +117,13 @@ int32_t streamQueueGetNumOfItems(const SStreamQueue* pQueue) { return numOfItems1 + numOfItems2; } +int32_t streamQueueGetNumOfUnAccessedItems(const SStreamQueue* pQueue) { + int32_t numOfItems1 = taosQueueItemSize(pQueue->pQueue); + int32_t numOfItems2 = taosQallUnAccessedItemSize(pQueue->qall); + + return numOfItems1 + numOfItems2; +} + int32_t streamQueueGetItemSize(const SStreamQueue* pQueue) { return taosQueueMemorySize(pQueue->pQueue) + taosQallUnAccessedMemSize(pQueue->qall); } @@ -322,9 +329,10 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) ASSERT(0); } - if (type != STREAM_INPUT__GET_RES && type != STREAM_INPUT__CHECKPOINT && pTask->info.triggerParam != 0) { + if (type != STREAM_INPUT__GET_RES && type != STREAM_INPUT__CHECKPOINT && type != STREAM_INPUT__CHECKPOINT_TRIGGER && + (pTask->info.triggerParam != 0)) { atomic_val_compare_exchange_8(&pTask->schedInfo.status, TASK_TRIGGER_STATUS__INACTIVE, TASK_TRIGGER_STATUS__ACTIVE); - stDebug("s-task:%s new data arrived, active the trigger, triggerStatus:%d", pTask->id.idStr, pTask->schedInfo.status); + stDebug("s-task:%s new data arrived, active the sched-trigger, triggerStatus:%d", pTask->id.idStr, pTask->schedInfo.status); } return 0; From 46ca55ac749402879aac775904757866d50a7c88 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 29 May 2024 15:20:35 +0800 Subject: [PATCH 38/67] fix(stream): avoid deadlock. --- source/libs/stream/src/streamCheckpoint.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 3865d030f5..76bf139c3f 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -309,12 +309,15 @@ int32_t streamTaskUpdateTaskCheckpointInfo(SStreamTask* pTask, SVUpdateCheckpoin { // destroy the related fill-history tasks // drop task should not in the meta-lock, and drop the related fill-history task now + streamMetaWUnLock(pMeta); if (pReq->dropRelHTask) { streamMetaUnregisterTask(pMeta, pReq->hStreamId, pReq->hTaskId); int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); stDebug("s-task:%s vgId:%d related fill-history task:0x%x dropped in update checkpointInfo, remain tasks:%d", id, vgId, pReq->taskId, numOfTasks); } + + streamMetaWLock(pMeta); } return TSDB_CODE_SUCCESS; From becc4cb368cd29831fe36816ecc5161c4111d03e Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 29 May 2024 19:29:26 +0800 Subject: [PATCH 39/67] enh(stream): limit the concurrent started checkpoint tasks. --- include/common/tglobal.h | 1 + source/common/src/tglobal.c | 230 ++++++------------- source/dnode/mnode/impl/inc/mndStream.h | 1 + source/dnode/mnode/impl/src/mndMain.c | 52 +---- source/dnode/mnode/impl/src/mndStream.c | 150 ++++++------ source/dnode/mnode/impl/src/mndStreamTrans.c | 41 ++-- source/libs/stream/src/streamCheckpoint.c | 2 - 7 files changed, 195 insertions(+), 282 deletions(-) diff --git a/include/common/tglobal.h b/include/common/tglobal.h index 95b7591263..90ee6f7cc0 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -235,6 +235,7 @@ extern int32_t tsMqRebalanceInterval; extern int32_t tsStreamCheckpointInterval; extern float tsSinkDataRate; extern int32_t tsStreamNodeCheckInterval; +extern int32_t tsMaxConcurrentCheckpoint; extern int32_t tsTtlUnit; extern int32_t tsTtlPushIntervalSec; extern int32_t tsTtlBatchDropNum; diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 691eccd174..2fefeb4cf2 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -274,6 +274,7 @@ int32_t tsMqRebalanceInterval = 2; int32_t tsStreamCheckpointInterval = 60; float tsSinkDataRate = 2.0; int32_t tsStreamNodeCheckInterval = 16; +int32_t tsMaxConcurrentCheckpoint = 1; int32_t tsTtlUnit = 86400; int32_t tsTtlPushIntervalSec = 10; int32_t tsTrimVDbIntervalSec = 60 * 60; // interval of trimming db in all vgroups @@ -621,114 +622,80 @@ static int32_t taosAddSystemCfg(SConfig *pCfg) { } static int32_t taosAddServerCfg(SConfig *pCfg) { - if (cfgAddDir(pCfg, "dataDir", tsDataDir, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddFloat(pCfg, "minimalDataDirGB", 2.0f, 0.001f, 10000000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + tsNumOfCommitThreads = tsNumOfCores / 2; + tsNumOfCommitThreads = TRANGE(tsNumOfCommitThreads, 2, 4); tsNumOfSupportVnodes = tsNumOfCores * 2 + 5; tsNumOfSupportVnodes = TMAX(tsNumOfSupportVnodes, 2); - if (cfgAddInt32(pCfg, "supportVnodes", tsNumOfSupportVnodes, 0, 4096, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) - return -1; - - if (cfgAddString(pCfg, "encryptAlgorithm", tsEncryptAlgorithm, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddString(pCfg, "encryptScope", tsEncryptScope, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - // if (cfgAddString(pCfg, "authCode", tsAuthCode, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - - if (cfgAddInt32(pCfg, "statusInterval", tsStatusInterval, 1, 30, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "minSlidingTime", tsMinSlidingTime, 1, 1000000, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) - return -1; - if (cfgAddInt32(pCfg, "minIntervalTime", tsMinIntervalTime, 1, 1000000, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) - return -1; - - if (cfgAddInt32(pCfg, "queryBufferSize", tsQueryBufferSize, -1, 500000000000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; - if (cfgAddInt32(pCfg, "queryRspPolicy", tsQueryRspPolicy, 0, 1, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; - - tsNumOfCommitThreads = tsNumOfCores / 2; - tsNumOfCommitThreads = TRANGE(tsNumOfCommitThreads, 2, 4); - if (cfgAddInt32(pCfg, "numOfCommitThreads", tsNumOfCommitThreads, 1, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; tsNumOfMnodeReadThreads = tsNumOfCores / 8; tsNumOfMnodeReadThreads = TRANGE(tsNumOfMnodeReadThreads, 1, 4); - if (cfgAddInt32(pCfg, "numOfMnodeReadThreads", tsNumOfMnodeReadThreads, 1, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; tsNumOfVnodeQueryThreads = tsNumOfCores * 2; tsNumOfVnodeQueryThreads = TMAX(tsNumOfVnodeQueryThreads, 16); - if (cfgAddInt32(pCfg, "numOfVnodeQueryThreads", tsNumOfVnodeQueryThreads, 4, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != - 0) - return -1; - - if (cfgAddFloat(pCfg, "ratioOfVnodeStreamThreads", tsRatioOfVnodeStreamThreads, 0.01, 4, CFG_SCOPE_SERVER, - CFG_DYN_NONE) != 0) - return -1; tsNumOfVnodeFetchThreads = tsNumOfCores / 4; tsNumOfVnodeFetchThreads = TMAX(tsNumOfVnodeFetchThreads, 4); - if (cfgAddInt32(pCfg, "numOfVnodeFetchThreads", tsNumOfVnodeFetchThreads, 4, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != - 0) - return -1; tsNumOfVnodeRsmaThreads = tsNumOfCores / 4; tsNumOfVnodeRsmaThreads = TMAX(tsNumOfVnodeRsmaThreads, 4); - if (cfgAddInt32(pCfg, "numOfVnodeRsmaThreads", tsNumOfVnodeRsmaThreads, 1, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; tsNumOfQnodeQueryThreads = tsNumOfCores * 2; tsNumOfQnodeQueryThreads = TMAX(tsNumOfQnodeQueryThreads, 16); - if (cfgAddInt32(pCfg, "numOfQnodeQueryThreads", tsNumOfQnodeQueryThreads, 4, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != - 0) - return -1; + + tsNumOfSnodeStreamThreads = tsNumOfCores / 4; + tsNumOfSnodeStreamThreads = TRANGE(tsNumOfSnodeStreamThreads, 2, 4); + + tsNumOfSnodeWriteThreads = tsNumOfCores / 4; + tsNumOfSnodeWriteThreads = TRANGE(tsNumOfSnodeWriteThreads, 2, 4); + + tsRpcQueueMemoryAllowed = tsTotalMemoryKB * 1024 * 0.1; + tsRpcQueueMemoryAllowed = TRANGE(tsRpcQueueMemoryAllowed, TSDB_MAX_MSG_SIZE * 10LL, TSDB_MAX_MSG_SIZE * 10000LL); + + // clang-format off + if (cfgAddDir(pCfg, "dataDir", tsDataDir, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddFloat(pCfg, "minimalDataDirGB", 2.0f, 0.001f, 10000000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "supportVnodes", tsNumOfSupportVnodes, 0, 4096, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + + if (cfgAddString(pCfg, "encryptAlgorithm", tsEncryptAlgorithm, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddString(pCfg, "encryptScope", tsEncryptScope, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + + if (cfgAddInt32(pCfg, "statusInterval", tsStatusInterval, 1, 30, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "minSlidingTime", tsMinSlidingTime, 1, 1000000, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1; + if (cfgAddInt32(pCfg, "minIntervalTime", tsMinIntervalTime, 1, 1000000, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1; + + if (cfgAddInt32(pCfg, "queryBufferSize", tsQueryBufferSize, -1, 500000000000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "queryRspPolicy", tsQueryRspPolicy, 0, 1, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfCommitThreads", tsNumOfCommitThreads, 1, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + + if (cfgAddInt32(pCfg, "numOfMnodeReadThreads", tsNumOfMnodeReadThreads, 1, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfVnodeQueryThreads", tsNumOfVnodeQueryThreads, 4, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddFloat(pCfg, "ratioOfVnodeStreamThreads", tsRatioOfVnodeStreamThreads, 0.01, 4, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfVnodeFetchThreads", tsNumOfVnodeFetchThreads, 4, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + + if (cfgAddInt32(pCfg, "numOfVnodeRsmaThreads", tsNumOfVnodeRsmaThreads, 1, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfQnodeQueryThreads", tsNumOfQnodeQueryThreads, 4, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; // tsNumOfQnodeFetchThreads = tsNumOfCores / 2; // tsNumOfQnodeFetchThreads = TMAX(tsNumOfQnodeFetchThreads, 4); // if (cfgAddInt32(pCfg, "numOfQnodeFetchThreads", tsNumOfQnodeFetchThreads, 1, 1024, 0) != 0) return -1; - tsNumOfSnodeStreamThreads = tsNumOfCores / 4; - tsNumOfSnodeStreamThreads = TRANGE(tsNumOfSnodeStreamThreads, 2, 4); - if (cfgAddInt32(pCfg, "numOfSnodeSharedThreads", tsNumOfSnodeStreamThreads, 2, 1024, CFG_SCOPE_SERVER, - CFG_DYN_NONE) != 0) - return -1; + if (cfgAddInt32(pCfg, "numOfSnodeSharedThreads", tsNumOfSnodeStreamThreads, 2, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfSnodeUniqueThreads", tsNumOfSnodeWriteThreads, 2, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - tsNumOfSnodeWriteThreads = tsNumOfCores / 4; - tsNumOfSnodeWriteThreads = TRANGE(tsNumOfSnodeWriteThreads, 2, 4); - if (cfgAddInt32(pCfg, "numOfSnodeUniqueThreads", tsNumOfSnodeWriteThreads, 2, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != - 0) - return -1; + if (cfgAddInt64(pCfg, "rpcQueueMemoryAllowed", tsRpcQueueMemoryAllowed, TSDB_MAX_MSG_SIZE * 10L, INT64_MAX, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - tsRpcQueueMemoryAllowed = tsTotalMemoryKB * 1024 * 0.1; - tsRpcQueueMemoryAllowed = TRANGE(tsRpcQueueMemoryAllowed, TSDB_MAX_MSG_SIZE * 10LL, TSDB_MAX_MSG_SIZE * 10000LL); - if (cfgAddInt64(pCfg, "rpcQueueMemoryAllowed", tsRpcQueueMemoryAllowed, TSDB_MAX_MSG_SIZE * 10L, INT64_MAX, - CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) - return -1; + if (cfgAddInt32(pCfg, "syncElectInterval", tsElectInterval, 10, 1000 * 60 * 24 * 2, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "syncHeartbeatInterval", tsHeartbeatInterval, 10, 1000 * 60 * 24 * 2, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "syncHeartbeatTimeout", tsHeartbeatTimeout, 10, 1000 * 60 * 24 * 2, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "syncSnapReplMaxWaitN", tsSnapReplMaxWaitN, 16, (TSDB_SYNC_SNAP_BUFFER_SIZE >> 2), CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "syncElectInterval", tsElectInterval, 10, 1000 * 60 * 24 * 2, CFG_SCOPE_SERVER, CFG_DYN_NONE) != - 0) - return -1; - if (cfgAddInt32(pCfg, "syncHeartbeatInterval", tsHeartbeatInterval, 10, 1000 * 60 * 24 * 2, CFG_SCOPE_SERVER, - CFG_DYN_NONE) != 0) - return -1; - if (cfgAddInt32(pCfg, "syncHeartbeatTimeout", tsHeartbeatTimeout, 10, 1000 * 60 * 24 * 2, CFG_SCOPE_SERVER, - CFG_DYN_NONE) != 0) - return -1; - if (cfgAddInt32(pCfg, "syncSnapReplMaxWaitN", tsSnapReplMaxWaitN, 16, (TSDB_SYNC_SNAP_BUFFER_SIZE >> 2), - CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; + if (cfgAddInt32(pCfg, "arbHeartBeatIntervalSec", tsArbHeartBeatIntervalSec, 1, 60 * 24 * 2, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "arbCheckSyncIntervalSec", tsArbCheckSyncIntervalSec, 1, 60 * 24 * 2, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "arbSetAssignedTimeoutSec", tsArbSetAssignedTimeoutSec, 1, 60 * 24 * 2, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "arbHeartBeatIntervalSec", tsArbHeartBeatIntervalSec, 1, 60 * 24 * 2, CFG_SCOPE_SERVER, - CFG_DYN_NONE) != 0) - return -1; - if (cfgAddInt32(pCfg, "arbCheckSyncIntervalSec", tsArbCheckSyncIntervalSec, 1, 60 * 24 * 2, CFG_SCOPE_SERVER, - CFG_DYN_NONE) != 0) - return -1; - if (cfgAddInt32(pCfg, "arbSetAssignedTimeoutSec", tsArbSetAssignedTimeoutSec, 1, 60 * 24 * 2, CFG_SCOPE_SERVER, - CFG_DYN_NONE) != 0) - return -1; - - if (cfgAddInt64(pCfg, "mndSdbWriteDelta", tsMndSdbWriteDelta, 20, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) - return -1; - if (cfgAddInt64(pCfg, "mndLogRetention", tsMndLogRetention, 500, 10000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; + if (cfgAddInt64(pCfg, "mndSdbWriteDelta", tsMndSdbWriteDelta, 20, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt64(pCfg, "mndLogRetention", tsMndLogRetention, 500, 10000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "skipGrant", tsMndSkipGrant, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "monitorFqdn", tsMonitorFqdn, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; @@ -736,9 +703,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "monitorMaxLogs", tsMonitorMaxLogs, 1, 1000000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "monitorComp", tsMonitorComp, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "monitorLogProtocol", tsMonitorLogProtocol, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; - if (cfgAddInt32(pCfg, "monitorIntervalForBasic", tsMonitorIntervalForBasic, 1, 200000, CFG_SCOPE_SERVER, - CFG_DYN_NONE) != 0) - return -1; + if (cfgAddInt32(pCfg, "monitorIntervalForBasic", tsMonitorIntervalForBasic, 1, 200000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "monitorForceV2", tsMonitorForceV2, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "audit", tsEnableAudit, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; @@ -752,68 +717,43 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "rsyncPort", tsRsyncPort, 1, 65535, CFG_SCOPE_BOTH, CFG_DYN_SERVER) != 0) return -1; if (cfgAddString(pCfg, "snodeAddress", tsSnodeAddress, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; - if (cfgAddString(pCfg, "checkpointBackupDir", tsCheckpointBackupDir, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) - return -1; + if (cfgAddString(pCfg, "checkpointBackupDir", tsCheckpointBackupDir, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; - if (cfgAddInt32(pCfg, "tmqMaxTopicNum", tmqMaxTopicNum, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) - return -1; + if (cfgAddInt32(pCfg, "tmqMaxTopicNum", tmqMaxTopicNum, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "tmqRowSize", tmqRowSize, 1, 1000000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "maxTsmaNum", tsMaxTsmaNum, 0, 3, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; - if (cfgAddInt32(pCfg, "transPullupInterval", tsTransPullupInterval, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != - 0) - return -1; - if (cfgAddInt32(pCfg, "compactPullupInterval", tsCompactPullupInterval, 1, 10000, CFG_SCOPE_SERVER, - CFG_DYN_ENT_SERVER) != 0) - return -1; - if (cfgAddInt32(pCfg, "mqRebalanceInterval", tsMqRebalanceInterval, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != - 0) - return -1; + if (cfgAddInt32(pCfg, "transPullupInterval", tsTransPullupInterval, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "compactPullupInterval", tsCompactPullupInterval, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "mqRebalanceInterval", tsMqRebalanceInterval, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "ttlUnit", tsTtlUnit, 1, 86400 * 365, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "ttlPushInterval", tsTtlPushIntervalSec, 1, 100000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) - return -1; - if (cfgAddInt32(pCfg, "ttlBatchDropNum", tsTtlBatchDropNum, 0, INT32_MAX, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) - return -1; - if (cfgAddBool(pCfg, "ttlChangeOnWrite", tsTtlChangeOnWrite, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "ttlFlushThreshold", tsTtlFlushThreshold, -1, 1000000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != - 0) - return -1; - if (cfgAddInt32(pCfg, "trimVDbIntervalSec", tsTrimVDbIntervalSec, 1, 100000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != - 0) - return -1; - if (cfgAddInt32(pCfg, "s3MigrateIntervalSec", tsS3MigrateIntervalSec, 600, 100000, CFG_SCOPE_SERVER, - CFG_DYN_ENT_SERVER) != 0) - return -1; + if (cfgAddInt32(pCfg, "ttlPushInterval", tsTtlPushIntervalSec, 1, 100000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "ttlBatchDropNum", tsTtlBatchDropNum, 0, INT32_MAX, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddBool(pCfg, "ttlChangeOnWrite", tsTtlChangeOnWrite, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddInt32(pCfg, "ttlFlushThreshold", tsTtlFlushThreshold, -1, 1000000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "trimVDbIntervalSec", tsTrimVDbIntervalSec, 1, 100000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "s3MigrateIntervalSec", tsS3MigrateIntervalSec, 600, 100000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddBool(pCfg, "s3MigrateEnabled", tsS3MigrateEnabled, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "uptimeInterval", tsUptimeInterval, 1, 100000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "queryRsmaTolerance", tsQueryRsmaTolerance, 0, 900000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; - if (cfgAddInt32(pCfg, "timeseriesThreshold", tsTimeSeriesThreshold, 0, 2000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != - 0) - return -1; + if (cfgAddInt32(pCfg, "queryRsmaTolerance", tsQueryRsmaTolerance, 0, 900000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "timeseriesThreshold", tsTimeSeriesThreshold, 0, 2000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; - if (cfgAddInt64(pCfg, "walFsyncDataSizeLimit", tsWalFsyncDataSizeLimit, 100 * 1024 * 1024, INT64_MAX, - CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; + if (cfgAddInt64(pCfg, "walFsyncDataSizeLimit", tsWalFsyncDataSizeLimit, 100 * 1024 * 1024, INT64_MAX, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "udf", tsStartUdfd, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "udfdResFuncs", tsUdfdResFuncs, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "udfdLdLibPath", tsUdfdLdLibPath, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "disableStream", tsDisableStream, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; - if (cfgAddInt64(pCfg, "streamBufferSize", tsStreamBufferSize, 0, INT64_MAX, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; + if (cfgAddInt64(pCfg, "streamBufferSize", tsStreamBufferSize, 0, INT64_MAX, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddInt64(pCfg, "streamAggCnt", tsStreamAggCnt, 2, INT32_MAX, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "checkpointInterval", tsStreamCheckpointInterval, 60, 1200, CFG_SCOPE_SERVER, - CFG_DYN_ENT_SERVER) != 0) - return -1; + if (cfgAddInt32(pCfg, "checkpointInterval", tsStreamCheckpointInterval, 60, 1200, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddFloat(pCfg, "streamSinkDataRate", tsSinkDataRate, 0.1, 5, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "concurrentCheckpoint", tsMaxConcurrentCheckpoint, 1, 10, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; - if (cfgAddInt32(pCfg, "cacheLazyLoadThreshold", tsCacheLazyLoadThreshold, 0, 100000, CFG_SCOPE_SERVER, - CFG_DYN_ENT_SERVER) != 0) - return -1; + if (cfgAddInt32(pCfg, "cacheLazyLoadThreshold", tsCacheLazyLoadThreshold, 0, 100000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddFloat(pCfg, "fPrecision", tsFPrecision, 0.0f, 100000.0f, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddFloat(pCfg, "dPrecision", tsDPrecision, 0.0f, 1000000.0f, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; @@ -823,41 +763,23 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddString(pCfg, "compressor", tsCompressor, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "filterScalarMode", tsFilterScalarMode, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "maxStreamBackendCache", tsMaxStreamBackendCache, 16, 1024, CFG_SCOPE_SERVER, - CFG_DYN_ENT_SERVER) != 0) - return -1; - if (cfgAddInt32(pCfg, "pqSortMemThreshold", tsPQSortMemThreshold, 1, 10240, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; - if (cfgAddInt32(pCfg, "resolveFQDNRetryTime", tsResolveFQDNRetryTime, 1, 10240, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; + if (cfgAddInt32(pCfg, "maxStreamBackendCache", tsMaxStreamBackendCache, 16, 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "pqSortMemThreshold", tsPQSortMemThreshold, 1, 10240, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "resolveFQDNRetryTime", tsResolveFQDNRetryTime, 1, 10240, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "s3Accesskey", tsS3AccessKey, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "s3Endpoint", tsS3Endpoint, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "s3BucketName", tsS3BucketName, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - /* - if (cfgAddInt32(pCfg, "s3BlockSize", tsS3BlockSize, -1, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) - return -1; - if (tsS3BlockSize > -1 && tsS3BlockSize < 1024) { - uError("failed to config s3blocksize since value:%d. Valid range: -1 or [1024, 1024 * 1024]", tsS3BlockSize); - return -1; - } - if (cfgAddInt32(pCfg, "s3BlockCacheSize", tsS3BlockCacheSize, 4, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != - 0) - return -1; - */ - if (cfgAddInt32(pCfg, "s3PageCacheSize", tsS3PageCacheSize, 4, 1024 * 1024 * 1024, CFG_SCOPE_SERVER, - CFG_DYN_ENT_SERVER) != 0) - return -1; - if (cfgAddInt32(pCfg, "s3UploadDelaySec", tsS3UploadDelaySec, 1, 60 * 60 * 24 * 30, CFG_SCOPE_SERVER, - CFG_DYN_ENT_SERVER) != 0) - return -1; + + if (cfgAddInt32(pCfg, "s3PageCacheSize", tsS3PageCacheSize, 4, 1024 * 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "s3UploadDelaySec", tsS3UploadDelaySec, 1, 60 * 60 * 24 * 30, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; // min free disk space used to check if the disk is full [50MB, 1GB] - if (cfgAddInt64(pCfg, "minDiskFreeSize", tsMinDiskFreeSize, TFS_MIN_DISK_FREE_SIZE, 1024 * 1024 * 1024, - CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) - return -1; + if (cfgAddInt64(pCfg, "minDiskFreeSize", tsMinDiskFreeSize, TFS_MIN_DISK_FREE_SIZE, 1024 * 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddBool(pCfg, "enableWhiteList", tsEnableWhiteList, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + // clang-format on + // GRANT_CFG_ADD; return 0; } diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index 5a4caf3348..bb10a9b9ad 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -97,6 +97,7 @@ void mndReleaseStream(SMnode *pMnode, SStreamObj *pStream); int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); int32_t mndPersistStream(STrans *pTrans, SStreamObj *pStream); int32_t mndStreamRegisterTrans(STrans *pTrans, const char *pTransName, int64_t streamId); +int32_t mndStreamClearFinishedTrans(SMnode *pMnode, int32_t *pNumOfActiveChkpt); int32_t mndAddtoCheckpointWaitingList(SStreamObj *pStream, int64_t checkpointId); bool mndStreamTransConflictCheck(SMnode *pMnode, int64_t streamId, const char *pTransName, bool lock); int32_t mndStreamGetRelTrans(SMnode *pMnode, int64_t streamId); diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index a78edcb05e..a228166124 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -90,21 +90,6 @@ static void *mndBuildTimerMsg(int32_t *pContLen) { return pReq; } -static void *mndBuildCheckpointTickMsg(int32_t *pContLen, int64_t sec) { - SMStreamTickReq timerReq = { - .tick = sec, - }; - - int32_t contLen = tSerializeSMStreamTickMsg(NULL, 0, &timerReq); - if (contLen <= 0) return NULL; - void *pReq = rpcMallocCont(contLen); - if (pReq == NULL) return NULL; - - tSerializeSMStreamTickMsg(pReq, contLen, &timerReq); - *pContLen = contLen; - return pReq; -} - static void mndPullupTrans(SMnode *pMnode) { mTrace("pullup trans msg"); int32_t contLen = 0; @@ -174,21 +159,12 @@ static void mndCalMqRebalance(SMnode *pMnode) { } } -static void mndStreamCheckpointTick(SMnode *pMnode, int64_t sec) { - int32_t contLen = 0; - void *pReq = mndBuildCheckpointTickMsg(&contLen, sec); - if (pReq != NULL) { - SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CHECKPOINT_TIMER, .pCont = pReq, .contLen = contLen}; - tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); - } -} - -static void mndStreamCheckpointRemain(SMnode *pMnode) { - int32_t contLen = 0; - void *pReq = mndBuildCheckpointTickMsg(&contLen, 0); - if (pReq != NULL) { - SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE, .pCont = pReq, .contLen = contLen}; - tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); +static void mndStreamCheckpointTimer(SMnode *pMnode) { + SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg)); + if (pMsg != NULL) { + int32_t size = sizeof(SMStreamDoCheckpointMsg); + SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, .pCont = pMsg, .contLen = size}; + tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); } } @@ -367,12 +343,8 @@ void mndDoTimerPullupTask(SMnode *pMnode, int64_t sec) { mndCalMqRebalance(pMnode); } - if (sec % tsStreamCheckpointInterval == 0) { - mndStreamCheckpointTick(pMnode, sec); - } - - if (sec % 5 == 0) { - mndStreamCheckpointRemain(pMnode); + if (sec % 30 == 0) { // send the checkpoint info every 10 sec + mndStreamCheckpointTimer(pMnode); } if (sec % tsStreamNodeCheckInterval == 0) { @@ -399,6 +371,7 @@ void mndDoTimerCheckTask(SMnode *pMnode, int64_t sec) { mndSyncCheckTimeout(pMnode); } } + static void *mndThreadFp(void *param) { SMnode *pMnode = param; int64_t lastTime = 0; @@ -832,10 +805,9 @@ _OVER: pMsg->msgType == TDMT_MND_TRANS_TIMER || pMsg->msgType == TDMT_MND_TTL_TIMER || pMsg->msgType == TDMT_MND_TRIM_DB_TIMER || pMsg->msgType == TDMT_MND_UPTIME_TIMER || pMsg->msgType == TDMT_MND_COMPACT_TIMER || pMsg->msgType == TDMT_MND_NODECHECK_TIMER || - pMsg->msgType == TDMT_MND_GRANT_HB_TIMER || pMsg->msgType == TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE || - pMsg->msgType == TDMT_MND_STREAM_CHECKPOINT_TIMER || pMsg->msgType == TDMT_MND_STREAM_REQ_CHKPT || - pMsg->msgType == TDMT_MND_S3MIGRATE_DB_TIMER || - pMsg->msgType == TDMT_MND_ARB_HEARTBEAT_TIMER || pMsg->msgType == TDMT_MND_ARB_CHECK_SYNC_TIMER) { + pMsg->msgType == TDMT_MND_GRANT_HB_TIMER || pMsg->msgType == TDMT_MND_STREAM_REQ_CHKPT || + pMsg->msgType == TDMT_MND_S3MIGRATE_DB_TIMER || pMsg->msgType == TDMT_MND_ARB_HEARTBEAT_TIMER || + pMsg->msgType == TDMT_MND_ARB_CHECK_SYNC_TIMER) { mTrace("timer not process since mnode restored:%d stopped:%d, sync restored:%d role:%s ", pMnode->restored, pMnode->stopped, state.restored, syncStr(state.state)); return -1; diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 989d9970cd..ef29547e20 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -45,9 +45,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq); static int32_t mndProcessCreateStreamReqFromMNode(SRpcMsg *pReq); static int32_t mndProcessDropStreamReqFromMNode(SRpcMsg *pReq); -static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq); static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq); -static int32_t mndProcessStreamCheckpointInCandid(SRpcMsg *pReq); static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows); static void mndCancelGetNextStream(SMnode *pMnode, void *pIter); static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows); @@ -114,10 +112,8 @@ int32_t mndInitStream(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_STREAM_DROP_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_VND_STREAM_CHECK_POINT_SOURCE_RSP, mndTransProcessRsp); - mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_TIMER, mndProcessStreamCheckpointTmr); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamDoCheckpoint); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_REQ_CHKPT, mndProcessStreamReqCheckpoint); - mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE, mndProcessStreamCheckpointInCandid); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_HEARTBEAT, mndProcessStreamHb); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_REPORT_CHECKPOINT, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_NODECHANGE_CHECK, mndProcessNodeCheckReq); @@ -886,26 +882,10 @@ int64_t mndStreamGenChkptId(SMnode *pMnode, bool lock) { } } - mDebug("generated checkpoint %" PRId64 "", maxChkptId + 1); + mDebug("generate new checkpointId:%" PRId64, maxChkptId + 1); return maxChkptId + 1; } -static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; - SSdb *pSdb = pMnode->pSdb; - if (sdbGetSize(pSdb, SDB_STREAM) <= 0) { - return 0; - } - - SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg)); - pMsg->checkpointId = mndStreamGenChkptId(pMnode, true); - - int32_t size = sizeof(SMStreamDoCheckpointMsg); - SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, .pCont = pMsg, .contLen = size}; - tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); - return 0; -} - static int32_t mndBuildStreamCheckpointSourceReq(void **pBuf, int32_t *pLen, int32_t nodeId, int64_t checkpointId, int64_t streamId, int32_t taskId, int32_t transId, int8_t mndTrigger) { SStreamCheckpointSourceReq req = {0}; @@ -1136,73 +1116,101 @@ static int32_t mndCheckNodeStatus(SMnode *pMnode) { return ready ? 0 : -1; } +typedef struct { + int64_t streamId; + int64_t duration; +} SCheckpointInterval; + +static int32_t streamWaitComparFn(const void* p1, const void* p2) { + const SCheckpointInterval* pInt1 = p1; + const SCheckpointInterval* pInt2 = p2; + if (pInt1->duration == pInt2->duration) { + return 0; + } + + return pInt1->duration > pInt2->duration? -1:1; +} + static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; SSdb *pSdb = pMnode->pSdb; void *pIter = NULL; SStreamObj *pStream = NULL; int32_t code = 0; + int32_t numOfCheckpointTrans = 0; if ((code = mndCheckNodeStatus(pMnode)) != 0) { return code; } - // make sure the time interval between two consecutive checkpoint trans is long enough - SMStreamDoCheckpointMsg *pMsg = (SMStreamDoCheckpointMsg *)pReq->pCont; + SArray* pList = taosArrayInit(4, sizeof(SCheckpointInterval)); + int64_t now = taosGetTimestampMs(); while ((pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream)) != NULL) { - code = mndProcessStreamCheckpointTrans(pMnode, pStream, pMsg->checkpointId, 1, true); - sdbRelease(pSdb, pStream); - if (code == -1) { - break; - } - } - - return code; -} - -static int32_t mndProcessStreamCheckpointInCandid(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; - void *pIter = NULL; - int32_t code = 0; - - taosThreadMutexLock(&execInfo.lock); - int32_t num = taosHashGetSize(execInfo.transMgmt.pWaitingList); - taosThreadMutexUnlock(&execInfo.lock); - if (num == 0) { - return code; - } - - if ((code = mndCheckNodeStatus(pMnode)) != 0) { - return code; - } - - SArray *pList = taosArrayInit(4, sizeof(int64_t)); - while ((pIter = taosHashIterate(execInfo.transMgmt.pWaitingList, pIter)) != NULL) { - SCheckpointCandEntry *pEntry = pIter; - - SStreamObj *ps = mndAcquireStream(pMnode, pEntry->pName); - if (ps == NULL) { + int64_t duration = now - pStream->checkpointFreq; + if (duration < tsStreamCheckpointInterval * 1000) { + sdbRelease(pSdb, pStream); continue; } - mDebug("start to launch checkpoint for stream:%s %" PRIx64 " in candidate list", pEntry->pName, pEntry->streamId); + SCheckpointInterval in = {.streamId = pStream->uid, .duration = duration}; + taosArrayPush(pList, &in); - code = mndProcessStreamCheckpointTrans(pMnode, ps, pEntry->checkpointId, 1, true); - mndReleaseStream(pMnode, ps); + int32_t currentSize = taosArrayGetSize(pList); + mDebug("stream:%s (uid:0x%" PRIx64 ") checkpoint interval beyond threshold: %" PRId64 "s(%" PRId64 + "s) beyond threshold:%d", + pStream->name, pStream->uid, tsStreamCheckpointInterval, duration / 1000, currentSize); - if (code == TSDB_CODE_SUCCESS) { - taosArrayPush(pList, &pEntry->streamId); + sdbRelease(pSdb, pStream); + } + + int32_t size = taosArrayGetSize(pList); + if (size == 0) { + taosArrayDestroy(pList); + return code; + } + + taosArraySort(pList, streamWaitComparFn); + mndStreamClearFinishedTrans(pMnode, &numOfCheckpointTrans); + int32_t numOfQual = taosArrayGetSize(pList); + + if (numOfCheckpointTrans > tsMaxConcurrentCheckpoint) { + mDebug( + "%d stream(s) checkpoint interval longer than %ds, ongoing checkpoint trans:%d reach maximum allowed:%d, new " + "checkpoint trans are not allowed, wait for 30s", + numOfQual, tsStreamCheckpointInterval, numOfCheckpointTrans, tsMaxConcurrentCheckpoint); + taosArrayDestroy(pList); + return code; + } + + int32_t capacity = tsMaxConcurrentCheckpoint - numOfCheckpointTrans; + mDebug( + "%d stream(s) checkpoint interval longer than %ds, %d ongoing checkpoint trans, %d new checkpoint trans allowed, " + "concurrent trans threshold:%d", + numOfQual, tsStreamCheckpointInterval, numOfCheckpointTrans, capacity, tsMaxConcurrentCheckpoint); + + int32_t started = 0; + int64_t checkpointId = mndStreamGenChkptId(pMnode, true); + + for (int32_t i = 0; i < numOfQual; ++i) { + SCheckpointInterval *pCheckpointInfo = taosArrayGet(pList, i); + + SStreamObj *p = mndGetStreamObj(pMnode, pCheckpointInfo->streamId); + if (p != NULL) { + code = mndProcessStreamCheckpointTrans(pMnode, p, checkpointId, 1, true); + sdbRelease(pSdb, p); + + if (code != -1) { + started += 1; + + if (started >= capacity) { + mDebug("already start %d new checkpoint trans, current active checkpoint trans:%d", started, + (started + numOfCheckpointTrans)); + break; + } + } } } - for (int32_t i = 0; i < taosArrayGetSize(pList); ++i) { - int64_t *pId = taosArrayGet(pList, i); - - taosHashRemove(execInfo.transMgmt.pWaitingList, pId, sizeof(*pId)); - } - - int32_t remain = taosHashGetSize(execInfo.transMgmt.pWaitingList); - mDebug("%d in candidate list generated checkpoint, remaining:%d", (int32_t)taosArrayGetSize(pList), remain); taosArrayDestroy(pList); return code; } @@ -2349,10 +2357,10 @@ static int32_t mndProcessNodeCheck(SRpcMsg *pReq) { return 0; } - SMStreamNodeCheckMsg *pMsg = rpcMallocCont(sizeof(SMStreamNodeCheckMsg)); + int32_t size = sizeof(SMStreamNodeCheckMsg); + SMStreamNodeCheckMsg *pMsg = rpcMallocCont(size); - SRpcMsg rpcMsg = { - .msgType = TDMT_MND_STREAM_NODECHANGE_CHECK, .pCont = pMsg, .contLen = sizeof(SMStreamNodeCheckMsg)}; + SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_NODECHANGE_CHECK, .pCont = pMsg, .contLen = size}; tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); return 0; } diff --git a/source/dnode/mnode/impl/src/mndStreamTrans.c b/source/dnode/mnode/impl/src/mndStreamTrans.c index 74ad09c752..ac4cb08308 100644 --- a/source/dnode/mnode/impl/src/mndStreamTrans.c +++ b/source/dnode/mnode/impl/src/mndStreamTrans.c @@ -21,8 +21,6 @@ typedef struct SKeyInfo { int32_t keyLen; } SKeyInfo; -static int32_t clearFinishedTrans(SMnode* pMnode); - int32_t mndStreamRegisterTrans(STrans* pTrans, const char* pTransName, int64_t streamId) { SStreamTransInfo info = { .transId = pTrans->id, .startTime = taosGetTimestampMs(), .name = pTransName, .streamId = streamId}; @@ -30,41 +28,54 @@ int32_t mndStreamRegisterTrans(STrans* pTrans, const char* pTransName, int64_t s return 0; } -int32_t clearFinishedTrans(SMnode* pMnode) { +int32_t mndStreamClearFinishedTrans(SMnode* pMnode, int32_t* pNumOfActiveChkpt) { size_t keyLen = 0; void* pIter = NULL; SArray* pList = taosArrayInit(4, sizeof(SKeyInfo)); + int32_t num = 0; while ((pIter = taosHashIterate(execInfo.transMgmt.pDBTrans, pIter)) != NULL) { - SStreamTransInfo* pEntry = (SStreamTransInfo*)pIter; + SStreamTransInfo *pEntry = (SStreamTransInfo *)pIter; // let's clear the finished trans - STrans* pTrans = mndAcquireTrans(pMnode, pEntry->transId); + STrans *pTrans = mndAcquireTrans(pMnode, pEntry->transId); if (pTrans == NULL) { - void* pKey = taosHashGetKey(pEntry, &keyLen); + void *pKey = taosHashGetKey(pEntry, &keyLen); // key is the name of src/dst db name SKeyInfo info = {.pKey = pKey, .keyLen = keyLen}; - mDebug("transId:%d %s startTs:%" PRId64 " cleared since finished", pEntry->transId, pEntry->name, - pEntry->startTime); + mDebug("transId:%d %s startTs:%" PRId64 " cleared since finished", pEntry->transId, pEntry->name, pEntry->startTime); taosArrayPush(pList, &info); } else { + if (strcmp(pEntry->name, MND_STREAM_CHECKPOINT_NAME) == 0) { + num++; + } mndReleaseTrans(pMnode, pTrans); } } - size_t num = taosArrayGetSize(pList); - for (int32_t i = 0; i < num; ++i) { + int32_t size = taosArrayGetSize(pList); + for (int32_t i = 0; i < size; ++i) { SKeyInfo* pKey = taosArrayGet(pList, i); taosHashRemove(execInfo.transMgmt.pDBTrans, pKey->pKey, pKey->keyLen); } - mDebug("clear %d finished stream-trans, remained:%d", (int32_t)num, taosHashGetSize(execInfo.transMgmt.pDBTrans)); + mDebug("clear %d finished stream-trans, remained:%d, active checkpoint trans:%d", size, + taosHashGetSize(execInfo.transMgmt.pDBTrans), num); terrno = TSDB_CODE_SUCCESS; taosArrayDestroy(pList); + + if (pNumOfActiveChkpt != NULL) { + *pNumOfActiveChkpt = num; + } + return 0; } +// * Transactions of different streams are not related. Here only check the conflict of transaction for a given stream. +// For a given stream: +// 1. checkpoint trans is conflict with any other trans except for the drop and reset trans. +// 2. create/drop/reset/update trans are conflict with any other trans. bool mndStreamTransConflictCheck(SMnode* pMnode, int64_t streamId, const char* pTransName, bool lock) { if (lock) { taosThreadMutexLock(&execInfo.lock); @@ -78,7 +89,7 @@ bool mndStreamTransConflictCheck(SMnode* pMnode, int64_t streamId, const char* p return false; } - clearFinishedTrans(pMnode); + mndStreamClearFinishedTrans(pMnode, NULL); SStreamTransInfo *pEntry = taosHashGet(execInfo.transMgmt.pDBTrans, &streamId, sizeof(streamId)); if (pEntry != NULL) { @@ -95,7 +106,7 @@ bool mndStreamTransConflictCheck(SMnode* pMnode, int64_t streamId, const char* p terrno = TSDB_CODE_MND_TRANS_CONFLICT; return true; } else { - mDebug("not conflict with checkpoint trans, name:%s, continue create trans", pTransName); + mDebug("not conflict with checkpoint trans, name:%s, continue creating trans", pTransName); } } else if ((strcmp(tInfo.name, MND_STREAM_CREATE_NAME) == 0) || (strcmp(tInfo.name, MND_STREAM_DROP_NAME) == 0) || (strcmp(tInfo.name, MND_STREAM_TASK_RESET_NAME) == 0) || @@ -106,7 +117,7 @@ bool mndStreamTransConflictCheck(SMnode* pMnode, int64_t streamId, const char* p return true; } } else { - mDebug("stream:0x%"PRIx64" no conflict trans existed, continue create trans", streamId); + mDebug("stream:0x%" PRIx64 " no conflict trans existed, continue create trans", streamId); } if (lock) { @@ -124,7 +135,7 @@ int32_t mndStreamGetRelTrans(SMnode* pMnode, int64_t streamUid) { return 0; } - clearFinishedTrans(pMnode); + mndStreamClearFinishedTrans(pMnode, NULL); SStreamTransInfo* pEntry = taosHashGet(execInfo.transMgmt.pDBTrans, &streamUid, sizeof(streamUid)); if (pEntry != NULL) { SStreamTransInfo tInfo = *pEntry; diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 76bf139c3f..6b02ae485f 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -377,8 +377,6 @@ int32_t streamTaskUpdateTaskCheckpointInfo(SStreamTask* pTask, SVUpdateCheckpoin // drop task should not in the meta-lock, and drop the related fill-history task now if (pReq->dropRelHTask) { streamMetaUnregisterTask(pMeta, pReq->hStreamId, pReq->hTaskId); - - // commit the update int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); stDebug("s-task:%s vgId:%d related fill-history task:0x%x dropped, remain tasks:%d", id, vgId, pReq->taskId, numOfTasks); } From 0946f25e5f1ea19fe559022cd41cf211091b02d1 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 29 May 2024 21:59:26 +0800 Subject: [PATCH 40/67] fix(stream): fix syntax error. --- source/dnode/mnode/impl/src/mndStream.c | 2 +- source/libs/stream/src/streamDispatch.c | 2 +- source/libs/stream/src/streamExec.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index ef29547e20..09e0e4e415 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -1156,7 +1156,7 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { taosArrayPush(pList, &in); int32_t currentSize = taosArrayGetSize(pList); - mDebug("stream:%s (uid:0x%" PRIx64 ") checkpoint interval beyond threshold: %" PRId64 "s(%" PRId64 + mDebug("stream:%s (uid:0x%" PRIx64 ") checkpoint interval beyond threshold: %ds(%" PRId64 "s) beyond threshold:%d", pStream->name, pStream->uid, tsStreamCheckpointInterval, duration / 1000, currentSize); diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 0a8a65544c..522ac9a910 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -1038,7 +1038,7 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i taosArrayPush(pTask->msgInfo.pRetryList, &pRsp->downstreamNodeId); taosThreadMutexUnlock(&pTask->lock); - stWarn("s-task:%s inputQ of downstream task:0x%x(vgId:%d) is full, wait for %dms and retry dispatch", id, + stTrace("s-task:%s inputQ of downstream task:0x%x(vgId:%d) is full, wait for %dms and retry dispatch", id, pRsp->downstreamTaskId, pRsp->downstreamNodeId, DISPATCH_RETRY_INTERVAL_MS); } else if (pRsp->inputStatus == TASK_INPUT_STATUS__REFUSED) { // todo handle the agg task failure, add test case diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 2b92696b88..77f266bd6d 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -562,13 +562,13 @@ static int32_t doStreamExecTask(SStreamTask* pTask) { } if (streamQueueIsFull(pTask->outputq.queue)) { - stWarn("s-task:%s outputQ is full, idle for 500ms and retry", id); + stTrace("s-task:%s outputQ is full, idle for 500ms and retry", id); streamTaskSetIdleInfo(pTask, 1000); return 0; } if (pTask->inputq.status == TASK_INPUT_STATUS__BLOCKED) { - stWarn("s-task:%s downstream task inputQ blocked, idle for 1sec and retry", id); + stTrace("s-task:%s downstream task inputQ blocked, idle for 1sec and retry", id); streamTaskSetIdleInfo(pTask, 1000); return 0; } From cff5e753ef28b2656b603f1723f9764a0e2292ba Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 30 May 2024 14:12:23 +0800 Subject: [PATCH 41/67] fix(stream): discard the result data block if the primary timestamp of results is expired. --- source/dnode/vnode/src/inc/tq.h | 2 +- source/dnode/vnode/src/inc/vnodeInt.h | 1 + source/dnode/vnode/src/sma/smaTimeRange.c | 4 +- source/dnode/vnode/src/tq/tqSink.c | 50 ++++++++++++++++------- source/dnode/vnode/src/tsdb/tsdbOpen.c | 8 ++++ source/dnode/vnode/src/tsdb/tsdbRead2.c | 10 ++--- 6 files changed, 50 insertions(+), 25 deletions(-) diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index e2ecdca59f..338a0b40bd 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -160,7 +160,7 @@ int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const void* pRsp, int32_t tqInitDataRsp(SMqDataRspCommon* pRsp, STqOffsetVal pOffset); void tqUpdateNodeStage(STQ* pTq, bool isLeader); int32_t tqSetDstTableDataPayload(uint64_t suid, const STSchema* pTSchema, int32_t blockIndex, SSDataBlock* pDataBlock, - SSubmitTbData* pTableData, const char* id); + SSubmitTbData* pTableData, int64_t earlyTs, const char* id); int32_t doMergeExistedRows(SSubmitTbData* pExisted, const SSubmitTbData* pNew, const char* id); SVCreateTbReq* buildAutoCreateTableReq(const char* stbFullName, int64_t suid, int32_t numOfCols, diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index b369bd6039..8222af4d60 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -242,6 +242,7 @@ int tsdbInsertData(STsdb* pTsdb, int64_t version, SSubmitReq2* pMsg, SSubmit int32_t tsdbInsertTableData(STsdb* pTsdb, int64_t version, SSubmitTbData* pSubmitTbData, int32_t* affectedRows); int32_t tsdbDeleteTableData(STsdb* pTsdb, int64_t version, tb_uid_t suid, tb_uid_t uid, TSKEY sKey, TSKEY eKey); int32_t tsdbSetKeepCfg(STsdb* pTsdb, STsdbCfg* pCfg); +int64_t tsdbGetEarliestTs(STsdb* pTsdb); // tq STQ* tqOpen(const char* path, SVnode* pVnode); diff --git a/source/dnode/vnode/src/sma/smaTimeRange.c b/source/dnode/vnode/src/sma/smaTimeRange.c index fb898c02f8..d0913081ac 100644 --- a/source/dnode/vnode/src/sma/smaTimeRange.c +++ b/source/dnode/vnode/src/sma/smaTimeRange.c @@ -203,7 +203,7 @@ int32_t smaBlockToSubmit(SVnode *pVnode, const SArray *pBlocks, const STSchema * int32_t *index = taosHashGet(pTableIndexMap, &groupId, sizeof(groupId)); if (index == NULL) { // no data yet, append it - code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, ""); + code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, INT64_MIN, ""); if (code != TSDB_CODE_SUCCESS) { continue; } @@ -213,7 +213,7 @@ int32_t smaBlockToSubmit(SVnode *pVnode, const SArray *pBlocks, const STSchema * int32_t size = (int32_t)taosArrayGetSize(pReq->aSubmitTbData) - 1; taosHashPut(pTableIndexMap, &groupId, sizeof(groupId), &size, sizeof(size)); } else { - code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, ""); + code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, INT64_MIN, ""); if (code != TSDB_CODE_SUCCESS) { continue; } diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 5929c6b591..3fe124ea85 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -34,7 +34,7 @@ static int32_t doBuildAndSendDeleteMsg(SVnode* pVnode, char* stbFullName, SSData static int32_t doBuildAndSendSubmitMsg(SVnode* pVnode, SStreamTask* pTask, SSubmitReq2* pReq, int32_t numOfBlocks); static int32_t buildSubmitMsgImpl(SSubmitReq2* pSubmitReq, int32_t vgId, void** pMsg, int32_t* msgLen); static int32_t doConvertRows(SSubmitTbData* pTableData, const STSchema* pTSchema, SSDataBlock* pDataBlock, - const char* id); + int64_t earlyTs, const char* id); static int32_t doWaitForDstTableCreated(SVnode* pVnode, SStreamTask* pTask, STableSinkInfo* pTableSinkInfo, const char* dstTableName, int64_t* uid); static int32_t doPutIntoCache(SSHashObj* pSinkTableMap, STableSinkInfo* pTableSinkInfo, uint64_t groupId, @@ -552,7 +552,8 @@ int32_t tsAscendingSortFn(const void* p1, const void* p2) { } } -int32_t doConvertRows(SSubmitTbData* pTableData, const STSchema* pTSchema, SSDataBlock* pDataBlock, const char* id) { +int32_t doConvertRows(SSubmitTbData* pTableData, const STSchema* pTSchema, SSDataBlock* pDataBlock, int64_t earlyTs, + const char* id) { int32_t numOfRows = pDataBlock->info.rows; int32_t code = TSDB_CODE_SUCCESS; @@ -581,6 +582,14 @@ int32_t doConvertRows(SSubmitTbData* pTableData, const STSchema* pTSchema, SSDat SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex); ts = *(int64_t*)colDataGetData(pColData, j); tqTrace("s-task:%s sink row %d, col %d ts %" PRId64, id, j, k, ts); + + if (ts < earlyTs) { + tqError("s-task:%s ts:%" PRId64 " of generated results out of valid time range %" PRId64 " , discarded", id, + ts, earlyTs); + pTableData->aRowP = taosArrayDestroy(pTableData->aRowP); + taosArrayDestroy(pVals); + return TSDB_CODE_SUCCESS; + } } if (IS_SET_NULL(pCol)) { @@ -605,8 +614,7 @@ int32_t doConvertRows(SSubmitTbData* pTableData, const STSchema* pTSchema, SSDat dataIndex++; } else { void* colData = colDataGetData(pColData, j); - if (IS_VAR_DATA_TYPE(pCol->type)) { - // address copy, no value + if (IS_VAR_DATA_TYPE(pCol->type)) { // address copy, no value SValue sv = (SValue){.type = pCol->type, .nData = varDataLen(colData), .pData = (uint8_t*)varDataVal(colData)}; SColVal cv = COL_VAL_VALUE(pCol->colId, sv); @@ -796,22 +804,25 @@ int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDat } int32_t tqSetDstTableDataPayload(uint64_t suid, const STSchema *pTSchema, int32_t blockIndex, SSDataBlock* pDataBlock, - SSubmitTbData* pTableData, const char* id) { + SSubmitTbData* pTableData, int64_t earlyTs, const char* id) { int32_t numOfRows = pDataBlock->info.rows; + char* dstTableName = pDataBlock->info.parTbName; tqDebug("s-task:%s sink data pipeline, build submit msg from %dth resBlock, including %d rows, dst suid:%" PRId64, id, blockIndex + 1, numOfRows, suid); - char* dstTableName = pDataBlock->info.parTbName; // convert all rows - int32_t code = doConvertRows(pTableData, pTSchema, pDataBlock, id); + int32_t code = doConvertRows(pTableData, pTSchema, pDataBlock, earlyTs, id); if (code != TSDB_CODE_SUCCESS) { tqError("s-task:%s failed to convert rows from result block, code:%s", id, tstrerror(terrno)); return code; } - taosArraySort(pTableData->aRowP, tsAscendingSortFn); - tqTrace("s-task:%s build submit msg for dstTable:%s, numOfRows:%d", id, dstTableName, numOfRows); + if (pTableData->aRowP != NULL) { + taosArraySort(pTableData->aRowP, tsAscendingSortFn); + tqTrace("s-task:%s build submit msg for dstTable:%s, numOfRows:%d", id, dstTableName, numOfRows); + } + return code; } @@ -836,6 +847,7 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { int32_t numOfBlocks = taosArrayGetSize(pBlocks); int32_t code = TSDB_CODE_SUCCESS; const char* id = pTask->id.idStr; + int64_t earlyTs = tsdbGetEarliestTs(pVnode->pTsdb); bool onlySubmitData = hasOnlySubmitData(pBlocks, numOfBlocks); if (!onlySubmitData) { @@ -870,8 +882,8 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { continue; } - code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, id); - if (code != TSDB_CODE_SUCCESS) { + code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, earlyTs, id); + if (code != TSDB_CODE_SUCCESS || tbData.aRowP == NULL) { continue; } @@ -918,8 +930,12 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { continue; } - code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, id); - if (code != TSDB_CODE_SUCCESS) { + code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, earlyTs, id); + if (code != TSDB_CODE_SUCCESS || tbData.aRowP == NULL) { + if (tbData.pCreateTbReq != NULL) { + tdDestroySVCreateTbReq(tbData.pCreateTbReq); + tbData.pCreateTbReq = NULL; + } continue; } @@ -928,8 +944,12 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { int32_t size = (int32_t)taosArrayGetSize(submitReq.aSubmitTbData) - 1; taosHashPut(pTableIndexMap, &groupId, sizeof(groupId), &size, sizeof(size)); } else { - code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, id); - if (code != TSDB_CODE_SUCCESS) { + code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, earlyTs, id); + if (code != TSDB_CODE_SUCCESS || tbData.aRowP == NULL) { + if (tbData.pCreateTbReq != NULL) { + tdDestroySVCreateTbReq(tbData.pCreateTbReq); + tbData.pCreateTbReq = NULL; + } continue; } diff --git a/source/dnode/vnode/src/tsdb/tsdbOpen.c b/source/dnode/vnode/src/tsdb/tsdbOpen.c index ea3d285880..f9fede1d9b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbOpen.c +++ b/source/dnode/vnode/src/tsdb/tsdbOpen.c @@ -30,6 +30,14 @@ int32_t tsdbSetKeepCfg(STsdb *pTsdb, STsdbCfg *pCfg) { return 0; } +int64_t tsdbGetEarliestTs(STsdb *pTsdb) { + STsdbKeepCfg *pCfg = &pTsdb->keepCfg; + + int64_t now = taosGetTimestamp(pCfg->precision); + int64_t ts = now - (tsTickPerMin[pCfg->precision] * pCfg->keep2) + 1; // needs to add one tick + return ts; +} + /** * @brief * diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index 99520f7c92..d7c3eff571 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -226,14 +226,10 @@ static bool isEmptyQueryTimeWindow(STimeWindow* pWindow) { return pWindow->skey // Update the query time window according to the data time to live(TTL) information, in order to avoid to return // the expired data to client, even it is queried already. static STimeWindow updateQueryTimeWindow(STsdb* pTsdb, STimeWindow* pWindow) { - STsdbKeepCfg* pCfg = &pTsdb->keepCfg; - - int64_t now = taosGetTimestamp(pCfg->precision); - int64_t earilyTs = now - (tsTickPerMin[pCfg->precision] * pCfg->keep2) + 1; // needs to add one tick - + int64_t earlyTs = tsdbGetEarliestTs(pTsdb); STimeWindow win = *pWindow; - if (win.skey < earilyTs) { - win.skey = earilyTs; + if (win.skey < earlyTs) { + win.skey = earlyTs; } return win; From b4956392d9cf4768968757d505ec74fd94059daa Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 30 May 2024 15:54:08 +0800 Subject: [PATCH 42/67] fix(stream): return in_progress code if not send retrieve rsp to downstream. --- source/dnode/vnode/src/tqCommon/tqCommon.c | 7 ++++--- source/dnode/vnode/src/vnd/vnodeSvr.c | 8 ++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index ee3f1a3760..45e1a300d0 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -871,7 +871,7 @@ int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) tqError("vgId:%d process retrieve checkpoint trigger, checkpointId:%" PRId64 " from s-task:0x%x, failed to acquire task:0x%x, it may have been dropped already", pMeta->vgId, pReq->checkpointId, (int32_t)pReq->downstreamTaskId, pReq->upstreamTaskId); - return TSDB_CODE_SUCCESS; + return TSDB_CODE_STREAM_TASK_NOT_EXIST; } tqDebug("s-task:0x%x recv retrieve checkpoint-trigger msg from downstream s-task:0x%x, checkpointId:%" PRId64, @@ -890,6 +890,7 @@ int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) tqDebug("s-task:%s re-send checkpoint-trigger to:0x%x, checkpointId:%" PRId64 ", transId:%d", pTask->id.idStr, (int32_t)pReq->downstreamTaskId, checkpointId, transId); streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, &pMsg->info); + return TSDB_CODE_SUCCESS; } else { // not send checkpoint-trigger yet, wait int32_t recv = 0, total = 0; streamTaskGetTriggerRecvStatus(pTask, &recv, &total); @@ -902,6 +903,7 @@ int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) "sending checkpoint-source/trigger", pTask->id.idStr, recv, total); } + return TSDB_CODE_ACTION_IN_PROGRESS; } } else { // upstream not recv the checkpoint-source/trigger till now ASSERT(pState->state == TASK_STATUS__READY || pState->state == TASK_STATUS__HALT); @@ -909,9 +911,8 @@ int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) "s-task:%s not recv checkpoint-source from mnode or checkpoint-trigger from upstream yet, wait for all " "upstream sending checkpoint-source/trigger", pTask->id.idStr); + return TSDB_CODE_ACTION_IN_PROGRESS; } - - return TSDB_CODE_SUCCESS; } int32_t tqStreamTaskProcessRetrieveTriggerRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 426f85fa5e..c0eeebaecb 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -842,16 +842,16 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) return tqProcessTaskScanHistory(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_CHECKPOINT_READY: return tqProcessTaskCheckpointReadyMsg(pVnode->pTq, pMsg); + case TDMT_STREAM_TASK_CHECKPOINT_READY_RSP: + return tqProcessTaskCheckpointReadyRsp(pVnode->pTq, pMsg); case TDMT_STREAM_RETRIEVE_TRIGGER: return tqProcessTaskRetrieveTriggerReq(pVnode->pTq, pMsg); + case TDMT_STREAM_RETRIEVE_TRIGGER_RSP: + return tqProcessTaskRetrieveTriggerRsp(pVnode->pTq, pMsg); case TDMT_MND_STREAM_HEARTBEAT_RSP: return tqProcessStreamHbRsp(pVnode->pTq, pMsg); case TDMT_MND_STREAM_REQ_CHKPT_RSP: return tqProcessStreamReqCheckpointRsp(pVnode->pTq, pMsg); - case TDMT_STREAM_TASK_CHECKPOINT_READY_RSP: - return tqProcessTaskCheckpointReadyRsp(pVnode->pTq, pMsg); - case TDMT_STREAM_RETRIEVE_TRIGGER_RSP: - return tqProcessTaskRetrieveTriggerRsp(pVnode->pTq, pMsg); case TDMT_VND_GET_STREAM_PROGRESS: return tqStreamProgressRetrieveReq(pVnode->pTq, pMsg); default: From e66283d99ccbefe442f05630b1c164d19062a557 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 30 May 2024 18:41:17 +0800 Subject: [PATCH 43/67] fix(stream): remove table meta in table-meta cache. --- source/dnode/vnode/src/tq/tqSink.c | 153 ++++++++++++++++------------- 1 file changed, 85 insertions(+), 68 deletions(-) diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 3fe124ea85..96ccf18504 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -39,6 +39,7 @@ static int32_t doWaitForDstTableCreated(SVnode* pVnode, SStreamTask* pTask, STab const char* dstTableName, int64_t* uid); static int32_t doPutIntoCache(SSHashObj* pSinkTableMap, STableSinkInfo* pTableSinkInfo, uint64_t groupId, const char* id); +static int32_t doRemoveFromCache(SSHashObj* pSinkTableMap, uint64_t groupId, const char* id); static bool isValidDstChildTable(SMetaReader* pReader, int32_t vgId, const char* ctbName, int64_t suid); static int32_t initCreateTableMsg(SVCreateTbReq* pCreateTableReq, uint64_t suid, const char* stbFullName, int32_t numOfTags); @@ -396,46 +397,6 @@ int32_t doMergeExistedRows(SSubmitTbData* pExisted, const SSubmitTbData* pNew, c return TSDB_CODE_SUCCESS; } -int32_t doBuildAndSendDeleteMsg(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataBlock, SStreamTask* pTask, - int64_t suid) { - SBatchDeleteReq deleteReq = {.suid = suid, .deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq))}; - - int32_t code = tqBuildDeleteReq(pVnode->pTq, stbFullName, pDataBlock, &deleteReq, pTask->id.idStr, - pTask->ver >= SSTREAM_TASK_SUBTABLE_CHANGED_VER && pTask->subtableWithoutMd5 != 1); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - if (taosArrayGetSize(deleteReq.deleteReqs) == 0) { - taosArrayDestroy(deleteReq.deleteReqs); - return TSDB_CODE_SUCCESS; - } - - int32_t len; - tEncodeSize(tEncodeSBatchDeleteReq, &deleteReq, len, code); - if (code != TSDB_CODE_SUCCESS) { - qError("s-task:%s failed to encode delete request", pTask->id.idStr); - return code; - } - - SEncoder encoder; - void* serializedDeleteReq = rpcMallocCont(len + sizeof(SMsgHead)); - void* abuf = POINTER_SHIFT(serializedDeleteReq, sizeof(SMsgHead)); - tEncoderInit(&encoder, abuf, len); - tEncodeSBatchDeleteReq(&encoder, &deleteReq); - tEncoderClear(&encoder); - taosArrayDestroy(deleteReq.deleteReqs); - - ((SMsgHead*)serializedDeleteReq)->vgId = TD_VID(pVnode); - - SRpcMsg msg = {.msgType = TDMT_VND_BATCH_DEL, .pCont = serializedDeleteReq, .contLen = len + sizeof(SMsgHead)}; - if (tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg) != 0) { - tqDebug("failed to put delete req into write-queue since %s", terrstr()); - } - - return TSDB_CODE_SUCCESS; -} - bool isValidDstChildTable(SMetaReader* pReader, int32_t vgId, const char* ctbName, int64_t suid) { if (pReader->me.type != TSDB_CHILD_TABLE) { tqError("vgId:%d, failed to write into %s, since table type:%d incorrect", vgId, ctbName, pReader->me.type); @@ -484,23 +445,6 @@ SVCreateTbReq* buildAutoCreateTableReq(const char* stbFullName, int64_t suid, in return pCreateTbReq; } -int32_t doPutIntoCache(SSHashObj* pSinkTableMap, STableSinkInfo* pTableSinkInfo, uint64_t groupId, const char* id) { - if (tSimpleHashGetSize(pSinkTableMap) > MAX_CACHE_TABLE_INFO_NUM) { - taosMemoryFreeClear(pTableSinkInfo); // too many items, failed to cache it - return TSDB_CODE_FAILED; - } - - int32_t code = tSimpleHashPut(pSinkTableMap, &groupId, sizeof(uint64_t), &pTableSinkInfo, POINTER_BYTES); - if (code != TSDB_CODE_SUCCESS) { - taosMemoryFreeClear(pTableSinkInfo); - } else { - tqDebug("s-task:%s new dst table:%s(uid:%" PRIu64 ") added into cache, total:%d", id, pTableSinkInfo->name.data, - pTableSinkInfo->uid, tSimpleHashGetSize(pSinkTableMap)); - } - - return code; -} - int32_t buildSubmitMsgImpl(SSubmitReq2* pSubmitReq, int32_t vgId, void** pMsg, int32_t* msgLen) { int32_t code = 0; void* pBuf = NULL; @@ -826,17 +770,6 @@ int32_t tqSetDstTableDataPayload(uint64_t suid, const STSchema *pTSchema, int32_ return code; } -bool hasOnlySubmitData(const SArray* pBlocks, int32_t numOfBlocks) { - for (int32_t i = 0; i < numOfBlocks; ++i) { - SSDataBlock* p = taosArrayGet(pBlocks, i); - if (p->info.type == STREAM_DELETE_RESULT || p->info.type == STREAM_CREATE_CHILD_TABLE) { - return false; - } - } - - return true; -} - void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { const SArray* pBlocks = (const SArray*)data; SVnode* pVnode = (SVnode*)vnode; @@ -884,6 +817,11 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, earlyTs, id); if (code != TSDB_CODE_SUCCESS || tbData.aRowP == NULL) { + if (tbData.pCreateTbReq != NULL) { + tdDestroySVCreateTbReq(tbData.pCreateTbReq); + doRemoveFromCache(pTask->outputInfo.tbSink.pTblInfo, pDataBlock->info.id.groupId, id); + tbData.pCreateTbReq = NULL; + } continue; } @@ -934,6 +872,7 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { if (code != TSDB_CODE_SUCCESS || tbData.aRowP == NULL) { if (tbData.pCreateTbReq != NULL) { tdDestroySVCreateTbReq(tbData.pCreateTbReq); + doRemoveFromCache(pTask->outputInfo.tbSink.pTblInfo, groupId, id); tbData.pCreateTbReq = NULL; } continue; @@ -973,3 +912,81 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { } } } + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool hasOnlySubmitData(const SArray* pBlocks, int32_t numOfBlocks) { + for (int32_t i = 0; i < numOfBlocks; ++i) { + SSDataBlock* p = taosArrayGet(pBlocks, i); + if (p->info.type == STREAM_DELETE_RESULT || p->info.type == STREAM_CREATE_CHILD_TABLE) { + return false; + } + } + + return true; +} + +int32_t doPutIntoCache(SSHashObj* pSinkTableMap, STableSinkInfo* pTableSinkInfo, uint64_t groupId, const char* id) { + if (tSimpleHashGetSize(pSinkTableMap) > MAX_CACHE_TABLE_INFO_NUM) { + taosMemoryFreeClear(pTableSinkInfo); // too many items, failed to cache it + return TSDB_CODE_FAILED; + } + + int32_t code = tSimpleHashPut(pSinkTableMap, &groupId, sizeof(uint64_t), &pTableSinkInfo, POINTER_BYTES); + if (code != TSDB_CODE_SUCCESS) { + taosMemoryFreeClear(pTableSinkInfo); + } else { + tqDebug("s-task:%s new dst table:%s(uid:%" PRIu64 ") added into cache, total:%d", id, pTableSinkInfo->name.data, + pTableSinkInfo->uid, tSimpleHashGetSize(pSinkTableMap)); + } + + return code; +} + +int32_t doRemoveFromCache(SSHashObj* pSinkTableMap, uint64_t groupId, const char* id) { + if (tSimpleHashGetSize(pSinkTableMap) == 0) { + return TSDB_CODE_SUCCESS; + } + + int32_t code = tSimpleHashRemove(pSinkTableMap, &groupId, sizeof(groupId)); + tqDebug("s-task:%s remove cached table meta for groupId:%"PRId64, id, groupId); +} + +int32_t doBuildAndSendDeleteMsg(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataBlock, SStreamTask* pTask, + int64_t suid) { + SBatchDeleteReq deleteReq = {.suid = suid, .deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq))}; + + int32_t code = tqBuildDeleteReq(pVnode->pTq, stbFullName, pDataBlock, &deleteReq, pTask->id.idStr, + pTask->ver >= SSTREAM_TASK_SUBTABLE_CHANGED_VER && pTask->subtableWithoutMd5 != 1); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + if (taosArrayGetSize(deleteReq.deleteReqs) == 0) { + taosArrayDestroy(deleteReq.deleteReqs); + return TSDB_CODE_SUCCESS; + } + + int32_t len; + tEncodeSize(tEncodeSBatchDeleteReq, &deleteReq, len, code); + if (code != TSDB_CODE_SUCCESS) { + qError("s-task:%s failed to encode delete request", pTask->id.idStr); + return code; + } + + SEncoder encoder; + void* serializedDeleteReq = rpcMallocCont(len + sizeof(SMsgHead)); + void* abuf = POINTER_SHIFT(serializedDeleteReq, sizeof(SMsgHead)); + tEncoderInit(&encoder, abuf, len); + tEncodeSBatchDeleteReq(&encoder, &deleteReq); + tEncoderClear(&encoder); + taosArrayDestroy(deleteReq.deleteReqs); + + ((SMsgHead*)serializedDeleteReq)->vgId = TD_VID(pVnode); + + SRpcMsg msg = {.msgType = TDMT_VND_BATCH_DEL, .pCont = serializedDeleteReq, .contLen = len + sizeof(SMsgHead)}; + if (tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg) != 0) { + tqDebug("failed to put delete req into write-queue since %s", terrstr()); + } + + return TSDB_CODE_SUCCESS; +} \ No newline at end of file From 815a9e953ba31eb84f6495bc60fd7aa73d09b49d Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 30 May 2024 18:41:56 +0800 Subject: [PATCH 44/67] fix(stream): desc the ref for checkpoint-trigger timer. --- source/dnode/vnode/src/tq/tqSink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 96ccf18504..5f3e1e3d14 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -948,7 +948,8 @@ int32_t doRemoveFromCache(SSHashObj* pSinkTableMap, uint64_t groupId, const char } int32_t code = tSimpleHashRemove(pSinkTableMap, &groupId, sizeof(groupId)); - tqDebug("s-task:%s remove cached table meta for groupId:%"PRId64, id, groupId); + tqDebug("s-task:%s remove cached table meta for groupId:%" PRId64, id, groupId); + return code; } int32_t doBuildAndSendDeleteMsg(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataBlock, SStreamTask* pTask, From ee4e4c0e6d4d75c43637746fff13ce5069b637ec Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 31 May 2024 09:17:20 +0800 Subject: [PATCH 45/67] refactor: do some internal refactor. --- include/libs/stream/tstream.h | 2 +- source/dnode/mnode/impl/inc/mndStream.h | 11 ---------- source/dnode/mnode/impl/src/mndStream.c | 8 +++---- source/dnode/mnode/impl/src/mndStreamTrans.c | 22 -------------------- source/dnode/mnode/impl/src/mndStreamUtil.c | 7 ------- source/dnode/snode/src/snode.c | 4 ++-- source/dnode/vnode/src/sma/smaRollup.c | 4 ++-- source/dnode/vnode/src/tq/tq.c | 8 +++---- source/libs/stream/inc/streamInt.h | 3 --- source/libs/stream/src/streamDispatch.c | 22 -------------------- source/libs/stream/src/streamMeta.c | 8 +++---- source/libs/stream/src/streamQueue.c | 2 +- source/libs/stream/src/streamSched.c | 8 +++---- source/libs/stream/src/streamTask.c | 2 +- source/libs/stream/src/streammsg.c | 4 ++-- 15 files changed, 24 insertions(+), 91 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 7e1c80c842..d007ea29a9 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -304,7 +304,7 @@ typedef struct SSTaskBasicInfo { int32_t totalLevel; int8_t taskLevel; int8_t fillHistory; // is fill history task or not - int64_t triggerParam; // in msec + int64_t delaySchedParam; // in msec } SSTaskBasicInfo; typedef struct SStreamRetrieveReq SStreamRetrieveReq; diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index bb10a9b9ad..2800aecdfa 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -46,18 +46,8 @@ typedef struct SVgroupChangeInfo { SArray *pUpdateNodeList; // SArray } SVgroupChangeInfo; -// time to generated the checkpoint, if now() - checkpointTs >= tsCheckpointInterval, this checkpoint will be discard -// to avoid too many checkpoints for a taskk in the waiting list -typedef struct SCheckpointCandEntry { - char *pName; - int64_t streamId; - int64_t checkpointTs; - int64_t checkpointId; -} SCheckpointCandEntry; - typedef struct SStreamTransMgmt { SHashObj *pDBTrans; - SHashObj *pWaitingList; // stream id list, of which timed checkpoint failed to be issued due to the trans conflict. } SStreamTransMgmt; typedef struct SStreamExecInfo { @@ -98,7 +88,6 @@ int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); int32_t mndPersistStream(STrans *pTrans, SStreamObj *pStream); int32_t mndStreamRegisterTrans(STrans *pTrans, const char *pTransName, int64_t streamId); int32_t mndStreamClearFinishedTrans(SMnode *pMnode, int32_t *pNumOfActiveChkpt); -int32_t mndAddtoCheckpointWaitingList(SStreamObj *pStream, int64_t checkpointId); bool mndStreamTransConflictCheck(SMnode *pMnode, int64_t streamId, const char *pTransName, bool lock); int32_t mndStreamGetRelTrans(SMnode *pMnode, int64_t streamId); diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 09e0e4e415..b88c4a4665 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -45,7 +45,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq); static int32_t mndProcessCreateStreamReqFromMNode(SRpcMsg *pReq); static int32_t mndProcessDropStreamReqFromMNode(SRpcMsg *pReq); -static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq); +static int32_t mndProcessStreamCheckpoint(SRpcMsg *pReq); static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows); static void mndCancelGetNextStream(SMnode *pMnode, void *pIter); static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows); @@ -112,7 +112,7 @@ int32_t mndInitStream(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_STREAM_DROP_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_VND_STREAM_CHECK_POINT_SOURCE_RSP, mndTransProcessRsp); - mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamDoCheckpoint); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamCheckpoint); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_REQ_CHKPT, mndProcessStreamReqCheckpoint); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_HEARTBEAT, mndProcessStreamHb); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_REPORT_CHECKPOINT, mndTransProcessRsp); @@ -141,7 +141,6 @@ void mndCleanupStream(SMnode *pMnode) { taosArrayDestroy(execInfo.pTaskList); taosHashCleanup(execInfo.pTaskMap); taosHashCleanup(execInfo.transMgmt.pDBTrans); - taosHashCleanup(execInfo.transMgmt.pWaitingList); taosHashCleanup(execInfo.pTransferStateStreams); taosThreadMutexDestroy(&execInfo.lock); mDebug("mnd stream exec info cleanup"); @@ -967,7 +966,6 @@ static int32_t mndProcessStreamCheckpointTrans(SMnode *pMnode, SStreamObj *pStre bool conflict = mndStreamTransConflictCheck(pMnode, pStream->uid, MND_STREAM_CHECKPOINT_NAME, lock); if (conflict) { - mndAddtoCheckpointWaitingList(pStream, checkpointId); mWarn("checkpoint conflict with other trans in %s, ignore the checkpoint for stream:%s %" PRIx64, pStream->sourceDb, pStream->name, pStream->uid); return -1; @@ -1131,7 +1129,7 @@ static int32_t streamWaitComparFn(const void* p1, const void* p2) { return pInt1->duration > pInt2->duration? -1:1; } -static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { +static int32_t mndProcessStreamCheckpoint(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; SSdb *pSdb = pMnode->pSdb; void *pIter = NULL; diff --git a/source/dnode/mnode/impl/src/mndStreamTrans.c b/source/dnode/mnode/impl/src/mndStreamTrans.c index ac4cb08308..ff31aa0f7d 100644 --- a/source/dnode/mnode/impl/src/mndStreamTrans.c +++ b/source/dnode/mnode/impl/src/mndStreamTrans.c @@ -151,28 +151,6 @@ int32_t mndStreamGetRelTrans(SMnode* pMnode, int64_t streamUid) { return 0; } -int32_t mndAddtoCheckpointWaitingList(SStreamObj* pStream, int64_t checkpointId) { - SCheckpointCandEntry* pEntry = taosHashGet(execInfo.transMgmt.pWaitingList, &pStream->uid, sizeof(pStream->uid)); - if (pEntry == NULL) { - SCheckpointCandEntry entry = {.streamId = pStream->uid, - .checkpointTs = taosGetTimestampMs(), - .checkpointId = checkpointId, - .pName = taosStrdup(pStream->name)}; - - taosHashPut(execInfo.transMgmt.pWaitingList, &pStream->uid, sizeof(pStream->uid), &entry, sizeof(entry)); - int32_t size = taosHashGetSize(execInfo.transMgmt.pWaitingList); - - mDebug("stream:%" PRIx64 " add into waiting list due to conflict, ts:%" PRId64 " , checkpointId: %" PRId64 - ", total in waitingList:%d", - pStream->uid, entry.checkpointTs, checkpointId, size); - } else { - mDebug("stream:%" PRIx64 " ts:%" PRId64 ", checkpointId:%" PRId64 " already in waiting list, no need to add into", - pStream->uid, pEntry->checkpointTs, checkpointId); - } - - return TSDB_CODE_SUCCESS; -} - STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, ETrnConflct conflict, const char *name, const char *pMsg) { STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, conflict, pReq, name); if (pTrans == NULL) { diff --git a/source/dnode/mnode/impl/src/mndStreamUtil.c b/source/dnode/mnode/impl/src/mndStreamUtil.c index d5bc12f9df..e53908eeed 100644 --- a/source/dnode/mnode/impl/src/mndStreamUtil.c +++ b/source/dnode/mnode/impl/src/mndStreamUtil.c @@ -558,11 +558,6 @@ int32_t mndStreamSetResetTaskAction(SMnode *pMnode, STrans *pTrans, SStreamObj * return 0; } -static void freeCheckpointCandEntry(void *param) { - SCheckpointCandEntry *pEntry = param; - taosMemoryFreeClear(pEntry->pName); -} - static void freeTaskList(void* param) { SArray** pList = (SArray **)param; taosArrayDestroy(*pList); @@ -575,9 +570,7 @@ void mndInitExecInfo() { execInfo.pTaskList = taosArrayInit(4, sizeof(STaskId)); execInfo.pTaskMap = taosHashInit(64, fn, true, HASH_NO_LOCK); execInfo.transMgmt.pDBTrans = taosHashInit(32, fn, true, HASH_NO_LOCK); - execInfo.transMgmt.pWaitingList = taosHashInit(32, fn, true, HASH_NO_LOCK); execInfo.pTransferStateStreams = taosHashInit(32, fn, true, HASH_NO_LOCK); - taosHashSetFreeFp(execInfo.transMgmt.pWaitingList, freeCheckpointCandEntry); taosHashSetFreeFp(execInfo.pTransferStateStreams, freeTaskList); } diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 89ab6d52c3..c61988574c 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -62,14 +62,14 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer " child id:%d, level:%d, status:%s fill-history:%d, related stream task:0x%x trigger:%" PRId64 " ms", SNODE_HANDLE, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, pTask->info.selfChildId, pTask->info.taskLevel, p, pTask->info.fillHistory, - (int32_t)pTask->streamTaskId.taskId, pTask->info.triggerParam); + (int32_t)pTask->streamTaskId.taskId, pTask->info.delaySchedParam); } else { sndInfo("vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " nextProcessVer:%" PRId64 " child id:%d, level:%d, status:%s fill-history:%d, related fill-task:0x%x trigger:%" PRId64 " ms", SNODE_HANDLE, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, pTask->info.selfChildId, pTask->info.taskLevel, p, pTask->info.fillHistory, - (int32_t)pTask->hTaskInfo.id.taskId, pTask->info.triggerParam); + (int32_t)pTask->hTaskInfo.id.taskId, pTask->info.delaySchedParam); } return 0; } diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 7138ecbeaa..3cc7c6ec66 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -244,7 +244,7 @@ static void tdRSmaTaskInit(SStreamMeta *pMeta, SRSmaInfoItem *pItem, SStreamTask SStreamTask **ppTask = (SStreamTask **)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask && *ppTask) { pItem->submitReqVer = (*ppTask)->chkInfo.checkpointVer; - pItem->fetchResultVer = (*ppTask)->info.triggerParam; + pItem->fetchResultVer = (*ppTask)->info.delaySchedParam; } streamMetaRUnLock(pMeta); } @@ -1289,7 +1289,7 @@ _checkpoint: pTask->chkInfo.checkpointId = checkpointId; // 1pTask->checkpointingId; pTask->chkInfo.checkpointVer = pItem->submitReqVer; - pTask->info.triggerParam = pItem->fetchResultVer; + pTask->info.delaySchedParam = pItem->fetchResultVer; pTask->info.taskLevel = TASK_LEVEL_SMA; if (!checkpointBuilt) { diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index d8460d2cb5..1e564ba467 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -770,19 +770,19 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) { tqInfo("vgId:%d build stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " nextProcessVer:%" PRId64 " child id:%d, level:%d, cur-status:%s, next-status:%s fill-history:%d, related stream task:0x%x " - "trigger:%" PRId64 " ms, inputVer:%" PRId64, + "delaySched:%" PRId64 " ms, inputVer:%" PRId64, vgId, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, pTask->info.selfChildId, pTask->info.taskLevel, p, pNext, pTask->info.fillHistory, - (int32_t)pTask->streamTaskId.taskId, pTask->info.triggerParam, nextProcessVer); + (int32_t)pTask->streamTaskId.taskId, pTask->info.delaySchedParam, nextProcessVer); } else { tqInfo( "vgId:%d build stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " nextProcessVer:%" PRId64 - " child id:%d, level:%d, cur-status:%s next-status:%s fill-history:%d, related fill-task:0x%x trigger:%" PRId64 + " child id:%d, level:%d, cur-status:%s next-status:%s fill-history:%d, related fill-task:0x%x delaySched:%" PRId64 " ms, inputVer:%" PRId64, vgId, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, pTask->info.selfChildId, pTask->info.taskLevel, p, pNext, pTask->info.fillHistory, - (int32_t)pTask->hTaskInfo.id.taskId, pTask->info.triggerParam, nextProcessVer); + (int32_t)pTask->hTaskInfo.id.taskId, pTask->info.delaySchedParam, nextProcessVer); ASSERT(pChkInfo->checkpointVer <= pChkInfo->nextProcessVer); } diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index 10db53ea38..d6cd5b528d 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -145,9 +145,6 @@ void destroyDispatchMsg(SStreamDispatchReq* pReq, int32_t numOfVgroups); int32_t getNumOfDispatchBranch(SStreamTask* pTask); void clearBufferedDispatchMsg(SStreamTask* pTask); -int32_t streamTaskBuildAndSendTriggerMsg(SStreamTask* pTask, const SStreamDataBlock* pData, int32_t dstTaskId, - int32_t vgId, SEpSet* pEpset); - int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock* pBlock); SStreamDataBlock* createStreamBlockFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg); SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamTask* pTask, int64_t resultSize, diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 522ac9a910..a6cfbb063b 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -227,28 +227,6 @@ void clearBufferedDispatchMsg(SStreamTask* pTask) { pMsgInfo->dispatchMsgType = 0; } -int32_t streamTaskBuildAndSendTriggerMsg(SStreamTask* pTask, const SStreamDataBlock* pData, int32_t dstTaskId, - int32_t vgId, SEpSet* pEpset) { - SStreamDispatchReq* pReq = taosMemoryCalloc(1, sizeof(SStreamDispatchReq)); - - int32_t numOfBlocks = taosArrayGetSize(pData->blocks); - int32_t code = tInitStreamDispatchReq(pReq, pTask, pData->srcVgId, numOfBlocks, dstTaskId, pData->type); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - for (int32_t i = 0; i < numOfBlocks; i++) { - SSDataBlock* pDataBlock = taosArrayGet(pData->blocks, i); - code = streamAddBlockIntoDispatchMsg(pDataBlock, pReq); - if (code != TSDB_CODE_SUCCESS) { - destroyDispatchMsg(pReq, 1); - return code; - } - } - - return doSendDispatchMsg(pTask, pReq, vgId, pEpset); -} - static int32_t doBuildDispatchMsg(SStreamTask* pTask, const SStreamDataBlock* pData) { int32_t code = 0; int32_t numOfBlocks = taosArrayGetSize(pData->blocks); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index e0822f60e7..ae1d86e317 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -458,10 +458,10 @@ void streamMetaClear(SStreamMeta* pMeta) { SStreamTask* p = *(SStreamTask**)pIter; // release the ref by timer - if (p->info.triggerParam != 0 && p->info.fillHistory == 0) { // one more ref in timer + if (p->info.delaySchedParam != 0 && p->info.fillHistory == 0) { // one more ref in timer stDebug("s-task:%s stop schedTimer, and (before) desc ref:%d", p->id.idStr, p->refCnt); taosTmrStop(p->schedInfo.pDelayTimer); - p->info.triggerParam = 0; + p->info.delaySchedParam = 0; streamMetaReleaseTask(pMeta, p); } @@ -752,10 +752,10 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t ASSERT(pTask->status.timerActive == 0); - if (pTask->info.triggerParam != 0 && pTask->info.fillHistory == 0) { + if (pTask->info.delaySchedParam != 0 && pTask->info.fillHistory == 0) { stDebug("s-task:%s stop schedTimer, and (before) desc ref:%d", pTask->id.idStr, pTask->refCnt); taosTmrStop(pTask->schedInfo.pDelayTimer); - pTask->info.triggerParam = 0; + pTask->info.delaySchedParam = 0; streamMetaReleaseTask(pMeta, pTask); } diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 461d53d5a9..9c5c230a3d 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -330,7 +330,7 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) } if (type != STREAM_INPUT__GET_RES && type != STREAM_INPUT__CHECKPOINT && type != STREAM_INPUT__CHECKPOINT_TRIGGER && - (pTask->info.triggerParam != 0)) { + (pTask->info.delaySchedParam != 0)) { atomic_val_compare_exchange_8(&pTask->schedInfo.status, TASK_TRIGGER_STATUS__INACTIVE, TASK_TRIGGER_STATUS__ACTIVE); stDebug("s-task:%s new data arrived, active the sched-trigger, triggerStatus:%d", pTask->id.idStr, pTask->schedInfo.status); } diff --git a/source/libs/stream/src/streamSched.c b/source/libs/stream/src/streamSched.c index 9bd12a4fd8..9c817d565b 100644 --- a/source/libs/stream/src/streamSched.c +++ b/source/libs/stream/src/streamSched.c @@ -20,13 +20,13 @@ static void streamTaskResumeHelper(void* param, void* tmrId); static void streamTaskSchedHelper(void* param, void* tmrId); int32_t streamSetupScheduleTrigger(SStreamTask* pTask) { - if (pTask->info.triggerParam != 0 && pTask->info.fillHistory == 0) { + if (pTask->info.delaySchedParam != 0 && pTask->info.fillHistory == 0) { int32_t ref = atomic_add_fetch_32(&pTask->refCnt, 1); ASSERT(ref == 2 && pTask->schedInfo.pDelayTimer == NULL); - stDebug("s-task:%s setup scheduler trigger, delay:%" PRId64 " ms", pTask->id.idStr, pTask->info.triggerParam); + stDebug("s-task:%s setup scheduler trigger, delay:%" PRId64 " ms", pTask->id.idStr, pTask->info.delaySchedParam); - pTask->schedInfo.pDelayTimer = taosTmrStart(streamTaskSchedHelper, (int32_t)pTask->info.triggerParam, pTask, streamTimer); + pTask->schedInfo.pDelayTimer = taosTmrStart(streamTaskSchedHelper, (int32_t)pTask->info.delaySchedParam, pTask, streamTimer); pTask->schedInfo.status = TASK_TRIGGER_STATUS__INACTIVE; } @@ -119,7 +119,7 @@ void streamTaskResumeHelper(void* param, void* tmrId) { void streamTaskSchedHelper(void* param, void* tmrId) { SStreamTask* pTask = (void*)param; const char* id = pTask->id.idStr; - int32_t nextTrigger = (int32_t)pTask->info.triggerParam; + int32_t nextTrigger = (int32_t)pTask->info.delaySchedParam; int8_t status = atomic_load_8(&pTask->schedInfo.status); stTrace("s-task:%s in scheduler, trigger status:%d, next:%dms", id, status, nextTrigger); diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 6d30cc6759..3daadda687 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -107,7 +107,7 @@ SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, SEpSet* pEpset, pTask->info.taskLevel = taskLevel; pTask->info.fillHistory = fillHistory; - pTask->info.triggerParam = triggerParam; + pTask->info.delaySchedParam = triggerParam; pTask->subtableWithoutMd5 = subtableWithoutMd5; pTask->status.pSM = streamCreateStateMachine(pTask); diff --git a/source/libs/stream/src/streammsg.c b/source/libs/stream/src/streammsg.c index f8228a8f5f..705406f044 100644 --- a/source/libs/stream/src/streammsg.c +++ b/source/libs/stream/src/streammsg.c @@ -505,7 +505,7 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { if (tSerializeSUseDbRspImp(pEncoder, &pTask->outputInfo.shuffleDispatcher.dbInfo) < 0) return -1; if (tEncodeCStr(pEncoder, pTask->outputInfo.shuffleDispatcher.stbFullName) < 0) return -1; } - if (tEncodeI64(pEncoder, pTask->info.triggerParam) < 0) return -1; + if (tEncodeI64(pEncoder, pTask->info.delaySchedParam) < 0) return -1; if (tEncodeI8(pEncoder, pTask->subtableWithoutMd5) < 0) return -1; if (tEncodeCStrWithLen(pEncoder, pTask->reserve, sizeof(pTask->reserve) - 1) < 0) return -1; @@ -588,7 +588,7 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tDeserializeSUseDbRspImp(pDecoder, &pTask->outputInfo.shuffleDispatcher.dbInfo) < 0) return -1; if (tDecodeCStrTo(pDecoder, pTask->outputInfo.shuffleDispatcher.stbFullName) < 0) return -1; } - if (tDecodeI64(pDecoder, &pTask->info.triggerParam) < 0) return -1; + if (tDecodeI64(pDecoder, &pTask->info.delaySchedParam) < 0) return -1; if (pTask->ver >= SSTREAM_TASK_SUBTABLE_CHANGED_VER){ if (tDecodeI8(pDecoder, &pTask->subtableWithoutMd5) < 0) return -1; } From 76b43dc072a455abc9772fbae256918514358dc3 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 31 May 2024 14:32:55 +0800 Subject: [PATCH 46/67] fix(stream): 1. check the checkpoint-trigger rsp, 2. set the error code in the message body, 3. follower nodes not handle the checkpoint-trigger retrieve request. --- include/libs/stream/streammsg.h | 1 + include/libs/stream/tstream.h | 3 +- include/util/taoserror.h | 1 + source/dnode/vnode/src/tq/tq.c | 9 ++ source/dnode/vnode/src/tqCommon/tqCommon.c | 20 +++- source/libs/stream/inc/streamInt.h | 1 + source/libs/stream/src/streamCheckpoint.c | 124 ++++++++++++++------- source/libs/stream/src/streamDispatch.c | 39 ++----- source/libs/stream/src/streamExec.c | 4 - source/libs/stream/src/streamTask.c | 4 + 10 files changed, 129 insertions(+), 77 deletions(-) diff --git a/include/libs/stream/streammsg.h b/include/libs/stream/streammsg.h index 96701fe21d..91bfc6afc8 100644 --- a/include/libs/stream/streammsg.h +++ b/include/libs/stream/streammsg.h @@ -187,6 +187,7 @@ typedef struct SCheckpointTriggerRsp { int32_t upstreamTaskId; int32_t taskId; int32_t transId; + int32_t rspCode; } SCheckpointTriggerRsp; typedef struct { diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index d007ea29a9..0f546ee869 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -272,7 +272,6 @@ typedef struct SCheckpointInfo { int64_t processedVer; int64_t nextProcessVer; // current offset in WAL, not serialize it int32_t numOfNotReady; - SActiveCheckpointInfo* pActiveInfo; int64_t msgVer; } SCheckpointInfo; @@ -678,7 +677,7 @@ bool streamTaskAlreadySendTrigger(SStreamTask* pTask, int32_t downstreamNodeI void streamTaskGetTriggerRecvStatus(SStreamTask* pTask, int32_t* pRecved, int32_t* pTotal); void streamTaskInitTriggerDispatchInfo(SStreamTask* pTask); void streamTaskSetTriggerDispatchConfirmed(SStreamTask* pTask, int32_t vgId); -int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t dstTaskId, SRpcHandleInfo* pRpcInfo); +int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t dstTaskId, SRpcHandleInfo* pInfo, int32_t code); int32_t streamQueueGetNumOfItems(const SStreamQueue* pQueue); int32_t streamQueueGetNumOfUnAccessedItems(const SStreamQueue* pQueue); diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 9ae75bade2..8f8434dfc1 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -910,6 +910,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_STREAM_EXEC_CANCELLED TAOS_DEF_ERROR_CODE(0, 0x4102) #define TSDB_CODE_STREAM_INVALID_STATETRANS TAOS_DEF_ERROR_CODE(0, 0x4103) #define TSDB_CODE_STREAM_TASK_IVLD_STATUS TAOS_DEF_ERROR_CODE(0, 0x4104) +#define TSDB_CODE_STREAM_NOT_LEADER TAOS_DEF_ERROR_CODE(0, 0x4105) // TDLite #define TSDB_CODE_TDLITE_IVLD_OPEN_FLAGS TAOS_DEF_ERROR_CODE(0, 0x5100) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 1e564ba467..1076f1f2c6 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1241,6 +1241,15 @@ int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg) { } int32_t tqProcessTaskRetrieveTriggerReq(STQ* pTq, SRpcMsg* pMsg) { + int32_t vgId = TD_VID(pTq->pVnode); + + SRetrieveChkptTriggerReq* pReq = (SRetrieveChkptTriggerReq*) pMsg->pCont; + if (!vnodeIsRoleLeader(pTq->pVnode)) { + tqError("vgId:%d not leader, ignore the retrieve checkpoint-trigger msg from 0x%x", vgId, + (int32_t)pReq->downstreamTaskId); + return TSDB_CODE_STREAM_NOT_LEADER; + } + return tqStreamTaskProcessRetrieveTriggerReq(pTq->pStreamMeta, pMsg); } diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index 45e1a300d0..c8ca324c5e 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -877,6 +877,16 @@ int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) tqDebug("s-task:0x%x recv retrieve checkpoint-trigger msg from downstream s-task:0x%x, checkpointId:%" PRId64, pReq->upstreamTaskId, (int32_t)pReq->downstreamTaskId, pReq->checkpointId); + if (pTask->status.downstreamReady != 1) { + tqError("s-task:%s not ready for checkpoint-trigger retrieve from 0x%x, since downstream not ready", + pTask->id.idStr, (int32_t)pReq->downstreamTaskId); + + streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, &pMsg->info, TSDB_CODE_STREAM_TASK_IVLD_STATUS); + streamMetaReleaseTask(pMeta, pTask); + + return TSDB_CODE_SUCCESS; + } + SStreamTaskState* pState = streamTaskGetStatus(pTask); if (pState->state == TASK_STATUS__CK) { // recv the checkpoint-source/trigger already int32_t transId = 0; @@ -889,8 +899,7 @@ int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) // re-send the lost checkpoint-trigger msg to downstream task tqDebug("s-task:%s re-send checkpoint-trigger to:0x%x, checkpointId:%" PRId64 ", transId:%d", pTask->id.idStr, (int32_t)pReq->downstreamTaskId, checkpointId, transId); - streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, &pMsg->info); - return TSDB_CODE_SUCCESS; + streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, &pMsg->info, TSDB_CODE_SUCCESS); } else { // not send checkpoint-trigger yet, wait int32_t recv = 0, total = 0; streamTaskGetTriggerRecvStatus(pTask, &recv, &total); @@ -903,7 +912,7 @@ int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) "sending checkpoint-source/trigger", pTask->id.idStr, recv, total); } - return TSDB_CODE_ACTION_IN_PROGRESS; + streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, &pMsg->info, TSDB_CODE_ACTION_IN_PROGRESS); } } else { // upstream not recv the checkpoint-source/trigger till now ASSERT(pState->state == TASK_STATUS__READY || pState->state == TASK_STATUS__HALT); @@ -911,8 +920,11 @@ int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) "s-task:%s not recv checkpoint-source from mnode or checkpoint-trigger from upstream yet, wait for all " "upstream sending checkpoint-source/trigger", pTask->id.idStr); - return TSDB_CODE_ACTION_IN_PROGRESS; + streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, &pMsg->info, TSDB_CODE_ACTION_IN_PROGRESS); } + + streamMetaReleaseTask(pMeta, pTask); + return TSDB_CODE_SUCCESS; } int32_t tqStreamTaskProcessRetrieveTriggerRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index d6cd5b528d..9a3cbdc963 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -61,6 +61,7 @@ struct SActiveCheckpointInfo { bool dispatchTrigger; SArray* pDispatchTriggerList; // SArray SArray* pReadyMsgList; // SArray + int8_t allUpstreamTriggerRecv; int32_t checkCounter; tmr_h pCheckTmr; diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 6b02ae485f..7f41629dac 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -34,11 +34,11 @@ static int32_t streamTaskUploadCheckpoint(const char* id, const char* path); static int32_t deleteCheckpoint(const char* id); static int32_t downloadCheckpointByNameS3(const char* id, const char* fname, const char* dstName); static int32_t continueDispatchCheckpointTriggerBlock(SStreamDataBlock* pBlock, SStreamTask* pTask); -static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType); +static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType, int64_t checkpointId, int32_t transId); static int32_t doSendRetrieveTriggerMsg(SStreamTask* pTask, SArray* pNotSendList); static void checkpointTriggerMonitorFn(void* param, void* tmrId); -static SStreamDataBlock* createChkptTriggerBlock(SStreamTask* pTask, int32_t checkpointType); +static SStreamDataBlock* createChkptTriggerBlock(SStreamTask* pTask, int32_t checkpointType, int64_t checkpointId, int32_t transId); bool streamTaskIsAllUpstreamSendTrigger(SStreamTask* pTask) { SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; @@ -58,7 +58,8 @@ bool streamTaskIsAllUpstreamSendTrigger(SStreamTask* pTask) { return allSend; } -SStreamDataBlock* createChkptTriggerBlock(SStreamTask* pTask, int32_t checkpointType) { +SStreamDataBlock* createChkptTriggerBlock(SStreamTask* pTask, int32_t checkpointType, int64_t checkpointId, + int32_t transId) { SStreamDataBlock* pChkpoint = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, sizeof(SSDataBlock)); if (pChkpoint == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -75,8 +76,8 @@ SStreamDataBlock* createChkptTriggerBlock(SStreamTask* pTask, int32_t checkpoint } pBlock->info.type = STREAM_CHECKPOINT; - pBlock->info.version = pTask->chkInfo.pActiveInfo->activeId; - pBlock->info.window.ekey = pBlock->info.window.skey = pTask->chkInfo.pActiveInfo->transId; // NOTE: set the transId + pBlock->info.version = checkpointId; + pBlock->info.window.ekey = pBlock->info.window.skey = transId; // NOTE: set the transId pBlock->info.rows = 1; pBlock->info.childId = pTask->info.selfChildId; @@ -89,10 +90,10 @@ SStreamDataBlock* createChkptTriggerBlock(SStreamTask* pTask, int32_t checkpoint return pChkpoint; } -int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType) { - SStreamDataBlock* pChkpoint = createChkptTriggerBlock(pTask, checkpointType); +int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType, int64_t checkpointId, int32_t transId) { + SStreamDataBlock* pCheckpoint = createChkptTriggerBlock(pTask, checkpointType, checkpointId, transId); - if (streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pChkpoint) < 0) { + if (streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pCheckpoint) < 0) { return TSDB_CODE_OUT_OF_MEMORY; } @@ -116,39 +117,37 @@ int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSo // 2. Put the checkpoint block into inputQ, to make sure all blocks with less version have been handled by this task // and this is the last item in the inputQ. - return appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER); + return appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER, pReq->checkpointId, pReq->transId); } int32_t streamTaskProcessCheckpointTriggerRsp(SStreamTask* pTask, SCheckpointTriggerRsp* pRsp) { ASSERT(pTask->info.taskLevel != TASK_LEVEL__SOURCE); - SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; - if (pInfo->transId != pRsp->transId || pInfo->activeId != pRsp->checkpointId) { - // todo handle error - return -1; + if (pRsp->rspCode != TSDB_CODE_SUCCESS) { + stDebug("s-task:%s retrieve checkpoint-trgger rsp from upstream:0x%x invalid, code:%s", pTask->id.idStr, + pRsp->upstreamTaskId, tstrerror(pRsp->rspCode)); + return TSDB_CODE_SUCCESS; } - taosThreadMutexLock(&pTask->lock); - SStreamTaskState* pState = streamTaskGetStatus(pTask); - if (pState->state != TASK_STATUS__CK) { - // todo handle error - taosThreadMutexUnlock(&pTask->lock); - return -1; - } - - taosThreadMutexUnlock(&pTask->lock); - - appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER); + appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER, pRsp->checkpointId, pRsp->transId); return TSDB_CODE_SUCCESS; } -int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t dstTaskId, SRpcHandleInfo* pRpcInfo) { +int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t dstTaskId, SRpcHandleInfo* pRpcInfo, int32_t code) { SCheckpointTriggerRsp* pRsp = rpcMallocCont(sizeof(SCheckpointTriggerRsp)); pRsp->streamId = pTask->id.streamId; pRsp->upstreamTaskId = pTask->id.taskId; pRsp->taskId = dstTaskId; - pRsp->checkpointId = pTask->chkInfo.pActiveInfo->activeId; - pRsp->transId = pTask->chkInfo.pActiveInfo->transId; + + if (code == TSDB_CODE_SUCCESS) { + pRsp->checkpointId = pTask->chkInfo.pActiveInfo->activeId; + pRsp->transId = pTask->chkInfo.pActiveInfo->transId; + } else { + pRsp->checkpointId = -1; + pRsp->transId = -1; + } + + pRsp->rspCode = code; SRpcMsg rspMsg = {.code = 0, .pCont = pRsp, .contLen = sizeof(SCheckpointTriggerRsp), .info = *pRpcInfo}; tmsgSendRsp(&rspMsg); @@ -178,15 +177,64 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock const char* id = pTask->id.idStr; int32_t code = TSDB_CODE_SUCCESS; int32_t vgId = pTask->pMeta->vgId; + int32_t taskLevel = pTask->info.taskLevel; + + SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; + + taosThreadMutexLock(&pTask->lock); + if (pTask->chkInfo.checkpointId >= checkpointId) { + stError("s-task:%s vgId:%d current checkpointId:%" PRId64 + " recv expired checkpoint-trigger block, checkpointId:%" PRId64 " transId:%d, discard", + id, vgId, pTask->chkInfo.checkpointId, checkpointId, transId); + taosThreadMutexUnlock(&pTask->lock); + return TSDB_CODE_SUCCESS; + } + + if (streamTaskGetStatus(pTask)->state == TASK_STATUS__CK) { + if (pActiveInfo->activeId != checkpointId) { + stError("s-task:%s vgId:%d active checkpointId:%" PRId64 ", recv invalid checkpoint-trigger checkpointId:%" PRId64 + " discard", + id, vgId, pActiveInfo->activeId, checkpointId); + taosThreadMutexUnlock(&pTask->lock); + return TSDB_CODE_SUCCESS; + } else { // checkpointId == pActiveInfo->activeId + if (pActiveInfo->allUpstreamTriggerRecv == 1) { + stDebug( + "s-task:%s vgId:%d all upstream checkpoint-trigger recv, discard this checkpoint-trigger, " + "checkpointId:%" PRId64 " transId:%d", + id, vgId, checkpointId, transId); + taosThreadMutexUnlock(&pTask->lock); + return TSDB_CODE_SUCCESS; + } + + if (taskLevel == TASK_LEVEL__SINK || taskLevel == TASK_LEVEL__AGG) { + // check if already recv or not, and duplicated checkpoint-trigger msg recv, discard it + for (int32_t i = 0; i < taosArrayGetSize(pActiveInfo->pReadyMsgList); ++i) { + SStreamChkptReadyInfo* p = taosArrayGet(pActiveInfo->pReadyMsgList, i); + if (p->upStreamTaskId == pBlock->srcTaskId) { + ASSERT(p->checkpointId == checkpointId); + stWarn("s-task:%s repeatly recv checkpoint-source msg from task:0x%x vgId:%d, checkpointId:%" PRId64 + ", prev recvTs:%" PRId64 " discard", + pTask->id.idStr, p->upStreamTaskId, p->nodeId, p->checkpointId, p->recvTs); + + taosThreadMutexUnlock(&pTask->lock); + return TSDB_CODE_SUCCESS; + } + } + } + } + } + + taosThreadMutexUnlock(&pTask->lock); stDebug("s-task:%s vgId:%d start to handle the checkpoint-trigger block, checkpointId:%" PRId64 " ver:%" PRId64 - ", transId:%d current checkpointingId:%" PRId64, + ", transId:%d current active checkpointId:%" PRId64, id, vgId, pTask->chkInfo.checkpointId, pTask->chkInfo.checkpointVer, transId, checkpointId); // set task status if (streamTaskGetStatus(pTask)->state != TASK_STATUS__CK) { - pTask->chkInfo.pActiveInfo->activeId = checkpointId; - pTask->chkInfo.pActiveInfo->transId = transId; + pActiveInfo->activeId = checkpointId; + pActiveInfo->transId = transId; code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_GEN_CHECKPOINT); if (code != TSDB_CODE_SUCCESS) { @@ -197,20 +245,18 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); stDebug("s-task:%s start checkpoint-trigger monitor in 10s, ref:%d ", pTask->id.idStr, ref); - SActiveCheckpointInfo* pActive = pTask->chkInfo.pActiveInfo; streamMetaAcquireOneTask(pTask); - if (pActive->pCheckTmr == NULL) { - pActive->pCheckTmr = taosTmrStart(checkpointTriggerMonitorFn, 100, pTask, streamTimer); + if (pActiveInfo->pCheckTmr == NULL) { + pActiveInfo->pCheckTmr = taosTmrStart(checkpointTriggerMonitorFn, 100, pTask, streamTimer); } else { - taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActive->pCheckTmr); + taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pCheckTmr); } } - // todo fix race condition: set the status and append checkpoint block - int32_t taskLevel = pTask->info.taskLevel; if (taskLevel == TASK_LEVEL__SOURCE) { int8_t type = pTask->outputInfo.type; + pActiveInfo->allUpstreamTriggerRecv = 1; if (type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__SHUFFLE_DISPATCH) { stDebug("s-task:%s set childIdx:%d, and add checkpoint-trigger block into outputQ", id, pTask->info.selfChildId); @@ -231,8 +277,7 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock streamAddCheckpointReadyMsg(pTask, pBlock->srcTaskId, pTask->info.selfChildId, checkpointId); // there are still some upstream tasks not send checkpoint request, do nothing and wait for then - bool allSend = streamTaskIsAllUpstreamSendTrigger(pTask); - if (!allSend) { + if (pActiveInfo->allUpstreamTriggerRecv != 1) { streamFreeQitem((SStreamQueueItem*)pBlock); return code; } @@ -272,7 +317,8 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask) { if (notReady == 0) { stDebug("s-task:%s all downstream tasks have completed the checkpoint, start to do checkpoint for current task", pTask->id.idStr); - appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT); + SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; + appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT, pInfo->activeId, pInfo->transId); } else { int32_t total = streamTaskGetNumOfDownstream(pTask); stDebug("s-task:%s %d/%d downstream tasks are not ready, wait", pTask->id.idStr, notReady, total); diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index a6cfbb063b..f134224196 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -559,7 +559,7 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { } if (pTask->chkInfo.pActiveInfo->dispatchTrigger) { - stDebug("s-task:%s already send checkpoint trigger, not dispatch anymore", id); + stDebug("s-task:%s already send checkpoint-trigger, no longer dispatch any other data", id); atomic_store_8(&pTask->outputq.status, TASK_OUTPUT_STATUS__NORMAL); return 0; } @@ -874,29 +874,18 @@ int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, req.upstreamNodeId); SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; + taosThreadMutexLock(&pActiveInfo->lock); + taosArrayPush(pActiveInfo->pReadyMsgList, &info); - bool recved = false; - int32_t size = taosArrayGetSize(pActiveInfo->pReadyMsgList); - for (int32_t i = 0; i < size; ++i) { - SStreamChkptReadyInfo* p = taosArrayGet(pActiveInfo->pReadyMsgList, i); - if (p->nodeId == req.upstreamNodeId) { - if (p->checkpointId == req.checkpointId) { - stWarn("s-task:%s repeatly recv checkpoint-source msg from task:0x%x vgId:%d, checkpointId:%" PRId64 ", ignore", - pTask->id.idStr, p->upStreamTaskId, p->nodeId, p->checkpointId); - } else { - stError("s-task:%s checkpointId:%" PRId64 " not completed, new checkpointId:%" PRId64 " recv", - pTask->id.idStr, p->checkpointId, checkpointId); - ASSERT(0); // failed to handle it - } - - recved = true; - break; - } - } - - if (!recved) { - taosArrayPush(pActiveInfo->pReadyMsgList, &info); + int32_t numOfRecv = taosArrayGetSize(pActiveInfo->pReadyMsgList); + int32_t total = streamTaskGetNumOfUpstream(pTask); + if (numOfRecv == total) { + stDebug("s-task:%s recv checkpoint-trigger from all upstream, continue", pTask->id.idStr); + pActiveInfo->allUpstreamTriggerRecv = 1; + } else { + ASSERT(numOfRecv <= total); + stDebug("s-task:%s %d/%d checkpoint-trigger recv", pTask->id.idStr, numOfRecv, total); } taosThreadMutexUnlock(&pActiveInfo->lock); @@ -1175,7 +1164,6 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S // This task has received the checkpoint req from the upstream task, from which all the messages should be // blocked. Note that there is no race condition here. if (pReq->type == STREAM_INPUT__CHECKPOINT_TRIGGER) { - atomic_add_fetch_32(&pTask->upstreamInfo.numOfClosed, 1); streamTaskCloseUpstreamInput(pTask, pReq->upstreamTaskId); stDebug("s-task:%s close inputQ for upstream:0x%x, msgId:%d", id, pReq->upstreamTaskId, pReq->msgId); } else if (pReq->type == STREAM_INPUT__TRANS_STATE) { @@ -1187,11 +1175,6 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S } } - // disable the data from upstream tasks -// if (streamTaskGetStatus(pTask)->state == TASK_STATUS__HALT) { -// status = TASK_INPUT_STATUS__BLOCKED; -// } - { // do send response with the input status int32_t code = buildDispatchRsp(pTask, pReq, status, &pRsp->pCont); diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 77f266bd6d..95634b2ff3 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -597,10 +597,6 @@ static int32_t doStreamExecTask(SStreamTask* pTask) { // dispatch checkpoint msg to all downstream tasks int32_t type = pInput->type; if (type == STREAM_INPUT__CHECKPOINT_TRIGGER) { - if (pTask->pMeta->vgId == 2) { -// taosSsleep(20); - } - streamProcessCheckpointTriggerBlock(pTask, (SStreamDataBlock*)pInput); continue; } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 3daadda687..01e2f89d8c 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -626,6 +626,7 @@ void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId) { SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, taskId); if (pInfo != NULL) { pInfo->dataAllowed = false; + int32_t t = atomic_add_fetch_32(&pTask->upstreamInfo.numOfClosed, 1); } } @@ -633,6 +634,8 @@ void streamTaskOpenUpstreamInput(SStreamTask* pTask, int32_t taskId) { SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, taskId); if (pInfo != NULL) { pInfo->dataAllowed = true; + int32_t t = atomic_sub_fetch_32(&pTask->upstreamInfo.numOfClosed, 1); + ASSERT(t >= 0); } } @@ -1006,6 +1009,7 @@ void streamTaskClearActiveInfo(SActiveCheckpointInfo* pInfo) { pInfo->activeId = 0; // clear the checkpoint id pInfo->failedId = 0; pInfo->transId = 0; + pInfo->allUpstreamTriggerRecv = 0; pInfo->dispatchTrigger = false; taosArrayClear(pInfo->pReadyMsgList); From 923eaaa569a4729efa0ac0de99ca2f0f3e89fde3 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 31 May 2024 16:12:44 +0800 Subject: [PATCH 47/67] fix(test): update test cases. --- tests/system-test/0-others/information_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system-test/0-others/information_schema.py b/tests/system-test/0-others/information_schema.py index 66a69c4f9a..e8ed3db1b9 100644 --- a/tests/system-test/0-others/information_schema.py +++ b/tests/system-test/0-others/information_schema.py @@ -222,7 +222,7 @@ class TDTestCase: tdSql.query("select * from information_schema.ins_columns where db_name ='information_schema'") tdLog.info(len(tdSql.queryResult)) - tdSql.checkEqual(True, len(tdSql.queryResult) in range(259, 260)) + tdSql.checkEqual(True, len(tdSql.queryResult) in range(260, 261)) tdSql.query("select * from information_schema.ins_columns where db_name ='performance_schema'") tdSql.checkEqual(54, len(tdSql.queryResult)) From 5cbd733cab1911ec3a336bdf59d58dbcd855dfe5 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 31 May 2024 18:41:20 +0800 Subject: [PATCH 48/67] fix(util): idle 500ms after kill rsync --- source/common/src/rsync.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/source/common/src/rsync.c b/source/common/src/rsync.c index 1b03b142e0..2ed21616dc 100644 --- a/source/common/src/rsync.c +++ b/source/common/src/rsync.c @@ -114,20 +114,24 @@ static int32_t execCommand(char* command){ void stopRsync() { int32_t code = #ifdef WINDOWS - system("taskkill /f /im rsync.exe"); + system("taskkill /f /im rsync.exe"); #else - system("pkill rsync"); + system("pkill rsync"); #endif - if(code != 0){ - uError("[rsync] stop rsync server failed,"ERRNO_ERR_FORMAT, ERRNO_ERR_DATA); - return; + + if (code != 0) { + uError("[rsync] stop rsync server failed," ERRNO_ERR_FORMAT, ERRNO_ERR_DATA); + } else { + uDebug("[rsync] stop rsync server successful"); } - uDebug("[rsync] stop rsync server successful"); + + taosMsleep(500); // sleep 500 ms to wait for the completion of kill operation. } void startRsync() { - if(taosMulMkDir(tsCheckpointBackupDir) != 0){ - uError("[rsync] build checkpoint backup dir failed, dir:%s,"ERRNO_ERR_FORMAT, tsCheckpointBackupDir, ERRNO_ERR_DATA); + if (taosMulMkDir(tsCheckpointBackupDir) != 0) { + uError("[rsync] build checkpoint backup dir failed, path:%s," ERRNO_ERR_FORMAT, tsCheckpointBackupDir, + ERRNO_ERR_DATA); return; } @@ -137,7 +141,7 @@ void startRsync() { snprintf(confDir, PATH_MAX, "%srsync.conf", tsCheckpointBackupDir); int32_t code = generateConfigFile(confDir); - if(code != 0){ + if (code != 0) { return; } @@ -145,12 +149,12 @@ void startRsync() { snprintf(cmd, PATH_MAX, "rsync --daemon --port=%d --config=%s", tsRsyncPort, confDir); // start rsync service to backup checkpoint code = system(cmd); - if(code != 0){ - uError("[rsync] start server failed, code:%d,"ERRNO_ERR_FORMAT, code, ERRNO_ERR_DATA); - return; + if (code != 0) { + uError("[rsync] start server failed, code:%d," ERRNO_ERR_FORMAT, code, ERRNO_ERR_DATA); + } else { + uDebug("[rsync] start server successful"); } - uDebug("[rsync] start server successful"); } int32_t uploadByRsync(const char* id, const char* path) { From 2a8270f9c835e55969f9f241e2c9bc4fa33eec48 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 1 Jun 2024 18:26:45 +0800 Subject: [PATCH 49/67] fix(stream): track the checkpoint-ready msg on the upstream tasks. --- include/libs/stream/tstream.h | 2 +- source/dnode/vnode/src/tqCommon/tqCommon.c | 2 +- source/libs/stream/inc/streamInt.h | 20 ++++-- source/libs/stream/src/streamCheckpoint.c | 81 +++++++++++++++++----- source/libs/stream/src/streamDispatch.c | 57 ++++++++------- source/libs/stream/src/streamTask.c | 5 +- 6 files changed, 115 insertions(+), 52 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 0f546ee869..99ca2104ff 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -753,7 +753,7 @@ tmr_h streamTimerGetInstance(); // checkpoint int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); int32_t streamTaskProcessCheckpointTriggerRsp(SStreamTask* pTask, SCheckpointTriggerRsp* pRsp); -int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask); +int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask, int32_t downstreamNodeId, int32_t downstreamTaskId); int32_t streamTaskBuildCheckpoint(SStreamTask* pTask); void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg); int32_t streamAlignTransferState(SStreamTask* pTask); diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index c8ca324c5e..b779cbe932 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -492,7 +492,7 @@ int32_t tqStreamTaskProcessCheckpointReadyMsg(SStreamMeta* pMeta, SRpcMsg* pMsg) tqDebug("vgId:%d s-task:%s received the checkpoint ready msg from task:0x%x (vgId:%d), handle it", vgId, pTask->id.idStr, req.downstreamTaskId, req.downstreamNodeId); - streamProcessCheckpointReadyMsg(pTask); + streamProcessCheckpointReadyMsg(pTask, req.downstreamTaskId, req.downstreamNodeId); streamMetaReleaseTask(pMeta, pTask); { // send checkpoint ready rsp diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index 9a3cbdc963..c943f663e6 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -60,9 +60,9 @@ struct SActiveCheckpointInfo { int64_t failedId; bool dispatchTrigger; SArray* pDispatchTriggerList; // SArray - SArray* pReadyMsgList; // SArray + SArray* pReadyMsgList; // SArray int8_t allUpstreamTriggerRecv; - + SArray* pCheckpointReadyRecvList; // SArray int32_t checkCounter; tmr_h pCheckTmr; }; @@ -97,14 +97,14 @@ struct STokenBucket { }; typedef struct { - int32_t upStreamTaskId; + int32_t upstreamTaskId; SEpSet upstreamNodeEpset; int32_t nodeId; SRpcMsg msg; int64_t recvTs; int32_t transId; int64_t checkpointId; -} SStreamChkptReadyInfo; +} STaskCheckpointReadyInfo; typedef struct { int64_t sendTs; @@ -114,6 +114,15 @@ typedef struct { int32_t taskId; } STaskTriggerSendInfo; +typedef struct { + int64_t streamId; + int64_t recvTs; + int32_t downstreamNodeId; + int32_t downstreamTaskId; + int64_t checkpointId; + int32_t transId; +} STaskCheckpointReadyRecvInfo; + struct SStreamQueue { STaosQueue* pQueue; STaosQall* qall; @@ -203,6 +212,9 @@ int32_t streamTaskDownloadCheckpointData(const char* id, char* path); int32_t streamTaskOnNormalTaskReady(SStreamTask* pTask); int32_t streamTaskOnScanHistoryTaskReady(SStreamTask* pTask); +int32_t initCheckpointReadyInfo(STaskCheckpointReadyInfo* pReadyInfo, SStreamTask* pTask, int32_t upstreamNodeId, + int32_t upstreamTaskId, int32_t childId, SEpSet* pEpset, int64_t checkpointId); + typedef int32_t (*__stream_async_exec_fn_t)(void* param); int32_t streamMetaAsyncExec(SStreamMeta* pMeta, __stream_async_exec_fn_t fn, void* param, int32_t* code); diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 7f41629dac..6b6b740f01 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -182,7 +182,7 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; taosThreadMutexLock(&pTask->lock); - if (pTask->chkInfo.checkpointId >= checkpointId) { + if (pTask->chkInfo.checkpointId > checkpointId) { stError("s-task:%s vgId:%d current checkpointId:%" PRId64 " recv expired checkpoint-trigger block, checkpointId:%" PRId64 " transId:%d, discard", id, vgId, pTask->chkInfo.checkpointId, checkpointId, transId); @@ -190,6 +190,26 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock return TSDB_CODE_SUCCESS; } + if (pTask->chkInfo.checkpointId == checkpointId) { + { // send checkpoint-ready msg to upstream + SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, pBlock->srcTaskId); + + STaskCheckpointReadyInfo info = {0}; + initCheckpointReadyInfo(&info, pTask, pInfo->nodeId, pInfo->taskId, pInfo->childId, &pInfo->epSet, checkpointId); + + tmsgSendReq(&info.upstreamNodeEpset, &info.msg); + } + + stWarn( + "s-task:%s vgId:%d recv already finished checkpoint msg, send checkpoint-ready to upstream:0x%x to resume the " + "interrupted checkpoint", + id, vgId, pBlock->srcTaskId); + + streamTaskOpenUpstreamInput(pTask, pBlock->srcTaskId); + taosThreadMutexUnlock(&pTask->lock); + return TSDB_CODE_SUCCESS; + } + if (streamTaskGetStatus(pTask)->state == TASK_STATUS__CK) { if (pActiveInfo->activeId != checkpointId) { stError("s-task:%s vgId:%d active checkpointId:%" PRId64 ", recv invalid checkpoint-trigger checkpointId:%" PRId64 @@ -210,12 +230,12 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock if (taskLevel == TASK_LEVEL__SINK || taskLevel == TASK_LEVEL__AGG) { // check if already recv or not, and duplicated checkpoint-trigger msg recv, discard it for (int32_t i = 0; i < taosArrayGetSize(pActiveInfo->pReadyMsgList); ++i) { - SStreamChkptReadyInfo* p = taosArrayGet(pActiveInfo->pReadyMsgList, i); - if (p->upStreamTaskId == pBlock->srcTaskId) { + STaskCheckpointReadyInfo* p = taosArrayGet(pActiveInfo->pReadyMsgList, i); + if (p->upstreamTaskId == pBlock->srcTaskId) { ASSERT(p->checkpointId == checkpointId); stWarn("s-task:%s repeatly recv checkpoint-source msg from task:0x%x vgId:%d, checkpointId:%" PRId64 ", prev recvTs:%" PRId64 " discard", - pTask->id.idStr, p->upStreamTaskId, p->nodeId, p->checkpointId, p->recvTs); + pTask->id.idStr, p->upstreamTaskId, p->nodeId, p->checkpointId, p->recvTs); taosThreadMutexUnlock(&pTask->lock); return TSDB_CODE_SUCCESS; @@ -262,8 +282,7 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock stDebug("s-task:%s set childIdx:%d, and add checkpoint-trigger block into outputQ", id, pTask->info.selfChildId); continueDispatchCheckpointTriggerBlock(pBlock, pTask); } else { // only one task exists, no need to dispatch downstream info - atomic_add_fetch_32(&pTask->chkInfo.numOfNotReady, 1); - streamProcessCheckpointReadyMsg(pTask); + streamProcessCheckpointReadyMsg(pTask, 0, 0); streamFreeQitem((SStreamQueueItem*)pBlock); } } else if (taskLevel == TASK_LEVEL__SINK || taskLevel == TASK_LEVEL__AGG) { @@ -307,23 +326,47 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock * All down stream tasks have successfully completed the check point task. * Current stream task is allowed to start to do checkpoint things in ASYNC model. */ -int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask) { +int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask, int32_t downstreamNodeId, int32_t downstreamTaskId) { ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE || pTask->info.taskLevel == TASK_LEVEL__AGG); + SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; + + const char* id = pTask->id.idStr; + bool received = false; + int32_t total = streamTaskGetNumOfDownstream(pTask); + + taosThreadMutexLock(&pInfo->lock); // only when all downstream tasks are send checkpoint rsp, we can start the checkpoint procedure for the agg task - int32_t notReady = atomic_sub_fetch_32(&pTask->chkInfo.numOfNotReady, 1); - ASSERT(notReady >= 0); - - if (notReady == 0) { - stDebug("s-task:%s all downstream tasks have completed the checkpoint, start to do checkpoint for current task", - pTask->id.idStr); - SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; - appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT, pInfo->activeId, pInfo->transId); - } else { - int32_t total = streamTaskGetNumOfDownstream(pTask); - stDebug("s-task:%s %d/%d downstream tasks are not ready, wait", pTask->id.idStr, notReady, total); + int32_t size = taosArrayGetSize(pInfo->pCheckpointReadyRecvList); + for (int32_t i = 0; i < size; ++i) { + STaskCheckpointReadyRecvInfo* p = taosArrayGet(pInfo->pCheckpointReadyRecvList, i); + if (p->downstreamTaskId == downstreamTaskId) { + received = true; + break; + } } + if (received) { + stDebug("s-task:%s already recv checkpoint-ready msg from downstream:0x%x, %d/%d downstream not ready", id, + downstreamTaskId, (int32_t)(total - taosArrayGetSize(pInfo->pCheckpointReadyRecvList)), total); + } else { + STaskCheckpointReadyRecvInfo info = {.recvTs = taosGetTimestampMs(), + .downstreamTaskId = downstreamTaskId, + .checkpointId = pInfo->activeId, + .transId = pInfo->transId, + .streamId = pTask->id.streamId, + .downstreamNodeId = downstreamNodeId}; + taosArrayPush(pInfo->pCheckpointReadyRecvList, &info); + } + + int32_t notReady = total - taosArrayGetSize(pInfo->pCheckpointReadyRecvList); + if (notReady == 0) { + stDebug("s-task:%s all downstream task(s) have completed build checkpoint, start to do checkpoint for current task", + id); + appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT, pInfo->activeId, pInfo->transId); + } + + taosThreadMutexUnlock(&pInfo->lock); return 0; } @@ -685,7 +728,7 @@ void checkpointTriggerMonitorFn(void* param, void* tmrId) { bool recved = false; for(int32_t j = 0; j < taosArrayGetSize(pActiveInfo->pReadyMsgList); ++j) { - SStreamChkptReadyInfo* pReady = taosArrayGet(pActiveInfo->pReadyMsgList, j); + STaskCheckpointReadyInfo* pReady = taosArrayGet(pActiveInfo->pReadyMsgList, j); if (pInfo->nodeId == pReady->nodeId) { recved = true; break; diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index f134224196..aa0d7c3120 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -633,11 +633,11 @@ int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask) { ASSERT(taosArrayGetSize(pTask->upstreamInfo.pList) == num); for (int32_t i = 0; i < num; ++i) { - SStreamChkptReadyInfo* pInfo = taosArrayGet(pList, i); + STaskCheckpointReadyInfo* pInfo = taosArrayGet(pList, i); tmsgSendReq(&pInfo->upstreamNodeEpset, &pInfo->msg); stDebug("s-task:%s level:%d checkpoint ready msg sent to upstream:0x%x", pTask->id.idStr, pTask->info.taskLevel, - pInfo->upStreamTaskId); + pInfo->upstreamTaskId); } taosArrayClear(pList); @@ -657,7 +657,7 @@ int32_t streamTaskSendCheckpointSourceRsp(SStreamTask* pTask) { ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); if (taosArrayGetSize(pList) == 1) { - SStreamChkptReadyInfo* pInfo = taosArrayGet(pList, 0); + STaskCheckpointReadyInfo* pInfo = taosArrayGet(pList, 0); tmsgSendRsp(&pInfo->msg); taosArrayClear(pList); @@ -785,7 +785,7 @@ int32_t streamTaskBuildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRp } int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SStreamTask* pTask) { - SStreamChkptReadyInfo info = { + STaskCheckpointReadyInfo info = { .recvTs = taosGetTimestampMs(), .transId = pReq->transId, .checkpointId = pReq->checkpointId}; streamTaskBuildCheckpointSourceRsp(pReq, pRpcInfo, &info.msg, TSDB_CODE_SUCCESS); @@ -797,7 +797,7 @@ int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHa if (size > 0) { ASSERT(size == 1); - SStreamChkptReadyInfo* pReady = taosArrayGet(pActiveInfo->pReadyMsgList, 0); + STaskCheckpointReadyInfo* pReady = taosArrayGet(pActiveInfo->pReadyMsgList, 0); if (pReady->transId == pReq->transId) { stWarn("s-task:%s repeatly recv checkpoint source msg from mnode, checkpointId:%" PRId64 ", ignore", pTask->id.idStr, pReq->checkpointId); @@ -816,24 +816,20 @@ int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHa return TSDB_CODE_SUCCESS; } -int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, int32_t index, int64_t checkpointId) { +int32_t initCheckpointReadyInfo(STaskCheckpointReadyInfo* pReadyInfo, SStreamTask* pTask, int32_t upstreamNodeId, + int32_t upstreamTaskId, int32_t childId, SEpSet* pEpset, int64_t checkpointId) { int32_t code = 0; int32_t tlen = 0; void* buf = NULL; - if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { - return TSDB_CODE_SUCCESS; - } - - SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); SStreamCheckpointReadyMsg req = {0}; req.downstreamNodeId = pTask->pMeta->vgId; req.downstreamTaskId = pTask->id.taskId; req.streamId = pTask->id.streamId; req.checkpointId = checkpointId; - req.childId = pInfo->childId; - req.upstreamNodeId = pInfo->nodeId; - req.upstreamTaskId = pInfo->taskId; + req.childId = childId; + req.upstreamNodeId = upstreamNodeId; + req.upstreamTaskId = upstreamTaskId; tEncodeSize(tEncodeStreamCheckpointReadyMsg, &req, tlen, code); if (code < 0) { @@ -858,20 +854,29 @@ int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, ASSERT(req.upstreamTaskId != 0); - SStreamChkptReadyInfo info = { - .upStreamTaskId = pInfo->taskId, - .upstreamNodeEpset = pInfo->epSet, - .nodeId = req.upstreamNodeId, - .recvTs = taosGetTimestampMs(), - .checkpointId = req.checkpointId, - }; + pReadyInfo->upstreamTaskId = upstreamTaskId; + pReadyInfo->upstreamNodeEpset = *pEpset; + pReadyInfo->nodeId = req.upstreamNodeId; + pReadyInfo->recvTs = taosGetTimestampMs(); + pReadyInfo->checkpointId = req.checkpointId; - initRpcMsg(&info.msg, TDMT_STREAM_TASK_CHECKPOINT_READY, buf, tlen + sizeof(SMsgHead)); + initRpcMsg(&pReadyInfo->msg, TDMT_STREAM_TASK_CHECKPOINT_READY, buf, tlen + sizeof(SMsgHead)); + return TSDB_CODE_SUCCESS; +} + +int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, int32_t index, int64_t checkpointId) { + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + return TSDB_CODE_SUCCESS; + } + + SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); + + STaskCheckpointReadyInfo info = {0}; + initCheckpointReadyInfo(&info, pTask, pInfo->nodeId, pInfo->taskId, pInfo->childId, &pInfo->epSet, checkpointId); stDebug("s-task:%s (level:%d) prepare checkpoint-ready msg to upstream s-task:0x%" PRIx64 - ":0x%x (vgId:%d) idx:%d, vgId:%d", - pTask->id.idStr, pTask->info.taskLevel, req.streamId, req.upstreamTaskId, req.upstreamNodeId, index, - req.upstreamNodeId); + "-0x%x (vgId:%d) idx:%d", + pTask->id.idStr, pTask->info.taskLevel, pTask->id.streamId, pInfo->taskId, pInfo->nodeId, index); SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; @@ -899,7 +904,7 @@ void streamClearChkptReadyMsg(SStreamTask* pTask) { } for (int i = 0; i < taosArrayGetSize(pActiveInfo->pReadyMsgList); i++) { - SStreamChkptReadyInfo* pInfo = taosArrayGet(pActiveInfo->pReadyMsgList, i); + STaskCheckpointReadyInfo* pInfo = taosArrayGet(pActiveInfo->pReadyMsgList, i); rpcFreeCont(pInfo->msg.pCont); } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 01e2f89d8c..8084a978ef 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -984,7 +984,8 @@ SActiveCheckpointInfo* streamTaskCreateActiveChkptInfo() { taosThreadMutexInit(&pInfo->lock, NULL); pInfo->pDispatchTriggerList = taosArrayInit(4, sizeof(STaskTriggerSendInfo)); - pInfo->pReadyMsgList = taosArrayInit(4, sizeof(SStreamChkptReadyInfo)); + pInfo->pReadyMsgList = taosArrayInit(4, sizeof(STaskCheckpointReadyInfo)); + pInfo->pCheckpointReadyRecvList = taosArrayInit(4, sizeof(STaskCheckpointReadyRecvInfo)); return pInfo; } @@ -996,6 +997,7 @@ void streamTaskDestroyActiveChkptInfo(SActiveCheckpointInfo* pInfo) { taosThreadMutexDestroy(&pInfo->lock); pInfo->pDispatchTriggerList = taosArrayDestroy(pInfo->pDispatchTriggerList); pInfo->pReadyMsgList = taosArrayDestroy(pInfo->pReadyMsgList); + pInfo->pCheckpointReadyRecvList = taosArrayDestroy(pInfo->pCheckpointReadyRecvList); if (pInfo->pCheckTmr != NULL) { taosTmrStop(pInfo->pCheckTmr); @@ -1014,4 +1016,5 @@ void streamTaskClearActiveInfo(SActiveCheckpointInfo* pInfo) { taosArrayClear(pInfo->pReadyMsgList); taosArrayClear(pInfo->pDispatchTriggerList); + taosArrayClear(pInfo->pCheckpointReadyRecvList); } \ No newline at end of file From 31317c489508aae81268fdb0e5a95e3c4eb227f8 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 1 Jun 2024 23:20:43 +0800 Subject: [PATCH 50/67] fix(stream): set the checkpoint ready info for only one task in stream. --- source/libs/stream/inc/streamInt.h | 9 +++++---- source/libs/stream/src/streamCheckpoint.c | 21 ++++++++++++++------- source/libs/stream/src/streamTask.c | 2 +- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index c943f663e6..c4c3298ea7 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -62,7 +62,7 @@ struct SActiveCheckpointInfo { SArray* pDispatchTriggerList; // SArray SArray* pReadyMsgList; // SArray int8_t allUpstreamTriggerRecv; - SArray* pCheckpointReadyRecvList; // SArray + SArray* pCheckpointReadyRecvList; // SArray int32_t checkCounter; tmr_h pCheckTmr; }; @@ -100,10 +100,11 @@ typedef struct { int32_t upstreamTaskId; SEpSet upstreamNodeEpset; int32_t nodeId; - SRpcMsg msg; - int64_t recvTs; int32_t transId; + SRpcMsg msg; int64_t checkpointId; + int64_t recvTs; + int32_t sendToUpstream; } STaskCheckpointReadyInfo; typedef struct { @@ -121,7 +122,7 @@ typedef struct { int32_t downstreamTaskId; int64_t checkpointId; int32_t transId; -} STaskCheckpointReadyRecvInfo; +} STaskDownstreamReadyInfo; struct SStreamQueue { STaosQueue* pQueue; diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 6b6b740f01..f2868fea96 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -334,12 +334,19 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask, int32_t downstreamNo bool received = false; int32_t total = streamTaskGetNumOfDownstream(pTask); + // only one task in this stream + if (total == 0 && pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT, pInfo->activeId, pInfo->transId); + taosThreadMutexUnlock(&pInfo->lock); + return 0; + } + taosThreadMutexLock(&pInfo->lock); // only when all downstream tasks are send checkpoint rsp, we can start the checkpoint procedure for the agg task int32_t size = taosArrayGetSize(pInfo->pCheckpointReadyRecvList); for (int32_t i = 0; i < size; ++i) { - STaskCheckpointReadyRecvInfo* p = taosArrayGet(pInfo->pCheckpointReadyRecvList, i); + STaskDownstreamReadyInfo* p = taosArrayGet(pInfo->pCheckpointReadyRecvList, i); if (p->downstreamTaskId == downstreamTaskId) { received = true; break; @@ -350,12 +357,12 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask, int32_t downstreamNo stDebug("s-task:%s already recv checkpoint-ready msg from downstream:0x%x, %d/%d downstream not ready", id, downstreamTaskId, (int32_t)(total - taosArrayGetSize(pInfo->pCheckpointReadyRecvList)), total); } else { - STaskCheckpointReadyRecvInfo info = {.recvTs = taosGetTimestampMs(), - .downstreamTaskId = downstreamTaskId, - .checkpointId = pInfo->activeId, - .transId = pInfo->transId, - .streamId = pTask->id.streamId, - .downstreamNodeId = downstreamNodeId}; + STaskDownstreamReadyInfo info = {.recvTs = taosGetTimestampMs(), + .downstreamTaskId = downstreamTaskId, + .checkpointId = pInfo->activeId, + .transId = pInfo->transId, + .streamId = pTask->id.streamId, + .downstreamNodeId = downstreamNodeId}; taosArrayPush(pInfo->pCheckpointReadyRecvList, &info); } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 8084a978ef..f6524a69ab 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -985,7 +985,7 @@ SActiveCheckpointInfo* streamTaskCreateActiveChkptInfo() { pInfo->pDispatchTriggerList = taosArrayInit(4, sizeof(STaskTriggerSendInfo)); pInfo->pReadyMsgList = taosArrayInit(4, sizeof(STaskCheckpointReadyInfo)); - pInfo->pCheckpointReadyRecvList = taosArrayInit(4, sizeof(STaskCheckpointReadyRecvInfo)); + pInfo->pCheckpointReadyRecvList = taosArrayInit(4, sizeof(STaskDownstreamReadyInfo)); return pInfo; } From f13dc1858ab7cac08983546ac01a686fb40a0113 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 3 Jun 2024 09:54:28 +0800 Subject: [PATCH 51/67] fix(stream): disable stream checkpoint when related fill-history task exists. --- source/dnode/mnode/impl/src/mndStream.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index b88c4a4665..db4b345536 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -1108,6 +1108,15 @@ static int32_t mndCheckNodeStatus(SMnode *pMnode) { ready = false; break; } + + if (pEntry->hTaskId != 0) { + mDebug("s-task:0x%" PRIx64 "-0x%x (nodeId:%d) status:%s related fill-history task:0x%" PRIx64 + " exists, checkpoint not issued", + pEntry->id.streamId, (int32_t)pEntry->id.taskId, pEntry->nodeId, streamTaskGetStatusStr(pEntry->status), + pEntry->hTaskId); + ready = false; + break; + } } taosThreadMutexUnlock(&execInfo.lock); From 3b3ed1c30c067ac5e647a13e55a4debc0c00b529 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 3 Jun 2024 16:45:37 +0800 Subject: [PATCH 52/67] fix(stream): add monitor for checkpoint-ready msg recv status. --- include/libs/stream/tstream.h | 4 +- source/dnode/vnode/src/tq/tq.c | 9 + source/dnode/vnode/src/tqCommon/tqCommon.c | 40 +++- source/libs/stream/inc/streamInt.h | 17 +- source/libs/stream/src/streamCheckpoint.c | 88 ++++++--- source/libs/stream/src/streamDispatch.c | 203 +++++++++++++++------ source/libs/stream/src/streamMeta.c | 5 +- source/libs/stream/src/streamTask.c | 7 +- 8 files changed, 273 insertions(+), 100 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 99ca2104ff..26c6631ee4 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -271,7 +271,6 @@ typedef struct SCheckpointInfo { int64_t checkpointTime; // latest checkpoint time int64_t processedVer; int64_t nextProcessVer; // current offset in WAL, not serialize it - int32_t numOfNotReady; SActiveCheckpointInfo* pActiveInfo; int64_t msgVer; } SCheckpointInfo; @@ -753,7 +752,8 @@ tmr_h streamTimerGetInstance(); // checkpoint int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); int32_t streamTaskProcessCheckpointTriggerRsp(SStreamTask* pTask, SCheckpointTriggerRsp* pRsp); -int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask, int32_t downstreamNodeId, int32_t downstreamTaskId); +int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask, int64_t checkpointId, int32_t downstreamNodeId, int32_t downstreamTaskId); +int32_t streamTaskProcessCheckpointReadyRsp(SStreamTask* pTask, int32_t upstreamTaskId, int64_t checkpointId); int32_t streamTaskBuildCheckpoint(SStreamTask* pTask); void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg); int32_t streamAlignTransferState(SStreamTask* pTask); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 1076f1f2c6..712cfbaa55 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1229,6 +1229,15 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) // downstream task has complete the stream task checkpoint procedure, let's start the handle the rsp by execute task int32_t tqProcessTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg) { + int32_t vgId = TD_VID(pTq->pVnode); + + SRetrieveChkptTriggerReq* pReq = (SRetrieveChkptTriggerReq*) pMsg->pCont; + if (!vnodeIsRoleLeader(pTq->pVnode)) { + tqError("vgId:%d not leader, ignore the retrieve checkpoint-trigger msg from 0x%x", vgId, + (int32_t)pReq->downstreamTaskId); + return TSDB_CODE_STREAM_NOT_LEADER; + } + return tqStreamTaskProcessCheckpointReadyMsg(pTq->pStreamMeta, pMsg); } diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index b779cbe932..c55745e5c5 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -19,6 +19,13 @@ typedef struct SMStreamCheckpointReadyRspMsg { SMsgHead head; + int64_t streamId; + int32_t upstreamTaskId; + int32_t upstreamNodeId; + int32_t downstreamTaskId; + int32_t downstreamNodeId; + int64_t checkpointId; + int32_t transId; } SMStreamCheckpointReadyRspMsg; static int32_t doProcessDummyRspMsg(SStreamMeta* pMeta, SRpcMsg* pMsg); @@ -486,21 +493,27 @@ int32_t tqStreamTaskProcessCheckpointReadyMsg(SStreamMeta* pMeta, SRpcMsg* pMsg) SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.upstreamTaskId); if (pTask == NULL) { tqError("vgId:%d failed to find s-task:0x%x, it may have been destroyed already", vgId, req.downstreamTaskId); - return code; + return TSDB_CODE_STREAM_TASK_NOT_EXIST; } - tqDebug("vgId:%d s-task:%s received the checkpoint ready msg from task:0x%x (vgId:%d), handle it", vgId, + tqDebug("vgId:%d s-task:%s received the checkpoint-ready msg from task:0x%x (vgId:%d), handle it", vgId, pTask->id.idStr, req.downstreamTaskId, req.downstreamNodeId); - streamProcessCheckpointReadyMsg(pTask, req.downstreamTaskId, req.downstreamNodeId); + streamProcessCheckpointReadyMsg(pTask, req.checkpointId, req.downstreamTaskId, req.downstreamNodeId); streamMetaReleaseTask(pMeta, pTask); { // send checkpoint ready rsp - SRpcMsg rsp = {.code = 0, .info = pMsg->info, .contLen = sizeof(SMStreamCheckpointReadyRspMsg)}; - rsp.pCont = rpcMallocCont(rsp.contLen); - SMsgHead* pHead = rsp.pCont; - pHead->vgId = htonl(req.downstreamNodeId); + SMStreamCheckpointReadyRspMsg* pReadyRsp = rpcMallocCont(sizeof(SMStreamCheckpointReadyRspMsg)); + pReadyRsp->upstreamTaskId = req.upstreamTaskId; + pReadyRsp->upstreamNodeId = req.upstreamNodeId; + pReadyRsp->downstreamTaskId = req.downstreamTaskId; + pReadyRsp->downstreamNodeId = req.downstreamNodeId; + pReadyRsp->checkpointId = req.checkpointId; + pReadyRsp->streamId = req.streamId; + pReadyRsp->head.vgId = htonl(req.downstreamNodeId); + + SRpcMsg rsp = {.code = 0, .info = pMsg->info, .pCont = pReadyRsp, .contLen = sizeof(SMStreamCheckpointReadyRspMsg)}; tmsgSendRsp(&rsp); pMsg->info.handle = NULL; // disable auto rsp @@ -1066,5 +1079,16 @@ int32_t tqStreamProcessStreamHbRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { return d int32_t tqStreamProcessReqCheckpointRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { return doProcessDummyRspMsg(pMeta, pMsg); } int32_t tqStreamProcessCheckpointReadyRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { - return doProcessDummyRspMsg(pMeta, pMsg); + SMStreamCheckpointReadyRspMsg* pRsp = pMsg->pCont; + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pRsp->streamId, pRsp->downstreamTaskId); + if (pTask == NULL) { + tqError("vgId:%d failed to acquire task:0x%x when handling checkpoint-ready msg, it may have been dropped", + pRsp->downstreamNodeId, pRsp->downstreamTaskId); + return TSDB_CODE_STREAM_TASK_NOT_EXIST; + } + + streamTaskProcessCheckpointReadyRsp(pTask, pRsp->upstreamTaskId, pRsp->checkpointId); + streamMetaReleaseTask(pMeta, pTask); + return TSDB_CODE_SUCCESS; } \ No newline at end of file diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index c4c3298ea7..154f623b9d 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -64,7 +64,9 @@ struct SActiveCheckpointInfo { int8_t allUpstreamTriggerRecv; SArray* pCheckpointReadyRecvList; // SArray int32_t checkCounter; - tmr_h pCheckTmr; + tmr_h pChkptTriggerTmr; + int32_t sendReadyCheckCounter; + tmr_h pSendReadyMsgTmr; }; typedef struct { @@ -99,12 +101,13 @@ struct STokenBucket { typedef struct { int32_t upstreamTaskId; SEpSet upstreamNodeEpset; - int32_t nodeId; + int32_t upstreamNodeId; int32_t transId; - SRpcMsg msg; + int32_t childId; + SRpcMsg msg; // for mnode checkpoint-source rsp int64_t checkpointId; int64_t recvTs; - int32_t sendToUpstream; + int32_t sendCompleted; } STaskCheckpointReadyInfo; typedef struct { @@ -213,8 +216,10 @@ int32_t streamTaskDownloadCheckpointData(const char* id, char* path); int32_t streamTaskOnNormalTaskReady(SStreamTask* pTask); int32_t streamTaskOnScanHistoryTaskReady(SStreamTask* pTask); -int32_t initCheckpointReadyInfo(STaskCheckpointReadyInfo* pReadyInfo, SStreamTask* pTask, int32_t upstreamNodeId, - int32_t upstreamTaskId, int32_t childId, SEpSet* pEpset, int64_t checkpointId); +int32_t initCheckpointReadyInfo(STaskCheckpointReadyInfo* pReadyInfo, int32_t upstreamNodeId, int32_t upstreamTaskId, + int32_t childId, SEpSet* pEpset, int64_t checkpointId); +int32_t initCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamNodeId, int32_t upstreamTaskId, int32_t childId, + int64_t checkpointId, SRpcMsg* pMsg); typedef int32_t (*__stream_async_exec_fn_t)(void* param); diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index f2868fea96..b0c4884c73 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -111,7 +111,6 @@ int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSo pTask->chkInfo.pActiveInfo->transId = pReq->transId; pTask->chkInfo.pActiveInfo->activeId = pReq->checkpointId; - pTask->chkInfo.numOfNotReady = streamTaskGetNumOfDownstream(pTask); pTask->chkInfo.startTs = taosGetTimestampMs(); pTask->execInfo.checkpoint += 1; @@ -192,12 +191,11 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock if (pTask->chkInfo.checkpointId == checkpointId) { { // send checkpoint-ready msg to upstream + SRpcMsg msg ={0}; + SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, pBlock->srcTaskId); - - STaskCheckpointReadyInfo info = {0}; - initCheckpointReadyInfo(&info, pTask, pInfo->nodeId, pInfo->taskId, pInfo->childId, &pInfo->epSet, checkpointId); - - tmsgSendReq(&info.upstreamNodeEpset, &info.msg); + initCheckpointReadyMsg(pTask, pInfo->nodeId, pBlock->srcTaskId, pInfo->childId, checkpointId, &msg); + tmsgSendReq(&pInfo->epSet, &msg); } stWarn( @@ -235,7 +233,7 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock ASSERT(p->checkpointId == checkpointId); stWarn("s-task:%s repeatly recv checkpoint-source msg from task:0x%x vgId:%d, checkpointId:%" PRId64 ", prev recvTs:%" PRId64 " discard", - pTask->id.idStr, p->upstreamTaskId, p->nodeId, p->checkpointId, p->recvTs); + pTask->id.idStr, p->upstreamTaskId, p->upstreamNodeId, p->checkpointId, p->recvTs); taosThreadMutexUnlock(&pTask->lock); return TSDB_CODE_SUCCESS; @@ -267,10 +265,10 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock stDebug("s-task:%s start checkpoint-trigger monitor in 10s, ref:%d ", pTask->id.idStr, ref); streamMetaAcquireOneTask(pTask); - if (pActiveInfo->pCheckTmr == NULL) { - pActiveInfo->pCheckTmr = taosTmrStart(checkpointTriggerMonitorFn, 100, pTask, streamTimer); + if (pActiveInfo->pChkptTriggerTmr == NULL) { + pActiveInfo->pChkptTriggerTmr = taosTmrStart(checkpointTriggerMonitorFn, 100, pTask, streamTimer); } else { - taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pCheckTmr); + taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pChkptTriggerTmr); } } @@ -282,7 +280,7 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock stDebug("s-task:%s set childIdx:%d, and add checkpoint-trigger block into outputQ", id, pTask->info.selfChildId); continueDispatchCheckpointTriggerBlock(pBlock, pTask); } else { // only one task exists, no need to dispatch downstream info - streamProcessCheckpointReadyMsg(pTask, 0, 0); + appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT, pActiveInfo->activeId, pActiveInfo->transId); streamFreeQitem((SStreamQueueItem*)pBlock); } } else if (taskLevel == TASK_LEVEL__SINK || taskLevel == TASK_LEVEL__AGG) { @@ -308,11 +306,6 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock streamTaskBuildCheckpoint(pTask); } else { // source & agg tasks need to forward the checkpoint msg downwards stDebug("s-task:%s process checkpoint-trigger block, all %d upstreams sent, forwards to downstream", id, num); - - // set the needed checked downstream tasks, only when all downstream tasks do checkpoint complete, this task - // can start local checkpoint procedure - pTask->chkInfo.numOfNotReady = streamTaskGetNumOfDownstream(pTask); - // Put the checkpoint block into inputQ, to make sure all blocks with less version have been handled by this task // already. And then, dispatch check point msg to all downstream tasks code = continueDispatchCheckpointTriggerBlock(pBlock, pTask); @@ -326,19 +319,31 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock * All down stream tasks have successfully completed the check point task. * Current stream task is allowed to start to do checkpoint things in ASYNC model. */ -int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask, int32_t downstreamNodeId, int32_t downstreamTaskId) { +int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask, int64_t checkpointId, int32_t downstreamNodeId, + int32_t downstreamTaskId) { ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE || pTask->info.taskLevel == TASK_LEVEL__AGG); SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; const char* id = pTask->id.idStr; bool received = false; int32_t total = streamTaskGetNumOfDownstream(pTask); + ASSERT(total > 0); - // only one task in this stream - if (total == 0 && pTask->info.taskLevel == TASK_LEVEL__SOURCE) { - appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT, pInfo->activeId, pInfo->transId); - taosThreadMutexUnlock(&pInfo->lock); - return 0; + // 1. not in checkpoint status now + SStreamTaskState* pStat = streamTaskGetStatus(pTask); + if (pStat->state != TASK_STATUS__CK) { + return TSDB_CODE_STREAM_TASK_IVLD_STATUS; + } + + // 2. expired checkpoint-ready msg + if (pTask->chkInfo.checkpointId > checkpointId) { + // discard it directly + return -1; + } + + // invalid checkpoint-ready msg + if (pInfo->activeId != checkpointId) { + return -1; } taosThreadMutexLock(&pInfo->lock); @@ -354,7 +359,7 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask, int32_t downstreamNo } if (received) { - stDebug("s-task:%s already recv checkpoint-ready msg from downstream:0x%x, %d/%d downstream not ready", id, + stDebug("s-task:%s already recv checkpoint-ready msg from downstream:0x%x, ignore. %d/%d downstream not ready", id, downstreamTaskId, (int32_t)(total - taosArrayGetSize(pInfo->pCheckpointReadyRecvList)), total); } else { STaskDownstreamReadyInfo info = {.recvTs = taosGetTimestampMs(), @@ -377,9 +382,38 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask, int32_t downstreamNo return 0; } +int32_t streamTaskProcessCheckpointReadyRsp(SStreamTask* pTask, int32_t upstreamTaskId, int64_t checkpointId) { + SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; + int64_t now = taosGetTimestampMs(); + int32_t numOfConfirmed = 0; + + taosThreadMutexLock(&pInfo->lock); + for(int32_t i = 0; i < taosArrayGetSize(pInfo->pReadyMsgList); ++i) { + STaskCheckpointReadyInfo* pReadyInfo = taosArrayGet(pInfo->pReadyMsgList, i); + if (pReadyInfo->upstreamTaskId == upstreamTaskId && pReadyInfo->checkpointId == checkpointId) { + pReadyInfo->sendCompleted = 1; + stDebug("s-task:%s send checkpoint-ready msg to upstream:0x%x confirmed, checkpointId:%" PRId64 " ts:%" PRId64, + pTask->id.idStr, upstreamTaskId, checkpointId, now); + break; + } + } + + for(int32_t i = 0; i < taosArrayGetSize(pInfo->pReadyMsgList); ++i) { + STaskCheckpointReadyInfo* pReadyInfo = taosArrayGet(pInfo->pReadyMsgList, i); + if (pReadyInfo->sendCompleted == 1) { + numOfConfirmed += 1; + } + } + + stDebug("s-task:%s send checkpoint-ready msg to %d upstream confirmed, checkpointId:%" PRId64, pTask->id.idStr, + numOfConfirmed, checkpointId); + + taosThreadMutexUnlock(&pInfo->lock); + return TSDB_CODE_SUCCESS; +} + void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg) { pTask->chkInfo.startTs = 0; // clear the recorded start time - pTask->chkInfo.numOfNotReady = 0; streamTaskClearActiveInfo(pTask->chkInfo.pActiveInfo); streamTaskOpenAllUpstreamInput(pTask); // open inputQ for all upstream tasks @@ -703,7 +737,7 @@ void checkpointTriggerMonitorFn(void* param, void* tmrId) { } if (++pActiveInfo->checkCounter < 100) { - taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pCheckTmr); + taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pChkptTriggerTmr); return; } @@ -736,7 +770,7 @@ void checkpointTriggerMonitorFn(void* param, void* tmrId) { bool recved = false; for(int32_t j = 0; j < taosArrayGetSize(pActiveInfo->pReadyMsgList); ++j) { STaskCheckpointReadyInfo* pReady = taosArrayGet(pActiveInfo->pReadyMsgList, j); - if (pInfo->nodeId == pReady->nodeId) { + if (pInfo->nodeId == pReady->upstreamNodeId) { recved = true; break; } @@ -756,7 +790,7 @@ void checkpointTriggerMonitorFn(void* param, void* tmrId) { // check every 100ms if (size > 0) { stDebug("s-task:%s start to monitor checkpoint-trigger in 10s", id); - taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pCheckTmr); + taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pChkptTriggerTmr); } else { int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); stDebug("s-task:%s all checkpoint-trigger recved, quit from monitor checkpoint-trigger tmr, ref:%d", id, ref); diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index aa0d7c3120..42a4e4e8fb 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -623,28 +623,163 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { return TSDB_CODE_SUCCESS; } -// this function is usually invoked by sink/agg task -int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask) { - SArray* pList = pTask->chkInfo.pActiveInfo->pReadyMsgList; +int32_t initCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamNodeId, int32_t upstreamTaskId, int32_t childId, + int64_t checkpointId, SRpcMsg* pMsg) { + int32_t code = 0; + int32_t tlen = 0; + void* buf = NULL; - taosThreadMutexLock(&pTask->chkInfo.pActiveInfo->lock); + SStreamCheckpointReadyMsg req = {0}; + req.downstreamNodeId = pTask->pMeta->vgId; + req.downstreamTaskId = pTask->id.taskId; + req.streamId = pTask->id.streamId; + req.checkpointId = checkpointId; + req.childId = childId; + req.upstreamNodeId = upstreamNodeId; + req.upstreamTaskId = upstreamTaskId; + + tEncodeSize(tEncodeStreamCheckpointReadyMsg, &req, tlen, code); + if (code < 0) { + return -1; + } + + buf = rpcMallocCont(sizeof(SMsgHead) + tlen); + if (buf == NULL) { + return -1; + } + + ((SMsgHead*)buf)->vgId = htonl(req.upstreamNodeId); + void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + + SEncoder encoder; + tEncoderInit(&encoder, abuf, tlen); + if ((code = tEncodeStreamCheckpointReadyMsg(&encoder, &req)) < 0) { + rpcFreeCont(buf); + return code; + } + tEncoderClear(&encoder); + + initRpcMsg(pMsg, TDMT_STREAM_TASK_CHECKPOINT_READY, buf, tlen + sizeof(SMsgHead)); + return TSDB_CODE_SUCCESS; +} + +static void checkpointReadyMsgSendMonitorFn(void* param, void* tmrId) { + SStreamTask* pTask = param; + int32_t vgId = pTask->pMeta->vgId; + const char* id = pTask->id.idStr; + + // check the status every 100ms + if (streamTaskShouldStop(pTask)) { + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s vgId:%d quit from monitor checkpoint-trigger, ref:%d", id, vgId, ref); + streamMetaReleaseTask(pTask->pMeta, pTask); + return; + } + + SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; + if (++pActiveInfo->sendReadyCheckCounter < 100) { + taosTmrReset(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pSendReadyMsgTmr); + return; + } + + pActiveInfo->sendReadyCheckCounter = 0; + stDebug("s-task:%s in sending checkpoint-ready msg monitor timer", id); + + taosThreadMutexLock(&pActiveInfo->lock); + + SArray* pList = pActiveInfo->pReadyMsgList; + SArray* pNotRspList = taosArrayInit(4, sizeof(int32_t)); int32_t num = taosArrayGetSize(pList); ASSERT(taosArrayGetSize(pTask->upstreamInfo.pList) == num); for (int32_t i = 0; i < num; ++i) { STaskCheckpointReadyInfo* pInfo = taosArrayGet(pList, i); - tmsgSendReq(&pInfo->upstreamNodeEpset, &pInfo->msg); + if (pInfo->sendCompleted == 1) { + continue; + } - stDebug("s-task:%s level:%d checkpoint ready msg sent to upstream:0x%x", pTask->id.idStr, pTask->info.taskLevel, + taosArrayPush(pNotRspList, &pInfo->upstreamTaskId); + stDebug("s-task:%s vgId:%d level:%d checkpoint-ready rsp from upstream:0x%x not confirmed yet", id, vgId, + pTask->info.taskLevel, pInfo->upstreamTaskId); + } + + int32_t checkpointId = pActiveInfo->activeId; + + int32_t notRsp = taosArrayGetSize(pNotRspList); + if (notRsp > 0) { // send checkpoint-ready msg again + for (int32_t i = 0; i < taosArrayGetSize(pNotRspList); ++i) { + int32_t taskId = *(int32_t*)taosArrayGet(pNotRspList, i); + + for (int32_t j = 0; j < num; ++j) { + STaskCheckpointReadyInfo* pReadyInfo = taosArrayGet(pList, j); + if (taskId == pReadyInfo->upstreamTaskId) { // send msg again + + SRpcMsg msg = {0}; + initCheckpointReadyMsg(pTask, pReadyInfo->upstreamNodeId, pReadyInfo->upstreamTaskId, pReadyInfo->childId, + checkpointId, &msg); + tmsgSendReq(&pReadyInfo->upstreamNodeEpset, &msg); + stDebug("s-task:%s level:%d checkpoint-ready msg sent to upstream:0x%x again", id, pTask->info.taskLevel, + pReadyInfo->upstreamTaskId); + } + } + } + + taosTmrReset(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pSendReadyMsgTmr); + taosThreadMutexUnlock(&pActiveInfo->lock); + } else { + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug( + "s-task:%s vgId:%d recv of checkpoint-ready msg confirmed by all upstream task(s), quit from timer and clear " + "checkpoint-ready msg, ref:%d", + id, vgId, ref); + + streamClearChkptReadyMsg(pTask); + taosThreadMutexUnlock(&pActiveInfo->lock); + streamMetaReleaseTask(pTask->pMeta, pTask); + } + + taosArrayDestroy(pNotRspList); +} + +// this function is usually invoked by sink/agg task +int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask) { + SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; + + const char* id = pTask->id.idStr; + SArray* pList = pActiveInfo->pReadyMsgList; + + taosThreadMutexLock(&pActiveInfo->lock); + + int32_t num = taosArrayGetSize(pList); + ASSERT(taosArrayGetSize(pTask->upstreamInfo.pList) == num); + + for (int32_t i = 0; i < num; ++i) { + STaskCheckpointReadyInfo* pInfo = taosArrayGet(pList, i); + + SRpcMsg msg = {0}; + initCheckpointReadyMsg(pTask, pInfo->upstreamNodeId, pInfo->upstreamTaskId, pInfo->childId, pInfo->checkpointId, &msg); + tmsgSendReq(&pInfo->upstreamNodeEpset, &msg); + + stDebug("s-task:%s level:%d checkpoint-ready msg sent to upstream:0x%x", id, pTask->info.taskLevel, pInfo->upstreamTaskId); } - taosArrayClear(pList); + taosThreadMutexUnlock(&pActiveInfo->lock); + stDebug("s-task:%s level:%d checkpoint-ready msg sent to all %d upstreams", id, pTask->info.taskLevel, num); - taosThreadMutexUnlock(&pTask->chkInfo.pActiveInfo->lock); - stDebug("s-task:%s level:%d checkpoint ready msg sent to all %d upstreams", pTask->id.idStr, pTask->info.taskLevel, - num); + // start to check if checkpoint ready msg has successfully received by upstream tasks. + pActiveInfo->pSendReadyMsgTmr = NULL; + + int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s start checkpoint-trigger monitor in 10s, ref:%d ", pTask->id.idStr, ref); + streamMetaAcquireOneTask(pTask); + + if (pActiveInfo->pSendReadyMsgTmr == NULL) { + pActiveInfo->pSendReadyMsgTmr = taosTmrStart(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer); + } else { + taosTmrReset(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pSendReadyMsgTmr); + } return TSDB_CODE_SUCCESS; } @@ -816,51 +951,17 @@ int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHa return TSDB_CODE_SUCCESS; } -int32_t initCheckpointReadyInfo(STaskCheckpointReadyInfo* pReadyInfo, SStreamTask* pTask, int32_t upstreamNodeId, - int32_t upstreamTaskId, int32_t childId, SEpSet* pEpset, int64_t checkpointId) { - int32_t code = 0; - int32_t tlen = 0; - void* buf = NULL; - - SStreamCheckpointReadyMsg req = {0}; - req.downstreamNodeId = pTask->pMeta->vgId; - req.downstreamTaskId = pTask->id.taskId; - req.streamId = pTask->id.streamId; - req.checkpointId = checkpointId; - req.childId = childId; - req.upstreamNodeId = upstreamNodeId; - req.upstreamTaskId = upstreamTaskId; - - tEncodeSize(tEncodeStreamCheckpointReadyMsg, &req, tlen, code); - if (code < 0) { - return -1; - } - - buf = rpcMallocCont(sizeof(SMsgHead) + tlen); - if (buf == NULL) { - return -1; - } - - ((SMsgHead*)buf)->vgId = htonl(req.upstreamNodeId); - void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); - - SEncoder encoder; - tEncoderInit(&encoder, abuf, tlen); - if ((code = tEncodeStreamCheckpointReadyMsg(&encoder, &req)) < 0) { - rpcFreeCont(buf); - return code; - } - tEncoderClear(&encoder); - - ASSERT(req.upstreamTaskId != 0); +int32_t initCheckpointReadyInfo(STaskCheckpointReadyInfo* pReadyInfo, int32_t upstreamNodeId, int32_t upstreamTaskId, + int32_t childId, SEpSet* pEpset, int64_t checkpointId) { + ASSERT(upstreamTaskId != 0); pReadyInfo->upstreamTaskId = upstreamTaskId; pReadyInfo->upstreamNodeEpset = *pEpset; - pReadyInfo->nodeId = req.upstreamNodeId; + pReadyInfo->upstreamNodeId = upstreamNodeId; pReadyInfo->recvTs = taosGetTimestampMs(); - pReadyInfo->checkpointId = req.checkpointId; + pReadyInfo->checkpointId = checkpointId; + pReadyInfo->childId = childId; - initRpcMsg(&pReadyInfo->msg, TDMT_STREAM_TASK_CHECKPOINT_READY, buf, tlen + sizeof(SMsgHead)); return TSDB_CODE_SUCCESS; } @@ -872,7 +973,7 @@ int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); STaskCheckpointReadyInfo info = {0}; - initCheckpointReadyInfo(&info, pTask, pInfo->nodeId, pInfo->taskId, pInfo->childId, &pInfo->epSet, checkpointId); + initCheckpointReadyInfo(&info, pInfo->nodeId, pInfo->taskId, pInfo->childId, &pInfo->epSet, checkpointId); stDebug("s-task:%s (level:%d) prepare checkpoint-ready msg to upstream s-task:0x%" PRIx64 "-0x%x (vgId:%d) idx:%d", diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index ae1d86e317..f6449829a3 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -1121,6 +1121,7 @@ void metaHbToMnode(void* param, void* tmrId) { SStreamMeta* pMeta = taosAcquireRef(streamMetaId, rid); if (pMeta == NULL) { + stError("invalid rid:%" PRId64 " failed to acquired stream-meta", rid); return; } @@ -1341,8 +1342,8 @@ void streamMetaUpdateStageRole(SStreamMeta* pMeta, int64_t stage, bool isLeader) streamMetaWUnLock(pMeta); if (isLeader) { - stInfo("vgId:%d update meta stage:%" PRId64 ", prev:%" PRId64 " leader:%d, start to send Hb", pMeta->vgId, - prevStage, stage, isLeader); + stInfo("vgId:%d update meta stage:%" PRId64 ", prev:%" PRId64 " leader:%d, start to send Hb, rid:%" PRId64, + pMeta->vgId, prevStage, stage, isLeader, pMeta->rid); streamMetaStartHb(pMeta); } else { stInfo("vgId:%d update meta stage:%" PRId64 " prev:%" PRId64 " leader:%d sendMsg beforeClosing:%d", pMeta->vgId, diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index f6524a69ab..7fb45a884c 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -999,9 +999,9 @@ void streamTaskDestroyActiveChkptInfo(SActiveCheckpointInfo* pInfo) { pInfo->pReadyMsgList = taosArrayDestroy(pInfo->pReadyMsgList); pInfo->pCheckpointReadyRecvList = taosArrayDestroy(pInfo->pCheckpointReadyRecvList); - if (pInfo->pCheckTmr != NULL) { - taosTmrStop(pInfo->pCheckTmr); - pInfo->pCheckTmr = NULL; + if (pInfo->pChkptTriggerTmr != NULL) { + taosTmrStop(pInfo->pChkptTriggerTmr); + pInfo->pChkptTriggerTmr = NULL; } taosMemoryFree(pInfo); @@ -1014,7 +1014,6 @@ void streamTaskClearActiveInfo(SActiveCheckpointInfo* pInfo) { pInfo->allUpstreamTriggerRecv = 0; pInfo->dispatchTrigger = false; - taosArrayClear(pInfo->pReadyMsgList); taosArrayClear(pInfo->pDispatchTriggerList); taosArrayClear(pInfo->pCheckpointReadyRecvList); } \ No newline at end of file From 407f79cfa69c08aec0822b3ab4cb8520b4f68a90 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 3 Jun 2024 19:14:45 +0800 Subject: [PATCH 53/67] fix(stream): fix dead-lock. --- source/libs/stream/src/streamCheckpoint.c | 24 +++++++++++------------ 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index b0c4884c73..94d2198e31 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -332,17 +332,15 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask, int64_t checkpointId // 1. not in checkpoint status now SStreamTaskState* pStat = streamTaskGetStatus(pTask); if (pStat->state != TASK_STATUS__CK) { + stError("s-task:%s status:%s discard checkpoint-ready msg from task:0x%x", id, pStat->name, downstreamTaskId); return TSDB_CODE_STREAM_TASK_IVLD_STATUS; } - // 2. expired checkpoint-ready msg - if (pTask->chkInfo.checkpointId > checkpointId) { - // discard it directly - return -1; - } - - // invalid checkpoint-ready msg - if (pInfo->activeId != checkpointId) { + // 2. expired checkpoint-ready msg, invalid checkpoint-ready msg + if (pTask->chkInfo.checkpointId > checkpointId || pInfo->activeId != checkpointId) { + stError("s-task:%s status:%s checkpointId:%" PRId64 " new arrival checkpoint-ready msg (checkpointId:%" PRId64 + ") from task:0x%x, expired and discard ", + id, pStat->name, pTask->chkInfo.checkpointId, checkpointId, downstreamTaskId); return -1; } @@ -372,13 +370,14 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask, int64_t checkpointId } int32_t notReady = total - taosArrayGetSize(pInfo->pCheckpointReadyRecvList); + int32_t transId = pInfo->transId; + taosThreadMutexUnlock(&pInfo->lock); + if (notReady == 0) { - stDebug("s-task:%s all downstream task(s) have completed build checkpoint, start to do checkpoint for current task", - id); - appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT, pInfo->activeId, pInfo->transId); + stDebug("s-task:%s all downstream task(s) have completed build checkpoint, start to do checkpoint for current task", id); + appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT, checkpointId, transId); } - taosThreadMutexUnlock(&pInfo->lock); return 0; } @@ -944,7 +943,6 @@ void streamTaskSetTriggerDispatchConfirmed(SStreamTask* pTask, int32_t vgId) { SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; int32_t taskId = 0; - taosThreadMutexLock(&pInfo->lock); for (int32_t i = 0; i < taosArrayGetSize(pInfo->pDispatchTriggerList); ++i) { From 5ab81da4a0abcedd640d0213b681156383951b7a Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao> Date: Tue, 4 Jun 2024 09:50:26 +0800 Subject: [PATCH 54/67] set scan mode for stream --- source/libs/executor/src/scanoperator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 851d2f2735..3af738c63c 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1314,6 +1314,7 @@ void resetTableScanInfo(STableScanInfo* pTableScanInfo, STimeWindow* pWin, uint6 pTableScanInfo->tableEndIndex = -1; pTableScanInfo->base.readerAPI.tsdReaderClose(pTableScanInfo->base.dataReader); pTableScanInfo->base.dataReader = NULL; + pTableScanInfo->scanMode = TABLE_SCAN__BLOCK_ORDER; } static SSDataBlock* readPreVersionData(SOperatorInfo* pTableScanOp, uint64_t tbUid, TSKEY startTs, TSKEY endTs, From 4fc21cc8b0f5fce72bf540b51c9106353f157f16 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 4 Jun 2024 10:08:18 +0800 Subject: [PATCH 55/67] refactor: do some internal refactor. --- include/libs/stream/tstream.h | 3 ++- source/common/src/tglobal.c | 2 +- source/dnode/mnode/impl/src/mndMain.c | 2 +- source/dnode/mnode/impl/src/mndStream.c | 2 +- source/libs/stream/src/streamDispatch.c | 8 ++++---- source/libs/stream/src/streamTask.c | 11 ++++++++--- 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 26c6631ee4..d07a302920 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -319,7 +319,8 @@ typedef struct SDispatchMsgInfo { int32_t retryCount; // retry send data count int64_t startTs; // dispatch start time, record total elapsed time for dispatch SArray* pRetryList; // current dispatch successfully completed node of downstream - void* pTimer; // used to dispatch data after a given time duration + void* pRetryTmr; // used to dispatch data after a given time duration + void* pRspTmr; // used to dispatch data after a given time duration } SDispatchMsgInfo; typedef struct STaskQueue { diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 2fefeb4cf2..f034244c69 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -273,7 +273,7 @@ int32_t tsCompactPullupInterval = 10; int32_t tsMqRebalanceInterval = 2; int32_t tsStreamCheckpointInterval = 60; float tsSinkDataRate = 2.0; -int32_t tsStreamNodeCheckInterval = 16; +int32_t tsStreamNodeCheckInterval = 20; int32_t tsMaxConcurrentCheckpoint = 1; int32_t tsTtlUnit = 86400; int32_t tsTtlPushIntervalSec = 10; diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index c82007fb59..cad8c6d745 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -345,7 +345,7 @@ void mndDoTimerPullupTask(SMnode *pMnode, int64_t sec) { mndCalMqRebalance(pMnode); } - if (sec % 30 == 0) { // send the checkpoint info every 10 sec + if (sec % 30 == 0) { // send the checkpoint info every 30 sec mndStreamCheckpointTimer(pMnode); } diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index db4b345536..e108ba557a 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -1062,7 +1062,7 @@ static bool taskNodeIsUpdated(SMnode *pMnode) { bool allReady = true; SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allReady); if (!allReady) { - mWarn("not all vnodes ready"); + mWarn("not all vnodes ready, quit from vnodes status check"); taosArrayDestroy(pNodeSnapshot); taosThreadMutexUnlock(&execInfo.lock); return 0; diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 42a4e4e8fb..fb5f1e33c5 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -420,7 +420,7 @@ static void doRetryDispatchData(void* param, void* tmrId) { } } - stDebug("s-task:%s complete re-try shuffle-dispatch blocks to all %d vnodes, msgId:%d", pTask->id.idStr, + stDebug("s-task:%s complete retry shuffle-dispatch blocks to all %d vnodes, msgId:%d", pTask->id.idStr, numOfFailed, msgId); } else { int32_t vgId = pTask->outputInfo.fixedDispatcher.nodeId; @@ -461,10 +461,10 @@ void streamRetryDispatchData(SStreamTask* pTask, int64_t waitDuration) { stTrace("s-task:%s retry send dispatch data in %" PRId64 "ms, in timer msgId:%d, retryTimes:%d", pTask->id.idStr, waitDuration, pTask->execInfo.dispatch, pTask->msgInfo.retryCount); - if (pTask->msgInfo.pTimer != NULL) { - taosTmrReset(doRetryDispatchData, waitDuration, pTask, streamTimer, &pTask->msgInfo.pTimer); + if (pTask->msgInfo.pRetryTmr != NULL) { + taosTmrReset(doRetryDispatchData, waitDuration, pTask, streamTimer, &pTask->msgInfo.pRetryTmr); } else { - pTask->msgInfo.pTimer = taosTmrStart(doRetryDispatchData, waitDuration, pTask, streamTimer); + pTask->msgInfo.pRetryTmr = taosTmrStart(doRetryDispatchData, waitDuration, pTask, streamTimer); } } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 7fb45a884c..834daf15d0 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -228,9 +228,9 @@ void tFreeStreamTask(SStreamTask* pTask) { pTask->hTaskInfo.pTimer = NULL; } - if (pTask->msgInfo.pTimer != NULL) { - /*bool ret = */taosTmrStop(pTask->msgInfo.pTimer); - pTask->msgInfo.pTimer = NULL; + if (pTask->msgInfo.pRetryTmr != NULL) { + /*bool ret = */taosTmrStop(pTask->msgInfo.pRetryTmr); + pTask->msgInfo.pRetryTmr = NULL; } if (pTask->inputq.queue) { @@ -1004,6 +1004,11 @@ void streamTaskDestroyActiveChkptInfo(SActiveCheckpointInfo* pInfo) { pInfo->pChkptTriggerTmr = NULL; } + if (pInfo->pSendReadyMsgTmr != NULL) { + taosTmrStop(pInfo->pSendReadyMsgTmr); + pInfo->pSendReadyMsgTmr = NULL; + } + taosMemoryFree(pInfo); } From 61e6b25da6b3004ca614cf0b8209baeb9ef17433 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 4 Jun 2024 14:27:46 +0800 Subject: [PATCH 56/67] fix:add api document for tmq --- docs/en/08-client-libraries/03-cpp.mdx | 149 ++++++- docs/zh/08-connector/10-cpp.mdx | 567 +++++++++++++++---------- source/client/src/clientTmq.c | 4 +- 3 files changed, 473 insertions(+), 247 deletions(-) diff --git a/docs/en/08-client-libraries/03-cpp.mdx b/docs/en/08-client-libraries/03-cpp.mdx index dda436f553..39f9688d86 100644 --- a/docs/en/08-client-libraries/03-cpp.mdx +++ b/docs/en/08-client-libraries/03-cpp.mdx @@ -451,6 +451,88 @@ In addition to writing data using the SQL method or the parameter binding API, w - Within _reqid interfaces can track the entire call chain by passing the reqid parameter. ### Subscription API +- `const char *tmq_err2str(int32_t code)` + + **Description** + - This interface is used to convert error codes for data subscriptions into error messages + + **Parameter description** + - code: error code + + **Return value** + - non NULL, return error message, error message may be empty + + +- `tmq_conf_t *tmq_conf_new()` +- `tmq_conf_res_t tmq_conf_set(tmq_conf_t *conf, const char *key, const char *value)` +- `void tmq_conf_set_auto_commit_cb(tmq_conf_t *conf, tmq_commit_cb *cb, void *param)` +- `void tmq_conf_destroy(tmq_conf_t *conf)` + + tmq_conf_res_t defined as follows: + ``` + typedef enum tmq_conf_res_t { + TMQ_CONF_UNKNOWN = -2, + TMQ_CONF_INVALID = -1, + TMQ_CONF_OK = 0, + } tmq_conf_res_t; + ``` + + commit callback function defined as follows: + ``` + typedef void(tmq_commit_cb(tmq_t *tmq, int32_t code, void *param)) + ``` + **Description** + - tmq_conf_new : create a tmq_conf_t structure to configure consumption parameters + - tmq_conf_set : set configuration, key is parameter name,value is parameter value + - tmq_conf_set_auto_commit_cb : set automatic commit callback function, cb is call back function, param is callback function parameter + - tmq_conf_destroy : destroy tmq_conf_t structure + + +- `tmq_list_t *tmq_list_new()` +- `int32_t tmq_list_append(tmq_list_t *, const char *)` +- `void tmq_list_destroy(tmq_list_t *)` +- `int32_t tmq_list_get_size(const tmq_list_t *)` +- `char **tmq_list_to_c_array(const tmq_list_t *)` + + **Description** + - tmq_list_new : build a tmq_list_t constructure, used to save topic + - tmq_list_append : add topic to tmq_list_t + - tmq_list_destroy : destroy tmq_list_t + - tmq_list_get_size : get size of tmq_list_t + - tmq_list_to_c_array : convert tmq_list_t to c array, element is string pointer + + +- `tmq_t *tmq_consumer_new(tmq_conf_t *conf, char *errstr, int32_t errstrLen)` +- `int32_t tmq_subscribe(tmq_t *tmq, const tmq_list_t *topic_list)` +- `int32_t tmq_unsubscribe(tmq_t *tmq)` +- `int32_t tmq_subscription(tmq_t *tmq, tmq_list_t **topic_list)` +- `TAOS_RES *tmq_consumer_poll(tmq_t *tmq, int64_t timeout)` +- `int32_t tmq_consumer_close(tmq_t *tmq)` + + **Description** + - tmq_consumer_new : build a tmq_t constructure, need to be used with tmq_consumer_close + - tmq_subscribe : subscribe topic, need to be used with tmq_unsubscribe + - tmq_unsubscribe : unsubscribe topic, need to be used with tmq_subscribe + - tmq_subscription : obtain a list of topics subscribed by consumer + - tmq_consumer_poll : used to consume data + - tmq_consumer_close : clost tmq_t, need to be used with tmq_consumer_new + + **Parameter description** + - conf: sed to configure consume parameters + - errstr: The error information is stored in this string. Allocation and release of memory are the responsibility of the caller + - errstenLen: the length of errstr + - tmq: structure of tmq_t returned by tmq_consumer_new + - topic_list: a list of topics subscribed by consumers,need to be freed by tmq_list_destroy + - timeout: the timeout time, measured in milliseconds, indicates how long it takes for data to expire. If it is negative, it will default to 1 second + + **Return value** + - tmq_consumer_new: structure of tmq_t, NULL failed + - tmq_subscribe: zero success, none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` + - tmq_unsubscribe: zero success, none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` + - tmq_subscription: zero success, none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` + - tmq_consumer_poll: structure of TAOS_RES(same like taos_query), NULL if there is no data + - tmq_consumer_close: zero success, none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` + - `int32_t tmq_get_topic_assignment(tmq_t *tmq, const char *pTopicName, tmq_topic_assignment **assignment, int32_t *numOfAssignment)` - `void tmq_free_assignment(tmq_topic_assignment* pAssignment)` @@ -474,6 +556,7 @@ In addition to writing data using the SQL method or the parameter binding API, w **Return value** - zero success,none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` + - `int64_t tmq_committed(tmq_t *tmq, const char *pTopicName, int32_t vgId)` **Function description** @@ -482,6 +565,7 @@ In addition to writing data using the SQL method or the parameter binding API, w **Return value** - the value of committed offset, -2147467247 means no committed value, Other values less than 0 indicate failure + - `int32_t tmq_commit_sync(tmq_t *tmq, const TAOS_RES *msg)` - `void tmq_commit_async(tmq_t *tmq, const TAOS_RES *msg, tmq_commit_cb *cb, void *param)` - `int32_t tmq_commit_offset_sync(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset)` @@ -499,6 +583,7 @@ In addition to writing data using the SQL method or the parameter binding API, w **Return value** - zero success, none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` + - `int64_t tmq_position(tmq_t *tmq, const char *pTopicName, int32_t vgId)` **Function description** @@ -507,6 +592,7 @@ In addition to writing data using the SQL method or the parameter binding API, w **Return value** - the current consumption location, none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` + - `int32_t tmq_offset_seek(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset)` **Function description** @@ -514,25 +600,52 @@ In addition to writing data using the SQL method or the parameter binding API, w **Return value** - zero success, none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` + + +- `int64_t tmq_get_vgroup_offset(TAOS_RES* res)` +- `int32_t tmq_get_vgroup_id(TAOS_RES *res)` + + **Description** + - tmq_get_vgroup_offset : Obtain the starting offset of the consumed data + - tmq_get_vgroup_id : Obtain the vgroup id of the consumed data -- `int32_t int64_t tmq_get_vgroup_offset(TAOS_RES* res)` - - **Function description** - - Obtain the starting offset of the consumed data - **Parameter description** - - msg:Message consumed - - **Return value** - - the starting offset of the consumed data, none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` + - msg : Message consumed -- `int32_t int32_t tmq_subscription(tmq_t *tmq, tmq_list_t **topics)` - - **Function description** - - Obtain a list of topics subscribed by consumers - - **Parameter description** - - topics: a list of topics subscribed by consumers,need to be freed by tmq_list_destroy - **Return value** - - zero success,none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` + - tmq_get_vgroup_offset : the starting offset of the consumed data, none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` + - tmq_get_vgroup_id : vgroup id of result, none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` + + +- `TAOS *tmq_get_connect(tmq_t *tmq)` +- `const char *tmq_get_table_name(TAOS_RES *res)` +- `tmq_res_t tmq_get_res_type(TAOS_RES *res)` +- `const char *tmq_get_topic_name(TAOS_RES *res)` +- `const char *tmq_get_db_name(TAOS_RES *res)` + + tmq_res_t 表示消费到的数据类型,定义如下: + ``` + typedef enum tmq_res_t { + TMQ_RES_INVALID = -1, // invalid + TMQ_RES_DATA = 1, // data + TMQ_RES_TABLE_META = 2, // meta + TMQ_RES_METADATA = 3 // data & meta + } tmq_res_t; + ``` + **Description** + - tmq_get_connect : when creating a consumer, a link will be automatically established and saved in the tmq_t structure. This interface allows users to obtain link information(same like taos_connect) from the tmq_t structure + - tmq_get_table_name : get the table name of result + - tmq_get_res_type : get the type of result + - tmq_get_topic_name : get the topic name of result + - tmq_get_db_name : get the db name of result + + **Parameter description** + - tmq : tmq_t structure created by tmq_consumer_new + - res : TAOS_RES structure returned by tmq_consumer_poll + + **Return value** + - tmq_get_connect : connection info in tmq, NULL if failed + - tmq_get_table_name : table name of result, NULL if failed + - tmq_get_res_type : result type tmq_res_t + - tmq_get_topic_name : topic name of result, NULL if failed + - tmq_get_db_name : db name of result, NULL if failed \ No newline at end of file diff --git a/docs/zh/08-connector/10-cpp.mdx b/docs/zh/08-connector/10-cpp.mdx index e5dc2ec8c8..16e6fc892f 100644 --- a/docs/zh/08-connector/10-cpp.mdx +++ b/docs/zh/08-connector/10-cpp.mdx @@ -68,143 +68,143 @@ TDengine 客户端驱动的安装请参考 [安装指南](../#安装步骤) ### 同步查询示例
-同步查询 + 同步查询 -```c -{{#include examples/c/demo.c}} -``` -格式化输出不同类型字段函数 taos_print_row -```c -int taos_print_row(char *str, TAOS_ROW row, TAOS_FIELD *fields, int num_fields) { - int32_t len = 0; - for (int i = 0; i < num_fields; ++i) { + ```c + {{#include examples/c/demo.c}} + ``` + 格式化输出不同类型字段函数 taos_print_row + ```c + int taos_print_row(char *str, TAOS_ROW row, TAOS_FIELD *fields, int num_fields) { + int32_t len = 0; + for (int i = 0; i < num_fields; ++i) { if (i > 0) { - str[len++] = ' '; - } + str[len++] = ' '; +} if (row[i] == NULL) { - len += sprintf(str + len, "%s", TSDB_DATA_NULL_STR); - continue; - } + len += sprintf(str + len, "%s", TSDB_DATA_NULL_STR); + continue; +} switch (fields[i].type) { - case TSDB_DATA_TYPE_TINYINT: - len += sprintf(str + len, "%d", *((int8_t *)row[i])); - break; + case TSDB_DATA_TYPE_TINYINT: + len += sprintf(str + len, "%d", *((int8_t *)row[i])); + break; - case TSDB_DATA_TYPE_UTINYINT: - len += sprintf(str + len, "%u", *((uint8_t *)row[i])); - break; + case TSDB_DATA_TYPE_UTINYINT: + len += sprintf(str + len, "%u", *((uint8_t *)row[i])); + break; - case TSDB_DATA_TYPE_SMALLINT: - len += sprintf(str + len, "%d", *((int16_t *)row[i])); - break; + case TSDB_DATA_TYPE_SMALLINT: + len += sprintf(str + len, "%d", *((int16_t *)row[i])); + break; - case TSDB_DATA_TYPE_USMALLINT: - len += sprintf(str + len, "%u", *((uint16_t *)row[i])); - break; + case TSDB_DATA_TYPE_USMALLINT: + len += sprintf(str + len, "%u", *((uint16_t *)row[i])); + break; - case TSDB_DATA_TYPE_INT: - len += sprintf(str + len, "%d", *((int32_t *)row[i])); - break; + case TSDB_DATA_TYPE_INT: + len += sprintf(str + len, "%d", *((int32_t *)row[i])); + break; - case TSDB_DATA_TYPE_UINT: - len += sprintf(str + len, "%u", *((uint32_t *)row[i])); - break; + case TSDB_DATA_TYPE_UINT: + len += sprintf(str + len, "%u", *((uint32_t *)row[i])); + break; - case TSDB_DATA_TYPE_BIGINT: - len += sprintf(str + len, "%" PRId64, *((int64_t *)row[i])); - break; + case TSDB_DATA_TYPE_BIGINT: + len += sprintf(str + len, "%" PRId64, *((int64_t *)row[i])); + break; - case TSDB_DATA_TYPE_UBIGINT: - len += sprintf(str + len, "%" PRIu64, *((uint64_t *)row[i])); - break; + case TSDB_DATA_TYPE_UBIGINT: + len += sprintf(str + len, "%" PRIu64, *((uint64_t *)row[i])); + break; - case TSDB_DATA_TYPE_FLOAT: { - float fv = 0; - fv = GET_FLOAT_VAL(row[i]); - len += sprintf(str + len, "%f", fv); - } break; + case TSDB_DATA_TYPE_FLOAT: { + float fv = 0; + fv = GET_FLOAT_VAL(row[i]); + len += sprintf(str + len, "%f", fv); +} break; - case TSDB_DATA_TYPE_DOUBLE: { - double dv = 0; - dv = GET_DOUBLE_VAL(row[i]); - len += sprintf(str + len, "%lf", dv); - } break; + case TSDB_DATA_TYPE_DOUBLE: { + double dv = 0; + dv = GET_DOUBLE_VAL(row[i]); + len += sprintf(str + len, "%lf", dv); +} break; - case TSDB_DATA_TYPE_BINARY: - case TSDB_DATA_TYPE_NCHAR: { - int32_t charLen = varDataLen((char *)row[i] - VARSTR_HEADER_SIZE); - if (fields[i].type == TSDB_DATA_TYPE_BINARY) { - assert(charLen <= fields[i].bytes && charLen >= 0); - } else { - assert(charLen <= fields[i].bytes * TSDB_NCHAR_SIZE && charLen >= 0); - } - - memcpy(str + len, row[i], charLen); - len += charLen; - } break; - - case TSDB_DATA_TYPE_TIMESTAMP: - len += sprintf(str + len, "%" PRId64, *((int64_t *)row[i])); - break; - - case TSDB_DATA_TYPE_BOOL: - len += sprintf(str + len, "%d", *((int8_t *)row[i])); - default: - break; - } - } - str[len] = 0; - - return len; + case TSDB_DATA_TYPE_BINARY: + case TSDB_DATA_TYPE_NCHAR: { + int32_t charLen = varDataLen((char *)row[i] - VARSTR_HEADER_SIZE); + if (fields[i].type == TSDB_DATA_TYPE_BINARY) { + assert(charLen <= fields[i].bytes && charLen >= 0); +} else { + assert(charLen <= fields[i].bytes * TSDB_NCHAR_SIZE && charLen >= 0); } - -``` - + + memcpy(str + len, row[i], charLen); + len += charLen; +} break; + + case TSDB_DATA_TYPE_TIMESTAMP: + len += sprintf(str + len, "%" PRId64, *((int64_t *)row[i])); + break; + + case TSDB_DATA_TYPE_BOOL: + len += sprintf(str + len, "%d", *((int8_t *)row[i])); + default: + break; +} +} + str[len] = 0; + + return len; +} + + ``` +
### 异步查询示例
-异步查询 + 异步查询 -```c -{{#include examples/c/asyncdemo.c}} -``` + ```c + {{#include examples/c/asyncdemo.c}} + ```
### 参数绑定示例
-参数绑定 + 参数绑定 -```c -{{#include examples/c/prepare.c}} -``` + ```c + {{#include examples/c/prepare.c}} + ```
### 无模式写入示例
-无模式写入 + 无模式写入 -```c -{{#include examples/c/schemaless.c}} -``` + ```c + {{#include examples/c/schemaless.c}} + ```
### 订阅和消费示例
-订阅和消费 + 订阅和消费 -```c - {{#include examples/c/tmq.c}} -``` + ```c + {{#include examples/c/tmq.c}} + ```
@@ -225,70 +225,70 @@ int taos_print_row(char *str, TAOS_ROW row, TAOS_FIELD *fields, int num_fields) - `int taos_init()` - 初始化运行环境。如果没有主动调用该 API,那么调用 `taos_connect()` 时驱动将自动调用该 API,故程序一般无需手动调用。 +初始化运行环境。如果没有主动调用该 API,那么调用 `taos_connect()` 时驱动将自动调用该 API,故程序一般无需手动调用。 - `void taos_cleanup()` - 清理运行环境,应用退出前应调用。 +清理运行环境,应用退出前应调用。 - `int taos_options(TSDB_OPTION option, const void * arg, ...)` - 设置客户端选项,目前支持区域设置(`TSDB_OPTION_LOCALE`)、字符集设置(`TSDB_OPTION_CHARSET`)、时区设置(`TSDB_OPTION_TIMEZONE`)、配置文件路径设置(`TSDB_OPTION_CONFIGDIR`)。区域设置、字符集、时区默认为操作系统当前设置。 +设置客户端选项,目前支持区域设置(`TSDB_OPTION_LOCALE`)、字符集设置(`TSDB_OPTION_CHARSET`)、时区设置(`TSDB_OPTION_TIMEZONE`)、配置文件路径设置(`TSDB_OPTION_CONFIGDIR`)。区域设置、字符集、时区默认为操作系统当前设置。 - `char *taos_get_client_info()` - 获取客户端版本信息。 +获取客户端版本信息。 - `TAOS *taos_connect(const char *host, const char *user, const char *pass, const char *db, int port)` - 创建数据库连接,初始化连接上下文。其中需要用户提供的参数包含: +创建数据库连接,初始化连接上下文。其中需要用户提供的参数包含: - - host:TDengine 集群中任一节点的 FQDN - - user:用户名 - - pass:密码 - - db: 数据库名字,如果用户没有提供,也可以正常连接,用户可以通过该连接创建新的数据库,如果用户提供了数据库名字,则说明该数据库用户已经创建好,缺省使用该数据库 - - port:taosd 程序监听的端口 +- host:TDengine 集群中任一节点的 FQDN +- user:用户名 +- pass:密码 +- db: 数据库名字,如果用户没有提供,也可以正常连接,用户可以通过该连接创建新的数据库,如果用户提供了数据库名字,则说明该数据库用户已经创建好,缺省使用该数据库 +- port:taosd 程序监听的端口 - 返回值为空表示失败。应用程序需要保存返回的参数,以便后续使用。 +返回值为空表示失败。应用程序需要保存返回的参数,以便后续使用。 - :::info - 同一进程可以根据不同的 host/port 连接多个 TDengine 集群 +:::info +同一进程可以根据不同的 host/port 连接多个 TDengine 集群 - ::: +::: -- `TAOS *taos_connect_auth(const char *host, const char *user, const char *auth, const char *db, uint16_t port)` +- `TAOS *taos_connect_auth(const char *host, const char *user, const char *auth, const char *db, uint16_t port)` - 功能同 taos_connect。除 pass 参数替换为 auth 外,其他参数同 taos_connect。 - - - auth: 原始密码取 32 位小写 md5 +功能同 taos_connect。除 pass 参数替换为 auth 外,其他参数同 taos_connect。 + +- auth: 原始密码取 32 位小写 md5 - `char *taos_get_server_info(TAOS *taos)` - 获取服务端版本信息。 +获取服务端版本信息。 - `int taos_select_db(TAOS *taos, const char *db)` - 将当前的缺省数据库设置为 `db`。 - +将当前的缺省数据库设置为 `db`。 + - `int taos_get_current_db(TAOS *taos, char *database, int len, int *required)` - - - database,len为用户在外面申请的空间,内部会把当前db赋值到database里。 - - 只要是没有正常把db名赋值到database中(包括截断),返回错误,返回值为-1,然后用户可以通过 taos_errstr(NULL) 来获取错误提示。 - - 如果,database == NULL 或者 len<=0 返回错误,required里保存存储db需要的空间(包含最后的'\0') - - 如果,len 小于 存储db需要的空间(包含最后的'\0'),返回错误,database里赋值截断的数据,以'\0'结尾。 - - 如果,len 大于等于 存储db需要的空间(包含最后的'\0'),返回正常0,database里赋值以'\0‘结尾的db名。 + +- database,len为用户在外面申请的空间,内部会把当前db赋值到database里。 +- 只要是没有正常把db名赋值到database中(包括截断),返回错误,返回值为-1,然后用户可以通过 taos_errstr(NULL) 来获取错误提示。 +- 如果,database == NULL 或者 len<=0 返回错误,required里保存存储db需要的空间(包含最后的'\0') +- 如果,len 小于 存储db需要的空间(包含最后的'\0'),返回错误,database里赋值截断的数据,以'\0'结尾。 +- 如果,len 大于等于 存储db需要的空间(包含最后的'\0'),返回正常0,database里赋值以'\0‘结尾的db名。 - `int taos_set_notify_cb(TAOS *taos, __taos_notify_fn_t fp, void *param, int type)` - 设置事件回调函数。 +设置事件回调函数。 - - fp 事件回调函数指针。函数声明:typedef void (*__taos_notify_fn_t)(void *param, void *ext, int type);其中, param 为用户自定义参数,ext 为扩展参数(依赖事件类型,针对 TAOS_NOTIFY_PASSVER 返回用户密码版本),type 为事件类型 - - param 用户自定义参数 - - type 事件类型。取值范围:1)TAOS_NOTIFY_PASSVER: 用户密码改变 +- fp 事件回调函数指针。函数声明:typedef void (*__taos_notify_fn_t)(void *param, void *ext, int type);其中, param 为用户自定义参数,ext 为扩展参数(依赖事件类型,针对 TAOS_NOTIFY_PASSVER 返回用户密码版本),type 为事件类型 +- param 用户自定义参数 +- type 事件类型。取值范围:1)TAOS_NOTIFY_PASSVER: 用户密码改变 - `void taos_close(TAOS *taos)` - 关闭连接,其中`taos`是 `taos_connect()` 返回的句柄。 +关闭连接,其中`taos`是 `taos_connect()` 返回的句柄。 ### 同步查询 API @@ -296,35 +296,35 @@ int taos_print_row(char *str, TAOS_ROW row, TAOS_FIELD *fields, int num_fields) - `TAOS_RES* taos_query(TAOS *taos, const char *sql)` - 执行 SQL 语句,可以是 DQL、DML 或 DDL 语句。 其中的 `taos` 参数是通过 `taos_connect()` 获得的句柄。不能通过返回值是否是 `NULL` 来判断执行结果是否失败,而是需要用 `taos_errno()` 函数解析结果集中的错误代码来进行判断。 +执行 SQL 语句,可以是 DQL、DML 或 DDL 语句。 其中的 `taos` 参数是通过 `taos_connect()` 获得的句柄。不能通过返回值是否是 `NULL` 来判断执行结果是否失败,而是需要用 `taos_errno()` 函数解析结果集中的错误代码来进行判断。 - `int taos_result_precision(TAOS_RES *res)` - 返回结果集时间戳字段的精度,`0` 代表毫秒,`1` 代表微秒,`2` 代表纳秒。 +返回结果集时间戳字段的精度,`0` 代表毫秒,`1` 代表微秒,`2` 代表纳秒。 - `TAOS_ROW taos_fetch_row(TAOS_RES *res)` - 按行获取查询结果集中的数据。 +按行获取查询结果集中的数据。 - `int taos_fetch_block(TAOS_RES *res, TAOS_ROW *rows)` - 批量获取查询结果集中的数据,返回值为获取到的数据的行数。 +批量获取查询结果集中的数据,返回值为获取到的数据的行数。 - `int taos_num_fields(TAOS_RES *res)` 和 `int taos_field_count(TAOS_RES *res)` - 这两个 API 等价,用于获取查询结果集中的列数。 +这两个 API 等价,用于获取查询结果集中的列数。 - `int* taos_fetch_lengths(TAOS_RES *res)` - 获取结果集中每个字段的长度。返回值是一个数组,其长度为结果集的列数。 +获取结果集中每个字段的长度。返回值是一个数组,其长度为结果集的列数。 - `int taos_affected_rows(TAOS_RES *res)` - 获取被所执行的 SQL 语句影响的行数。 +获取被所执行的 SQL 语句影响的行数。 - `TAOS_FIELD *taos_fetch_fields(TAOS_RES *res)` - 获取查询结果集每列数据的属性(列的名称、列的数据类型、列的长度),与 `taos_num_fields()` 配合使用,可用来解析 `taos_fetch_row()` 返回的一个元组(一行)的数据。 `TAOS_FIELD` 的结构如下: +获取查询结果集每列数据的属性(列的名称、列的数据类型、列的长度),与 `taos_num_fields()` 配合使用,可用来解析 `taos_fetch_row()` 返回的一个元组(一行)的数据。 `TAOS_FIELD` 的结构如下: ```c typedef struct taosField { @@ -336,19 +336,19 @@ typedef struct taosField { - `void taos_stop_query(TAOS_RES *res)` - 停止当前查询的执行。 +停止当前查询的执行。 - `void taos_free_result(TAOS_RES *res)` - 释放查询结果集以及相关的资源。查询完成后,务必调用该 API 释放资源,否则可能导致应用内存泄露。但也需注意,释放资源后,如果再调用 `taos_consume()` 等获取查询结果的函数,将导致应用崩溃。 +释放查询结果集以及相关的资源。查询完成后,务必调用该 API 释放资源,否则可能导致应用内存泄露。但也需注意,释放资源后,如果再调用 `taos_consume()` 等获取查询结果的函数,将导致应用崩溃。 - `char *taos_errstr(TAOS_RES *res)` - 获取最近一次 API 调用失败的原因,返回值为字符串标识的错误提示信息。 +获取最近一次 API 调用失败的原因,返回值为字符串标识的错误提示信息。 - `int taos_errno(TAOS_RES *res)` - 获取最近一次 API 调用失败的原因,返回值为错误代码。 +获取最近一次 API 调用失败的原因,返回值为错误代码。 :::note 2.0 及以上版本 TDengine 推荐数据库应用的每个线程都建立一个独立的连接,或基于线程建立连接池。而不推荐在应用中将该连接 (TAOS\*) 结构体传递到不同的线程共享使用。基于 TAOS 结构体发出的查询、写入等操作具有多线程安全性,但 “USE statement” 等状态量有可能在线程之间相互干扰。此外,C 语言的连接器可以按照需求动态建立面向数据库的新连接(该过程对用户不可见),同时建议只有在程序最后退出的时候才调用 `taos_close()` 关闭连接。 @@ -366,19 +366,19 @@ TDengine 还提供性能更高的异步 API 处理数据插入、查询操作。 - `void taos_query_a(TAOS *taos, const char *sql, void (*fp)(void *param, TAOS_RES *, int code), void *param);` - 异步执行 SQL 语句。 +异步执行 SQL 语句。 - - taos:调用 `taos_connect()` 返回的数据库连接 - - sql:需要执行的 SQL 语句 - - fp:用户定义的回调函数,其第三个参数 `code` 用于指示操作是否成功,`0` 表示成功,负数表示失败(调用 `taos_errstr()` 可获取失败原因)。应用在定义回调函数的时候,主要处理第二个参数 `TAOS_RES *`,该参数是查询返回的结果集 - - param:应用提供一个用于回调的参数 +- taos:调用 `taos_connect()` 返回的数据库连接 +- sql:需要执行的 SQL 语句 +- fp:用户定义的回调函数,其第三个参数 `code` 用于指示操作是否成功,`0` 表示成功,负数表示失败(调用 `taos_errstr()` 可获取失败原因)。应用在定义回调函数的时候,主要处理第二个参数 `TAOS_RES *`,该参数是查询返回的结果集 +- param:应用提供一个用于回调的参数 - `void taos_fetch_rows_a(TAOS_RES *res, void (*fp)(void *param, TAOS_RES *, int numOfRows), void *param);` - 批量获取异步查询的结果集,只能与 `taos_query_a()` 配合使用。其中: +批量获取异步查询的结果集,只能与 `taos_query_a()` 配合使用。其中: - - res:`taos_query_a()` 回调时返回的结果集 - - fp:回调函数。其参数 `param` 是用户可定义的传递给回调函数的参数结构体;`numOfRows` 是获取到的数据的行数(不是整个查询结果集的函数)。 在回调函数中,应用可以通过调用 `taos_fetch_row()` 前向迭代获取批量记录中每一行记录。读完一块内的所有记录后,应用需要在回调函数中继续调用 `taos_fetch_rows_a()` 获取下一批记录进行处理,直到返回的记录数 `numOfRows` 为零(结果返回完成)或记录数为负值(查询出错)。 +- res:`taos_query_a()` 回调时返回的结果集 +- fp:回调函数。其参数 `param` 是用户可定义的传递给回调函数的参数结构体;`numOfRows` 是获取到的数据的行数(不是整个查询结果集的函数)。 在回调函数中,应用可以通过调用 `taos_fetch_row()` 前向迭代获取批量记录中每一行记录。读完一块内的所有记录后,应用需要在回调函数中继续调用 `taos_fetch_rows_a()` 获取下一批记录进行处理,直到返回的记录数 `numOfRows` 为零(结果返回完成)或记录数为负值(查询出错)。 TDengine 的异步 API 均采用非阻塞调用模式。应用程序可以用多线程同时打开多张表,并可以同时对每张打开的表进行查询或者插入操作。需要指出的是,**客户端应用必须确保对同一张表的操作完全串行化**,即对同一个表的插入或查询操作未完成时(未返回时),不能够执行第二个插入或查询操作。 @@ -404,71 +404,71 @@ TDengine 的异步 API 均采用非阻塞调用模式。应用程序可以用多 - `TAOS_STMT* taos_stmt_init(TAOS *taos)` - 创建一个 TAOS_STMT 对象用于后续调用。 +创建一个 TAOS_STMT 对象用于后续调用。 - `int taos_stmt_prepare(TAOS_STMT *stmt, const char *sql, unsigned long length)` - 解析一条 SQL 语句,将解析结果和参数信息绑定到 stmt 上,如果参数 length 大于 0,将使用此参数作为 SQL 语句的长度,如等于 0,将自动判断 SQL 语句的长度。 +解析一条 SQL 语句,将解析结果和参数信息绑定到 stmt 上,如果参数 length 大于 0,将使用此参数作为 SQL 语句的长度,如等于 0,将自动判断 SQL 语句的长度。 - `int taos_stmt_bind_param(TAOS_STMT *stmt, TAOS_MULTI_BIND *bind)` - 不如 `taos_stmt_bind_param_batch()` 效率高,但可以支持非 INSERT 类型的 SQL 语句。 - 进行参数绑定,bind 指向一个数组(代表所要绑定的一行数据),需保证此数组中的元素数量和顺序与 SQL 语句中的参数完全一致。TAOS_MULTI_BIND 的使用方法与 MySQL 中的 MYSQL_BIND 类似,具体定义如下: +不如 `taos_stmt_bind_param_batch()` 效率高,但可以支持非 INSERT 类型的 SQL 语句。 +进行参数绑定,bind 指向一个数组(代表所要绑定的一行数据),需保证此数组中的元素数量和顺序与 SQL 语句中的参数完全一致。TAOS_MULTI_BIND 的使用方法与 MySQL 中的 MYSQL_BIND 类似,具体定义如下: - ```c - typedef struct TAOS_MULTI_BIND { - int buffer_type; - void *buffer; - uintptr_t buffer_length; - uint32_t *length; - char *is_null; - int num; // the number of columns - } TAOS_MULTI_BIND; +```c +typedef struct TAOS_MULTI_BIND { + int buffer_type; + void *buffer; + uintptr_t buffer_length; + uint32_t *length; + char *is_null; + int num; // the number of columns +} TAOS_MULTI_BIND; ``` - `int taos_stmt_set_tbname(TAOS_STMT* stmt, const char* name)` - (2.1.1.0 版本新增,仅支持用于替换 INSERT 语句中的参数值) - 当 SQL 语句中的表名使用了 `?` 占位时,可以使用此函数绑定一个具体的表名。 +(2.1.1.0 版本新增,仅支持用于替换 INSERT 语句中的参数值) +当 SQL 语句中的表名使用了 `?` 占位时,可以使用此函数绑定一个具体的表名。 - `int taos_stmt_set_tbname_tags(TAOS_STMT* stmt, const char* name, TAOS_MULTI_BIND* tags)` - (2.1.2.0 版本新增,仅支持用于替换 INSERT 语句中的参数值) - 当 SQL 语句中的表名和 TAGS 都使用了 `?` 占位时,可以使用此函数绑定具体的表名和具体的 TAGS 取值。最典型的使用场景是使用了自动建表功能的 INSERT 语句(目前版本不支持指定具体的 TAGS 列)。TAGS 参数中的列数量需要与 SQL 语句中要求的 TAGS 数量完全一致。 +(2.1.2.0 版本新增,仅支持用于替换 INSERT 语句中的参数值) +当 SQL 语句中的表名和 TAGS 都使用了 `?` 占位时,可以使用此函数绑定具体的表名和具体的 TAGS 取值。最典型的使用场景是使用了自动建表功能的 INSERT 语句(目前版本不支持指定具体的 TAGS 列)。TAGS 参数中的列数量需要与 SQL 语句中要求的 TAGS 数量完全一致。 - `int taos_stmt_bind_param_batch(TAOS_STMT* stmt, TAOS_MULTI_BIND* bind)` - (2.1.1.0 版本新增,仅支持用于替换 INSERT 语句中的参数值) - 以多列的方式传递待绑定的数据,需要保证这里传递的数据列的顺序、列的数量与 SQL 语句中的 VALUES 参数完全一致。TAOS_MULTI_BIND 的具体定义如下: +(2.1.1.0 版本新增,仅支持用于替换 INSERT 语句中的参数值) +以多列的方式传递待绑定的数据,需要保证这里传递的数据列的顺序、列的数量与 SQL 语句中的 VALUES 参数完全一致。TAOS_MULTI_BIND 的具体定义如下: - `int taos_stmt_add_batch(TAOS_STMT *stmt)` - 将当前绑定的参数加入批处理中,调用此函数后,可以再次调用 `taos_stmt_bind_param()` 或 `taos_stmt_bind_param_batch()` 绑定新的参数。需要注意,此函数仅支持 INSERT/IMPORT 语句,如果是 SELECT 等其他 SQL 语句,将返回错误。 +将当前绑定的参数加入批处理中,调用此函数后,可以再次调用 `taos_stmt_bind_param()` 或 `taos_stmt_bind_param_batch()` 绑定新的参数。需要注意,此函数仅支持 INSERT/IMPORT 语句,如果是 SELECT 等其他 SQL 语句,将返回错误。 - `int taos_stmt_execute(TAOS_STMT *stmt)` - 执行准备好的语句。目前,一条语句只能执行一次。 +执行准备好的语句。目前,一条语句只能执行一次。 - `int taos_stmt_affected_rows(TAOS_STMT *stmt)` - - 获取执行多次绑定语句影响的行数。 + +获取执行多次绑定语句影响的行数。 - `int taos_stmt_affected_rows_once(TAOS_STMT *stmt)` - 获取执行一次绑定语句影响的行数。 +获取执行一次绑定语句影响的行数。 - `TAOS_RES* taos_stmt_use_result(TAOS_STMT *stmt)` - 获取语句的结果集。结果集的使用方式与非参数化调用时一致,使用完成后,应对此结果集调用 `taos_free_result()` 以释放资源。 +获取语句的结果集。结果集的使用方式与非参数化调用时一致,使用完成后,应对此结果集调用 `taos_free_result()` 以释放资源。 - `int taos_stmt_close(TAOS_STMT *stmt)` - 执行完毕,释放所有资源。 +执行完毕,释放所有资源。 - `char * taos_stmt_errstr(TAOS_STMT *stmt)` - (2.1.3.0 版本新增) - 用于在其他 STMT API 返回错误(返回错误码或空指针)时获取错误信息。 +(2.1.3.0 版本新增) +用于在其他 STMT API 返回错误(返回错误码或空指针)时获取错误信息。 ### 无模式(schemaless)写入 API @@ -476,43 +476,43 @@ TDengine 的异步 API 均采用非阻塞调用模式。应用程序可以用多 - `TAOS_RES* taos_schemaless_insert(TAOS* taos, const char* lines[], int numLines, int protocol, int precision)` - **功能说明** - - 该接口将行协议的文本数据写入到 TDengine 中。 +**功能说明** +- 该接口将行协议的文本数据写入到 TDengine 中。 - **参数说明** - - taos: 数据库连接,通过 `taos_connect()` 函数建立的数据库连接。 - - lines:文本数据。满足解析格式要求的无模式文本字符串。 - - numLines:文本数据的行数,不能为 0 。 - - protocol: 行协议类型,用于标识文本数据格式。 - - precision:文本数据中的时间戳精度字符串。 +**参数说明** +- taos: 数据库连接,通过 `taos_connect()` 函数建立的数据库连接。 +- lines:文本数据。满足解析格式要求的无模式文本字符串。 +- numLines:文本数据的行数,不能为 0 。 +- protocol: 行协议类型,用于标识文本数据格式。 +- precision:文本数据中的时间戳精度字符串。 - **返回值** - - TAOS_RES 结构体,应用可以通过使用 `taos_errstr()` 获得错误信息,也可以使用 `taos_errno()` 获得错误码。 - 在某些情况下,返回的 TAOS_RES 为 `NULL`,此时仍然可以调用 `taos_errno()` 来安全地获得错误码信息。 - 返回的 TAOS_RES 需要调用方来负责释放,否则会出现内存泄漏。 +**返回值** +- TAOS_RES 结构体,应用可以通过使用 `taos_errstr()` 获得错误信息,也可以使用 `taos_errno()` 获得错误码。 +在某些情况下,返回的 TAOS_RES 为 `NULL`,此时仍然可以调用 `taos_errno()` 来安全地获得错误码信息。 +返回的 TAOS_RES 需要调用方来负责释放,否则会出现内存泄漏。 - **说明** - - 协议类型是枚举类型,包含以下三种格式: +**说明** - - TSDB_SML_LINE_PROTOCOL:InfluxDB 行协议(Line Protocol) - - TSDB_SML_TELNET_PROTOCOL: OpenTSDB Telnet 文本行协议 - - TSDB_SML_JSON_PROTOCOL: OpenTSDB Json 协议格式 +协议类型是枚举类型,包含以下三种格式: - 时间戳分辨率的定义,定义在 `taos.h` 文件中,具体内容如下: +- TSDB_SML_LINE_PROTOCOL:InfluxDB 行协议(Line Protocol) +- TSDB_SML_TELNET_PROTOCOL: OpenTSDB Telnet 文本行协议 +- TSDB_SML_JSON_PROTOCOL: OpenTSDB Json 协议格式 - - TSDB_SML_TIMESTAMP_NOT_CONFIGURED = 0, - - TSDB_SML_TIMESTAMP_HOURS, - - TSDB_SML_TIMESTAMP_MINUTES, - - TSDB_SML_TIMESTAMP_SECONDS, - - TSDB_SML_TIMESTAMP_MILLI_SECONDS, - - TSDB_SML_TIMESTAMP_MICRO_SECONDS, - - TSDB_SML_TIMESTAMP_NANO_SECONDS +时间戳分辨率的定义,定义在 `taos.h` 文件中,具体内容如下: - 需要注意的是,时间戳分辨率参数只在协议类型为 `SML_LINE_PROTOCOL` 的时候生效。 - 对于 OpenTSDB 的文本协议,时间戳的解析遵循其官方解析规则 — 按照时间戳包含的字符的数量来确认时间精度。 +- TSDB_SML_TIMESTAMP_NOT_CONFIGURED = 0, +- TSDB_SML_TIMESTAMP_HOURS, +- TSDB_SML_TIMESTAMP_MINUTES, +- TSDB_SML_TIMESTAMP_SECONDS, +- TSDB_SML_TIMESTAMP_MILLI_SECONDS, +- TSDB_SML_TIMESTAMP_MICRO_SECONDS, +- TSDB_SML_TIMESTAMP_NANO_SECONDS - **schemaless 其他相关的接口** +需要注意的是,时间戳分辨率参数只在协议类型为 `SML_LINE_PROTOCOL` 的时候生效。 +对于 OpenTSDB 的文本协议,时间戳的解析遵循其官方解析规则 — 按照时间戳包含的字符的数量来确认时间精度。 + +**schemaless 其他相关的接口** - `TAOS_RES *taos_schemaless_insert_with_reqid(TAOS *taos, char *lines[], int numLines, int protocol, int precision, int64_t reqid)` - `TAOS_RES *taos_schemaless_insert_raw(TAOS *taos, char *lines, int len, int32_t *totalRows, int protocol, int precision)` - `TAOS_RES *taos_schemaless_insert_raw_with_reqid(TAOS *taos, char *lines, int len, int32_t *totalRows, int protocol, int precision, int64_t reqid)` @@ -521,17 +521,97 @@ TDengine 的异步 API 均采用非阻塞调用模式。应用程序可以用多 - `TAOS_RES *taos_schemaless_insert_raw_ttl(TAOS *taos, char *lines, int len, int32_t *totalRows, int protocol, int precision, int32_t ttl)` - `TAOS_RES *taos_schemaless_insert_raw_ttl_with_reqid(TAOS *taos, char *lines, int len, int32_t *totalRows, int protocol, int precision, int32_t ttl, int64_t reqid)` - **说明** - - 上面这7个接口是扩展接口,主要用于在schemaless写入时传递ttl、reqid参数,可以根据需要使用。 - - 带_raw的接口通过传递的参数lines指针和长度len来表示数据,为了解决原始接口数据包含'\0'而被截断的问题。totalRows指针返回解析出来的数据行数。 - - 带_ttl的接口可以传递ttl参数来控制建表的ttl到期时间。 - - 带_reqid的接口可以通过传递reqid参数来追踪整个的调用链。 +**说明** +- 上面这7个接口是扩展接口,主要用于在schemaless写入时传递ttl、reqid参数,可以根据需要使用。 +- 带_raw的接口通过传递的参数lines指针和长度len来表示数据,为了解决原始接口数据包含'\0'而被截断的问题。totalRows指针返回解析出来的数据行数。 +- 带_ttl的接口可以传递ttl参数来控制建表的ttl到期时间。 +- 带_reqid的接口可以通过传递reqid参数来追踪整个的调用链。 ### 数据订阅 API +- `const char *tmq_err2str(int32_t code)` + + **功能说明** + - 该接口用于将数据订阅的错误码转换为错误信息 + + **参数说明** + - code: 数据订阅的错误码 + + **返回值** + - 非NULL,返回错误信息,错误信息可能为空字符串 + + +- `tmq_conf_t *tmq_conf_new()` +- `tmq_conf_res_t tmq_conf_set(tmq_conf_t *conf, const char *key, const char *value)` +- `void tmq_conf_set_auto_commit_cb(tmq_conf_t *conf, tmq_commit_cb *cb, void *param)` +- `void tmq_conf_destroy(tmq_conf_t *conf)` + tmq_conf_res_t 错误码定义如下: + ``` + typedef enum tmq_conf_res_t { + TMQ_CONF_UNKNOWN = -2, + TMQ_CONF_INVALID = -1, + TMQ_CONF_OK = 0, + } tmq_conf_res_t; + ``` + 设置自动提交回调函数的定义如下: + ``` + typedef void(tmq_commit_cb(tmq_t *tmq, int32_t code, void *param)) + ``` + **功能说明** + - tmq_conf_new 接口用于创建一个 tmq_conf_t 结构体,用于配置消费参数。 + - tmq_conf_set 接口用于设置消费参数,key 为参数名,value 为参数值。 + - tmq_conf_set_auto_commit_cb 接口用于设置自动提交回调函数,参数为回调函数和回调函数的参数。 + - tmq_conf_destroy 接口用于销毁 tmq_conf_t 结构体。 + + +- `tmq_list_t *tmq_list_new()` +- `int32_t tmq_list_append(tmq_list_t *, const char *)` +- `void tmq_list_destroy(tmq_list_t *)` +- `int32_t tmq_list_get_size(const tmq_list_t *)` +- `char **tmq_list_to_c_array(const tmq_list_t *)` + + **功能说明** + - tmq_list_new 接口用于创建一个 tmq_list_t 结构体,用于存储订阅的 topic。 + - tmq_list_append 接口用于向 tmq_list_t 结构体中添加一个 topic。 + - tmq_list_destroy 接口用于销毁 tmq_list_t 结构体,tmq_list_new 的结果需要通过该接口销毁。 + - tmq_list_get_size 接口用于获取 tmq_list_t 结构体中 topic 的个数。 + - tmq_list_to_c_array 接口用于将 tmq_list_t 结构体转换为 C 数组,数组每个元素为字符串指针。 + + +- `tmq_t *tmq_consumer_new(tmq_conf_t *conf, char *errstr, int32_t errstrLen)` +- `int32_t tmq_subscribe(tmq_t *tmq, const tmq_list_t *topic_list)` +- `int32_t tmq_unsubscribe(tmq_t *tmq)` +- `int32_t tmq_subscription(tmq_t *tmq, tmq_list_t **topic_list)` +- `TAOS_RES *tmq_consumer_poll(tmq_t *tmq, int64_t timeout)` +- `int32_t tmq_consumer_close(tmq_t *tmq)` + + **功能说明** + - tmq_consumer_new 接口用于创建一个 tmq_t 结构体,用于消费数据,消费完数据后需调用 tmq_consumer_close 关闭消费者。 + - tmq_subscribe 接口用于订阅 topic 列表,消费完数据后,需调用 tmq_subscribe 取消订阅。 + - tmq_unsubscribe 接口用于取消订阅的 topic 列表。需与 tmq_subscribe 配合使用。 + - tmq_subscription 接口用于获取订阅的 topic 列表。 + - tmq_consumer_poll 接口用于轮询消费数据,每一个消费者,只能单线程调用该接口。 + - tmq_consumer_close 接口用于关闭 tmq_t 结构体。需与 tmq_consumer_new 配合使用。 + + **参数说明** + - conf: 参数用于配置消费参数 + - errstr: 错误信息存储在这个字符串中,需自定分配内存,释放内存由调用者负责 + - errstenLen: errstr 字符串的长度 + - tmq: tmq_consumer_new 函数返回的 tmq_t 结构体 + - topic_list: topic 列表 + - timeout: 超时时间,单位为毫秒,表示多久没数据的话自动返回 NULL,负数的话默认超时1秒 + + **返回值** + - tmq_consumer_new 返回 tmq_t 结构体,失败返回 NULL + - tmq_subscribe 返回错误码,0 表示成功,非0表示失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息。 + - tmq_unsubscribe 返回错误码,0 表示成功,非0表示失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息。 + - tmq_subscription 返回错误码,0 表示成功,非0表示失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息。 + - tmq_consumer_poll 返回 TAOS_RES 结构体,NULL 表示没有数据,非 NULL 表示有数据,TAOS_RES 结果和 taos_query 返回结果一致,可通过查询的各种接口获取 TAOS_RES 里的信息,比如 schema 等。 + - tmq_consumer_close 返回错误码,0 表示成功,非0表示失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息。 + - `int32_t tmq_get_topic_assignment(tmq_t *tmq, const char *pTopicName, tmq_topic_assignment **assignment, int32_t *numOfAssignment)` - `void tmq_free_assignment(tmq_topic_assignment* pAssignment)` - + tmq_topic_assignment结构体定义如下: ```c typedef struct tmq_topic_assignment { @@ -541,6 +621,7 @@ TDengine 的异步 API 均采用非阻塞调用模式。应用程序可以用多 int64_t end; } tmq_topic_assignment; ``` + **功能说明** - tmq_get_topic_assignment 接口返回当前consumer分配的vgroup的信息,每个vgroup的信息包括vgId,wal的最大最小offset,以及当前消费到的offset。 @@ -551,65 +632,97 @@ TDengine 的异步 API 均采用非阻塞调用模式。应用程序可以用多 **返回值** - 错误码,0成功,非0失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息。 + - `int64_t tmq_committed(tmq_t *tmq, const char *pTopicName, int32_t vgId)` - + **功能说明** - 获取当前 consumer 在某个 topic 和 vgroup上的 commit 位置。 **返回值** - 当前commit的位置,-2147467247表示没有消费进度,其他小于0的值表示失败,错误码就是返回值 + - `int32_t tmq_commit_sync(tmq_t *tmq, const TAOS_RES *msg)` - `void tmq_commit_async(tmq_t *tmq, const TAOS_RES *msg, tmq_commit_cb *cb, void *param)` - `int32_t tmq_commit_offset_sync(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset)` - `void tmq_commit_offset_async(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset, tmq_commit_cb *cb, void *param)` - + **功能说明** - + - commit接口分为两种类型,每种类型有同步和异步接口: - - 第一种类型:根据消息提交,提交消息里的进度,如果消息传NULL,提交当前consumer所有消费的vgroup的当前进度 : tmq_commit_sync/tmq_commit_async - - 第二种类型:根据某个topic的某个vgroup的offset提交 : tmq_commit_offset_sync/tmq_commit_offset_async - + - 第一种类型:根据消息提交,提交消息里的进度,如果消息传NULL,提交当前consumer所有消费的vgroup的当前进度 : tmq_commit_sync/tmq_commit_async + - 第二种类型:根据某个topic的某个vgroup的offset提交 : tmq_commit_offset_sync/tmq_commit_offset_async + **参数说明** - msg:消费到的消息结构,如果msg传NULL,提交当前consumer所有消费的vgroup的当前进度 **返回值** - 错误码,0成功,非0失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息 + - `int64_t tmq_position(tmq_t *tmq, const char *pTopicName, int32_t vgId)` **功能说明** - 获取当前消费位置,为消费到的数据位置的下一个位置 - + **返回值** - 消费位置,非0失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息 -- `int32_t tmq_offset_seek(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset)` + + - `int32_t tmq_offset_seek(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset)` **功能说明** - 设置 consumer 在某个topic的某个vgroup的 offset位置,开始消费 **返回值** - 错误码,0成功,非0失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息 - -- `int32_t int64_t tmq_get_vgroup_offset(TAOS_RES* res)` + + +- `int64_t tmq_get_vgroup_offset(TAOS_RES* res)` +- `int32_t tmq_get_vgroup_id(TAOS_RES *res)` **功能说明** - - 获取 poll 消费到的数据的起始offset - + - tmq_get_vgroup_offset 获取 poll 消费到的数据的起始offset + - tmq_get_vgroup_id 获取 poll 消费到的数据的所属的vgrou id + **参数说明** - msg:消费到的消息结构 **返回值** - - 消费到的offset,非0失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息 - -- `int32_t int32_t tmq_subscription(tmq_t *tmq, tmq_list_t **topics)` + - tmq_get_vgroup_offset 返回值为消费到的offset,非0失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息 + - tmq_get_vgroup_id 返回值为消费到的数据所属的vgrou id,非0失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息 + + +- `TAOS *tmq_get_connect(tmq_t *tmq)` +- `const char *tmq_get_table_name(TAOS_RES *res)` +- `tmq_res_t tmq_get_res_type(TAOS_RES *res)` +- `const char *tmq_get_topic_name(TAOS_RES *res)` +- `const char *tmq_get_db_name(TAOS_RES *res)` + + tmq_res_t 表示消费到的数据类型,定义如下: + ``` + typedef enum tmq_res_t { + TMQ_RES_INVALID = -1, // invalid + TMQ_RES_DATA = 1, // 数据 + TMQ_RES_TABLE_META = 2, // 元数据 + TMQ_RES_METADATA = 3 // 既有元数据又有数据,即自动建表 + } tmq_res_t; + ``` **功能说明** - - 获取消费者订阅的 topic 列表 - + - tmq_get_connect 创建consumer时,会自动建立链接保存在 tmq_t 结构体中,该接口用户获取 tmq_t 结构体中的链接信息,类似taos_connect + - tmq_get_table_name 获取返回结果所属的的表名 + - tmq_get_res_type 获取返回结果的类型 + - tmq_get_topic_name 获取返回结果所属的topic名 + - tmq_get_db_name 获取返回结果所属的数据库名 + **参数说明** - - topics: 获取的 topic 列表存储在这个结构中,接口内分配内存,需调用tmq_list_destroy释放 + - tmq:tmq_consumer_new 返回的消费者handle + - res:tmq_consumer_poll 返回的消费到的消息 **返回值** - - 错误码,0成功,非0失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息 + - tmq_get_connect 返回值为tmq_t结构体中的链接连接,非 NULL 正常,NULL 失败 + - tmq_get_table_name 返回值为消费到的数据所属的表名,非 NULL 正常,NULL 失败 + - tmq_get_res_type 返回值为消费到的数据所属的类型,具体见上面 tmq_res_t 的注释说明 + - tmq_get_topic_name 返回值为消费到的数据所属的 topic 名,非 NULL 正常,NULL 失败 + - tmq_get_db_name 返回值为消费到的数据所属的数据库名,非 NULL 正常,NULL 失败 diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index fde1b12be0..d772706306 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -2342,7 +2342,7 @@ const char* tmq_get_db_name(TAOS_RES* res) { int32_t tmq_get_vgroup_id(TAOS_RES* res) { if (res == NULL) { - return -1; + return TSDB_CODE_INVALID_PARA; } if (TD_RES_TMQ(res) || TD_RES_TMQ_METADATA(res) || TD_RES_TMQ_BATCH_META(res)) { return ((SMqRspObjCommon*)res)->vgId; @@ -2350,7 +2350,7 @@ int32_t tmq_get_vgroup_id(TAOS_RES* res) { SMqMetaRspObj* pMetaRspObj = (SMqMetaRspObj*)res; return pMetaRspObj->vgId; } else { - return -1; + return TSDB_CODE_INVALID_PARA; } } From 6f696a3933d83a15704f969234ff06c30dc6855e Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 4 Jun 2024 16:27:51 +0800 Subject: [PATCH 57/67] fix: insert into select constant issue --- source/libs/scalar/src/scalar.c | 4 ++++ tests/script/tsim/insert/insert_select.sim | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/source/libs/scalar/src/scalar.c b/source/libs/scalar/src/scalar.c index 5f43ae9f3c..50de5e760d 100644 --- a/source/libs/scalar/src/scalar.c +++ b/source/libs/scalar/src/scalar.c @@ -1213,6 +1213,7 @@ EDealRes sclRewriteFunction(SNode **pNode, SScalarCtx *ctx) { res->translate = true; + strcpy(res->node.aliasName, node->node.aliasName); res->node.resType.type = output.columnData->info.type; res->node.resType.bytes = output.columnData->info.bytes; res->node.resType.scale = output.columnData->info.scale; @@ -1268,6 +1269,7 @@ EDealRes sclRewriteLogic(SNode **pNode, SScalarCtx *ctx) { res->node.resType = node->node.resType; res->translate = true; + strcpy(res->node.aliasName, node->node.aliasName); int32_t type = output.columnData->info.type; if (IS_VAR_DATA_TYPE(type)) { res->datum.p = output.columnData->pData; @@ -1309,6 +1311,7 @@ EDealRes sclRewriteOperator(SNode **pNode, SScalarCtx *ctx) { res->translate = true; + strcpy(res->node.aliasName, node->node.aliasName); res->node.resType = node->node.resType; if (colDataIsNull_s(output.columnData, 0)) { res->isNull = true; @@ -1364,6 +1367,7 @@ EDealRes sclRewriteCaseWhen(SNode **pNode, SScalarCtx *ctx) { res->translate = true; + strcpy(res->node.aliasName, node->node.aliasName); res->node.resType = node->node.resType; if (colDataIsNull_s(output.columnData, 0)) { res->isNull = true; diff --git a/tests/script/tsim/insert/insert_select.sim b/tests/script/tsim/insert/insert_select.sim index 333964b1d6..888f349fbb 100644 --- a/tests/script/tsim/insert/insert_select.sim +++ b/tests/script/tsim/insert/insert_select.sim @@ -61,5 +61,23 @@ if $data02 != 1 then return -1 endi +sql insert into t2 (ts, b, a) select ts + 1, 11, 12 from t1; +sql select * from t2; +if $rows != 2 then + return -1 +endi +if $data01 != 2 then + return -1 +endi +if $data02 != 1 then + return -1 +endi +if $data11 != 12 then + return -1 +endi +if $data12 != 11 then + return -1 +endi + system sh/exec.sh -n dnode1 -s stop -x SIGINT From fe3f285ef95f8f55592e82e637a4890692689325 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 4 Jun 2024 16:37:38 +0800 Subject: [PATCH 58/67] fix:Spelling errors --- docs/zh/12-taos-sql/14-stream.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/12-taos-sql/14-stream.md b/docs/zh/12-taos-sql/14-stream.md index 3d48116ec8..cc057c3b72 100644 --- a/docs/zh/12-taos-sql/14-stream.md +++ b/docs/zh/12-taos-sql/14-stream.md @@ -91,7 +91,7 @@ SELECT _wstart, count(*), avg(voltage) from meters PARTITION BY tbname COUNT_WIN CREATE STREAM avg_vol_s INTO avg_vol SUBTABLE(CONCAT('new-', tname)) AS SELECT _wstart, count(*), avg(voltage) FROM meters PARTITION BY tbname tname INTERVAL(1m); ``` -PARTITION 子句中,为 tbname 定义了一个别名 tname, 在PARTITION 子句中的别名可以用于 SUBTABLE 子句中的表达式计算,在上述示例中,流新创建的子表将以前缀 'new-' 连接原表名作为表名(从3.2.3.0开始,为了避免 sutable 中的表达式无法区分各个子表,即误将多个相同时间线写入一个子表,在指定的子表名后面加上 __stableName_groupId)。 +PARTITION 子句中,为 tbname 定义了一个别名 tname, 在PARTITION 子句中的别名可以用于 SUBTABLE 子句中的表达式计算,在上述示例中,流新创建的子表将以前缀 'new-' 连接原表名作为表名(从3.2.3.0开始,为了避免 SUBTABLE 中的表达式无法区分各个子表,即误将多个相同时间线写入一个子表,在指定的子表名后面加上 __stableName_groupId)。 注意,子表名的长度若超过 TDengine 的限制,将被截断。若要生成的子表名已经存在于另一超级表,由于 TDengine 的子表名是唯一的,因此对应新子表的创建以及数据的写入将会失败。 From 144739c82b57d2b6e6cde9678fe9eeab68a47452 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 4 Jun 2024 16:49:23 +0800 Subject: [PATCH 59/67] fix:modify format --- docs/en/08-client-libraries/03-cpp.mdx | 2 +- docs/zh/08-connector/10-cpp.mdx | 416 ++++++++++++------------- 2 files changed, 209 insertions(+), 209 deletions(-) diff --git a/docs/en/08-client-libraries/03-cpp.mdx b/docs/en/08-client-libraries/03-cpp.mdx index 39f9688d86..e343f59400 100644 --- a/docs/en/08-client-libraries/03-cpp.mdx +++ b/docs/en/08-client-libraries/03-cpp.mdx @@ -623,7 +623,7 @@ In addition to writing data using the SQL method or the parameter binding API, w - `const char *tmq_get_topic_name(TAOS_RES *res)` - `const char *tmq_get_db_name(TAOS_RES *res)` - tmq_res_t 表示消费到的数据类型,定义如下: + tmq_res_t the type of consumed result, defined as follows: ``` typedef enum tmq_res_t { TMQ_RES_INVALID = -1, // invalid diff --git a/docs/zh/08-connector/10-cpp.mdx b/docs/zh/08-connector/10-cpp.mdx index 16e6fc892f..567f45aa60 100644 --- a/docs/zh/08-connector/10-cpp.mdx +++ b/docs/zh/08-connector/10-cpp.mdx @@ -1,5 +1,5 @@ --- -sidebar_label: C/C++ +idebar_label: C/C++ title: C/C++ Connector --- @@ -68,143 +68,143 @@ TDengine 客户端驱动的安装请参考 [安装指南](../#安装步骤) ### 同步查询示例
- 同步查询 +同步查询 - ```c - {{#include examples/c/demo.c}} - ``` - 格式化输出不同类型字段函数 taos_print_row - ```c - int taos_print_row(char *str, TAOS_ROW row, TAOS_FIELD *fields, int num_fields) { - int32_t len = 0; - for (int i = 0; i < num_fields; ++i) { +```c +{{#include examples/c/demo.c}} +``` +格式化输出不同类型字段函数 taos_print_row +```c +int taos_print_row(char *str, TAOS_ROW row, TAOS_FIELD *fields, int num_fields) { + int32_t len = 0; + for (int i = 0; i < num_fields; ++i) { if (i > 0) { - str[len++] = ' '; -} + str[len++] = ' '; + } if (row[i] == NULL) { - len += sprintf(str + len, "%s", TSDB_DATA_NULL_STR); - continue; -} + len += sprintf(str + len, "%s", TSDB_DATA_NULL_STR); + continue; + } switch (fields[i].type) { - case TSDB_DATA_TYPE_TINYINT: - len += sprintf(str + len, "%d", *((int8_t *)row[i])); - break; + case TSDB_DATA_TYPE_TINYINT: + len += sprintf(str + len, "%d", *((int8_t *)row[i])); + break; - case TSDB_DATA_TYPE_UTINYINT: - len += sprintf(str + len, "%u", *((uint8_t *)row[i])); - break; + case TSDB_DATA_TYPE_UTINYINT: + len += sprintf(str + len, "%u", *((uint8_t *)row[i])); + break; - case TSDB_DATA_TYPE_SMALLINT: - len += sprintf(str + len, "%d", *((int16_t *)row[i])); - break; + case TSDB_DATA_TYPE_SMALLINT: + len += sprintf(str + len, "%d", *((int16_t *)row[i])); + break; - case TSDB_DATA_TYPE_USMALLINT: - len += sprintf(str + len, "%u", *((uint16_t *)row[i])); - break; + case TSDB_DATA_TYPE_USMALLINT: + len += sprintf(str + len, "%u", *((uint16_t *)row[i])); + break; - case TSDB_DATA_TYPE_INT: - len += sprintf(str + len, "%d", *((int32_t *)row[i])); - break; + case TSDB_DATA_TYPE_INT: + len += sprintf(str + len, "%d", *((int32_t *)row[i])); + break; - case TSDB_DATA_TYPE_UINT: - len += sprintf(str + len, "%u", *((uint32_t *)row[i])); - break; + case TSDB_DATA_TYPE_UINT: + len += sprintf(str + len, "%u", *((uint32_t *)row[i])); + break; - case TSDB_DATA_TYPE_BIGINT: - len += sprintf(str + len, "%" PRId64, *((int64_t *)row[i])); - break; + case TSDB_DATA_TYPE_BIGINT: + len += sprintf(str + len, "%" PRId64, *((int64_t *)row[i])); + break; - case TSDB_DATA_TYPE_UBIGINT: - len += sprintf(str + len, "%" PRIu64, *((uint64_t *)row[i])); - break; + case TSDB_DATA_TYPE_UBIGINT: + len += sprintf(str + len, "%" PRIu64, *((uint64_t *)row[i])); + break; - case TSDB_DATA_TYPE_FLOAT: { - float fv = 0; - fv = GET_FLOAT_VAL(row[i]); - len += sprintf(str + len, "%f", fv); -} break; + case TSDB_DATA_TYPE_FLOAT: { + float fv = 0; + fv = GET_FLOAT_VAL(row[i]); + len += sprintf(str + len, "%f", fv); + } break; - case TSDB_DATA_TYPE_DOUBLE: { - double dv = 0; - dv = GET_DOUBLE_VAL(row[i]); - len += sprintf(str + len, "%lf", dv); -} break; + case TSDB_DATA_TYPE_DOUBLE: { + double dv = 0; + dv = GET_DOUBLE_VAL(row[i]); + len += sprintf(str + len, "%lf", dv); + } break; - case TSDB_DATA_TYPE_BINARY: - case TSDB_DATA_TYPE_NCHAR: { - int32_t charLen = varDataLen((char *)row[i] - VARSTR_HEADER_SIZE); - if (fields[i].type == TSDB_DATA_TYPE_BINARY) { - assert(charLen <= fields[i].bytes && charLen >= 0); -} else { - assert(charLen <= fields[i].bytes * TSDB_NCHAR_SIZE && charLen >= 0); + case TSDB_DATA_TYPE_BINARY: + case TSDB_DATA_TYPE_NCHAR: { + int32_t charLen = varDataLen((char *)row[i] - VARSTR_HEADER_SIZE); + if (fields[i].type == TSDB_DATA_TYPE_BINARY) { + assert(charLen <= fields[i].bytes && charLen >= 0); + } else { + assert(charLen <= fields[i].bytes * TSDB_NCHAR_SIZE && charLen >= 0); + } + + memcpy(str + len, row[i], charLen); + len += charLen; + } break; + + case TSDB_DATA_TYPE_TIMESTAMP: + len += sprintf(str + len, "%" PRId64, *((int64_t *)row[i])); + break; + + case TSDB_DATA_TYPE_BOOL: + len += sprintf(str + len, "%d", *((int8_t *)row[i])); + default: + break; + } + } + str[len] = 0; + + return len; } - - memcpy(str + len, row[i], charLen); - len += charLen; -} break; - - case TSDB_DATA_TYPE_TIMESTAMP: - len += sprintf(str + len, "%" PRId64, *((int64_t *)row[i])); - break; - - case TSDB_DATA_TYPE_BOOL: - len += sprintf(str + len, "%d", *((int8_t *)row[i])); - default: - break; -} -} - str[len] = 0; - - return len; -} - - ``` - + +``` +
### 异步查询示例
- 异步查询 +异步查询 - ```c - {{#include examples/c/asyncdemo.c}} - ``` +```c +{{#include examples/c/asyncdemo.c}} +```
### 参数绑定示例
- 参数绑定 +参数绑定 - ```c - {{#include examples/c/prepare.c}} - ``` +```c +{{#include examples/c/prepare.c}} +```
### 无模式写入示例
- 无模式写入 +无模式写入 - ```c - {{#include examples/c/schemaless.c}} - ``` +```c +{{#include examples/c/schemaless.c}} +```
### 订阅和消费示例
- 订阅和消费 +订阅和消费 - ```c - {{#include examples/c/tmq.c}} - ``` +```c + {{#include examples/c/tmq.c}} +```
@@ -225,70 +225,70 @@ TDengine 客户端驱动的安装请参考 [安装指南](../#安装步骤) - `int taos_init()` -初始化运行环境。如果没有主动调用该 API,那么调用 `taos_connect()` 时驱动将自动调用该 API,故程序一般无需手动调用。 + 初始化运行环境。如果没有主动调用该 API,那么调用 `taos_connect()` 时驱动将自动调用该 API,故程序一般无需手动调用。 - `void taos_cleanup()` -清理运行环境,应用退出前应调用。 + 清理运行环境,应用退出前应调用。 - `int taos_options(TSDB_OPTION option, const void * arg, ...)` -设置客户端选项,目前支持区域设置(`TSDB_OPTION_LOCALE`)、字符集设置(`TSDB_OPTION_CHARSET`)、时区设置(`TSDB_OPTION_TIMEZONE`)、配置文件路径设置(`TSDB_OPTION_CONFIGDIR`)。区域设置、字符集、时区默认为操作系统当前设置。 + 设置客户端选项,目前支持区域设置(`TSDB_OPTION_LOCALE`)、字符集设置(`TSDB_OPTION_CHARSET`)、时区设置(`TSDB_OPTION_TIMEZONE`)、配置文件路径设置(`TSDB_OPTION_CONFIGDIR`)。区域设置、字符集、时区默认为操作系统当前设置。 - `char *taos_get_client_info()` -获取客户端版本信息。 + 获取客户端版本信息。 - `TAOS *taos_connect(const char *host, const char *user, const char *pass, const char *db, int port)` -创建数据库连接,初始化连接上下文。其中需要用户提供的参数包含: + 创建数据库连接,初始化连接上下文。其中需要用户提供的参数包含: -- host:TDengine 集群中任一节点的 FQDN -- user:用户名 -- pass:密码 -- db: 数据库名字,如果用户没有提供,也可以正常连接,用户可以通过该连接创建新的数据库,如果用户提供了数据库名字,则说明该数据库用户已经创建好,缺省使用该数据库 -- port:taosd 程序监听的端口 + - host:TDengine 集群中任一节点的 FQDN + - user:用户名 + - pass:密码 + - db: 数据库名字,如果用户没有提供,也可以正常连接,用户可以通过该连接创建新的数据库,如果用户提供了数据库名字,则说明该数据库用户已经创建好,缺省使用该数据库 + - port:taosd 程序监听的端口 -返回值为空表示失败。应用程序需要保存返回的参数,以便后续使用。 + 返回值为空表示失败。应用程序需要保存返回的参数,以便后续使用。 -:::info -同一进程可以根据不同的 host/port 连接多个 TDengine 集群 + :::info + 同一进程可以根据不同的 host/port 连接多个 TDengine 集群 -::: + ::: -- `TAOS *taos_connect_auth(const char *host, const char *user, const char *auth, const char *db, uint16_t port)` +- `TAOS *taos_connect_auth(const char *host, const char *user, const char *auth, const char *db, uint16_t port)` -功能同 taos_connect。除 pass 参数替换为 auth 外,其他参数同 taos_connect。 - -- auth: 原始密码取 32 位小写 md5 + 功能同 taos_connect。除 pass 参数替换为 auth 外,其他参数同 taos_connect。 + + - auth: 原始密码取 32 位小写 md5 - `char *taos_get_server_info(TAOS *taos)` -获取服务端版本信息。 + 获取服务端版本信息。 - `int taos_select_db(TAOS *taos, const char *db)` -将当前的缺省数据库设置为 `db`。 - + 将当前的缺省数据库设置为 `db`。 + - `int taos_get_current_db(TAOS *taos, char *database, int len, int *required)` - -- database,len为用户在外面申请的空间,内部会把当前db赋值到database里。 -- 只要是没有正常把db名赋值到database中(包括截断),返回错误,返回值为-1,然后用户可以通过 taos_errstr(NULL) 来获取错误提示。 -- 如果,database == NULL 或者 len<=0 返回错误,required里保存存储db需要的空间(包含最后的'\0') -- 如果,len 小于 存储db需要的空间(包含最后的'\0'),返回错误,database里赋值截断的数据,以'\0'结尾。 -- 如果,len 大于等于 存储db需要的空间(包含最后的'\0'),返回正常0,database里赋值以'\0‘结尾的db名。 + + - database,len为用户在外面申请的空间,内部会把当前db赋值到database里。 + - 只要是没有正常把db名赋值到database中(包括截断),返回错误,返回值为-1,然后用户可以通过 taos_errstr(NULL) 来获取错误提示。 + - 如果,database == NULL 或者 len<=0 返回错误,required里保存存储db需要的空间(包含最后的'\0') + - 如果,len 小于 存储db需要的空间(包含最后的'\0'),返回错误,database里赋值截断的数据,以'\0'结尾。 + - 如果,len 大于等于 存储db需要的空间(包含最后的'\0'),返回正常0,database里赋值以'\0‘结尾的db名。 - `int taos_set_notify_cb(TAOS *taos, __taos_notify_fn_t fp, void *param, int type)` -设置事件回调函数。 + 设置事件回调函数。 -- fp 事件回调函数指针。函数声明:typedef void (*__taos_notify_fn_t)(void *param, void *ext, int type);其中, param 为用户自定义参数,ext 为扩展参数(依赖事件类型,针对 TAOS_NOTIFY_PASSVER 返回用户密码版本),type 为事件类型 -- param 用户自定义参数 -- type 事件类型。取值范围:1)TAOS_NOTIFY_PASSVER: 用户密码改变 + - fp 事件回调函数指针。函数声明:typedef void (*__taos_notify_fn_t)(void *param, void *ext, int type);其中, param 为用户自定义参数,ext 为扩展参数(依赖事件类型,针对 TAOS_NOTIFY_PASSVER 返回用户密码版本),type 为事件类型 + - param 用户自定义参数 + - type 事件类型。取值范围:1)TAOS_NOTIFY_PASSVER: 用户密码改变 - `void taos_close(TAOS *taos)` -关闭连接,其中`taos`是 `taos_connect()` 返回的句柄。 + 关闭连接,其中`taos`是 `taos_connect()` 返回的句柄。 ### 同步查询 API @@ -296,35 +296,35 @@ TDengine 客户端驱动的安装请参考 [安装指南](../#安装步骤) - `TAOS_RES* taos_query(TAOS *taos, const char *sql)` -执行 SQL 语句,可以是 DQL、DML 或 DDL 语句。 其中的 `taos` 参数是通过 `taos_connect()` 获得的句柄。不能通过返回值是否是 `NULL` 来判断执行结果是否失败,而是需要用 `taos_errno()` 函数解析结果集中的错误代码来进行判断。 + 执行 SQL 语句,可以是 DQL、DML 或 DDL 语句。 其中的 `taos` 参数是通过 `taos_connect()` 获得的句柄。不能通过返回值是否是 `NULL` 来判断执行结果是否失败,而是需要用 `taos_errno()` 函数解析结果集中的错误代码来进行判断。 - `int taos_result_precision(TAOS_RES *res)` -返回结果集时间戳字段的精度,`0` 代表毫秒,`1` 代表微秒,`2` 代表纳秒。 + 返回结果集时间戳字段的精度,`0` 代表毫秒,`1` 代表微秒,`2` 代表纳秒。 - `TAOS_ROW taos_fetch_row(TAOS_RES *res)` -按行获取查询结果集中的数据。 + 按行获取查询结果集中的数据。 - `int taos_fetch_block(TAOS_RES *res, TAOS_ROW *rows)` -批量获取查询结果集中的数据,返回值为获取到的数据的行数。 + 批量获取查询结果集中的数据,返回值为获取到的数据的行数。 - `int taos_num_fields(TAOS_RES *res)` 和 `int taos_field_count(TAOS_RES *res)` -这两个 API 等价,用于获取查询结果集中的列数。 + 这两个 API 等价,用于获取查询结果集中的列数。 - `int* taos_fetch_lengths(TAOS_RES *res)` -获取结果集中每个字段的长度。返回值是一个数组,其长度为结果集的列数。 + 获取结果集中每个字段的长度。返回值是一个数组,其长度为结果集的列数。 - `int taos_affected_rows(TAOS_RES *res)` -获取被所执行的 SQL 语句影响的行数。 + 获取被所执行的 SQL 语句影响的行数。 - `TAOS_FIELD *taos_fetch_fields(TAOS_RES *res)` -获取查询结果集每列数据的属性(列的名称、列的数据类型、列的长度),与 `taos_num_fields()` 配合使用,可用来解析 `taos_fetch_row()` 返回的一个元组(一行)的数据。 `TAOS_FIELD` 的结构如下: + 获取查询结果集每列数据的属性(列的名称、列的数据类型、列的长度),与 `taos_num_fields()` 配合使用,可用来解析 `taos_fetch_row()` 返回的一个元组(一行)的数据。 `TAOS_FIELD` 的结构如下: ```c typedef struct taosField { @@ -336,19 +336,19 @@ typedef struct taosField { - `void taos_stop_query(TAOS_RES *res)` -停止当前查询的执行。 + 停止当前查询的执行。 - `void taos_free_result(TAOS_RES *res)` -释放查询结果集以及相关的资源。查询完成后,务必调用该 API 释放资源,否则可能导致应用内存泄露。但也需注意,释放资源后,如果再调用 `taos_consume()` 等获取查询结果的函数,将导致应用崩溃。 + 释放查询结果集以及相关的资源。查询完成后,务必调用该 API 释放资源,否则可能导致应用内存泄露。但也需注意,释放资源后,如果再调用 `taos_consume()` 等获取查询结果的函数,将导致应用崩溃。 - `char *taos_errstr(TAOS_RES *res)` -获取最近一次 API 调用失败的原因,返回值为字符串标识的错误提示信息。 + 获取最近一次 API 调用失败的原因,返回值为字符串标识的错误提示信息。 - `int taos_errno(TAOS_RES *res)` -获取最近一次 API 调用失败的原因,返回值为错误代码。 + 获取最近一次 API 调用失败的原因,返回值为错误代码。 :::note 2.0 及以上版本 TDengine 推荐数据库应用的每个线程都建立一个独立的连接,或基于线程建立连接池。而不推荐在应用中将该连接 (TAOS\*) 结构体传递到不同的线程共享使用。基于 TAOS 结构体发出的查询、写入等操作具有多线程安全性,但 “USE statement” 等状态量有可能在线程之间相互干扰。此外,C 语言的连接器可以按照需求动态建立面向数据库的新连接(该过程对用户不可见),同时建议只有在程序最后退出的时候才调用 `taos_close()` 关闭连接。 @@ -366,19 +366,19 @@ TDengine 还提供性能更高的异步 API 处理数据插入、查询操作。 - `void taos_query_a(TAOS *taos, const char *sql, void (*fp)(void *param, TAOS_RES *, int code), void *param);` -异步执行 SQL 语句。 + 异步执行 SQL 语句。 -- taos:调用 `taos_connect()` 返回的数据库连接 -- sql:需要执行的 SQL 语句 -- fp:用户定义的回调函数,其第三个参数 `code` 用于指示操作是否成功,`0` 表示成功,负数表示失败(调用 `taos_errstr()` 可获取失败原因)。应用在定义回调函数的时候,主要处理第二个参数 `TAOS_RES *`,该参数是查询返回的结果集 -- param:应用提供一个用于回调的参数 + - taos:调用 `taos_connect()` 返回的数据库连接 + - sql:需要执行的 SQL 语句 + - fp:用户定义的回调函数,其第三个参数 `code` 用于指示操作是否成功,`0` 表示成功,负数表示失败(调用 `taos_errstr()` 可获取失败原因)。应用在定义回调函数的时候,主要处理第二个参数 `TAOS_RES *`,该参数是查询返回的结果集 + - param:应用提供一个用于回调的参数 - `void taos_fetch_rows_a(TAOS_RES *res, void (*fp)(void *param, TAOS_RES *, int numOfRows), void *param);` -批量获取异步查询的结果集,只能与 `taos_query_a()` 配合使用。其中: + 批量获取异步查询的结果集,只能与 `taos_query_a()` 配合使用。其中: -- res:`taos_query_a()` 回调时返回的结果集 -- fp:回调函数。其参数 `param` 是用户可定义的传递给回调函数的参数结构体;`numOfRows` 是获取到的数据的行数(不是整个查询结果集的函数)。 在回调函数中,应用可以通过调用 `taos_fetch_row()` 前向迭代获取批量记录中每一行记录。读完一块内的所有记录后,应用需要在回调函数中继续调用 `taos_fetch_rows_a()` 获取下一批记录进行处理,直到返回的记录数 `numOfRows` 为零(结果返回完成)或记录数为负值(查询出错)。 + - res:`taos_query_a()` 回调时返回的结果集 + - fp:回调函数。其参数 `param` 是用户可定义的传递给回调函数的参数结构体;`numOfRows` 是获取到的数据的行数(不是整个查询结果集的函数)。 在回调函数中,应用可以通过调用 `taos_fetch_row()` 前向迭代获取批量记录中每一行记录。读完一块内的所有记录后,应用需要在回调函数中继续调用 `taos_fetch_rows_a()` 获取下一批记录进行处理,直到返回的记录数 `numOfRows` 为零(结果返回完成)或记录数为负值(查询出错)。 TDengine 的异步 API 均采用非阻塞调用模式。应用程序可以用多线程同时打开多张表,并可以同时对每张打开的表进行查询或者插入操作。需要指出的是,**客户端应用必须确保对同一张表的操作完全串行化**,即对同一个表的插入或查询操作未完成时(未返回时),不能够执行第二个插入或查询操作。 @@ -404,71 +404,71 @@ TDengine 的异步 API 均采用非阻塞调用模式。应用程序可以用多 - `TAOS_STMT* taos_stmt_init(TAOS *taos)` -创建一个 TAOS_STMT 对象用于后续调用。 + 创建一个 TAOS_STMT 对象用于后续调用。 - `int taos_stmt_prepare(TAOS_STMT *stmt, const char *sql, unsigned long length)` -解析一条 SQL 语句,将解析结果和参数信息绑定到 stmt 上,如果参数 length 大于 0,将使用此参数作为 SQL 语句的长度,如等于 0,将自动判断 SQL 语句的长度。 + 解析一条 SQL 语句,将解析结果和参数信息绑定到 stmt 上,如果参数 length 大于 0,将使用此参数作为 SQL 语句的长度,如等于 0,将自动判断 SQL 语句的长度。 - `int taos_stmt_bind_param(TAOS_STMT *stmt, TAOS_MULTI_BIND *bind)` -不如 `taos_stmt_bind_param_batch()` 效率高,但可以支持非 INSERT 类型的 SQL 语句。 -进行参数绑定,bind 指向一个数组(代表所要绑定的一行数据),需保证此数组中的元素数量和顺序与 SQL 语句中的参数完全一致。TAOS_MULTI_BIND 的使用方法与 MySQL 中的 MYSQL_BIND 类似,具体定义如下: + 不如 `taos_stmt_bind_param_batch()` 效率高,但可以支持非 INSERT 类型的 SQL 语句。 + 进行参数绑定,bind 指向一个数组(代表所要绑定的一行数据),需保证此数组中的元素数量和顺序与 SQL 语句中的参数完全一致。TAOS_MULTI_BIND 的使用方法与 MySQL 中的 MYSQL_BIND 类似,具体定义如下: -```c -typedef struct TAOS_MULTI_BIND { - int buffer_type; - void *buffer; - uintptr_t buffer_length; - uint32_t *length; - char *is_null; - int num; // the number of columns -} TAOS_MULTI_BIND; + ```c + typedef struct TAOS_MULTI_BIND { + int buffer_type; + void *buffer; + uintptr_t buffer_length; + uint32_t *length; + char *is_null; + int num; // the number of columns + } TAOS_MULTI_BIND; ``` - `int taos_stmt_set_tbname(TAOS_STMT* stmt, const char* name)` -(2.1.1.0 版本新增,仅支持用于替换 INSERT 语句中的参数值) -当 SQL 语句中的表名使用了 `?` 占位时,可以使用此函数绑定一个具体的表名。 + (2.1.1.0 版本新增,仅支持用于替换 INSERT 语句中的参数值) + 当 SQL 语句中的表名使用了 `?` 占位时,可以使用此函数绑定一个具体的表名。 - `int taos_stmt_set_tbname_tags(TAOS_STMT* stmt, const char* name, TAOS_MULTI_BIND* tags)` -(2.1.2.0 版本新增,仅支持用于替换 INSERT 语句中的参数值) -当 SQL 语句中的表名和 TAGS 都使用了 `?` 占位时,可以使用此函数绑定具体的表名和具体的 TAGS 取值。最典型的使用场景是使用了自动建表功能的 INSERT 语句(目前版本不支持指定具体的 TAGS 列)。TAGS 参数中的列数量需要与 SQL 语句中要求的 TAGS 数量完全一致。 + (2.1.2.0 版本新增,仅支持用于替换 INSERT 语句中的参数值) + 当 SQL 语句中的表名和 TAGS 都使用了 `?` 占位时,可以使用此函数绑定具体的表名和具体的 TAGS 取值。最典型的使用场景是使用了自动建表功能的 INSERT 语句(目前版本不支持指定具体的 TAGS 列)。TAGS 参数中的列数量需要与 SQL 语句中要求的 TAGS 数量完全一致。 - `int taos_stmt_bind_param_batch(TAOS_STMT* stmt, TAOS_MULTI_BIND* bind)` -(2.1.1.0 版本新增,仅支持用于替换 INSERT 语句中的参数值) -以多列的方式传递待绑定的数据,需要保证这里传递的数据列的顺序、列的数量与 SQL 语句中的 VALUES 参数完全一致。TAOS_MULTI_BIND 的具体定义如下: + (2.1.1.0 版本新增,仅支持用于替换 INSERT 语句中的参数值) + 以多列的方式传递待绑定的数据,需要保证这里传递的数据列的顺序、列的数量与 SQL 语句中的 VALUES 参数完全一致。TAOS_MULTI_BIND 的具体定义如下: - `int taos_stmt_add_batch(TAOS_STMT *stmt)` -将当前绑定的参数加入批处理中,调用此函数后,可以再次调用 `taos_stmt_bind_param()` 或 `taos_stmt_bind_param_batch()` 绑定新的参数。需要注意,此函数仅支持 INSERT/IMPORT 语句,如果是 SELECT 等其他 SQL 语句,将返回错误。 + 将当前绑定的参数加入批处理中,调用此函数后,可以再次调用 `taos_stmt_bind_param()` 或 `taos_stmt_bind_param_batch()` 绑定新的参数。需要注意,此函数仅支持 INSERT/IMPORT 语句,如果是 SELECT 等其他 SQL 语句,将返回错误。 - `int taos_stmt_execute(TAOS_STMT *stmt)` -执行准备好的语句。目前,一条语句只能执行一次。 + 执行准备好的语句。目前,一条语句只能执行一次。 - `int taos_stmt_affected_rows(TAOS_STMT *stmt)` - -获取执行多次绑定语句影响的行数。 + + 获取执行多次绑定语句影响的行数。 - `int taos_stmt_affected_rows_once(TAOS_STMT *stmt)` -获取执行一次绑定语句影响的行数。 + 获取执行一次绑定语句影响的行数。 - `TAOS_RES* taos_stmt_use_result(TAOS_STMT *stmt)` -获取语句的结果集。结果集的使用方式与非参数化调用时一致,使用完成后,应对此结果集调用 `taos_free_result()` 以释放资源。 + 获取语句的结果集。结果集的使用方式与非参数化调用时一致,使用完成后,应对此结果集调用 `taos_free_result()` 以释放资源。 - `int taos_stmt_close(TAOS_STMT *stmt)` -执行完毕,释放所有资源。 + 执行完毕,释放所有资源。 - `char * taos_stmt_errstr(TAOS_STMT *stmt)` -(2.1.3.0 版本新增) -用于在其他 STMT API 返回错误(返回错误码或空指针)时获取错误信息。 + (2.1.3.0 版本新增) + 用于在其他 STMT API 返回错误(返回错误码或空指针)时获取错误信息。 ### 无模式(schemaless)写入 API @@ -476,43 +476,43 @@ typedef struct TAOS_MULTI_BIND { - `TAOS_RES* taos_schemaless_insert(TAOS* taos, const char* lines[], int numLines, int protocol, int precision)` -**功能说明** -- 该接口将行协议的文本数据写入到 TDengine 中。 + **功能说明** + - 该接口将行协议的文本数据写入到 TDengine 中。 -**参数说明** -- taos: 数据库连接,通过 `taos_connect()` 函数建立的数据库连接。 -- lines:文本数据。满足解析格式要求的无模式文本字符串。 -- numLines:文本数据的行数,不能为 0 。 -- protocol: 行协议类型,用于标识文本数据格式。 -- precision:文本数据中的时间戳精度字符串。 + **参数说明** + - taos: 数据库连接,通过 `taos_connect()` 函数建立的数据库连接。 + - lines:文本数据。满足解析格式要求的无模式文本字符串。 + - numLines:文本数据的行数,不能为 0 。 + - protocol: 行协议类型,用于标识文本数据格式。 + - precision:文本数据中的时间戳精度字符串。 -**返回值** -- TAOS_RES 结构体,应用可以通过使用 `taos_errstr()` 获得错误信息,也可以使用 `taos_errno()` 获得错误码。 -在某些情况下,返回的 TAOS_RES 为 `NULL`,此时仍然可以调用 `taos_errno()` 来安全地获得错误码信息。 -返回的 TAOS_RES 需要调用方来负责释放,否则会出现内存泄漏。 + **返回值** + - TAOS_RES 结构体,应用可以通过使用 `taos_errstr()` 获得错误信息,也可以使用 `taos_errno()` 获得错误码。 + 在某些情况下,返回的 TAOS_RES 为 `NULL`,此时仍然可以调用 `taos_errno()` 来安全地获得错误码信息。 + 返回的 TAOS_RES 需要调用方来负责释放,否则会出现内存泄漏。 -**说明** + **说明** + + 协议类型是枚举类型,包含以下三种格式: -协议类型是枚举类型,包含以下三种格式: + - TSDB_SML_LINE_PROTOCOL:InfluxDB 行协议(Line Protocol) + - TSDB_SML_TELNET_PROTOCOL: OpenTSDB Telnet 文本行协议 + - TSDB_SML_JSON_PROTOCOL: OpenTSDB Json 协议格式 -- TSDB_SML_LINE_PROTOCOL:InfluxDB 行协议(Line Protocol) -- TSDB_SML_TELNET_PROTOCOL: OpenTSDB Telnet 文本行协议 -- TSDB_SML_JSON_PROTOCOL: OpenTSDB Json 协议格式 + 时间戳分辨率的定义,定义在 `taos.h` 文件中,具体内容如下: -时间戳分辨率的定义,定义在 `taos.h` 文件中,具体内容如下: + - TSDB_SML_TIMESTAMP_NOT_CONFIGURED = 0, + - TSDB_SML_TIMESTAMP_HOURS, + - TSDB_SML_TIMESTAMP_MINUTES, + - TSDB_SML_TIMESTAMP_SECONDS, + - TSDB_SML_TIMESTAMP_MILLI_SECONDS, + - TSDB_SML_TIMESTAMP_MICRO_SECONDS, + - TSDB_SML_TIMESTAMP_NANO_SECONDS -- TSDB_SML_TIMESTAMP_NOT_CONFIGURED = 0, -- TSDB_SML_TIMESTAMP_HOURS, -- TSDB_SML_TIMESTAMP_MINUTES, -- TSDB_SML_TIMESTAMP_SECONDS, -- TSDB_SML_TIMESTAMP_MILLI_SECONDS, -- TSDB_SML_TIMESTAMP_MICRO_SECONDS, -- TSDB_SML_TIMESTAMP_NANO_SECONDS + 需要注意的是,时间戳分辨率参数只在协议类型为 `SML_LINE_PROTOCOL` 的时候生效。 + 对于 OpenTSDB 的文本协议,时间戳的解析遵循其官方解析规则 — 按照时间戳包含的字符的数量来确认时间精度。 -需要注意的是,时间戳分辨率参数只在协议类型为 `SML_LINE_PROTOCOL` 的时候生效。 -对于 OpenTSDB 的文本协议,时间戳的解析遵循其官方解析规则 — 按照时间戳包含的字符的数量来确认时间精度。 - -**schemaless 其他相关的接口** + **schemaless 其他相关的接口** - `TAOS_RES *taos_schemaless_insert_with_reqid(TAOS *taos, char *lines[], int numLines, int protocol, int precision, int64_t reqid)` - `TAOS_RES *taos_schemaless_insert_raw(TAOS *taos, char *lines, int len, int32_t *totalRows, int protocol, int precision)` - `TAOS_RES *taos_schemaless_insert_raw_with_reqid(TAOS *taos, char *lines, int len, int32_t *totalRows, int protocol, int precision, int64_t reqid)` @@ -521,11 +521,11 @@ typedef struct TAOS_MULTI_BIND { - `TAOS_RES *taos_schemaless_insert_raw_ttl(TAOS *taos, char *lines, int len, int32_t *totalRows, int protocol, int precision, int32_t ttl)` - `TAOS_RES *taos_schemaless_insert_raw_ttl_with_reqid(TAOS *taos, char *lines, int len, int32_t *totalRows, int protocol, int precision, int32_t ttl, int64_t reqid)` -**说明** -- 上面这7个接口是扩展接口,主要用于在schemaless写入时传递ttl、reqid参数,可以根据需要使用。 -- 带_raw的接口通过传递的参数lines指针和长度len来表示数据,为了解决原始接口数据包含'\0'而被截断的问题。totalRows指针返回解析出来的数据行数。 -- 带_ttl的接口可以传递ttl参数来控制建表的ttl到期时间。 -- 带_reqid的接口可以通过传递reqid参数来追踪整个的调用链。 + **说明** + - 上面这7个接口是扩展接口,主要用于在schemaless写入时传递ttl、reqid参数,可以根据需要使用。 + - 带_raw的接口通过传递的参数lines指针和长度len来表示数据,为了解决原始接口数据包含'\0'而被截断的问题。totalRows指针返回解析出来的数据行数。 + - 带_ttl的接口可以传递ttl参数来控制建表的ttl到期时间。 + - 带_reqid的接口可以通过传递reqid参数来追踪整个的调用链。 ### 数据订阅 API - `const char *tmq_err2str(int32_t code)` From 6ac60e225e4b4d58fbea87baa0799718e7b0e625 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 4 Jun 2024 16:50:17 +0800 Subject: [PATCH 60/67] fix:modify format --- docs/zh/08-connector/10-cpp.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/08-connector/10-cpp.mdx b/docs/zh/08-connector/10-cpp.mdx index 567f45aa60..746c09f59d 100644 --- a/docs/zh/08-connector/10-cpp.mdx +++ b/docs/zh/08-connector/10-cpp.mdx @@ -1,5 +1,5 @@ --- -idebar_label: C/C++ +sidebar_label: C/C++ title: C/C++ Connector --- From 61475a0a7bc3854f19698417852d40d0cba22c4e Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 4 Jun 2024 17:24:10 +0800 Subject: [PATCH 61/67] docs:[TD-30393]add api document for tmq --- docs/en/08-client-libraries/03-cpp.mdx | 17 +++++++++++++++-- docs/zh/08-connector/10-cpp.mdx | 17 +++++++++++++++-- source/client/src/clientTmq.c | 8 ++++---- 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/docs/en/08-client-libraries/03-cpp.mdx b/docs/en/08-client-libraries/03-cpp.mdx index e343f59400..97fea49902 100644 --- a/docs/en/08-client-libraries/03-cpp.mdx +++ b/docs/en/08-client-libraries/03-cpp.mdx @@ -483,10 +483,18 @@ In addition to writing data using the SQL method or the parameter binding API, w ``` **Description** - tmq_conf_new : create a tmq_conf_t structure to configure consumption parameters - - tmq_conf_set : set configuration, key is parameter name,value is parameter value - - tmq_conf_set_auto_commit_cb : set automatic commit callback function, cb is call back function, param is callback function parameter + - tmq_conf_set : set configuration, configuration is key-value pair + - tmq_conf_set_auto_commit_cb : set auto commit callback function - tmq_conf_destroy : destroy tmq_conf_t structure + **Parameter description** + - tmq_conf_set : key is parameter name,value is parameter value + - tmq_conf_set_auto_commit_cb : cb is callback function, param is callback function parameter + + **Return value** + - tmq_conf_new: structure of tmq_conf_t, NULL failed + - tmq_conf_set: tmq_conf_res_t, TMQ_CONF_OK means success, others means failure + - `tmq_list_t *tmq_list_new()` - `int32_t tmq_list_append(tmq_list_t *, const char *)` @@ -501,6 +509,11 @@ In addition to writing data using the SQL method or the parameter binding API, w - tmq_list_get_size : get size of tmq_list_t - tmq_list_to_c_array : convert tmq_list_t to c array, element is string pointer + **Return value** + - tmq_list_new : structure of tmq_list_t, tmq_list_t is a list of strings, NULL failed + - tmq_list_append : zero success, none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` + - tmq_list_get_size : size of tmq_list_t, -1 failed + - tmq_list_to_c_array : c array, element is pointer of string, NULL failed - `tmq_t *tmq_consumer_new(tmq_conf_t *conf, char *errstr, int32_t errstrLen)` - `int32_t tmq_subscribe(tmq_t *tmq, const tmq_list_t *topic_list)` diff --git a/docs/zh/08-connector/10-cpp.mdx b/docs/zh/08-connector/10-cpp.mdx index 746c09f59d..f03777ac4b 100644 --- a/docs/zh/08-connector/10-cpp.mdx +++ b/docs/zh/08-connector/10-cpp.mdx @@ -558,10 +558,17 @@ TDengine 的异步 API 均采用非阻塞调用模式。应用程序可以用多 ``` **功能说明** - tmq_conf_new 接口用于创建一个 tmq_conf_t 结构体,用于配置消费参数。 - - tmq_conf_set 接口用于设置消费参数,key 为参数名,value 为参数值。 - - tmq_conf_set_auto_commit_cb 接口用于设置自动提交回调函数,参数为回调函数和回调函数的参数。 + - tmq_conf_set 接口用于设置消费参数。 + - tmq_conf_set_auto_commit_cb 接口用于设置自动提交回调函数。 - tmq_conf_destroy 接口用于销毁 tmq_conf_t 结构体。 + **参数说明** + - tmq_conf_set : key 为参数名,value 为参数值 + - tmq_conf_set_auto_commit_cb : cb 回调函数, param 回调函数参数 + + **返回值** + - tmq_conf_new: 配置 tmq_conf_t 类型指针, NULL 失败 + - tmq_conf_set: 结果 tmq_conf_res_t 类型, TMQ_CONF_OK 成功, 其他失败 - `tmq_list_t *tmq_list_new()` - `int32_t tmq_list_append(tmq_list_t *, const char *)` @@ -576,6 +583,12 @@ TDengine 的异步 API 均采用非阻塞调用模式。应用程序可以用多 - tmq_list_get_size 接口用于获取 tmq_list_t 结构体中 topic 的个数。 - tmq_list_to_c_array 接口用于将 tmq_list_t 结构体转换为 C 数组,数组每个元素为字符串指针。 + **返回值** + - tmq_list_new : 返回 tmq_list_t 结果指针, tmq_list_t 是一个数组,每个元素是一个字符串, NULL 失败 + - tmq_list_append : 返回 0 表示成功,非0表示失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息。 + - tmq_list_get_size : 返回 tmq_list_t 结构体中 topic 的个数, -1 失败 + - tmq_list_to_c_array : 返回 c 数组, 每个元素是字符串指针, NULL 失败 + - `tmq_t *tmq_consumer_new(tmq_conf_t *conf, char *errstr, int32_t errstrLen)` - `int32_t tmq_subscribe(tmq_t *tmq, const tmq_list_t *topic_list)` diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 8fe3dc4afa..6c06ee853c 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -412,11 +412,11 @@ tmq_conf_res_t tmq_conf_set(tmq_conf_t* conf, const char* key, const char* value tmq_list_t* tmq_list_new() { return (tmq_list_t*)taosArrayInit(0, sizeof(void*)); } int32_t tmq_list_append(tmq_list_t* list, const char* src) { - if (list == NULL) return -1; + if (list == NULL) return TSDB_CODE_INVALID_PARA; SArray* container = &list->container; - if (src == NULL || src[0] == 0) return -1; + if (src == NULL || src[0] == 0) return TSDB_CODE_INVALID_PARA; char* topic = taosStrdup(src); - if (taosArrayPush(container, &topic) == NULL) return -1; + if (taosArrayPush(container, &topic) == NULL) return TSDB_CODE_INVALID_PARA; return 0; } @@ -2708,7 +2708,7 @@ int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet) { if (tmq == NULL) { taosMemoryFree(pParamSet); terrno = TSDB_CODE_TMQ_CONSUMER_CLOSED; - return -1; + return terrno; } // if no more waiting rsp From 29648be30ded486ccba66b18df204220a48fcaca Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 4 Jun 2024 23:28:08 +0800 Subject: [PATCH 62/67] fix(stream): add the new node info when adding stream tasks. --- source/dnode/mnode/impl/inc/mndStream.h | 1 + source/dnode/mnode/impl/src/mndStream.c | 89 ++++++++++++--------- source/dnode/mnode/impl/src/mndStreamHb.c | 11 ++- source/dnode/mnode/impl/src/mndStreamUtil.c | 24 ++++++ 4 files changed, 83 insertions(+), 42 deletions(-) diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index 2800aecdfa..6d2a89ddc9 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -120,6 +120,7 @@ void destroyStreamTaskIter(SStreamTaskIter *pIter); bool streamTaskIterNextTask(SStreamTaskIter *pIter); SStreamTask *streamTaskIterGetCurrent(SStreamTaskIter *pIter); void mndInitExecInfo(); +void removeExpiredNodeInfo(const SArray *pNodeSnapshot); #ifdef __cplusplus } diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index e108ba557a..bbf2ad63ce 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -62,7 +62,7 @@ static int32_t mndProcessStreamReqCheckpoint(SRpcMsg *pReq); static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList); static void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode); -static int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot); +static int32_t removeExpiredNodeEntryAndTask(SArray *pNodeSnapshot); static int32_t doKillCheckpointTrans(SMnode *pMnode, const char *pDbName, size_t len); static SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw); @@ -692,6 +692,8 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { SStreamObj streamObj = {0}; char *sql = NULL; int32_t sqlLen = 0; + const char* pMsg = "create stream tasks on dnodes"; + terrno = TSDB_CODE_SUCCESS; SCMCreateStreamReq createReq = {0}; @@ -704,8 +706,8 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { terrno = TSDB_CODE_MND_INVALID_PLATFORM; goto _OVER; #endif - mInfo("stream:%s, start to create stream, sql:%s", createReq.name, createReq.sql); + mInfo("stream:%s, start to create stream, sql:%s", createReq.name, createReq.sql); if (mndCheckCreateStreamReq(&createReq) != 0) { mError("stream:%s, failed to create since %s", createReq.name, terrstr()); goto _OVER; @@ -745,8 +747,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { goto _OVER; } - STrans *pTrans = - doCreateTrans(pMnode, &streamObj, pReq, TRN_CONFLICT_DB, MND_STREAM_CREATE_NAME, "create stream tasks on dnodes"); + STrans *pTrans = doCreateTrans(pMnode, &streamObj, pReq, TRN_CONFLICT_DB, MND_STREAM_CREATE_NAME, pMsg); if (pTrans == NULL) { goto _OVER; } @@ -789,7 +790,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { // add into buffer firstly // to make sure when the hb from vnode arrived, the newly created tasks have been in the task map already. taosThreadMutexLock(&execInfo.lock); - mDebug("stream stream:%s start to register tasks into task_node_list", createReq.name); + mDebug("stream stream:%s start to register tasks into task nodeList", createReq.name); saveStreamTasksInfo(&streamObj, &execInfo); taosThreadMutexUnlock(&execInfo.lock); @@ -1030,7 +1031,7 @@ _ERR: } int32_t initStreamNodeList(SMnode *pMnode) { - if (execInfo.pNodeList == NULL || (taosArrayGetSize(execInfo.pNodeList) == 0)) { + if (taosArrayGetSize(execInfo.pNodeList) == 0) { execInfo.pNodeList = taosArrayDestroy(execInfo.pNodeList); execInfo.pNodeList = extractNodeListFromStream(pMnode); } @@ -2203,8 +2204,8 @@ static SArray *extractNodeListFromStream(SMnode *pMnode) { epsetToStr(&pEntry->epset, buf, tListLen(buf)); mDebug("extract nodeInfo from stream obj, nodeId:%d, %s", pEntry->nodeId, buf); } - taosHashCleanup(pHash); + taosHashCleanup(pHash); return plist; } @@ -2242,15 +2243,17 @@ static bool taskNodeExists(SArray *pList, int32_t nodeId) { return false; } -int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { +int32_t removeExpiredNodeEntryAndTask(SArray *pNodeSnapshot) { SArray *pRemovedTasks = taosArrayInit(4, sizeof(STaskId)); int32_t numOfTask = taosArrayGetSize(execInfo.pTaskList); for (int32_t i = 0; i < numOfTask; ++i) { - STaskId *pId = taosArrayGet(execInfo.pTaskList, i); - STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId)); + STaskId *pId = taosArrayGet(execInfo.pTaskList, i); - if (pEntry->nodeId == SNODE_HANDLE) continue; + STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId)); + if (pEntry->nodeId == SNODE_HANDLE) { + continue; + } bool existed = taskNodeExists(pNodeSnapshot, pEntry->nodeId); if (!existed) { @@ -2266,24 +2269,8 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { mDebug("remove invalid stream tasks:%d, remain:%d", (int32_t)taosArrayGetSize(pRemovedTasks), (int32_t)taosArrayGetSize(execInfo.pTaskList)); - int32_t size = taosArrayGetSize(pNodeSnapshot); - SArray *pValidNodeEntryList = taosArrayInit(4, sizeof(SNodeEntry)); - for (int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeList); ++i) { - SNodeEntry *p = taosArrayGet(execInfo.pNodeList, i); + removeExpiredNodeInfo(pNodeSnapshot); - for (int32_t j = 0; j < size; ++j) { - SNodeEntry *pEntry = taosArrayGet(pNodeSnapshot, j); - if (pEntry->nodeId == p->nodeId) { - taosArrayPush(pValidNodeEntryList, p); - break; - } - } - } - - taosArrayDestroy(execInfo.pNodeList); - execInfo.pNodeList = pValidNodeEntryList; - - mDebug("remain %d valid node entries", (int32_t)taosArrayGetSize(pValidNodeEntryList)); taosArrayDestroy(pRemovedTasks); return 0; } @@ -2314,9 +2301,9 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { return 0; } - bool allVgroupsReady = true; - SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allVgroupsReady); - if (!allVgroupsReady) { + bool allReady = true; + SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allReady); + if (!allReady) { taosArrayDestroy(pNodeSnapshot); atomic_store_32(&mndNodeCheckSentinel, 0); mWarn("not all vnodes are ready, ignore the exec nodeUpdate check"); @@ -2324,31 +2311,30 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { } taosThreadMutexLock(&execInfo.lock); - removeExpirednodeEntryAndTask(pNodeSnapshot); + removeExpiredNodeEntryAndTask(pNodeSnapshot); SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeList, pNodeSnapshot); if (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0) { // kill current active checkpoint transaction, since the transaction is vnode wide. killAllCheckpointTrans(pMnode, &changeInfo); - code = mndProcessVgroupChange(pMnode, &changeInfo); // keep the new vnode snapshot if success if (code == TSDB_CODE_SUCCESS || code == TSDB_CODE_ACTION_IN_PROGRESS) { - mDebug("create trans successfully, update cached node list"); taosArrayDestroy(execInfo.pNodeList); - execInfo.pNodeList = pNodeSnapshot; + execInfo.pNodeList = extractNodeListFromStream(pMnode); execInfo.ts = ts; + mDebug("create trans successfully, update cached node list, numOfNodes:%d", taosArrayGetSize(execInfo.pNodeList)); } else { mError("unexpected code during create nodeUpdate trans, code:%s", tstrerror(code)); - taosArrayDestroy(pNodeSnapshot); } } else { mDebug("no update found in nodeList"); - taosArrayDestroy(pNodeSnapshot); } + taosArrayDestroy(pNodeSnapshot); taosThreadMutexUnlock(&execInfo.lock); + taosArrayDestroy(changeInfo.pUpdateNodeList); taosHashCleanup(changeInfo.pDBMap); @@ -2385,8 +2371,27 @@ void saveStreamTasksInfo(SStreamObj *pStream, SStreamExecInfo *pExecNode) { taosHashPut(pExecNode->pTaskMap, &id, sizeof(id), &entry, sizeof(entry)); taosArrayPush(pExecNode->pTaskList, &id); - mInfo("s-task:0x%x add into task buffer, total:%d", (int32_t)entry.id.taskId, - (int32_t)taosArrayGetSize(pExecNode->pTaskList)); + + int32_t num = (int32_t)taosArrayGetSize(pExecNode->pTaskList); + mInfo("s-task:0x%x add into task buffer, total:%d", (int32_t)entry.id.taskId, num); + + // add the new vgroups if not added yet + bool exist = false; + for(int32_t j = 0; j < taosArrayGetSize(pExecNode->pNodeList); ++j) { + SNodeEntry* pEntry = taosArrayGet(pExecNode->pNodeList, j); + if (pEntry->nodeId == pTask->info.nodeId) { + exist = true; + break; + } + } + + if (!exist) { + SNodeEntry nodeEntry = {.hbTimestamp = -1, .nodeId = pTask->info.nodeId}; + epsetAssign(&nodeEntry.epset, &pTask->info.epSet); + + taosArrayPush(pExecNode->pNodeList, &nodeEntry); + mInfo("vgId:%d added into nodeList, total:%d", nodeEntry.nodeId, (int)taosArrayGetSize(pExecNode->pNodeList)); + } } } @@ -2394,6 +2399,8 @@ void saveStreamTasksInfo(SStreamObj *pStream, SStreamExecInfo *pExecNode) { } void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { + taosThreadMutexLock(&pExecNode->lock); + SStreamTaskIter *pIter = createStreamTaskIter(pStream); while (streamTaskIterNextTask(pIter)) { SStreamTask *pTask = streamTaskIterGetCurrent(pIter); @@ -2416,8 +2423,10 @@ void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { } } - destroyStreamTaskIter(pIter); ASSERT(taosHashGetSize(pExecNode->pTaskMap) == taosArrayGetSize(pExecNode->pTaskList)); + taosThreadMutexUnlock(&pExecNode->lock); + + destroyStreamTaskIter(pIter); } static void doAddTaskId(SArray *pList, int32_t taskId, int64_t uid, int32_t numOfTotal) { diff --git a/source/dnode/mnode/impl/src/mndStreamHb.c b/source/dnode/mnode/impl/src/mndStreamHb.c index 9bd7b3b18f..778fd295f7 100644 --- a/source/dnode/mnode/impl/src/mndStreamHb.c +++ b/source/dnode/mnode/impl/src/mndStreamHb.c @@ -131,18 +131,26 @@ static int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) { int32_t *pVgId = taosArrayGet(pNodeList, k); mInfo("set node expired for nodeId:%d, total:%d", *pVgId, num); + bool setFlag = false; int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList); + for (int i = 0; i < numOfNodes; ++i) { SNodeEntry *pNodeEntry = taosArrayGet(execInfo.pNodeList, i); if (pNodeEntry->nodeId == *pVgId) { mInfo("vgId:%d expired for some stream tasks, needs update nodeEp", *pVgId); pNodeEntry->stageUpdated = true; + setFlag = true; break; } } - } + if (!setFlag) { + mError("failed to set nodeUpdate flag, nodeId:%d not exists in nodelist, update it", *pVgId); + ASSERT(0); + return TSDB_CODE_FAILED; + } + } return TSDB_CODE_SUCCESS; } @@ -361,7 +369,6 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { pHead->vgId = htonl(req.vgId); tmsgSendRsp(&rsp); - pReq->info.handle = NULL; // disable auto rsp } diff --git a/source/dnode/mnode/impl/src/mndStreamUtil.c b/source/dnode/mnode/impl/src/mndStreamUtil.c index e53908eeed..54279161ab 100644 --- a/source/dnode/mnode/impl/src/mndStreamUtil.c +++ b/source/dnode/mnode/impl/src/mndStreamUtil.c @@ -135,6 +135,7 @@ SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) { char buf[256] = {0}; epsetToStr(&entry.epset, buf, tListLen(buf)); mDebug("take snode snapshot, nodeId:%d %s", entry.nodeId, buf); + taosArrayPush(pVgroupListSnapshot, &entry); sdbRelease(pSdb, pObj); } @@ -571,6 +572,29 @@ void mndInitExecInfo() { execInfo.pTaskMap = taosHashInit(64, fn, true, HASH_NO_LOCK); execInfo.transMgmt.pDBTrans = taosHashInit(32, fn, true, HASH_NO_LOCK); execInfo.pTransferStateStreams = taosHashInit(32, fn, true, HASH_NO_LOCK); + execInfo.pNodeList = taosArrayInit(4, sizeof(SNodeEntry)); taosHashSetFreeFp(execInfo.pTransferStateStreams, freeTaskList); } + +void removeExpiredNodeInfo(const SArray *pNodeSnapshot) { + SArray *pValidList = taosArrayInit(4, sizeof(SNodeEntry)); + int32_t size = taosArrayGetSize(pNodeSnapshot); + + for (int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeList); ++i) { + SNodeEntry *p = taosArrayGet(execInfo.pNodeList, i); + + for (int32_t j = 0; j < size; ++j) { + SNodeEntry *pEntry = taosArrayGet(pNodeSnapshot, j); + if (pEntry->nodeId == p->nodeId) { + taosArrayPush(pValidList, p); + break; + } + } + } + + taosArrayDestroy(execInfo.pNodeList); + execInfo.pNodeList = pValidList; + + mDebug("remain %d valid node entries after clean expired nodes info", (int32_t)taosArrayGetSize(pValidList)); +} \ No newline at end of file From bf9a5135fb13b9333933ec848864e3d13ae2dfb1 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 5 Jun 2024 00:13:57 +0800 Subject: [PATCH 63/67] fix(stream): disable timer for checkpoint-ready msg in rsma. --- source/libs/stream/src/streamDispatch.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index fb5f1e33c5..2e776313e0 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -769,16 +769,16 @@ int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask) { stDebug("s-task:%s level:%d checkpoint-ready msg sent to all %d upstreams", id, pTask->info.taskLevel, num); // start to check if checkpoint ready msg has successfully received by upstream tasks. - pActiveInfo->pSendReadyMsgTmr = NULL; + if (pTask->info.taskLevel == TASK_LEVEL__SINK || pTask->info.taskLevel == TASK_LEVEL__AGG) { + int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s start checkpoint-ready monitor in 10s, ref:%d ", pTask->id.idStr, ref); + streamMetaAcquireOneTask(pTask); - int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); - stDebug("s-task:%s start checkpoint-trigger monitor in 10s, ref:%d ", pTask->id.idStr, ref); - streamMetaAcquireOneTask(pTask); - - if (pActiveInfo->pSendReadyMsgTmr == NULL) { - pActiveInfo->pSendReadyMsgTmr = taosTmrStart(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer); - } else { - taosTmrReset(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pSendReadyMsgTmr); + if (pActiveInfo->pSendReadyMsgTmr == NULL) { + pActiveInfo->pSendReadyMsgTmr = taosTmrStart(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer); + } else { + taosTmrReset(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pSendReadyMsgTmr); + } } return TSDB_CODE_SUCCESS; From 68915197ee04b509da58aadefe1ea85a1e2fcd83 Mon Sep 17 00:00:00 2001 From: charles Date: Wed, 5 Jun 2024 08:25:39 +0800 Subject: [PATCH 64/67] update test cases to fix uncertainty results by charles --- tests/army/community/insert/insert_basic.py | 24 +++--- tests/system-test/1-insert/insert_double.py | 90 ++++++++++----------- tests/system-test/2-query/td-28068.py | 16 ++-- 3 files changed, 65 insertions(+), 65 deletions(-) diff --git a/tests/army/community/insert/insert_basic.py b/tests/army/community/insert/insert_basic.py index 481db4eadd..1f2488a756 100644 --- a/tests/army/community/insert/insert_basic.py +++ b/tests/army/community/insert/insert_basic.py @@ -35,18 +35,18 @@ class TDTestCase(TBase): tdSql.execute("create database db_geometry;") tdSql.execute("use db_geometry;") tdSql.execute("create table t_ge (ts timestamp, id int, c1 GEOMETRY(512));") - tdSql.execute("insert into t_ge values(now, 1, 'MULTIPOINT ((0 0), (1 1))');") - tdSql.execute("insert into t_ge values(now, 1, 'MULTIPOINT (0 0, 1 1)');") - tdSql.execute("insert into t_ge values(now, 2, 'POINT (0 0)');") - tdSql.execute("insert into t_ge values(now, 2, 'POINT EMPTY');") - tdSql.execute("insert into t_ge values(now, 3, 'LINESTRING (0 0, 0 1, 1 2)');") - tdSql.execute("insert into t_ge values(now, 3, 'LINESTRING EMPTY');") - tdSql.execute("insert into t_ge values(now, 4, 'POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))');") - tdSql.execute("insert into t_ge values(now, 4, 'POLYGON ((0 0, 4 0, 4 4, 0 4, 0 0), (1 1, 1 2, 2 2, 2 1, 1 1))');") - tdSql.execute("insert into t_ge values(now, 4, 'POLYGON EMPTY');") - tdSql.execute("insert into t_ge values(now, 5, 'MULTILINESTRING ((0 0, 1 1), (2 2, 3 3))');") - tdSql.execute("insert into t_ge values(now, 6, 'MULTIPOLYGON (((1 1, 1 3, 3 3, 3 1, 1 1)), ((4 3, 6 3, 6 1, 4 1, 4 3)))');") - tdSql.execute("insert into t_ge values(now, 7, 'GEOMETRYCOLLECTION (MULTIPOINT((0 0), (1 1)), POINT(3 4), LINESTRING(2 3, 3 4))');") + tdSql.execute("insert into t_ge values(1717122943000, 1, 'MULTIPOINT ((0 0), (1 1))');") + tdSql.execute("insert into t_ge values(1717122944000, 1, 'MULTIPOINT (0 0, 1 1)');") + tdSql.execute("insert into t_ge values(1717122945000, 2, 'POINT (0 0)');") + tdSql.execute("insert into t_ge values(1717122946000, 2, 'POINT EMPTY');") + tdSql.execute("insert into t_ge values(1717122947000, 3, 'LINESTRING (0 0, 0 1, 1 2)');") + tdSql.execute("insert into t_ge values(1717122948000, 3, 'LINESTRING EMPTY');") + tdSql.execute("insert into t_ge values(1717122949000, 4, 'POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))');") + tdSql.execute("insert into t_ge values(1717122950000, 4, 'POLYGON ((0 0, 4 0, 4 4, 0 4, 0 0), (1 1, 1 2, 2 2, 2 1, 1 1))');") + tdSql.execute("insert into t_ge values(1717122951000, 4, 'POLYGON EMPTY');") + tdSql.execute("insert into t_ge values(1717122952000, 5, 'MULTILINESTRING ((0 0, 1 1), (2 2, 3 3))');") + tdSql.execute("insert into t_ge values(1717122953000, 6, 'MULTIPOLYGON (((1 1, 1 3, 3 3, 3 1, 1 1)), ((4 3, 6 3, 6 1, 4 1, 4 3)))');") + tdSql.execute("insert into t_ge values(1717122954000, 7, 'GEOMETRYCOLLECTION (MULTIPOINT((0 0), (1 1)), POINT(3 4), LINESTRING(2 3, 3 4))');") tdSql.query("select * from t_ge;") tdSql.checkRows(12) tdSql.query("select * from t_ge where id=1;") diff --git a/tests/system-test/1-insert/insert_double.py b/tests/system-test/1-insert/insert_double.py index b7af7237db..d6dada32cd 100644 --- a/tests/system-test/1-insert/insert_double.py +++ b/tests/system-test/1-insert/insert_double.py @@ -26,30 +26,30 @@ class TDTestCase: tdSql.execute(f"drop table if exists {table_name}") tdSql.execute(f"create table {table_name}(ts timestamp, i1 {dtype}, i2 {dtype} unsigned)") - tdSql.execute(f"insert into {table_name} values(now, -16, +6)") - tdSql.execute(f"insert into {table_name} values(now, 80.99, +0042)") - tdSql.execute(f"insert into {table_name} values(now, -0042, +80.99)") - tdSql.execute(f"insert into {table_name} values(now, 52.34354, 18.6)") - tdSql.execute(f"insert into {table_name} values(now, -12., +3.)") - tdSql.execute(f"insert into {table_name} values(now, -0.12, +3.0)") - tdSql.execute(f"insert into {table_name} values(now, -2.3e1, +2.324e2)") - tdSql.execute(f"insert into {table_name} values(now, -2e1, +2e2)") - tdSql.execute(f"insert into {table_name} values(now, -2.e1, +2.e2)") - tdSql.execute(f"insert into {table_name} values(now, -0x40, +0b10000)") - tdSql.execute(f"insert into {table_name} values(now, -0b10000, +0x40)") + tdSql.execute(f"insert into {table_name} values(1717122943000, -16, +6)") + tdSql.execute(f"insert into {table_name} values(1717122944000, 80.99, +0042)") + tdSql.execute(f"insert into {table_name} values(1717122945000, -0042, +80.99)") + tdSql.execute(f"insert into {table_name} values(1717122946000, 52.34354, 18.6)") + tdSql.execute(f"insert into {table_name} values(1717122947000, -12., +3.)") + tdSql.execute(f"insert into {table_name} values(1717122948000, -0.12, +3.0)") + tdSql.execute(f"insert into {table_name} values(1717122949000, -2.3e1, +2.324e2)") + tdSql.execute(f"insert into {table_name} values(1717122950000, -2e1, +2e2)") + tdSql.execute(f"insert into {table_name} values(1717122951000, -2.e1, +2.e2)") + tdSql.execute(f"insert into {table_name} values(1717122952000, -0x40, +0b10000)") + tdSql.execute(f"insert into {table_name} values(1717122953000, -0b10000, +0x40)") # str support - tdSql.execute(f"insert into {table_name} values(now, '-16', '+6')") - tdSql.execute(f"insert into {table_name} values(now, ' -80.99', ' +0042')") - tdSql.execute(f"insert into {table_name} values(now, ' -0042', ' +80.99')") - tdSql.execute(f"insert into {table_name} values(now, '52.34354', '18.6')") - tdSql.execute(f"insert into {table_name} values(now, '-12.', '+5.')") - tdSql.execute(f"insert into {table_name} values(now, '-.12', '+.5')") - tdSql.execute(f"insert into {table_name} values(now, '-2.e1', '+2.e2')") - tdSql.execute(f"insert into {table_name} values(now, '-2e1', '+2e2')") - tdSql.execute(f"insert into {table_name} values(now, '-2.3e1', '+2.324e2')") - tdSql.execute(f"insert into {table_name} values(now, '-0x40', '+0b10010')") - tdSql.execute(f"insert into {table_name} values(now, '-0b10010', '+0x40')") + tdSql.execute(f"insert into {table_name} values(1717122954000, '-16', '+6')") + tdSql.execute(f"insert into {table_name} values(1717122955000, ' -80.99', ' +0042')") + tdSql.execute(f"insert into {table_name} values(1717122956000, ' -0042', ' +80.99')") + tdSql.execute(f"insert into {table_name} values(1717122957000, '52.34354', '18.6')") + tdSql.execute(f"insert into {table_name} values(1717122958000, '-12.', '+5.')") + tdSql.execute(f"insert into {table_name} values(1717122959000, '-.12', '+.5')") + tdSql.execute(f"insert into {table_name} values(1717122960000, '-2.e1', '+2.e2')") + tdSql.execute(f"insert into {table_name} values(1717122961000, '-2e1', '+2e2')") + tdSql.execute(f"insert into {table_name} values(1717122962000, '-2.3e1', '+2.324e2')") + tdSql.execute(f"insert into {table_name} values(1717122963000, '-0x40', '+0b10010')") + tdSql.execute(f"insert into {table_name} values(1717122964000, '-0b10010', '+0x40')") tdSql.query(f"select * from {table_name}") tdSql.checkRows(22) @@ -64,22 +64,22 @@ class TDTestCase: min_u = 0 print("val:", baseval, negval, posval, max_i) - tdSql.execute(f"insert into {table_name} values(now, {negval}, {posval})") - tdSql.execute(f"insert into {table_name} values(now, -{baseval}, {baseval})") - tdSql.execute(f"insert into {table_name} values(now, {max_i}, {max_u})") - tdSql.execute(f"insert into {table_name} values(now, {min_i}, {min_u})") + tdSql.execute(f"insert into {table_name} values(1717122965000, {negval}, {posval})") + tdSql.execute(f"insert into {table_name} values(1717122966000, -{baseval}, {baseval})") + tdSql.execute(f"insert into {table_name} values(1717122967000, {max_i}, {max_u})") + tdSql.execute(f"insert into {table_name} values(1717122968000, {min_i}, {min_u})") tdSql.query(f"select * from {table_name}") tdSql.checkRows(26) # error case - tdSql.error(f"insert into {table_name} values(now, 0, {max_u+1})") - tdSql.error(f"insert into {table_name} values(now, 0, -1)") - tdSql.error(f"insert into {table_name} values(now, 0, -2.0)") - tdSql.error(f"insert into {table_name} values(now, 0, '-2.0')") - tdSql.error(f"insert into {table_name} values(now, {max_i+1}, 0)") - tdSql.error(f"insert into {table_name} values(now, {min_i-1}, 0)") - tdSql.error(f"insert into {table_name} values(now, '{min_i-1}', 0)") + tdSql.error(f"insert into {table_name} values(1717122969000, 0, {max_u+1})") + tdSql.error(f"insert into {table_name} values(1717122970000, 0, -1)") + tdSql.error(f"insert into {table_name} values(1717122971000, 0, -2.0)") + tdSql.error(f"insert into {table_name} values(1717122972000, 0, '-2.0')") + tdSql.error(f"insert into {table_name} values(1717122973000, {max_i+1}, 0)") + tdSql.error(f"insert into {table_name} values(1717122974000, {min_i-1}, 0)") + tdSql.error(f"insert into {table_name} values(1717122975000, '{min_i-1}', 0)") def test_tags(self, stable_name, dtype, bits): tdSql.execute(f"create stable {stable_name}(ts timestamp, i1 {dtype}, i2 {dtype} unsigned) tags(id {dtype})") @@ -93,20 +93,20 @@ class TDTestCase: max_u = 2*bigval - 1 min_u = 0 - tdSql.execute(f"insert into {stable_name}_1 using {stable_name} tags('{negval}') values(now, {negval}, {posval})") - tdSql.execute(f"insert into {stable_name}_2 using {stable_name} tags({posval}) values(now, -{baseval} , {baseval})") - tdSql.execute(f"insert into {stable_name}_3 using {stable_name} tags('0x40') values(now, {max_i}, {max_u})") - tdSql.execute(f"insert into {stable_name}_4 using {stable_name} tags(0b10000) values(now, {min_i}, {min_u})") + tdSql.execute(f"insert into {stable_name}_1 using {stable_name} tags('{negval}') values(1717122976000, {negval}, {posval})") + tdSql.execute(f"insert into {stable_name}_2 using {stable_name} tags({posval}) values(1717122977000, -{baseval} , {baseval})") + tdSql.execute(f"insert into {stable_name}_3 using {stable_name} tags('0x40') values(1717122978000, {max_i}, {max_u})") + tdSql.execute(f"insert into {stable_name}_4 using {stable_name} tags(0b10000) values(1717122979000, {min_i}, {min_u})") - tdSql.execute(f"insert into {stable_name}_5 using {stable_name} tags({max_i}) values(now, '{negval}', '{posval}')") - tdSql.execute(f"insert into {stable_name}_6 using {stable_name} tags('{min_i}') values(now, '-{baseval}' , '{baseval}')") - tdSql.execute(f"insert into {stable_name}_7 using {stable_name} tags(-0x40) values(now, '{max_i}', '{max_u}')") - tdSql.execute(f"insert into {stable_name}_8 using {stable_name} tags('-0b10000') values(now, '{min_i}', '{min_u}')") + tdSql.execute(f"insert into {stable_name}_5 using {stable_name} tags({max_i}) values(1717122980000, '{negval}', '{posval}')") + tdSql.execute(f"insert into {stable_name}_6 using {stable_name} tags('{min_i}') values(1717122981000, '-{baseval}' , '{baseval}')") + tdSql.execute(f"insert into {stable_name}_7 using {stable_name} tags(-0x40) values(1717122982000, '{max_i}', '{max_u}')") + tdSql.execute(f"insert into {stable_name}_8 using {stable_name} tags('-0b10000') values(1717122983000, '{min_i}', '{min_u}')") - tdSql.execute(f"insert into {stable_name}_9 using {stable_name} tags(12.) values(now, {negval}, {posval})") - tdSql.execute(f"insert into {stable_name}_10 using {stable_name} tags('-8.3') values(now, -{baseval} , {baseval})") - tdSql.execute(f"insert into {stable_name}_11 using {stable_name} tags(2.e1) values(now, {max_i}, {max_u})") - tdSql.execute(f"insert into {stable_name}_12 using {stable_name} tags('-2.3e1') values(now, {min_i}, {min_u})") + tdSql.execute(f"insert into {stable_name}_9 using {stable_name} tags(12.) values(1717122984000, {negval}, {posval})") + tdSql.execute(f"insert into {stable_name}_10 using {stable_name} tags('-8.3') values(1717122985000, -{baseval} , {baseval})") + tdSql.execute(f"insert into {stable_name}_11 using {stable_name} tags(2.e1) values(1717122986000, {max_i}, {max_u})") + tdSql.execute(f"insert into {stable_name}_12 using {stable_name} tags('-2.3e1') values(1717122987000, {min_i}, {min_u})") tdSql.query(f"select * from {stable_name}") tdSql.checkRows(12) diff --git a/tests/system-test/2-query/td-28068.py b/tests/system-test/2-query/td-28068.py index 0dfaf8e126..0a7e75fef2 100644 --- a/tests/system-test/2-query/td-28068.py +++ b/tests/system-test/2-query/td-28068.py @@ -10,14 +10,14 @@ class TDTestCase: tdSql.execute("create database td_28068;") tdSql.execute("create database if not exists td_28068;") tdSql.execute("create stable td_28068.st (ts timestamp, test_case nchar(10), time_cost float, num float) tags (branch nchar(10), scenario nchar(10));") - tdSql.execute("insert into td_28068.ct1 using td_28068.st (branch, scenario) tags ('3.0', 'scenario1') values (now(), 'query1', 1,2);") - tdSql.execute("insert into td_28068.ct1 using td_28068.st (branch, scenario) tags ('3.0', 'scenario1') values (now(), 'query1', 2,3);") - tdSql.execute("insert into td_28068.ct2 using td_28068.st (branch, scenario) tags ('3.0', 'scenario2') values (now(), 'query1', 10,1);") - tdSql.execute("insert into td_28068.ct2 using td_28068.st (branch, scenario) tags ('3.0', 'scenario2') values (now(), 'query1', 11,5);") - tdSql.execute("insert into td_28068.ct3 using td_28068.st (branch, scenario) tags ('3.1', 'scenario1') values (now(), 'query1', 20,4);") - tdSql.execute("insert into td_28068.ct3 using td_28068.st (branch, scenario) tags ('3.1', 'scenario1') values (now(), 'query1', 30,1);") - tdSql.execute("insert into td_28068.ct4 using td_28068.st (branch, scenario) tags ('3.1', 'scenario2') values (now(), 'query1', 8,8);") - tdSql.execute("insert into td_28068.ct4 using td_28068.st (branch, scenario) tags ('3.1', 'scenario2') values (now(), 'query1', 9,10);") + tdSql.execute("insert into td_28068.ct1 using td_28068.st (branch, scenario) tags ('3.0', 'scenario1') values (1717122943000, 'query1', 1,2);") + tdSql.execute("insert into td_28068.ct1 using td_28068.st (branch, scenario) tags ('3.0', 'scenario1') values (1717122944000, 'query1', 2,3);") + tdSql.execute("insert into td_28068.ct2 using td_28068.st (branch, scenario) tags ('3.0', 'scenario2') values (1717122945000, 'query1', 10,1);") + tdSql.execute("insert into td_28068.ct2 using td_28068.st (branch, scenario) tags ('3.0', 'scenario2') values (1717122946000, 'query1', 11,5);") + tdSql.execute("insert into td_28068.ct3 using td_28068.st (branch, scenario) tags ('3.1', 'scenario1') values (1717122947000, 'query1', 20,4);") + tdSql.execute("insert into td_28068.ct3 using td_28068.st (branch, scenario) tags ('3.1', 'scenario1') values (1717122948000, 'query1', 30,1);") + tdSql.execute("insert into td_28068.ct4 using td_28068.st (branch, scenario) tags ('3.1', 'scenario2') values (1717122949000, 'query1', 8,8);") + tdSql.execute("insert into td_28068.ct4 using td_28068.st (branch, scenario) tags ('3.1', 'scenario2') values (1717122950000, 'query1', 9,10);") def run(self): tdSql.query('select last(ts) as ts, last(branch) as branch, last(scenario) as scenario, last(test_case) as test_case from td_28068.st group by branch, scenario order by last(branch);') From 82293406f131f6bbffa2ef1f90fb55fb4323c9a7 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 5 Jun 2024 09:14:33 +0800 Subject: [PATCH 65/67] fix(stream): fix syntax error. --- source/dnode/mnode/impl/src/mndStream.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index bbf2ad63ce..9bfda65108 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -2324,7 +2324,8 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { taosArrayDestroy(execInfo.pNodeList); execInfo.pNodeList = extractNodeListFromStream(pMnode); execInfo.ts = ts; - mDebug("create trans successfully, update cached node list, numOfNodes:%d", taosArrayGetSize(execInfo.pNodeList)); + mDebug("create trans successfully, update cached node list, numOfNodes:%d", + (int)taosArrayGetSize(execInfo.pNodeList)); } else { mError("unexpected code during create nodeUpdate trans, code:%s", tstrerror(code)); } From 545fa5ebcb0bedd8d9073213cd145679cf3cc212 Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao> Date: Wed, 5 Jun 2024 11:38:38 +0800 Subject: [PATCH 66/67] add ts column for last cache --- source/libs/planner/src/planOptimizer.c | 6 +- tests/script/tsim/query/cache_last.sim | 243 ++++++++++++++++++++++++ 2 files changed, 247 insertions(+), 2 deletions(-) diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 7d10c02529..e9861c29b0 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -4159,8 +4159,10 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic lastRowScanOptSetLastTargets(pScan->node.pTargets, cxt.pLastCols, pLastRowCols, false, cxt.pkBytes); lastRowScanOptRemoveUslessTargets(pScan->node.pTargets, cxt.pLastCols, cxt.pOtherCols, pLastRowCols); - if (pPKTsCol && ((pScan->node.pTargets->length == 1) || (pScan->node.pTargets->length == 2 && cxt.pkBytes > 0))) { - // when select last(ts),ts from ..., we add another ts to targets + if (pPKTsCol && + ((cxt.pLastCols->length == 1 && nodesEqualNode((SNode*)pPKTsCol, nodesListGetNode(cxt.pLastCols, 0))) || + (pScan->node.pTargets->length == 2 && cxt.pkBytes > 0))) { + // when select last(ts),tbname,ts from ..., we add another ts to targets sprintf(pPKTsCol->colName, "#sel_val.%p", pPKTsCol); nodesListAppend(pScan->node.pTargets, nodesCloneNode((SNode*)pPKTsCol)); } diff --git a/tests/script/tsim/query/cache_last.sim b/tests/script/tsim/query/cache_last.sim index 50199117b0..b2d5ad8aa0 100644 --- a/tests/script/tsim/query/cache_last.sim +++ b/tests/script/tsim/query/cache_last.sim @@ -101,5 +101,248 @@ if $rows != 1 then return -1 endi +print step 2------------------------------- + +sql drop database if exists test; +sql create database test cachemodel 'both'; +sql use test; +sql create table stb (ts timestamp,a int,b int,c int) tags(ta int,tb int,tc int); + +sql create table t1 using stb tags(1,1,1); +sql create table t2 using stb tags(2,2,2); +sql insert into t1 values('2024-06-05 11:00:00',1,2,3); +sql insert into t1 values('2024-06-05 12:00:00',2,2,3); +sql insert into t2 values('2024-06-05 13:00:00',3,2,3); +sql insert into t2 values('2024-06-05 14:00:00',4,2,3); + +sql select last(ts) ts1,ts from stb; + +if $data00 != $data01 then + print $data00 + return -1 +endi + +sql select last(ts) ts1,ts from stb group by tbname; + +if $data00 != $data01 then + print $data00 + return -1 +endi + +sql select last(ts) ts1,tbname, ts from stb; + +if $data00 != $data02 then + print $data00 + return -1 +endi + +if $data01 != t2 then + print $data01 + return -1 +endi + +sql select last(ts) ts1,tbname, ts from stb group by tbname; + +if $data00 != $data02 then + print $data00 + return -1 +endi + +if $data01 != t2 then + print $data01 + return -1 +endi + +print step 3------------------------------- + +sql drop database if exists test1; +sql create database test1 cachemodel 'both'; +sql use test1; +sql create table stb (ts timestamp,a int primary key,b int,c int) tags(ta int,tb int,tc int); + +sql create table t1 using stb tags(1,1,1); +sql create table t2 using stb tags(2,2,2); +sql insert into t1 values('2024-06-05 11:00:00',1,2,3); +sql insert into t1 values('2024-06-05 12:00:00',2,2,3); +sql insert into t2 values('2024-06-05 13:00:00',3,2,3); +sql insert into t2 values('2024-06-05 14:00:00',4,2,3); + +sql select last(ts) ts1,ts from stb; + +if $data00 != $data01 then + print $data00 + return -1 +endi + +if $data00 != @24-06-05 14:00:00.000@ then + print $data00 + return -1 +endi + +sql select last(ts) ts1,ts from stb group by tbname; + +if $data00 != $data01 then + print $data00 + return -1 +endi + +if $data00 != @24-06-05 14:00:00.000@ then + print $data00 + return -1 +endi + +sql select last(ts) ts1,tbname, ts from stb; + +if $data00 != $data02 then + print $data00 + return -1 +endi + +if $data00 != @24-06-05 14:00:00.000@ then + print $data00 + return -1 +endi + +if $data01 != t2 then + print $data01 + return -1 +endi + +sql select last(ts) ts1,tbname, ts from stb group by tbname; + +if $data00 != $data02 then + print $data00 + return -1 +endi + +if $data00 != @24-06-05 14:00:00.000@ then + print $data00 + return -1 +endi + +if $data01 != t2 then + print $data01 + return -1 +endi + +print step 4------------------------------- + +sql select last(a) a,ts from stb; + +if $data00 != 4 then + print $data00 + return -1 +endi + +if $data01 != @24-06-05 14:00:00.000@ then + print $data01 + return -1 +endi + +sql select last(a) a,ts from stb group by tbname; + +if $data00 != 4 then + print $data00 + return -1 +endi + +if $data01 != @24-06-05 14:00:00.000@ then + print $data01 + return -1 +endi + +sql select last(a) a,tbname, ts from stb; + +if $data00 != 4 then + print $data00 + return -1 +endi + +if $data01 != t2 then + print $data01 + return -1 +endi + +if $data02 != @24-06-05 14:00:00.000@ then + print $data02 + return -1 +endi + +sql select last(a) a,tbname, ts from stb group by tbname; + +if $data00 != 4 then + print $data00 + return -1 +endi + +if $data01 != t2 then + print $data01 + return -1 +endi + +if $data02 != @24-06-05 14:00:00.000@ then + print $data02 + return -1 +endi + +print step 5------------------------------- + +sql select last(ts) ts1,a from stb; + +if $data00 != @24-06-05 14:00:00.000@ then + print $data00 + return -1 +endi + +if $data01 != 4 then + print $data01 + return -1 +endi + +sql select last(ts) ts1,a from stb group by tbname; + +if $data00 != @24-06-05 14:00:00.000@ then + print $data00 + return -1 +endi + +if $data01 != 4 then + print $data01 + return -1 +endi + +sql select last(ts) ts1,tbname, a from stb; + +if $data00 != @24-06-05 14:00:00.000@ then + print $data00 + return -1 +endi + +if $data01 != t2 then + print $data01 + return -1 +endi + +if $data02 != 4 then + print $data02 + return -1 +endi + +sql select last(ts) ts1,tbname, a from stb group by tbname; + +if $data00 != @24-06-05 14:00:00.000@ then + print $data00 + return -1 +endi + +if $data01 != t2 then + print $data01 + return -1 +endi + +if $data02 != 4 then + print $data02 + return -1 +endi system sh/exec.sh -n dnode1 -s stop -x SIGINT From 1e33a1f4088aefbdb0432a82ff5e2aedee6adefa Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 5 Jun 2024 11:43:39 +0800 Subject: [PATCH 67/67] fix(stream): fix memory leak. --- source/dnode/mnode/impl/src/mndStream.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 9bfda65108..9c8f3f26ff 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -139,6 +139,7 @@ int32_t mndInitStream(SMnode *pMnode) { void mndCleanupStream(SMnode *pMnode) { taosArrayDestroy(execInfo.pTaskList); + taosArrayDestroy(execInfo.pNodeList); taosHashCleanup(execInfo.pTaskMap); taosHashCleanup(execInfo.transMgmt.pDBTrans); taosHashCleanup(execInfo.pTransferStateStreams);