diff --git a/include/common/rsync.h b/include/common/rsync.h index f613a35f48..0840b51793 100644 --- a/include/common/rsync.h +++ b/include/common/rsync.h @@ -13,7 +13,7 @@ extern "C" { void stopRsync(); void startRsync(); -int32_t uploadRsync(const char* id, const char* path); +int32_t uploadByRsync(const char* id, const char* path); int32_t downloadRsync(const char* id, const char* path); int32_t deleteRsync(const char* id); diff --git a/include/common/tglobal.h b/include/common/tglobal.h index 95b7591263..90ee6f7cc0 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -235,6 +235,7 @@ extern int32_t tsMqRebalanceInterval; extern int32_t tsStreamCheckpointInterval; extern float tsSinkDataRate; extern int32_t tsStreamNodeCheckInterval; +extern int32_t tsMaxConcurrentCheckpoint; extern int32_t tsTtlUnit; extern int32_t tsTtlPushIntervalSec; extern int32_t tsTtlBatchDropNum; diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 3c6136b624..81c59b5538 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -3466,23 +3466,24 @@ int32_t tSerializeSMDropStreamReq(void* buf, int32_t bufLen, const SMDropStreamR int32_t tDeserializeSMDropStreamReq(void* buf, int32_t bufLen, SMDropStreamReq* pReq); void tFreeMDropStreamReq(SMDropStreamReq* pReq); -typedef struct { - char name[TSDB_STREAM_FNAME_LEN]; - int8_t igNotExists; -} SMRecoverStreamReq; - -typedef struct { - int8_t reserved; -} SMRecoverStreamRsp; - typedef struct { int64_t recoverObjUid; int32_t taskId; int32_t hasCheckPoint; } SMVStreamGatherInfoReq; -// int32_t tSerializeSMRecoverStreamReq(void* buf, int32_t bufLen, const SMRecoverStreamReq* pReq); -// int32_t tDeserializeSMRecoverStreamReq(void* buf, int32_t bufLen, SMRecoverStreamReq* pReq); +typedef struct SVUpdateCheckpointInfoReq { + SMsgHead head; + int64_t streamId; + int32_t taskId; + int64_t checkpointId; + int64_t checkpointVer; + int64_t checkpointTs; + int32_t transId; + int8_t dropRelHTask; + int64_t hStreamId; + int64_t hTaskId; +} SVUpdateCheckpointInfoReq; typedef struct { int64_t leftForVer; diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 6f15d7df70..d1ac2c79c3 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -310,7 +310,7 @@ TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DROP, "stream-task-drop", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_RUN, "stream-task-run", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DISPATCH, "stream-task-dispatch", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_UNUSED1, "stream-unused1", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_UPDATE_CHKPT, "stream-update-chkptinfo", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_RETRIEVE, "stream-retrieve", NULL, NULL) //1035 1036 TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECKPOINT_READY, "stream-checkpoint-ready", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_REPORT_CHECKPOINT, "stream-report-checkpoint", NULL, NULL) @@ -321,6 +321,8 @@ TD_DEF_MSG_TYPE(TDMT_STREAM_MAX_MSG, "stream-max", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_CREATE, "stream-create", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_DROP, "stream-drop", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_STREAM_RETRIEVE_TRIGGER, "stream-retri-trigger", NULL, NULL) + TD_CLOSE_MSG_SEG(TDMT_END_STREAM_MSG) TD_NEW_MSG_SEG(TDMT_MON_MSG) //5 << 8 diff --git a/include/dnode/vnode/tqCommon.h b/include/dnode/vnode/tqCommon.h index ce04ec6953..0076d79312 100644 --- a/include/dnode/vnode/tqCommon.h +++ b/include/dnode/vnode/tqCommon.h @@ -36,10 +36,13 @@ int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLead int32_t tqStartTaskCompleteCallback(SStreamMeta* pMeta); int32_t tqStreamTasksGetTotalNum(SStreamMeta* pMeta); int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg); +int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg); +int32_t tqStreamTaskProcessRetrieveTriggerRsp(SStreamMeta* pMeta, SRpcMsg* pMsg); int32_t tqStreamTaskProcessTaskPauseReq(SStreamMeta* pMeta, char* pMsg); int32_t tqStreamTaskProcessTaskResumeReq(void* handle, int64_t sversion, char* pMsg, bool fromVnode); +int32_t tqStreamTaskProcessUpdateCheckpointReq(SStreamMeta* pMeta, char* msg, int32_t msgLen); -int32_t tqExpandStreamTask(SStreamTask* pTask, SStreamMeta* pMeta, void* pVnode); +int32_t tqExpandStreamTask(SStreamTask* pTask, SStreamMeta* pMeta); void tqSetRestoreVersionInfo(SStreamTask* pTask); #endif // TDENGINE_TQ_COMMON_H diff --git a/include/libs/executor/storageapi.h b/include/libs/executor/storageapi.h index 2d507ab6bb..330ba31c65 100644 --- a/include/libs/executor/storageapi.h +++ b/include/libs/executor/storageapi.h @@ -410,7 +410,7 @@ typedef struct SStateStore { void (*streamFileStateClear)(struct SStreamFileState* pFileState); bool (*needClearDiskBuff)(struct SStreamFileState* pFileState); - SStreamState* (*streamStateOpen)(char* path, void* pTask, bool specPath, int32_t szPage, int32_t pages); + SStreamState* (*streamStateOpen)(const char* path, void* pTask, bool specPath, int32_t szPage, int32_t pages); void (*streamStateClose)(SStreamState* pState, bool remove); int32_t (*streamStateBegin)(SStreamState* pState); int32_t (*streamStateCommit)(SStreamState* pState); diff --git a/include/libs/function/function.h b/include/libs/function/function.h index 0afda2e160..87bbe21133 100644 --- a/include/libs/function/function.h +++ b/include/libs/function/function.h @@ -150,6 +150,7 @@ typedef struct SBackendCfWrapper { int64_t backendId; char idstr[64]; } SBackendCfWrapper; + typedef struct STdbState { SBackendCfWrapper *pBackendCfWrapper; int64_t backendCfWrapperId; diff --git a/include/libs/stream/streamState.h b/include/libs/stream/streamState.h index 7813b2cc9a..ae5a733ae9 100644 --- a/include/libs/stream/streamState.h +++ b/include/libs/stream/streamState.h @@ -29,7 +29,7 @@ extern "C" { #include "storageapi.h" -SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t szPage, int32_t pages); +SStreamState* streamStateOpen(const char* path, void* pTask, bool specPath, int32_t szPage, int32_t pages); void streamStateClose(SStreamState* pState, bool remove); int32_t streamStateBegin(SStreamState* pState); int32_t streamStateCommit(SStreamState* pState); diff --git a/include/libs/stream/streammsg.h b/include/libs/stream/streammsg.h index 5436442284..91bfc6afc8 100644 --- a/include/libs/stream/streammsg.h +++ b/include/libs/stream/streammsg.h @@ -22,17 +22,17 @@ extern "C" { #endif -typedef struct SStreamChildEpInfo { +typedef struct SStreamUpstreamEpInfo { int32_t nodeId; int32_t childId; int32_t taskId; SEpSet epSet; bool dataAllowed; // denote if the data from this upstream task is allowed to put into inputQ, not serialize it int64_t stage; // upstream task stage value, to denote if the upstream node has restart/replica changed/transfer -} SStreamChildEpInfo; +} SStreamUpstreamEpInfo; -int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); -int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamChildEpInfo* pInfo); +int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamUpstreamEpInfo* pInfo); +int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamUpstreamEpInfo* pInfo); // mndTrigger: denote if this checkpoint is triggered by mnode or as requested from tasks when transfer-state finished typedef struct { @@ -171,6 +171,25 @@ int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pRsp); int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pRsp); void tCleanupStreamHbMsg(SStreamHbMsg* pMsg); +typedef struct SRetrieveChkptTriggerReq { + SMsgHead head; + int64_t streamId; + int64_t checkpointId; + int32_t upstreamNodeId; + int32_t upstreamTaskId; + int32_t downstreamNodeId; + int64_t downstreamTaskId; +} SRetrieveChkptTriggerReq; + +typedef struct SCheckpointTriggerRsp { + int64_t streamId; + int64_t checkpointId; + int32_t upstreamTaskId; + int32_t taskId; + int32_t transId; + int32_t rspCode; +} SCheckpointTriggerRsp; + typedef struct { SMsgHead head; int64_t streamId; diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 3c5d6d6e4c..d07a302920 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -58,11 +58,14 @@ extern "C" { #define STREAM_EXEC_T_STOP_ALL_TASKS (-5) #define STREAM_EXEC_T_RESUME_TASK (-6) #define STREAM_EXEC_T_ADD_FAILED_TASK (-7) +// the load and start stream task should be executed after snode has started successfully, since the load of stream +// tasks may incur the download of checkpoint data from remote, which may consume significant network and CPU resources. -typedef struct SStreamTask SStreamTask; -typedef struct SStreamQueue SStreamQueue; -typedef struct SStreamTaskSM SStreamTaskSM; -typedef struct SStreamQueueItem SStreamQueueItem; +typedef struct SStreamTask SStreamTask; +typedef struct SStreamQueue SStreamQueue; +typedef struct SStreamTaskSM SStreamTaskSM; +typedef struct SStreamQueueItem SStreamQueueItem; +typedef struct SActiveCheckpointInfo SActiveCheckpointInfo; #define SSTREAM_TASK_VER 4 #define SSTREAM_TASK_INCOMPATIBLE_VER 1 @@ -152,8 +155,6 @@ typedef enum EStreamTaskEvent { TASK_EVENT_DROPPING = 0xA, } EStreamTaskEvent; -typedef int32_t (*__state_trans_user_fn)(SStreamTask*, void* param); - typedef void FTbSink(SStreamTask* pTask, void* vnode, void* data); typedef void FSmaSink(void* vnode, int64_t smaId, const SArray* data); typedef int32_t FTaskExpand(void* ahandle, SStreamTask* pTask, int64_t ver); @@ -270,13 +271,8 @@ typedef struct SCheckpointInfo { int64_t checkpointTime; // latest checkpoint time int64_t processedVer; int64_t nextProcessVer; // current offset in WAL, not serialize it - int64_t failedId; // record the latest failed checkpoint id - int64_t checkpointingId; - int32_t downstreamAlignNum; - int32_t numOfNotReady; - bool dispatchCheckpointTrigger; + SActiveCheckpointInfo* pActiveInfo; int64_t msgVer; - int32_t transId; } SCheckpointInfo; typedef struct SStreamStatus { @@ -290,6 +286,7 @@ typedef struct SStreamStatus { int64_t lastExecTs; // last exec time stamp int32_t inScanHistorySentinel; bool appendTranstateBlock; // has append the transfer state data block already + bool removeBackendFiles; // remove backend files on disk when free stream tasks } SStreamStatus; typedef struct SDataRange { @@ -305,7 +302,7 @@ typedef struct SSTaskBasicInfo { int32_t totalLevel; int8_t taskLevel; int8_t fillHistory; // is fill history task or not - int64_t triggerParam; // in msec + int64_t delaySchedParam; // in msec } SSTaskBasicInfo; typedef struct SStreamRetrieveReq SStreamRetrieveReq; @@ -322,7 +319,8 @@ typedef struct SDispatchMsgInfo { int32_t retryCount; // retry send data count int64_t startTs; // dispatch start time, record total elapsed time for dispatch SArray* pRetryList; // current dispatch successfully completed node of downstream - void* pTimer; // used to dispatch data after a given time duration + void* pRetryTmr; // used to dispatch data after a given time duration + void* pRspTmr; // used to dispatch data after a given time duration } SDispatchMsgInfo; typedef struct STaskQueue { @@ -356,8 +354,12 @@ typedef struct STaskExecStatisInfo { double step2El; int32_t updateCount; int64_t latestUpdateTs; - int32_t processDataBlocks; - int64_t processDataSize; + int32_t inputDataBlocks; + int64_t inputDataSize; + double procsThroughput; + int64_t outputDataBlocks; + int64_t outputDataSize; + double outputThroughput; int32_t dispatch; int64_t dispatchDataSize; int32_t checkpoint; @@ -431,7 +433,6 @@ struct SStreamTask { SHistoryTaskInfo hTaskInfo; STaskId streamTaskId; STaskExecStatisInfo execInfo; - SArray* pReadyMsgList; // SArray TdThreadMutex lock; // secure the operation of set task status and puting data into inputQ SMsgCb* pMsgCb; // msg handle SStreamState* pState; // state backend @@ -517,6 +518,9 @@ typedef struct STaskUpdateEntry { int32_t transId; } STaskUpdateEntry; +typedef int32_t (*__state_trans_user_fn)(SStreamTask*, void* param); +typedef int32_t (*__stream_task_expand_fn)(struct SStreamTask* pTask); + SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, SEpSet* pEpset, bool fillHistory, int64_t triggerParam, SArray* pTaskList, bool hasFillhistory, int8_t subtableWithoutMd5); int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask); @@ -562,6 +566,8 @@ typedef struct STaskCkptInfo { int64_t latestId; // saved checkpoint id int64_t latestVer; // saved checkpoint ver int64_t latestTime; // latest checkpoint time + int64_t latestSize; // latest checkpoint size + int8_t remoteBackup; // latest checkpoint backup done int64_t activeId; // current active checkpoint id int32_t activeTransId; // checkpoint trans id int8_t failed; // denote if the checkpoint is failed or not @@ -579,8 +585,12 @@ typedef struct STaskStatusEntry { int64_t inputQUnchangeCounter; double inputQUsed; // in MiB double inputRate; - double sinkQuota; // existed quota size for sink task - double sinkDataSize; // sink to dst data size + double procsThroughput; // duration between one element put into input queue and being processed. + double procsTotal; // duration between one element put into input queue and being processed. + double outputThroughput; // the size of dispatched result blocks in bytes + double outputTotal; // the size of dispatched result blocks in bytes + double sinkQuota; // existed quota size for sink task + double sinkDataSize; // sink to dst data size int64_t startTime; int64_t startCheckpointId; int64_t startCheckpointVer; @@ -605,7 +615,8 @@ int32_t streamSetupScheduleTrigger(SStreamTask* pTask); int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pMsg); int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code); -SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId); +SStreamUpstreamEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId); +SEpSet* streamTaskGetDownstreamEpInfo(SStreamTask* pTask, int32_t taskId); void streamTaskInputFail(SStreamTask* pTask); @@ -658,7 +669,18 @@ int32_t streamStartScanHistoryAsync(SStreamTask* pTask, int8_t igUntreated); int32_t streamExecScanHistoryInFuture(SStreamTask* pTask, int32_t idleDuration); bool streamHistoryTaskSetVerRangeStep2(SStreamTask* pTask, int64_t latestVer); +// checkpoint related +int32_t streamTaskGetActiveCheckpointInfo(const SStreamTask* pTask, int32_t* pTransId, int64_t* pCheckpointId); +int32_t streamTaskSetActiveCheckpointInfo(SStreamTask* pTask, int64_t activeCheckpointId); +int32_t streamTaskSetFailedChkptInfo(SStreamTask* pTask, int32_t transId, int64_t checkpointId); +bool streamTaskAlreadySendTrigger(SStreamTask* pTask, int32_t downstreamNodeId); +void streamTaskGetTriggerRecvStatus(SStreamTask* pTask, int32_t* pRecved, int32_t* pTotal); +void streamTaskInitTriggerDispatchInfo(SStreamTask* pTask); +void streamTaskSetTriggerDispatchConfirmed(SStreamTask* pTask, int32_t vgId); +int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t dstTaskId, SRpcHandleInfo* pInfo, int32_t code); + int32_t streamQueueGetNumOfItems(const SStreamQueue* pQueue); +int32_t streamQueueGetNumOfUnAccessedItems(const SStreamQueue* pQueue); // common void streamTaskPause(SStreamTask* pTask); @@ -668,10 +690,12 @@ int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstre void streamTaskSetFixedDownstreamInfo(SStreamTask* pTask, const SStreamTask* pDownstreamTask); int32_t streamTaskReleaseState(SStreamTask* pTask); int32_t streamTaskReloadState(SStreamTask* pTask); +void streamTaskOpenUpstreamInput(SStreamTask* pTask, int32_t taskId); void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId); void streamTaskOpenAllUpstreamInput(SStreamTask* pTask); -int32_t streamTaskSetDb(SStreamMeta* pMeta, void* pTask, char* key); +int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key); bool streamTaskIsSinkTask(const SStreamTask* pTask); +void streamTaskSetRemoveBackendFiles(SStreamTask* pTask); void streamTaskStatusInit(STaskStatusEntry* pEntry, const SStreamTask* pTask); void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc); @@ -718,9 +742,9 @@ void streamMetaResetStartInfo(STaskStartInfo* pMeta); SArray* streamMetaSendMsgBeforeCloseTasks(SStreamMeta* pMeta); void streamMetaUpdateStageRole(SStreamMeta* pMeta, int64_t stage, bool isLeader); void streamMetaLoadAllTasks(SStreamMeta* pMeta); -int32_t streamMetaStartAllTasks(SStreamMeta* pMeta); +int32_t streamMetaStartAllTasks(SStreamMeta* pMeta, __stream_task_expand_fn fn); int32_t streamMetaStopAllTasks(SStreamMeta* pMeta); -int32_t streamMetaStartOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId); +int32_t streamMetaStartOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId, __stream_task_expand_fn fn); bool streamMetaAllTasksReady(const SStreamMeta* pMeta); // timer @@ -728,7 +752,9 @@ tmr_h streamTimerGetInstance(); // checkpoint int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); -int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask); +int32_t streamTaskProcessCheckpointTriggerRsp(SStreamTask* pTask, SCheckpointTriggerRsp* pRsp); +int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask, int64_t checkpointId, int32_t downstreamNodeId, int32_t downstreamTaskId); +int32_t streamTaskProcessCheckpointReadyRsp(SStreamTask* pTask, int32_t upstreamTaskId, int64_t checkpointId); int32_t streamTaskBuildCheckpoint(SStreamTask* pTask); void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg); int32_t streamAlignTransferState(SStreamTask* pTask); @@ -736,6 +762,10 @@ int32_t streamBuildAndSendDropTaskMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskI int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SStreamTask* pTask); int32_t streamTaskBuildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SRpcMsg* pMsg, int32_t setCode); +int32_t streamBuildAndSendCheckpointUpdateMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskId* pTaskId, STaskId* pHTaskId, + SCheckpointInfo* pCheckpointInfo, int8_t dropRelHTask); +int32_t streamTaskUpdateTaskCheckpointInfo(SStreamTask* pTask, SVUpdateCheckpointInfoReq* pReq); +SActiveCheckpointInfo* streamTaskCreateActiveChkptInfo(); // stream task state machine, and event handling SStreamTaskSM* streamCreateStateMachine(SStreamTask* pTask); diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 9ae75bade2..8f8434dfc1 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -910,6 +910,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_STREAM_EXEC_CANCELLED TAOS_DEF_ERROR_CODE(0, 0x4102) #define TSDB_CODE_STREAM_INVALID_STATETRANS TAOS_DEF_ERROR_CODE(0, 0x4103) #define TSDB_CODE_STREAM_TASK_IVLD_STATUS TAOS_DEF_ERROR_CODE(0, 0x4104) +#define TSDB_CODE_STREAM_NOT_LEADER TAOS_DEF_ERROR_CODE(0, 0x4105) // TDLite #define TSDB_CODE_TDLITE_IVLD_OPEN_FLAGS TAOS_DEF_ERROR_CODE(0, 0x5100) diff --git a/source/common/src/rsync.c b/source/common/src/rsync.c index e448aec5e0..2ed21616dc 100644 --- a/source/common/src/rsync.c +++ b/source/common/src/rsync.c @@ -114,29 +114,34 @@ static int32_t execCommand(char* command){ void stopRsync() { int32_t code = #ifdef WINDOWS - system("taskkill /f /im rsync.exe"); + system("taskkill /f /im rsync.exe"); #else - system("pkill rsync"); + system("pkill rsync"); #endif - if(code != 0){ - uError("[rsync] stop rsync server failed,"ERRNO_ERR_FORMAT, ERRNO_ERR_DATA); - return; + + if (code != 0) { + uError("[rsync] stop rsync server failed," ERRNO_ERR_FORMAT, ERRNO_ERR_DATA); + } else { + uDebug("[rsync] stop rsync server successful"); } - uDebug("[rsync] stop rsync server successful"); + + taosMsleep(500); // sleep 500 ms to wait for the completion of kill operation. } void startRsync() { - if(taosMulMkDir(tsCheckpointBackupDir) != 0){ - uError("[rsync] build checkpoint backup dir failed, dir:%s,"ERRNO_ERR_FORMAT, tsCheckpointBackupDir, ERRNO_ERR_DATA); + if (taosMulMkDir(tsCheckpointBackupDir) != 0) { + uError("[rsync] build checkpoint backup dir failed, path:%s," ERRNO_ERR_FORMAT, tsCheckpointBackupDir, + ERRNO_ERR_DATA); return; } + removeEmptyDir(); char confDir[PATH_MAX] = {0}; snprintf(confDir, PATH_MAX, "%srsync.conf", tsCheckpointBackupDir); int32_t code = generateConfigFile(confDir); - if(code != 0){ + if (code != 0) { return; } @@ -144,25 +149,25 @@ void startRsync() { snprintf(cmd, PATH_MAX, "rsync --daemon --port=%d --config=%s", tsRsyncPort, confDir); // start rsync service to backup checkpoint code = system(cmd); - if(code != 0){ - uError("[rsync] start server failed, code:%d,"ERRNO_ERR_FORMAT, code, ERRNO_ERR_DATA); - return; + if (code != 0) { + uError("[rsync] start server failed, code:%d," ERRNO_ERR_FORMAT, code, ERRNO_ERR_DATA); + } else { + uDebug("[rsync] start server successful"); } - uDebug("[rsync] start server successful"); } -int32_t uploadRsync(const char* id, const char* path) { +int32_t uploadByRsync(const char* id, const char* path) { + int64_t st = taosGetTimestampMs(); + char command[PATH_MAX] = {0}; + #ifdef WINDOWS char pathTransform[PATH_MAX] = {0}; changeDirFromWindowsToLinux(path, pathTransform); -#endif - char command[PATH_MAX] = {0}; -#ifdef WINDOWS - if(pathTransform[strlen(pathTransform) - 1] != '/'){ + if(pathTransform[strlen(pathTransform) - 1] != '/') { #else - if(path[strlen(path) - 1] != '/'){ + if (path[strlen(path) - 1] != '/') { #endif snprintf(command, PATH_MAX, "rsync -av --delete --timeout=10 --bwlimit=100000 %s/ rsync://%s/checkpoint/%s/", #ifdef WINDOWS @@ -178,26 +183,37 @@ int32_t uploadRsync(const char* id, const char* path) { #else path #endif - , tsSnodeAddress, id); + , + tsSnodeAddress, id); } int32_t code = execCommand(command); - if(code != 0){ - uError("[rsync] send failed code:%d," ERRNO_ERR_FORMAT, code, ERRNO_ERR_DATA); - return -1; + if (code != 0) { + uError("[rsync] s-task:%s upload checkpoint data in %s to %s failed, code:%d," ERRNO_ERR_FORMAT, id, path, + tsSnodeAddress, code, ERRNO_ERR_DATA); + } else { + int64_t el = (taosGetTimestampMs() - st); + uDebug("[rsync] s-task:%s upload checkpoint data in:%s to %s successfully, elapsed time:%" PRId64 "ms", id, path, + tsSnodeAddress, el); } - uDebug("[rsync] upload data:%s successful", id); - return 0; + return code; } +// abort from retry if quit int32_t downloadRsync(const char* id, const char* path) { + int64_t st = taosGetTimestampMs(); + int32_t MAX_RETRY = 60; + int32_t times = 0; + int32_t code = 0; + #ifdef WINDOWS char pathTransform[PATH_MAX] = {0}; changeDirFromWindowsToLinux(path, pathTransform); #endif + char command[PATH_MAX] = {0}; - snprintf(command, PATH_MAX, "rsync -av --timeout=10 --bwlimit=100000 rsync://%s/checkpoint/%s/ %s", + snprintf(command, PATH_MAX, "rsync -av --debug=all --timeout=10 --bwlimit=100000 rsync://%s/checkpoint/%s/ %s", tsSnodeAddress, id, #ifdef WINDOWS pathTransform @@ -206,14 +222,22 @@ int32_t downloadRsync(const char* id, const char* path) { #endif ); - int32_t code = execCommand(command); - if (code != 0) { - uError("[rsync] get failed code:%d," ERRNO_ERR_FORMAT, code, ERRNO_ERR_DATA); - return -1; + uDebug("[rsync] %s start to sync data from remote to:%s, %s", id, path, command); + + while(times++ < MAX_RETRY) { + code = execCommand(command); + if (code != TSDB_CODE_SUCCESS) { + uError("[rsync] %s download checkpoint data:%s failed, retry after 1sec, times:%d, code:%d," ERRNO_ERR_FORMAT, id, + path, times, code, ERRNO_ERR_DATA); + taosSsleep(1); + } else { + int32_t el = taosGetTimestampMs() - st; + uDebug("[rsync] %s download checkpoint data:%s successfully, elapsed time:%dms", id, path, el); + break; + } } - uDebug("[rsync] down data:%s successful", id); - return 0; + return code; } int32_t deleteRsync(const char* id) { diff --git a/source/common/src/systable.c b/source/common/src/systable.c index cac9ee0e8b..6558df1fc1 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -188,16 +188,23 @@ static const SSysDbTableSchema streamTaskSchema[] = { {.name = "status", .bytes = 12 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "stage", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, {.name = "in_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "process_total", .bytes = 14, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "process_throughput", .bytes = 14, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "out_total", .bytes = 14, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "out_throughput", .bytes = 14, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, +// {.name = "dispatch_throughput", .bytes = 12, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, +// {.name = "dispatch_total", .bytes = 12, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, // {.name = "out_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, - {.name = "info", .bytes = 35, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "info", .bytes = 40+ VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "start_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = false}, {.name = "start_id", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, {.name = "start_ver", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, {.name = "checkpoint_time", .bytes = 8, .type = TSDB_DATA_TYPE_TIMESTAMP, .sysInfo = false}, {.name = "checkpoint_id", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, - {.name = "checkpoint_version", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, - {.name = "checkpoint_backup", .bytes = 15, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, - {.name = "ds_err_info", .bytes = 25, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "checkpoint_ver", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, + {.name = "checkpoint_size", .bytes = 14 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "checkpoint_backup", .bytes = 14 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, + {.name = "extra_info", .bytes = 25 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "history_task_id", .bytes = 16 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "history_task_status", .bytes = 12 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, }; diff --git a/source/common/src/tdataformat.c b/source/common/src/tdataformat.c index fb97c011fa..0e80d431d1 100644 --- a/source/common/src/tdataformat.c +++ b/source/common/src/tdataformat.c @@ -1318,6 +1318,7 @@ int32_t tRowKeyAssign(SRowKey *pDst, SRowKey *pSrc) { pVal->val = pSrc->pks[i].val; } else { pVal->nData = pSrc->pks[i].nData; + ASSERT(pSrc->pks[i].pData != NULL); memcpy(pVal->pData, pSrc->pks[i].pData, pVal->nData); } } diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 691eccd174..f034244c69 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -273,7 +273,8 @@ int32_t tsCompactPullupInterval = 10; int32_t tsMqRebalanceInterval = 2; int32_t tsStreamCheckpointInterval = 60; float tsSinkDataRate = 2.0; -int32_t tsStreamNodeCheckInterval = 16; +int32_t tsStreamNodeCheckInterval = 20; +int32_t tsMaxConcurrentCheckpoint = 1; int32_t tsTtlUnit = 86400; int32_t tsTtlPushIntervalSec = 10; int32_t tsTrimVDbIntervalSec = 60 * 60; // interval of trimming db in all vgroups @@ -621,114 +622,80 @@ static int32_t taosAddSystemCfg(SConfig *pCfg) { } static int32_t taosAddServerCfg(SConfig *pCfg) { - if (cfgAddDir(pCfg, "dataDir", tsDataDir, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddFloat(pCfg, "minimalDataDirGB", 2.0f, 0.001f, 10000000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + tsNumOfCommitThreads = tsNumOfCores / 2; + tsNumOfCommitThreads = TRANGE(tsNumOfCommitThreads, 2, 4); tsNumOfSupportVnodes = tsNumOfCores * 2 + 5; tsNumOfSupportVnodes = TMAX(tsNumOfSupportVnodes, 2); - if (cfgAddInt32(pCfg, "supportVnodes", tsNumOfSupportVnodes, 0, 4096, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) - return -1; - - if (cfgAddString(pCfg, "encryptAlgorithm", tsEncryptAlgorithm, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddString(pCfg, "encryptScope", tsEncryptScope, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - // if (cfgAddString(pCfg, "authCode", tsAuthCode, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - - if (cfgAddInt32(pCfg, "statusInterval", tsStatusInterval, 1, 30, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "minSlidingTime", tsMinSlidingTime, 1, 1000000, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) - return -1; - if (cfgAddInt32(pCfg, "minIntervalTime", tsMinIntervalTime, 1, 1000000, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) - return -1; - - if (cfgAddInt32(pCfg, "queryBufferSize", tsQueryBufferSize, -1, 500000000000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; - if (cfgAddInt32(pCfg, "queryRspPolicy", tsQueryRspPolicy, 0, 1, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; - - tsNumOfCommitThreads = tsNumOfCores / 2; - tsNumOfCommitThreads = TRANGE(tsNumOfCommitThreads, 2, 4); - if (cfgAddInt32(pCfg, "numOfCommitThreads", tsNumOfCommitThreads, 1, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; tsNumOfMnodeReadThreads = tsNumOfCores / 8; tsNumOfMnodeReadThreads = TRANGE(tsNumOfMnodeReadThreads, 1, 4); - if (cfgAddInt32(pCfg, "numOfMnodeReadThreads", tsNumOfMnodeReadThreads, 1, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; tsNumOfVnodeQueryThreads = tsNumOfCores * 2; tsNumOfVnodeQueryThreads = TMAX(tsNumOfVnodeQueryThreads, 16); - if (cfgAddInt32(pCfg, "numOfVnodeQueryThreads", tsNumOfVnodeQueryThreads, 4, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != - 0) - return -1; - - if (cfgAddFloat(pCfg, "ratioOfVnodeStreamThreads", tsRatioOfVnodeStreamThreads, 0.01, 4, CFG_SCOPE_SERVER, - CFG_DYN_NONE) != 0) - return -1; tsNumOfVnodeFetchThreads = tsNumOfCores / 4; tsNumOfVnodeFetchThreads = TMAX(tsNumOfVnodeFetchThreads, 4); - if (cfgAddInt32(pCfg, "numOfVnodeFetchThreads", tsNumOfVnodeFetchThreads, 4, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != - 0) - return -1; tsNumOfVnodeRsmaThreads = tsNumOfCores / 4; tsNumOfVnodeRsmaThreads = TMAX(tsNumOfVnodeRsmaThreads, 4); - if (cfgAddInt32(pCfg, "numOfVnodeRsmaThreads", tsNumOfVnodeRsmaThreads, 1, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; tsNumOfQnodeQueryThreads = tsNumOfCores * 2; tsNumOfQnodeQueryThreads = TMAX(tsNumOfQnodeQueryThreads, 16); - if (cfgAddInt32(pCfg, "numOfQnodeQueryThreads", tsNumOfQnodeQueryThreads, 4, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != - 0) - return -1; + + tsNumOfSnodeStreamThreads = tsNumOfCores / 4; + tsNumOfSnodeStreamThreads = TRANGE(tsNumOfSnodeStreamThreads, 2, 4); + + tsNumOfSnodeWriteThreads = tsNumOfCores / 4; + tsNumOfSnodeWriteThreads = TRANGE(tsNumOfSnodeWriteThreads, 2, 4); + + tsRpcQueueMemoryAllowed = tsTotalMemoryKB * 1024 * 0.1; + tsRpcQueueMemoryAllowed = TRANGE(tsRpcQueueMemoryAllowed, TSDB_MAX_MSG_SIZE * 10LL, TSDB_MAX_MSG_SIZE * 10000LL); + + // clang-format off + if (cfgAddDir(pCfg, "dataDir", tsDataDir, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddFloat(pCfg, "minimalDataDirGB", 2.0f, 0.001f, 10000000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "supportVnodes", tsNumOfSupportVnodes, 0, 4096, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + + if (cfgAddString(pCfg, "encryptAlgorithm", tsEncryptAlgorithm, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddString(pCfg, "encryptScope", tsEncryptScope, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + + if (cfgAddInt32(pCfg, "statusInterval", tsStatusInterval, 1, 30, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "minSlidingTime", tsMinSlidingTime, 1, 1000000, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1; + if (cfgAddInt32(pCfg, "minIntervalTime", tsMinIntervalTime, 1, 1000000, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1; + + if (cfgAddInt32(pCfg, "queryBufferSize", tsQueryBufferSize, -1, 500000000000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "queryRspPolicy", tsQueryRspPolicy, 0, 1, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfCommitThreads", tsNumOfCommitThreads, 1, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + + if (cfgAddInt32(pCfg, "numOfMnodeReadThreads", tsNumOfMnodeReadThreads, 1, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfVnodeQueryThreads", tsNumOfVnodeQueryThreads, 4, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddFloat(pCfg, "ratioOfVnodeStreamThreads", tsRatioOfVnodeStreamThreads, 0.01, 4, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfVnodeFetchThreads", tsNumOfVnodeFetchThreads, 4, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + + if (cfgAddInt32(pCfg, "numOfVnodeRsmaThreads", tsNumOfVnodeRsmaThreads, 1, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfQnodeQueryThreads", tsNumOfQnodeQueryThreads, 4, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; // tsNumOfQnodeFetchThreads = tsNumOfCores / 2; // tsNumOfQnodeFetchThreads = TMAX(tsNumOfQnodeFetchThreads, 4); // if (cfgAddInt32(pCfg, "numOfQnodeFetchThreads", tsNumOfQnodeFetchThreads, 1, 1024, 0) != 0) return -1; - tsNumOfSnodeStreamThreads = tsNumOfCores / 4; - tsNumOfSnodeStreamThreads = TRANGE(tsNumOfSnodeStreamThreads, 2, 4); - if (cfgAddInt32(pCfg, "numOfSnodeSharedThreads", tsNumOfSnodeStreamThreads, 2, 1024, CFG_SCOPE_SERVER, - CFG_DYN_NONE) != 0) - return -1; + if (cfgAddInt32(pCfg, "numOfSnodeSharedThreads", tsNumOfSnodeStreamThreads, 2, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "numOfSnodeUniqueThreads", tsNumOfSnodeWriteThreads, 2, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - tsNumOfSnodeWriteThreads = tsNumOfCores / 4; - tsNumOfSnodeWriteThreads = TRANGE(tsNumOfSnodeWriteThreads, 2, 4); - if (cfgAddInt32(pCfg, "numOfSnodeUniqueThreads", tsNumOfSnodeWriteThreads, 2, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != - 0) - return -1; + if (cfgAddInt64(pCfg, "rpcQueueMemoryAllowed", tsRpcQueueMemoryAllowed, TSDB_MAX_MSG_SIZE * 10L, INT64_MAX, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - tsRpcQueueMemoryAllowed = tsTotalMemoryKB * 1024 * 0.1; - tsRpcQueueMemoryAllowed = TRANGE(tsRpcQueueMemoryAllowed, TSDB_MAX_MSG_SIZE * 10LL, TSDB_MAX_MSG_SIZE * 10000LL); - if (cfgAddInt64(pCfg, "rpcQueueMemoryAllowed", tsRpcQueueMemoryAllowed, TSDB_MAX_MSG_SIZE * 10L, INT64_MAX, - CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) - return -1; + if (cfgAddInt32(pCfg, "syncElectInterval", tsElectInterval, 10, 1000 * 60 * 24 * 2, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "syncHeartbeatInterval", tsHeartbeatInterval, 10, 1000 * 60 * 24 * 2, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "syncHeartbeatTimeout", tsHeartbeatTimeout, 10, 1000 * 60 * 24 * 2, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "syncSnapReplMaxWaitN", tsSnapReplMaxWaitN, 16, (TSDB_SYNC_SNAP_BUFFER_SIZE >> 2), CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "syncElectInterval", tsElectInterval, 10, 1000 * 60 * 24 * 2, CFG_SCOPE_SERVER, CFG_DYN_NONE) != - 0) - return -1; - if (cfgAddInt32(pCfg, "syncHeartbeatInterval", tsHeartbeatInterval, 10, 1000 * 60 * 24 * 2, CFG_SCOPE_SERVER, - CFG_DYN_NONE) != 0) - return -1; - if (cfgAddInt32(pCfg, "syncHeartbeatTimeout", tsHeartbeatTimeout, 10, 1000 * 60 * 24 * 2, CFG_SCOPE_SERVER, - CFG_DYN_NONE) != 0) - return -1; - if (cfgAddInt32(pCfg, "syncSnapReplMaxWaitN", tsSnapReplMaxWaitN, 16, (TSDB_SYNC_SNAP_BUFFER_SIZE >> 2), - CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; + if (cfgAddInt32(pCfg, "arbHeartBeatIntervalSec", tsArbHeartBeatIntervalSec, 1, 60 * 24 * 2, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "arbCheckSyncIntervalSec", tsArbCheckSyncIntervalSec, 1, 60 * 24 * 2, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "arbSetAssignedTimeoutSec", tsArbSetAssignedTimeoutSec, 1, 60 * 24 * 2, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "arbHeartBeatIntervalSec", tsArbHeartBeatIntervalSec, 1, 60 * 24 * 2, CFG_SCOPE_SERVER, - CFG_DYN_NONE) != 0) - return -1; - if (cfgAddInt32(pCfg, "arbCheckSyncIntervalSec", tsArbCheckSyncIntervalSec, 1, 60 * 24 * 2, CFG_SCOPE_SERVER, - CFG_DYN_NONE) != 0) - return -1; - if (cfgAddInt32(pCfg, "arbSetAssignedTimeoutSec", tsArbSetAssignedTimeoutSec, 1, 60 * 24 * 2, CFG_SCOPE_SERVER, - CFG_DYN_NONE) != 0) - return -1; - - if (cfgAddInt64(pCfg, "mndSdbWriteDelta", tsMndSdbWriteDelta, 20, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) - return -1; - if (cfgAddInt64(pCfg, "mndLogRetention", tsMndLogRetention, 500, 10000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; + if (cfgAddInt64(pCfg, "mndSdbWriteDelta", tsMndSdbWriteDelta, 20, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt64(pCfg, "mndLogRetention", tsMndLogRetention, 500, 10000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "skipGrant", tsMndSkipGrant, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "monitorFqdn", tsMonitorFqdn, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; @@ -736,9 +703,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "monitorMaxLogs", tsMonitorMaxLogs, 1, 1000000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "monitorComp", tsMonitorComp, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "monitorLogProtocol", tsMonitorLogProtocol, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; - if (cfgAddInt32(pCfg, "monitorIntervalForBasic", tsMonitorIntervalForBasic, 1, 200000, CFG_SCOPE_SERVER, - CFG_DYN_NONE) != 0) - return -1; + if (cfgAddInt32(pCfg, "monitorIntervalForBasic", tsMonitorIntervalForBasic, 1, 200000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "monitorForceV2", tsMonitorForceV2, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "audit", tsEnableAudit, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; @@ -752,68 +717,43 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "rsyncPort", tsRsyncPort, 1, 65535, CFG_SCOPE_BOTH, CFG_DYN_SERVER) != 0) return -1; if (cfgAddString(pCfg, "snodeAddress", tsSnodeAddress, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; - if (cfgAddString(pCfg, "checkpointBackupDir", tsCheckpointBackupDir, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) - return -1; + if (cfgAddString(pCfg, "checkpointBackupDir", tsCheckpointBackupDir, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; - if (cfgAddInt32(pCfg, "tmqMaxTopicNum", tmqMaxTopicNum, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) - return -1; + if (cfgAddInt32(pCfg, "tmqMaxTopicNum", tmqMaxTopicNum, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "tmqRowSize", tmqRowSize, 1, 1000000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "maxTsmaNum", tsMaxTsmaNum, 0, 3, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; - if (cfgAddInt32(pCfg, "transPullupInterval", tsTransPullupInterval, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != - 0) - return -1; - if (cfgAddInt32(pCfg, "compactPullupInterval", tsCompactPullupInterval, 1, 10000, CFG_SCOPE_SERVER, - CFG_DYN_ENT_SERVER) != 0) - return -1; - if (cfgAddInt32(pCfg, "mqRebalanceInterval", tsMqRebalanceInterval, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != - 0) - return -1; + if (cfgAddInt32(pCfg, "transPullupInterval", tsTransPullupInterval, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "compactPullupInterval", tsCompactPullupInterval, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "mqRebalanceInterval", tsMqRebalanceInterval, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "ttlUnit", tsTtlUnit, 1, 86400 * 365, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "ttlPushInterval", tsTtlPushIntervalSec, 1, 100000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) - return -1; - if (cfgAddInt32(pCfg, "ttlBatchDropNum", tsTtlBatchDropNum, 0, INT32_MAX, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) - return -1; - if (cfgAddBool(pCfg, "ttlChangeOnWrite", tsTtlChangeOnWrite, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "ttlFlushThreshold", tsTtlFlushThreshold, -1, 1000000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != - 0) - return -1; - if (cfgAddInt32(pCfg, "trimVDbIntervalSec", tsTrimVDbIntervalSec, 1, 100000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != - 0) - return -1; - if (cfgAddInt32(pCfg, "s3MigrateIntervalSec", tsS3MigrateIntervalSec, 600, 100000, CFG_SCOPE_SERVER, - CFG_DYN_ENT_SERVER) != 0) - return -1; + if (cfgAddInt32(pCfg, "ttlPushInterval", tsTtlPushIntervalSec, 1, 100000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "ttlBatchDropNum", tsTtlBatchDropNum, 0, INT32_MAX, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddBool(pCfg, "ttlChangeOnWrite", tsTtlChangeOnWrite, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddInt32(pCfg, "ttlFlushThreshold", tsTtlFlushThreshold, -1, 1000000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "trimVDbIntervalSec", tsTrimVDbIntervalSec, 1, 100000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "s3MigrateIntervalSec", tsS3MigrateIntervalSec, 600, 100000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddBool(pCfg, "s3MigrateEnabled", tsS3MigrateEnabled, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "uptimeInterval", tsUptimeInterval, 1, 100000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "queryRsmaTolerance", tsQueryRsmaTolerance, 0, 900000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; - if (cfgAddInt32(pCfg, "timeseriesThreshold", tsTimeSeriesThreshold, 0, 2000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != - 0) - return -1; + if (cfgAddInt32(pCfg, "queryRsmaTolerance", tsQueryRsmaTolerance, 0, 900000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "timeseriesThreshold", tsTimeSeriesThreshold, 0, 2000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; - if (cfgAddInt64(pCfg, "walFsyncDataSizeLimit", tsWalFsyncDataSizeLimit, 100 * 1024 * 1024, INT64_MAX, - CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; + if (cfgAddInt64(pCfg, "walFsyncDataSizeLimit", tsWalFsyncDataSizeLimit, 100 * 1024 * 1024, INT64_MAX, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "udf", tsStartUdfd, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "udfdResFuncs", tsUdfdResFuncs, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "udfdLdLibPath", tsUdfdLdLibPath, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "disableStream", tsDisableStream, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; - if (cfgAddInt64(pCfg, "streamBufferSize", tsStreamBufferSize, 0, INT64_MAX, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; + if (cfgAddInt64(pCfg, "streamBufferSize", tsStreamBufferSize, 0, INT64_MAX, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddInt64(pCfg, "streamAggCnt", tsStreamAggCnt, 2, INT32_MAX, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "checkpointInterval", tsStreamCheckpointInterval, 60, 1200, CFG_SCOPE_SERVER, - CFG_DYN_ENT_SERVER) != 0) - return -1; + if (cfgAddInt32(pCfg, "checkpointInterval", tsStreamCheckpointInterval, 60, 1200, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddFloat(pCfg, "streamSinkDataRate", tsSinkDataRate, 0.1, 5, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "concurrentCheckpoint", tsMaxConcurrentCheckpoint, 1, 10, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; - if (cfgAddInt32(pCfg, "cacheLazyLoadThreshold", tsCacheLazyLoadThreshold, 0, 100000, CFG_SCOPE_SERVER, - CFG_DYN_ENT_SERVER) != 0) - return -1; + if (cfgAddInt32(pCfg, "cacheLazyLoadThreshold", tsCacheLazyLoadThreshold, 0, 100000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddFloat(pCfg, "fPrecision", tsFPrecision, 0.0f, 100000.0f, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddFloat(pCfg, "dPrecision", tsDPrecision, 0.0f, 1000000.0f, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; @@ -823,41 +763,23 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddString(pCfg, "compressor", tsCompressor, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "filterScalarMode", tsFilterScalarMode, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - if (cfgAddInt32(pCfg, "maxStreamBackendCache", tsMaxStreamBackendCache, 16, 1024, CFG_SCOPE_SERVER, - CFG_DYN_ENT_SERVER) != 0) - return -1; - if (cfgAddInt32(pCfg, "pqSortMemThreshold", tsPQSortMemThreshold, 1, 10240, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; - if (cfgAddInt32(pCfg, "resolveFQDNRetryTime", tsResolveFQDNRetryTime, 1, 10240, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) - return -1; + if (cfgAddInt32(pCfg, "maxStreamBackendCache", tsMaxStreamBackendCache, 16, 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "pqSortMemThreshold", tsPQSortMemThreshold, 1, 10240, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; + if (cfgAddInt32(pCfg, "resolveFQDNRetryTime", tsResolveFQDNRetryTime, 1, 10240, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "s3Accesskey", tsS3AccessKey, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "s3Endpoint", tsS3Endpoint, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "s3BucketName", tsS3BucketName, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; - /* - if (cfgAddInt32(pCfg, "s3BlockSize", tsS3BlockSize, -1, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) - return -1; - if (tsS3BlockSize > -1 && tsS3BlockSize < 1024) { - uError("failed to config s3blocksize since value:%d. Valid range: -1 or [1024, 1024 * 1024]", tsS3BlockSize); - return -1; - } - if (cfgAddInt32(pCfg, "s3BlockCacheSize", tsS3BlockCacheSize, 4, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != - 0) - return -1; - */ - if (cfgAddInt32(pCfg, "s3PageCacheSize", tsS3PageCacheSize, 4, 1024 * 1024 * 1024, CFG_SCOPE_SERVER, - CFG_DYN_ENT_SERVER) != 0) - return -1; - if (cfgAddInt32(pCfg, "s3UploadDelaySec", tsS3UploadDelaySec, 1, 60 * 60 * 24 * 30, CFG_SCOPE_SERVER, - CFG_DYN_ENT_SERVER) != 0) - return -1; + + if (cfgAddInt32(pCfg, "s3PageCacheSize", tsS3PageCacheSize, 4, 1024 * 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "s3UploadDelaySec", tsS3UploadDelaySec, 1, 60 * 60 * 24 * 30, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; // min free disk space used to check if the disk is full [50MB, 1GB] - if (cfgAddInt64(pCfg, "minDiskFreeSize", tsMinDiskFreeSize, TFS_MIN_DISK_FREE_SIZE, 1024 * 1024 * 1024, - CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) - return -1; + if (cfgAddInt64(pCfg, "minDiskFreeSize", tsMinDiskFreeSize, TFS_MIN_DISK_FREE_SIZE, 1024 * 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; if (cfgAddBool(pCfg, "enableWhiteList", tsEnableWhiteList, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) return -1; + // clang-format on + // GRANT_CFG_ADD; return 0; } diff --git a/source/dnode/mgmt/exe/dmMain.c b/source/dnode/mgmt/exe/dmMain.c index 76b0565402..6fdd1ba33d 100644 --- a/source/dnode/mgmt/exe/dmMain.c +++ b/source/dnode/mgmt/exe/dmMain.c @@ -403,13 +403,6 @@ int mainWindows(int argc, char **argv) { return -1; } - if(dmGetEncryptKey() != 0){ - dError("failed to start since failed to get encrypt key"); - taosCloseLog(); - taosCleanupArgs(); - return -1; - }; - if (taosConvInit() != 0) { dError("failed to init conv"); taosCloseLog(); @@ -447,6 +440,13 @@ int mainWindows(int argc, char **argv) { osSetProcPath(argc, (char **)argv); taosCleanupArgs(); + if(dmGetEncryptKey() != 0){ + dError("failed to start since failed to get encrypt key"); + taosCloseLog(); + taosCleanupArgs(); + return -1; + }; + if (dmInit() != 0) { if (terrno == TSDB_CODE_NOT_FOUND) { dError("failed to init dnode since unsupported platform, please visit https://www.taosdata.com for support"); diff --git a/source/dnode/mgmt/mgmt_snode/src/smHandle.c b/source/dnode/mgmt/mgmt_snode/src/smHandle.c index 880e96adfb..9b07b6a3d8 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smHandle.c +++ b/source/dnode/mgmt/mgmt_snode/src/smHandle.c @@ -78,6 +78,7 @@ SArray *smGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DROP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RUN, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_UPDATE_CHKPT, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; @@ -88,6 +89,8 @@ SArray *smGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_TRIGGER, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_TRIGGER_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_RESET, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_HEARTBEAT_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_REQ_CHKPT_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 0295e88106..bfc9e92293 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -961,12 +961,16 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_TRIGGER, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_TRIGGER_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_UPDATE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_RESET, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_HEARTBEAT_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_STREAM_REQ_CHKPT_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_GET_STREAM_PROGRESS, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_UPDATE_CHKPT, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_REPLICA, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIG, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_CONFIRM, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index 5a4caf3348..6d2a89ddc9 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -46,18 +46,8 @@ typedef struct SVgroupChangeInfo { SArray *pUpdateNodeList; // SArray } SVgroupChangeInfo; -// time to generated the checkpoint, if now() - checkpointTs >= tsCheckpointInterval, this checkpoint will be discard -// to avoid too many checkpoints for a taskk in the waiting list -typedef struct SCheckpointCandEntry { - char *pName; - int64_t streamId; - int64_t checkpointTs; - int64_t checkpointId; -} SCheckpointCandEntry; - typedef struct SStreamTransMgmt { SHashObj *pDBTrans; - SHashObj *pWaitingList; // stream id list, of which timed checkpoint failed to be issued due to the trans conflict. } SStreamTransMgmt; typedef struct SStreamExecInfo { @@ -97,7 +87,7 @@ void mndReleaseStream(SMnode *pMnode, SStreamObj *pStream); int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); int32_t mndPersistStream(STrans *pTrans, SStreamObj *pStream); int32_t mndStreamRegisterTrans(STrans *pTrans, const char *pTransName, int64_t streamId); -int32_t mndAddtoCheckpointWaitingList(SStreamObj *pStream, int64_t checkpointId); +int32_t mndStreamClearFinishedTrans(SMnode *pMnode, int32_t *pNumOfActiveChkpt); bool mndStreamTransConflictCheck(SMnode *pMnode, int64_t streamId, const char *pTransName, bool lock); int32_t mndStreamGetRelTrans(SMnode *pMnode, int64_t streamId); @@ -130,6 +120,7 @@ void destroyStreamTaskIter(SStreamTaskIter *pIter); bool streamTaskIterNextTask(SStreamTaskIter *pIter); SStreamTask *streamTaskIterGetCurrent(SStreamTaskIter *pIter); void mndInitExecInfo(); +void removeExpiredNodeInfo(const SArray *pNodeSnapshot); #ifdef __cplusplus } diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 850c527a14..cad8c6d745 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -90,21 +90,6 @@ static void *mndBuildTimerMsg(int32_t *pContLen) { return pReq; } -static void *mndBuildCheckpointTickMsg(int32_t *pContLen, int64_t sec) { - SMStreamTickReq timerReq = { - .tick = sec, - }; - - int32_t contLen = tSerializeSMStreamTickMsg(NULL, 0, &timerReq); - if (contLen <= 0) return NULL; - void *pReq = rpcMallocCont(contLen); - if (pReq == NULL) return NULL; - - tSerializeSMStreamTickMsg(pReq, contLen, &timerReq); - *pContLen = contLen; - return pReq; -} - static void mndPullupTrans(SMnode *pMnode) { mTrace("pullup trans msg"); int32_t contLen = 0; @@ -174,21 +159,12 @@ static void mndCalMqRebalance(SMnode *pMnode) { } } -static void mndStreamCheckpointTick(SMnode *pMnode, int64_t sec) { - int32_t contLen = 0; - void *pReq = mndBuildCheckpointTickMsg(&contLen, sec); - if (pReq != NULL) { - SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CHECKPOINT_TIMER, .pCont = pReq, .contLen = contLen}; - tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); - } -} - -static void mndStreamCheckpointRemain(SMnode *pMnode) { - int32_t contLen = 0; - void *pReq = mndBuildCheckpointTickMsg(&contLen, 0); - if (pReq != NULL) { - SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE, .pCont = pReq, .contLen = contLen}; - tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); +static void mndStreamCheckpointTimer(SMnode *pMnode) { + SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg)); + if (pMsg != NULL) { + int32_t size = sizeof(SMStreamDoCheckpointMsg); + SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, .pCont = pMsg, .contLen = size}; + tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); } } @@ -369,12 +345,8 @@ void mndDoTimerPullupTask(SMnode *pMnode, int64_t sec) { mndCalMqRebalance(pMnode); } - if (sec % tsStreamCheckpointInterval == 0) { - mndStreamCheckpointTick(pMnode, sec); - } - - if (sec % 5 == 0) { - mndStreamCheckpointRemain(pMnode); + if (sec % 30 == 0) { // send the checkpoint info every 30 sec + mndStreamCheckpointTimer(pMnode); } if (sec % tsStreamNodeCheckInterval == 0) { @@ -413,6 +385,7 @@ void mndDoTimerCheckTask(SMnode *pMnode, int64_t sec) { mndSyncCheckTimeout(pMnode); } } + static void *mndThreadFp(void *param) { SMnode *pMnode = param; int64_t lastTime = 0; @@ -834,10 +807,9 @@ _OVER: pMsg->msgType == TDMT_MND_TRANS_TIMER || pMsg->msgType == TDMT_MND_TTL_TIMER || pMsg->msgType == TDMT_MND_TRIM_DB_TIMER || pMsg->msgType == TDMT_MND_UPTIME_TIMER || pMsg->msgType == TDMT_MND_COMPACT_TIMER || pMsg->msgType == TDMT_MND_NODECHECK_TIMER || - pMsg->msgType == TDMT_MND_GRANT_HB_TIMER || pMsg->msgType == TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE || - pMsg->msgType == TDMT_MND_STREAM_CHECKPOINT_TIMER || pMsg->msgType == TDMT_MND_STREAM_REQ_CHKPT || - pMsg->msgType == TDMT_MND_S3MIGRATE_DB_TIMER || - pMsg->msgType == TDMT_MND_ARB_HEARTBEAT_TIMER || pMsg->msgType == TDMT_MND_ARB_CHECK_SYNC_TIMER) { + pMsg->msgType == TDMT_MND_GRANT_HB_TIMER || pMsg->msgType == TDMT_MND_STREAM_REQ_CHKPT || + pMsg->msgType == TDMT_MND_S3MIGRATE_DB_TIMER || pMsg->msgType == TDMT_MND_ARB_HEARTBEAT_TIMER || + pMsg->msgType == TDMT_MND_ARB_CHECK_SYNC_TIMER) { mTrace("timer not process since mnode restored:%d stopped:%d, sync restored:%d role:%s ", pMnode->restored, pMnode->stopped, state.restored, syncStr(state.state)); return -1; diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 79a04a5348..9c8f3f26ff 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -45,9 +45,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq); static int32_t mndProcessCreateStreamReqFromMNode(SRpcMsg *pReq); static int32_t mndProcessDropStreamReqFromMNode(SRpcMsg *pReq); -static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq); -static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq); -static int32_t mndProcessStreamCheckpointInCandid(SRpcMsg *pReq); +static int32_t mndProcessStreamCheckpoint(SRpcMsg *pReq); static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows); static void mndCancelGetNextStream(SMnode *pMnode, void *pIter); static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows); @@ -64,7 +62,7 @@ static int32_t mndProcessStreamReqCheckpoint(SRpcMsg *pReq); static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList); static void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode); -static int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot); +static int32_t removeExpiredNodeEntryAndTask(SArray *pNodeSnapshot); static int32_t doKillCheckpointTrans(SMnode *pMnode, const char *pDbName, size_t len); static SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw); @@ -114,10 +112,8 @@ int32_t mndInitStream(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_STREAM_DROP_RSP, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_VND_STREAM_CHECK_POINT_SOURCE_RSP, mndTransProcessRsp); - mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_TIMER, mndProcessStreamCheckpointTmr); - mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamDoCheckpoint); + mndSetMsgHandle(pMnode, TDMT_MND_STREAM_BEGIN_CHECKPOINT, mndProcessStreamCheckpoint); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_REQ_CHKPT, mndProcessStreamReqCheckpoint); - mndSetMsgHandle(pMnode, TDMT_MND_STREAM_CHECKPOINT_CANDIDITATE, mndProcessStreamCheckpointInCandid); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_HEARTBEAT, mndProcessStreamHb); mndSetMsgHandle(pMnode, TDMT_STREAM_TASK_REPORT_CHECKPOINT, mndTransProcessRsp); mndSetMsgHandle(pMnode, TDMT_MND_STREAM_NODECHANGE_CHECK, mndProcessNodeCheckReq); @@ -143,9 +139,9 @@ int32_t mndInitStream(SMnode *pMnode) { void mndCleanupStream(SMnode *pMnode) { taosArrayDestroy(execInfo.pTaskList); + taosArrayDestroy(execInfo.pNodeList); taosHashCleanup(execInfo.pTaskMap); taosHashCleanup(execInfo.transMgmt.pDBTrans); - taosHashCleanup(execInfo.transMgmt.pWaitingList); taosHashCleanup(execInfo.pTransferStateStreams); taosThreadMutexDestroy(&execInfo.lock); mDebug("mnd stream exec info cleanup"); @@ -697,6 +693,8 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { SStreamObj streamObj = {0}; char *sql = NULL; int32_t sqlLen = 0; + const char* pMsg = "create stream tasks on dnodes"; + terrno = TSDB_CODE_SUCCESS; SCMCreateStreamReq createReq = {0}; @@ -709,8 +707,8 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { terrno = TSDB_CODE_MND_INVALID_PLATFORM; goto _OVER; #endif - mInfo("stream:%s, start to create stream, sql:%s", createReq.name, createReq.sql); + mInfo("stream:%s, start to create stream, sql:%s", createReq.name, createReq.sql); if (mndCheckCreateStreamReq(&createReq) != 0) { mError("stream:%s, failed to create since %s", createReq.name, terrstr()); goto _OVER; @@ -750,8 +748,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { goto _OVER; } - STrans *pTrans = - doCreateTrans(pMnode, &streamObj, pReq, TRN_CONFLICT_DB, MND_STREAM_CREATE_NAME, "create stream tasks on dnodes"); + STrans *pTrans = doCreateTrans(pMnode, &streamObj, pReq, TRN_CONFLICT_DB, MND_STREAM_CREATE_NAME, pMsg); if (pTrans == NULL) { goto _OVER; } @@ -794,7 +791,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { // add into buffer firstly // to make sure when the hb from vnode arrived, the newly created tasks have been in the task map already. taosThreadMutexLock(&execInfo.lock); - mDebug("stream stream:%s start to register tasks into task_node_list", createReq.name); + mDebug("stream stream:%s start to register tasks into task nodeList", createReq.name); saveStreamTasksInfo(&streamObj, &execInfo); taosThreadMutexUnlock(&execInfo.lock); @@ -886,26 +883,10 @@ int64_t mndStreamGenChkptId(SMnode *pMnode, bool lock) { } } - mDebug("generated checkpoint %" PRId64 "", maxChkptId + 1); + mDebug("generate new checkpointId:%" PRId64, maxChkptId + 1); return maxChkptId + 1; } -static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; - SSdb *pSdb = pMnode->pSdb; - if (sdbGetSize(pSdb, SDB_STREAM) <= 0) { - return 0; - } - - SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg)); - pMsg->checkpointId = mndStreamGenChkptId(pMnode, true); - - int32_t size = sizeof(SMStreamDoCheckpointMsg); - SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, .pCont = pMsg, .contLen = size}; - tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); - return 0; -} - static int32_t mndBuildStreamCheckpointSourceReq(void **pBuf, int32_t *pLen, int32_t nodeId, int64_t checkpointId, int64_t streamId, int32_t taskId, int32_t transId, int8_t mndTrigger) { SStreamCheckpointSourceReq req = {0}; @@ -987,7 +968,6 @@ static int32_t mndProcessStreamCheckpointTrans(SMnode *pMnode, SStreamObj *pStre bool conflict = mndStreamTransConflictCheck(pMnode, pStream->uid, MND_STREAM_CHECKPOINT_NAME, lock); if (conflict) { - mndAddtoCheckpointWaitingList(pStream, checkpointId); mWarn("checkpoint conflict with other trans in %s, ignore the checkpoint for stream:%s %" PRIx64, pStream->sourceDb, pStream->name, pStream->uid); return -1; @@ -1052,7 +1032,7 @@ _ERR: } int32_t initStreamNodeList(SMnode *pMnode) { - if (execInfo.pNodeList == NULL || (taosArrayGetSize(execInfo.pNodeList) == 0)) { + if (taosArrayGetSize(execInfo.pNodeList) == 0) { execInfo.pNodeList = taosArrayDestroy(execInfo.pNodeList); execInfo.pNodeList = extractNodeListFromStream(pMnode); } @@ -1084,7 +1064,7 @@ static bool taskNodeIsUpdated(SMnode *pMnode) { bool allReady = true; SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allReady); if (!allReady) { - mWarn("not all vnodes ready"); + mWarn("not all vnodes ready, quit from vnodes status check"); taosArrayDestroy(pNodeSnapshot); taosThreadMutexUnlock(&execInfo.lock); return 0; @@ -1145,73 +1125,101 @@ static int32_t mndCheckNodeStatus(SMnode *pMnode) { return ready ? 0 : -1; } -static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { +typedef struct { + int64_t streamId; + int64_t duration; +} SCheckpointInterval; + +static int32_t streamWaitComparFn(const void* p1, const void* p2) { + const SCheckpointInterval* pInt1 = p1; + const SCheckpointInterval* pInt2 = p2; + if (pInt1->duration == pInt2->duration) { + return 0; + } + + return pInt1->duration > pInt2->duration? -1:1; +} + +static int32_t mndProcessStreamCheckpoint(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; SSdb *pSdb = pMnode->pSdb; void *pIter = NULL; SStreamObj *pStream = NULL; int32_t code = 0; + int32_t numOfCheckpointTrans = 0; if ((code = mndCheckNodeStatus(pMnode)) != 0) { return code; } - // make sure the time interval between two consecutive checkpoint trans is long enough - SMStreamDoCheckpointMsg *pMsg = (SMStreamDoCheckpointMsg *)pReq->pCont; + SArray* pList = taosArrayInit(4, sizeof(SCheckpointInterval)); + int64_t now = taosGetTimestampMs(); while ((pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream)) != NULL) { - code = mndProcessStreamCheckpointTrans(pMnode, pStream, pMsg->checkpointId, 1, true); - sdbRelease(pSdb, pStream); - if (code == -1) { - break; - } - } - - return code; -} - -static int32_t mndProcessStreamCheckpointInCandid(SRpcMsg *pReq) { - SMnode *pMnode = pReq->info.node; - void *pIter = NULL; - int32_t code = 0; - - taosThreadMutexLock(&execInfo.lock); - int32_t num = taosHashGetSize(execInfo.transMgmt.pWaitingList); - taosThreadMutexUnlock(&execInfo.lock); - if (num == 0) { - return code; - } - - if ((code = mndCheckNodeStatus(pMnode)) != 0) { - return code; - } - - SArray *pList = taosArrayInit(4, sizeof(int64_t)); - while ((pIter = taosHashIterate(execInfo.transMgmt.pWaitingList, pIter)) != NULL) { - SCheckpointCandEntry *pEntry = pIter; - - SStreamObj *ps = mndAcquireStream(pMnode, pEntry->pName); - if (ps == NULL) { + int64_t duration = now - pStream->checkpointFreq; + if (duration < tsStreamCheckpointInterval * 1000) { + sdbRelease(pSdb, pStream); continue; } - mDebug("start to launch checkpoint for stream:%s %" PRIx64 " in candidate list", pEntry->pName, pEntry->streamId); + SCheckpointInterval in = {.streamId = pStream->uid, .duration = duration}; + taosArrayPush(pList, &in); - code = mndProcessStreamCheckpointTrans(pMnode, ps, pEntry->checkpointId, 1, true); - mndReleaseStream(pMnode, ps); + int32_t currentSize = taosArrayGetSize(pList); + mDebug("stream:%s (uid:0x%" PRIx64 ") checkpoint interval beyond threshold: %ds(%" PRId64 + "s) beyond threshold:%d", + pStream->name, pStream->uid, tsStreamCheckpointInterval, duration / 1000, currentSize); - if (code == TSDB_CODE_SUCCESS) { - taosArrayPush(pList, &pEntry->streamId); + sdbRelease(pSdb, pStream); + } + + int32_t size = taosArrayGetSize(pList); + if (size == 0) { + taosArrayDestroy(pList); + return code; + } + + taosArraySort(pList, streamWaitComparFn); + mndStreamClearFinishedTrans(pMnode, &numOfCheckpointTrans); + int32_t numOfQual = taosArrayGetSize(pList); + + if (numOfCheckpointTrans > tsMaxConcurrentCheckpoint) { + mDebug( + "%d stream(s) checkpoint interval longer than %ds, ongoing checkpoint trans:%d reach maximum allowed:%d, new " + "checkpoint trans are not allowed, wait for 30s", + numOfQual, tsStreamCheckpointInterval, numOfCheckpointTrans, tsMaxConcurrentCheckpoint); + taosArrayDestroy(pList); + return code; + } + + int32_t capacity = tsMaxConcurrentCheckpoint - numOfCheckpointTrans; + mDebug( + "%d stream(s) checkpoint interval longer than %ds, %d ongoing checkpoint trans, %d new checkpoint trans allowed, " + "concurrent trans threshold:%d", + numOfQual, tsStreamCheckpointInterval, numOfCheckpointTrans, capacity, tsMaxConcurrentCheckpoint); + + int32_t started = 0; + int64_t checkpointId = mndStreamGenChkptId(pMnode, true); + + for (int32_t i = 0; i < numOfQual; ++i) { + SCheckpointInterval *pCheckpointInfo = taosArrayGet(pList, i); + + SStreamObj *p = mndGetStreamObj(pMnode, pCheckpointInfo->streamId); + if (p != NULL) { + code = mndProcessStreamCheckpointTrans(pMnode, p, checkpointId, 1, true); + sdbRelease(pSdb, p); + + if (code != -1) { + started += 1; + + if (started >= capacity) { + mDebug("already start %d new checkpoint trans, current active checkpoint trans:%d", started, + (started + numOfCheckpointTrans)); + break; + } + } } } - for (int32_t i = 0; i < taosArrayGetSize(pList); ++i) { - int64_t *pId = taosArrayGet(pList, i); - - taosHashRemove(execInfo.transMgmt.pWaitingList, pId, sizeof(*pId)); - } - - int32_t remain = taosHashGetSize(execInfo.transMgmt.pWaitingList); - mDebug("%d in candidate list generated checkpoint, remaining:%d", (int32_t)taosArrayGetSize(pList), remain); taosArrayDestroy(pList); return code; } @@ -1629,15 +1637,76 @@ static int32_t setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SS colDataSetVal(pColInfo, numOfRows, (const char *)&pe->stage, false); // input queue - char vbuf[30] = {0}; - char buf[25] = {0}; - const char *queueInfoStr = "%4.2fMiB (%5.2f%)"; - sprintf(buf, queueInfoStr, pe->inputQUsed, pe->inputRate); + char vbuf[40] = {0}; + char buf[38] = {0}; + const char *queueInfoStr = "%4.2f MiB (%6.2f%)"; + snprintf(buf, tListLen(buf), queueInfoStr, pe->inputQUsed, pe->inputRate); STR_TO_VARSTR(vbuf, buf); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); + // input total + const char* formatTotalMb = "%7.2f MiB"; + const char* formatTotalGb = "%7.2f GiB"; + if (pe->procsTotal < 1024) { + snprintf(buf, tListLen(buf), formatTotalMb, pe->procsTotal); + } else { + snprintf(buf, tListLen(buf), formatTotalGb, pe->procsTotal / 1024); + } + + memset(vbuf, 0, tListLen(vbuf)); + STR_TO_VARSTR(vbuf, buf); + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); + + // process throughput + const char* formatKb = "%7.2f KiB/s"; + const char* formatMb = "%7.2f MiB/s"; + if (pe->procsThroughput < 1024) { + snprintf(buf, tListLen(buf), formatKb, pe->procsThroughput); + } else { + snprintf(buf, tListLen(buf), formatMb, pe->procsThroughput / 1024); + } + + memset(vbuf, 0, tListLen(vbuf)); + STR_TO_VARSTR(vbuf, buf); + + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); + + // output total + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + + if (pTask->info.taskLevel == TASK_LEVEL__SINK) { + colDataSetNULL(pColInfo, numOfRows); + } else { + sprintf(buf, formatTotalMb, pe->outputTotal); + memset(vbuf, 0, tListLen(vbuf)); + STR_TO_VARSTR(vbuf, buf); + + colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); + } + + // output throughput + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + + if (pTask->info.taskLevel == TASK_LEVEL__SINK) { + colDataSetNULL(pColInfo, numOfRows); + } else { + if (pe->outputThroughput < 1024) { + snprintf(buf, tListLen(buf), formatKb, pe->outputThroughput); + } else { + snprintf(buf, tListLen(buf), formatMb, pe->outputThroughput / 1024); + } + + memset(vbuf, 0, tListLen(vbuf)); + STR_TO_VARSTR(vbuf, buf); + + colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false); + } + // output queue // sprintf(buf, queueInfoStr, pe->outputQUsed, pe->outputRate); // STR_TO_VARSTR(vbuf, buf); @@ -1647,12 +1716,14 @@ static int32_t setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SS // info if (pTask->info.taskLevel == TASK_LEVEL__SINK) { - const char *sinkStr = "%.2fMiB"; + const char *sinkStr = "%.2f MiB"; snprintf(buf, tListLen(buf), sinkStr, pe->sinkDataSize); } else if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { // offset info const char *offsetStr = "%" PRId64 " [%" PRId64 ", %" PRId64 "]"; snprintf(buf, tListLen(buf), offsetStr, pe->processedVer, pe->verRange.minVer, pe->verRange.maxVer); + } else { + memset(buf, 0, tListLen(buf)); } STR_TO_VARSTR(vbuf, buf); @@ -1688,6 +1759,10 @@ static int32_t setTaskAttrInResBlock(SStreamObj *pStream, SStreamTask *pTask, SS pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, (const char*)&pe->checkpointInfo.latestVer, false); + // checkpoint size + pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); + colDataSetNULL(pColInfo, numOfRows); + // checkpoint backup status pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); colDataSetVal(pColInfo, numOfRows, 0, true); @@ -2130,8 +2205,8 @@ static SArray *extractNodeListFromStream(SMnode *pMnode) { epsetToStr(&pEntry->epset, buf, tListLen(buf)); mDebug("extract nodeInfo from stream obj, nodeId:%d, %s", pEntry->nodeId, buf); } - taosHashCleanup(pHash); + taosHashCleanup(pHash); return plist; } @@ -2169,15 +2244,17 @@ static bool taskNodeExists(SArray *pList, int32_t nodeId) { return false; } -int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { +int32_t removeExpiredNodeEntryAndTask(SArray *pNodeSnapshot) { SArray *pRemovedTasks = taosArrayInit(4, sizeof(STaskId)); int32_t numOfTask = taosArrayGetSize(execInfo.pTaskList); for (int32_t i = 0; i < numOfTask; ++i) { - STaskId *pId = taosArrayGet(execInfo.pTaskList, i); - STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId)); + STaskId *pId = taosArrayGet(execInfo.pTaskList, i); - if (pEntry->nodeId == SNODE_HANDLE) continue; + STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId)); + if (pEntry->nodeId == SNODE_HANDLE) { + continue; + } bool existed = taskNodeExists(pNodeSnapshot, pEntry->nodeId); if (!existed) { @@ -2193,24 +2270,8 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { mDebug("remove invalid stream tasks:%d, remain:%d", (int32_t)taosArrayGetSize(pRemovedTasks), (int32_t)taosArrayGetSize(execInfo.pTaskList)); - int32_t size = taosArrayGetSize(pNodeSnapshot); - SArray *pValidNodeEntryList = taosArrayInit(4, sizeof(SNodeEntry)); - for (int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeList); ++i) { - SNodeEntry *p = taosArrayGet(execInfo.pNodeList, i); + removeExpiredNodeInfo(pNodeSnapshot); - for (int32_t j = 0; j < size; ++j) { - SNodeEntry *pEntry = taosArrayGet(pNodeSnapshot, j); - if (pEntry->nodeId == p->nodeId) { - taosArrayPush(pValidNodeEntryList, p); - break; - } - } - } - - taosArrayDestroy(execInfo.pNodeList); - execInfo.pNodeList = pValidNodeEntryList; - - mDebug("remain %d valid node entries", (int32_t)taosArrayGetSize(pValidNodeEntryList)); taosArrayDestroy(pRemovedTasks); return 0; } @@ -2241,9 +2302,9 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { return 0; } - bool allVgroupsReady = true; - SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allVgroupsReady); - if (!allVgroupsReady) { + bool allReady = true; + SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allReady); + if (!allReady) { taosArrayDestroy(pNodeSnapshot); atomic_store_32(&mndNodeCheckSentinel, 0); mWarn("not all vnodes are ready, ignore the exec nodeUpdate check"); @@ -2251,31 +2312,31 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { } taosThreadMutexLock(&execInfo.lock); - removeExpirednodeEntryAndTask(pNodeSnapshot); + removeExpiredNodeEntryAndTask(pNodeSnapshot); SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeList, pNodeSnapshot); if (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0) { // kill current active checkpoint transaction, since the transaction is vnode wide. killAllCheckpointTrans(pMnode, &changeInfo); - code = mndProcessVgroupChange(pMnode, &changeInfo); // keep the new vnode snapshot if success if (code == TSDB_CODE_SUCCESS || code == TSDB_CODE_ACTION_IN_PROGRESS) { - mDebug("create trans successfully, update cached node list"); taosArrayDestroy(execInfo.pNodeList); - execInfo.pNodeList = pNodeSnapshot; + execInfo.pNodeList = extractNodeListFromStream(pMnode); execInfo.ts = ts; + mDebug("create trans successfully, update cached node list, numOfNodes:%d", + (int)taosArrayGetSize(execInfo.pNodeList)); } else { mError("unexpected code during create nodeUpdate trans, code:%s", tstrerror(code)); - taosArrayDestroy(pNodeSnapshot); } } else { mDebug("no update found in nodeList"); - taosArrayDestroy(pNodeSnapshot); } + taosArrayDestroy(pNodeSnapshot); taosThreadMutexUnlock(&execInfo.lock); + taosArrayDestroy(changeInfo.pUpdateNodeList); taosHashCleanup(changeInfo.pDBMap); @@ -2291,10 +2352,10 @@ static int32_t mndProcessNodeCheck(SRpcMsg *pReq) { return 0; } - SMStreamNodeCheckMsg *pMsg = rpcMallocCont(sizeof(SMStreamNodeCheckMsg)); + int32_t size = sizeof(SMStreamNodeCheckMsg); + SMStreamNodeCheckMsg *pMsg = rpcMallocCont(size); - SRpcMsg rpcMsg = { - .msgType = TDMT_MND_STREAM_NODECHANGE_CHECK, .pCont = pMsg, .contLen = sizeof(SMStreamNodeCheckMsg)}; + SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_NODECHANGE_CHECK, .pCont = pMsg, .contLen = size}; tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); return 0; } @@ -2312,8 +2373,27 @@ void saveStreamTasksInfo(SStreamObj *pStream, SStreamExecInfo *pExecNode) { taosHashPut(pExecNode->pTaskMap, &id, sizeof(id), &entry, sizeof(entry)); taosArrayPush(pExecNode->pTaskList, &id); - mInfo("s-task:0x%x add into task buffer, total:%d", (int32_t)entry.id.taskId, - (int32_t)taosArrayGetSize(pExecNode->pTaskList)); + + int32_t num = (int32_t)taosArrayGetSize(pExecNode->pTaskList); + mInfo("s-task:0x%x add into task buffer, total:%d", (int32_t)entry.id.taskId, num); + + // add the new vgroups if not added yet + bool exist = false; + for(int32_t j = 0; j < taosArrayGetSize(pExecNode->pNodeList); ++j) { + SNodeEntry* pEntry = taosArrayGet(pExecNode->pNodeList, j); + if (pEntry->nodeId == pTask->info.nodeId) { + exist = true; + break; + } + } + + if (!exist) { + SNodeEntry nodeEntry = {.hbTimestamp = -1, .nodeId = pTask->info.nodeId}; + epsetAssign(&nodeEntry.epset, &pTask->info.epSet); + + taosArrayPush(pExecNode->pNodeList, &nodeEntry); + mInfo("vgId:%d added into nodeList, total:%d", nodeEntry.nodeId, (int)taosArrayGetSize(pExecNode->pNodeList)); + } } } @@ -2321,6 +2401,8 @@ void saveStreamTasksInfo(SStreamObj *pStream, SStreamExecInfo *pExecNode) { } void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { + taosThreadMutexLock(&pExecNode->lock); + SStreamTaskIter *pIter = createStreamTaskIter(pStream); while (streamTaskIterNextTask(pIter)) { SStreamTask *pTask = streamTaskIterGetCurrent(pIter); @@ -2343,8 +2425,10 @@ void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { } } - destroyStreamTaskIter(pIter); ASSERT(taosHashGetSize(pExecNode->pTaskMap) == taosArrayGetSize(pExecNode->pTaskList)); + taosThreadMutexUnlock(&pExecNode->lock); + + destroyStreamTaskIter(pIter); } static void doAddTaskId(SArray *pList, int32_t taskId, int64_t uid, int32_t numOfTotal) { diff --git a/source/dnode/mnode/impl/src/mndStreamHb.c b/source/dnode/mnode/impl/src/mndStreamHb.c index 9bd7b3b18f..778fd295f7 100644 --- a/source/dnode/mnode/impl/src/mndStreamHb.c +++ b/source/dnode/mnode/impl/src/mndStreamHb.c @@ -131,18 +131,26 @@ static int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) { int32_t *pVgId = taosArrayGet(pNodeList, k); mInfo("set node expired for nodeId:%d, total:%d", *pVgId, num); + bool setFlag = false; int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList); + for (int i = 0; i < numOfNodes; ++i) { SNodeEntry *pNodeEntry = taosArrayGet(execInfo.pNodeList, i); if (pNodeEntry->nodeId == *pVgId) { mInfo("vgId:%d expired for some stream tasks, needs update nodeEp", *pVgId); pNodeEntry->stageUpdated = true; + setFlag = true; break; } } - } + if (!setFlag) { + mError("failed to set nodeUpdate flag, nodeId:%d not exists in nodelist, update it", *pVgId); + ASSERT(0); + return TSDB_CODE_FAILED; + } + } return TSDB_CODE_SUCCESS; } @@ -361,7 +369,6 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { pHead->vgId = htonl(req.vgId); tmsgSendRsp(&rsp); - pReq->info.handle = NULL; // disable auto rsp } diff --git a/source/dnode/mnode/impl/src/mndStreamTrans.c b/source/dnode/mnode/impl/src/mndStreamTrans.c index 74ad09c752..ff31aa0f7d 100644 --- a/source/dnode/mnode/impl/src/mndStreamTrans.c +++ b/source/dnode/mnode/impl/src/mndStreamTrans.c @@ -21,8 +21,6 @@ typedef struct SKeyInfo { int32_t keyLen; } SKeyInfo; -static int32_t clearFinishedTrans(SMnode* pMnode); - int32_t mndStreamRegisterTrans(STrans* pTrans, const char* pTransName, int64_t streamId) { SStreamTransInfo info = { .transId = pTrans->id, .startTime = taosGetTimestampMs(), .name = pTransName, .streamId = streamId}; @@ -30,41 +28,54 @@ int32_t mndStreamRegisterTrans(STrans* pTrans, const char* pTransName, int64_t s return 0; } -int32_t clearFinishedTrans(SMnode* pMnode) { +int32_t mndStreamClearFinishedTrans(SMnode* pMnode, int32_t* pNumOfActiveChkpt) { size_t keyLen = 0; void* pIter = NULL; SArray* pList = taosArrayInit(4, sizeof(SKeyInfo)); + int32_t num = 0; while ((pIter = taosHashIterate(execInfo.transMgmt.pDBTrans, pIter)) != NULL) { - SStreamTransInfo* pEntry = (SStreamTransInfo*)pIter; + SStreamTransInfo *pEntry = (SStreamTransInfo *)pIter; // let's clear the finished trans - STrans* pTrans = mndAcquireTrans(pMnode, pEntry->transId); + STrans *pTrans = mndAcquireTrans(pMnode, pEntry->transId); if (pTrans == NULL) { - void* pKey = taosHashGetKey(pEntry, &keyLen); + void *pKey = taosHashGetKey(pEntry, &keyLen); // key is the name of src/dst db name SKeyInfo info = {.pKey = pKey, .keyLen = keyLen}; - mDebug("transId:%d %s startTs:%" PRId64 " cleared since finished", pEntry->transId, pEntry->name, - pEntry->startTime); + mDebug("transId:%d %s startTs:%" PRId64 " cleared since finished", pEntry->transId, pEntry->name, pEntry->startTime); taosArrayPush(pList, &info); } else { + if (strcmp(pEntry->name, MND_STREAM_CHECKPOINT_NAME) == 0) { + num++; + } mndReleaseTrans(pMnode, pTrans); } } - size_t num = taosArrayGetSize(pList); - for (int32_t i = 0; i < num; ++i) { + int32_t size = taosArrayGetSize(pList); + for (int32_t i = 0; i < size; ++i) { SKeyInfo* pKey = taosArrayGet(pList, i); taosHashRemove(execInfo.transMgmt.pDBTrans, pKey->pKey, pKey->keyLen); } - mDebug("clear %d finished stream-trans, remained:%d", (int32_t)num, taosHashGetSize(execInfo.transMgmt.pDBTrans)); + mDebug("clear %d finished stream-trans, remained:%d, active checkpoint trans:%d", size, + taosHashGetSize(execInfo.transMgmt.pDBTrans), num); terrno = TSDB_CODE_SUCCESS; taosArrayDestroy(pList); + + if (pNumOfActiveChkpt != NULL) { + *pNumOfActiveChkpt = num; + } + return 0; } +// * Transactions of different streams are not related. Here only check the conflict of transaction for a given stream. +// For a given stream: +// 1. checkpoint trans is conflict with any other trans except for the drop and reset trans. +// 2. create/drop/reset/update trans are conflict with any other trans. bool mndStreamTransConflictCheck(SMnode* pMnode, int64_t streamId, const char* pTransName, bool lock) { if (lock) { taosThreadMutexLock(&execInfo.lock); @@ -78,7 +89,7 @@ bool mndStreamTransConflictCheck(SMnode* pMnode, int64_t streamId, const char* p return false; } - clearFinishedTrans(pMnode); + mndStreamClearFinishedTrans(pMnode, NULL); SStreamTransInfo *pEntry = taosHashGet(execInfo.transMgmt.pDBTrans, &streamId, sizeof(streamId)); if (pEntry != NULL) { @@ -95,7 +106,7 @@ bool mndStreamTransConflictCheck(SMnode* pMnode, int64_t streamId, const char* p terrno = TSDB_CODE_MND_TRANS_CONFLICT; return true; } else { - mDebug("not conflict with checkpoint trans, name:%s, continue create trans", pTransName); + mDebug("not conflict with checkpoint trans, name:%s, continue creating trans", pTransName); } } else if ((strcmp(tInfo.name, MND_STREAM_CREATE_NAME) == 0) || (strcmp(tInfo.name, MND_STREAM_DROP_NAME) == 0) || (strcmp(tInfo.name, MND_STREAM_TASK_RESET_NAME) == 0) || @@ -106,7 +117,7 @@ bool mndStreamTransConflictCheck(SMnode* pMnode, int64_t streamId, const char* p return true; } } else { - mDebug("stream:0x%"PRIx64" no conflict trans existed, continue create trans", streamId); + mDebug("stream:0x%" PRIx64 " no conflict trans existed, continue create trans", streamId); } if (lock) { @@ -124,7 +135,7 @@ int32_t mndStreamGetRelTrans(SMnode* pMnode, int64_t streamUid) { return 0; } - clearFinishedTrans(pMnode); + mndStreamClearFinishedTrans(pMnode, NULL); SStreamTransInfo* pEntry = taosHashGet(execInfo.transMgmt.pDBTrans, &streamUid, sizeof(streamUid)); if (pEntry != NULL) { SStreamTransInfo tInfo = *pEntry; @@ -140,28 +151,6 @@ int32_t mndStreamGetRelTrans(SMnode* pMnode, int64_t streamUid) { return 0; } -int32_t mndAddtoCheckpointWaitingList(SStreamObj* pStream, int64_t checkpointId) { - SCheckpointCandEntry* pEntry = taosHashGet(execInfo.transMgmt.pWaitingList, &pStream->uid, sizeof(pStream->uid)); - if (pEntry == NULL) { - SCheckpointCandEntry entry = {.streamId = pStream->uid, - .checkpointTs = taosGetTimestampMs(), - .checkpointId = checkpointId, - .pName = taosStrdup(pStream->name)}; - - taosHashPut(execInfo.transMgmt.pWaitingList, &pStream->uid, sizeof(pStream->uid), &entry, sizeof(entry)); - int32_t size = taosHashGetSize(execInfo.transMgmt.pWaitingList); - - mDebug("stream:%" PRIx64 " add into waiting list due to conflict, ts:%" PRId64 " , checkpointId: %" PRId64 - ", total in waitingList:%d", - pStream->uid, entry.checkpointTs, checkpointId, size); - } else { - mDebug("stream:%" PRIx64 " ts:%" PRId64 ", checkpointId:%" PRId64 " already in waiting list, no need to add into", - pStream->uid, pEntry->checkpointTs, checkpointId); - } - - return TSDB_CODE_SUCCESS; -} - STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, ETrnConflct conflict, const char *name, const char *pMsg) { STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, conflict, pReq, name); if (pTrans == NULL) { diff --git a/source/dnode/mnode/impl/src/mndStreamUtil.c b/source/dnode/mnode/impl/src/mndStreamUtil.c index d5bc12f9df..54279161ab 100644 --- a/source/dnode/mnode/impl/src/mndStreamUtil.c +++ b/source/dnode/mnode/impl/src/mndStreamUtil.c @@ -135,6 +135,7 @@ SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) { char buf[256] = {0}; epsetToStr(&entry.epset, buf, tListLen(buf)); mDebug("take snode snapshot, nodeId:%d %s", entry.nodeId, buf); + taosArrayPush(pVgroupListSnapshot, &entry); sdbRelease(pSdb, pObj); } @@ -558,11 +559,6 @@ int32_t mndStreamSetResetTaskAction(SMnode *pMnode, STrans *pTrans, SStreamObj * return 0; } -static void freeCheckpointCandEntry(void *param) { - SCheckpointCandEntry *pEntry = param; - taosMemoryFreeClear(pEntry->pName); -} - static void freeTaskList(void* param) { SArray** pList = (SArray **)param; taosArrayDestroy(*pList); @@ -575,9 +571,30 @@ void mndInitExecInfo() { execInfo.pTaskList = taosArrayInit(4, sizeof(STaskId)); execInfo.pTaskMap = taosHashInit(64, fn, true, HASH_NO_LOCK); execInfo.transMgmt.pDBTrans = taosHashInit(32, fn, true, HASH_NO_LOCK); - execInfo.transMgmt.pWaitingList = taosHashInit(32, fn, true, HASH_NO_LOCK); execInfo.pTransferStateStreams = taosHashInit(32, fn, true, HASH_NO_LOCK); + execInfo.pNodeList = taosArrayInit(4, sizeof(SNodeEntry)); - taosHashSetFreeFp(execInfo.transMgmt.pWaitingList, freeCheckpointCandEntry); taosHashSetFreeFp(execInfo.pTransferStateStreams, freeTaskList); } + +void removeExpiredNodeInfo(const SArray *pNodeSnapshot) { + SArray *pValidList = taosArrayInit(4, sizeof(SNodeEntry)); + int32_t size = taosArrayGetSize(pNodeSnapshot); + + for (int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeList); ++i) { + SNodeEntry *p = taosArrayGet(execInfo.pNodeList, i); + + for (int32_t j = 0; j < size; ++j) { + SNodeEntry *pEntry = taosArrayGet(pNodeSnapshot, j); + if (pEntry->nodeId == p->nodeId) { + taosArrayPush(pValidList, p); + break; + } + } + } + + taosArrayDestroy(execInfo.pNodeList); + execInfo.pNodeList = pValidList; + + mDebug("remain %d valid node entries after clean expired nodes info", (int32_t)taosArrayGetSize(pValidList)); +} \ No newline at end of file diff --git a/source/dnode/snode/inc/sndInt.h b/source/dnode/snode/inc/sndInt.h index 8c5d056893..2ac66fa1cd 100644 --- a/source/dnode/snode/inc/sndInt.h +++ b/source/dnode/snode/inc/sndInt.h @@ -35,23 +35,6 @@ struct SSnode { SMsgCb msgCb; }; -#if 0 -typedef struct { - SHashObj* pHash; // taskId -> SStreamTask -} SStreamMeta; - -SStreamMeta* sndMetaNew(); -void sndMetaDelete(SStreamMeta* pMeta); - -int32_t sndMetaDeployTask(SStreamMeta* pMeta, SStreamTask* pTask); -SStreamTask* sndMetaGetTask(SStreamMeta* pMeta, int32_t taskId); -int32_t sndMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId); - -int32_t sndDropTaskOfStream(SStreamMeta* pMeta, int64_t streamId); -int32_t sndStopTaskOfStream(SStreamMeta* pMeta, int64_t streamId); -int32_t sndResumeTaskOfStream(SStreamMeta* pMeta, int64_t streamId); -#endif - void initStreamStateAPI(SStorageAPI* pAPI); #ifdef __cplusplus diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 87f0681780..c61988574c 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -45,15 +45,10 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer if (code != TSDB_CODE_SUCCESS) { return code; } + pTask->pBackend = NULL; - streamTaskOpenAllUpstreamInput(pTask); - code = tqExpandStreamTask(pTask, pSnode->pMeta, NULL); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - streamTaskResetUpstreamStageInfo(pTask); streamSetupScheduleTrigger(pTask); @@ -67,14 +62,14 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer " child id:%d, level:%d, status:%s fill-history:%d, related stream task:0x%x trigger:%" PRId64 " ms", SNODE_HANDLE, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, pTask->info.selfChildId, pTask->info.taskLevel, p, pTask->info.fillHistory, - (int32_t)pTask->streamTaskId.taskId, pTask->info.triggerParam); + (int32_t)pTask->streamTaskId.taskId, pTask->info.delaySchedParam); } else { sndInfo("vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " nextProcessVer:%" PRId64 " child id:%d, level:%d, status:%s fill-history:%d, related fill-task:0x%x trigger:%" PRId64 " ms", SNODE_HANDLE, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, pTask->info.selfChildId, pTask->info.taskLevel, p, pTask->info.fillHistory, - (int32_t)pTask->hTaskInfo.id.taskId, pTask->info.triggerParam); + (int32_t)pTask->hTaskInfo.id.taskId, pTask->info.delaySchedParam); } return 0; } @@ -86,6 +81,9 @@ SSnode *sndOpen(const char *path, const SSnodeOpt *pOption) { return NULL; } + stopRsync(); + startRsync(); + pSnode->msgCb = pOption->msgCb; pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE, taosGetTimestampMs(), tqStartTaskCompleteCallback); if (pSnode->pMeta == NULL) { @@ -94,10 +92,6 @@ SSnode *sndOpen(const char *path, const SSnodeOpt *pOption) { } streamMetaLoadAllTasks(pSnode->pMeta); - - stopRsync(); - startRsync(); - return pSnode; FAIL: @@ -106,8 +100,7 @@ FAIL: } int32_t sndInit(SSnode *pSnode) { - streamMetaResetTaskStatus(pSnode->pMeta); - streamMetaStartAllTasks(pSnode->pMeta); + streamTaskSchedTask(&pSnode->msgCb, pSnode->pMeta->vgId, 0, 0, STREAM_EXEC_T_START_ALL_TASKS); return 0; } @@ -143,6 +136,10 @@ int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) { return tqStreamProcessReqCheckpointRsp(pSnode->pMeta, pMsg); case TDMT_STREAM_TASK_CHECKPOINT_READY_RSP: return tqStreamProcessCheckpointReadyRsp(pSnode->pMeta, pMsg); + case TDMT_STREAM_RETRIEVE_TRIGGER: + return tqStreamTaskProcessRetrieveTriggerReq(pSnode->pMeta, pMsg); + case TDMT_STREAM_RETRIEVE_TRIGGER_RSP: + return tqStreamTaskProcessRetrieveTriggerRsp(pSnode->pMeta, pMsg); default: sndError("invalid snode msg:%d", pMsg->msgType); ASSERT(0); @@ -168,6 +165,8 @@ int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) { return tqStreamTaskProcessTaskPauseReq(pSnode->pMeta, pMsg->pCont); case TDMT_STREAM_TASK_RESUME: return tqStreamTaskProcessTaskResumeReq(pSnode->pMeta, pMsg->info.conn.applyIndex, pMsg->pCont, false); + case TDMT_STREAM_TASK_UPDATE_CHKPT: + return tqStreamTaskProcessUpdateCheckpointReq(pSnode->pMeta, pMsg->pCont, pMsg->contLen); default: ASSERT(0); } diff --git a/source/dnode/snode/src/snodeInitApi.c b/source/dnode/snode/src/snodeInitApi.c index 3b60ef3427..196fa56c99 100644 --- a/source/dnode/snode/src/snodeInitApi.c +++ b/source/dnode/snode/src/snodeInitApi.c @@ -75,8 +75,10 @@ void initStateStoreAPI(SStateStore* pStore) { pStore->streamStateCountGetKeyByRange = streamStateCountGetKeyByRange; pStore->streamStateSessionAllocWinBuffByNextPosition = streamStateSessionAllocWinBuffByNextPosition; - pStore->streamStateCountWinAddIfNotExist = streamStateCountWinAddIfNotExist; - pStore->streamStateCountWinAdd = streamStateCountWinAdd; +//void initStreamStateAPI(SStorageAPI* pAPI) { +// initStateStoreAPI(&pAPI->stateStore); +// initFunctionStateStore(&pAPI->functionStore); +//} pStore->updateInfoInit = updateInfoInit; pStore->updateInfoFillBlockData = updateInfoFillBlockData; diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 08d32b2b81..651fe2cda4 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -160,7 +160,7 @@ int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const void* pRsp, int32_t tqInitDataRsp(SMqDataRspCommon* pRsp, STqOffsetVal pOffset); void tqUpdateNodeStage(STQ* pTq, bool isLeader); int32_t tqSetDstTableDataPayload(uint64_t suid, const STSchema* pTSchema, int32_t blockIndex, SSDataBlock* pDataBlock, - SSubmitTbData* pTableData, const char* id); + SSubmitTbData* pTableData, int64_t earlyTs, const char* id); int32_t doMergeExistedRows(SSubmitTbData* pExisted, const SSubmitTbData* pNew, const char* id); SVCreateTbReq* buildAutoCreateTableReq(const char* stbFullName, int64_t suid, int32_t numOfCols, diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 06b58213f7..8222af4d60 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -242,6 +242,7 @@ int tsdbInsertData(STsdb* pTsdb, int64_t version, SSubmitReq2* pMsg, SSubmit int32_t tsdbInsertTableData(STsdb* pTsdb, int64_t version, SSubmitTbData* pSubmitTbData, int32_t* affectedRows); int32_t tsdbDeleteTableData(STsdb* pTsdb, int64_t version, tb_uid_t suid, tb_uid_t uid, TSKEY sKey, TSKEY eKey); int32_t tsdbSetKeepCfg(STsdb* pTsdb, STsdbCfg* pCfg); +int64_t tsdbGetEarliestTs(STsdb* pTsdb); // tq STQ* tqOpen(const char* path, SVnode* pVnode); @@ -254,6 +255,8 @@ int tqScanWalAsync(STQ* pTq, bool ckPause); int32_t tqStopStreamTasksAsync(STQ* pTq); int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp); int32_t tqProcessTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessTaskRetrieveTriggerReq(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessTaskRetrieveTriggerRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessStreamHbRsp(STQ* pTq, SRpcMsg* pMsg); @@ -292,6 +295,7 @@ int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg); int32_t tqStreamProgressRetrieveReq(STQ* pTq, SRpcMsg* pMsg); +int32_t tqProcessTaskUpdateCheckpointReq(STQ* pTq, char* msg, int32_t msgLen); // sma int32_t smaInit(); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 7d6eda9cf1..3cc7c6ec66 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -244,7 +244,7 @@ static void tdRSmaTaskInit(SStreamMeta *pMeta, SRSmaInfoItem *pItem, SStreamTask SStreamTask **ppTask = (SStreamTask **)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask && *ppTask) { pItem->submitReqVer = (*ppTask)->chkInfo.checkpointVer; - pItem->fetchResultVer = (*ppTask)->info.triggerParam; + pItem->fetchResultVer = (*ppTask)->info.delaySchedParam; } streamMetaRUnLock(pMeta); } @@ -298,7 +298,7 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat pStreamTask->chkInfo.checkpointId = streamMetaGetLatestCheckpointId(pStreamTask->pMeta); tdRSmaTaskInit(pStreamTask->pMeta, pItem, &pStreamTask->id); pStreamTask->status.pSM = streamCreateStateMachine(pStreamTask); - + pStreamTask->chkInfo.pActiveInfo = streamTaskCreateActiveChkptInfo(); pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); if (!pStreamState) { terrno = TSDB_CODE_RSMA_STREAM_STATE_OPEN; @@ -1285,10 +1285,11 @@ _checkpoint: if (pItem && pItem->pStreamTask) { SStreamTask *pTask = pItem->pStreamTask; // atomic_store_32(&pTask->pMeta->chkptNotReadyTasks, 1); - pTask->chkInfo.checkpointingId = checkpointId; + streamTaskSetActiveCheckpointInfo(pTask, checkpointId); + pTask->chkInfo.checkpointId = checkpointId; // 1pTask->checkpointingId; pTask->chkInfo.checkpointVer = pItem->submitReqVer; - pTask->info.triggerParam = pItem->fetchResultVer; + pTask->info.delaySchedParam = pItem->fetchResultVer; pTask->info.taskLevel = TASK_LEVEL_SMA; if (!checkpointBuilt) { diff --git a/source/dnode/vnode/src/sma/smaTimeRange.c b/source/dnode/vnode/src/sma/smaTimeRange.c index fb898c02f8..d0913081ac 100644 --- a/source/dnode/vnode/src/sma/smaTimeRange.c +++ b/source/dnode/vnode/src/sma/smaTimeRange.c @@ -203,7 +203,7 @@ int32_t smaBlockToSubmit(SVnode *pVnode, const SArray *pBlocks, const STSchema * int32_t *index = taosHashGet(pTableIndexMap, &groupId, sizeof(groupId)); if (index == NULL) { // no data yet, append it - code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, ""); + code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, INT64_MIN, ""); if (code != TSDB_CODE_SUCCESS) { continue; } @@ -213,7 +213,7 @@ int32_t smaBlockToSubmit(SVnode *pVnode, const SArray *pBlocks, const STSchema * int32_t size = (int32_t)taosArrayGetSize(pReq->aSubmitTbData) - 1; taosHashPut(pTableIndexMap, &groupId, sizeof(groupId), &size, sizeof(size)); } else { - code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, ""); + code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, INT64_MIN, ""); if (code != TSDB_CODE_SUCCESS) { continue; } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 79f53e6dec..712cfbaa55 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -717,17 +717,14 @@ static void freePtr(void* ptr) { taosMemoryFree(*(void**)ptr); } int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) { int32_t vgId = TD_VID(pTq->pVnode); - tqDebug("s-task:0x%x start to expand task", pTask->id.taskId); + tqDebug("s-task:0x%x start to build task", pTask->id.taskId); int32_t code = streamTaskInit(pTask, pTq->pStreamMeta, &pTq->pVnode->msgCb, nextProcessVer); if (code != TSDB_CODE_SUCCESS) { return code; } - code = tqExpandStreamTask(pTask, pTq->pStreamMeta, pTq->pVnode); - if (code != TSDB_CODE_SUCCESS) { - return code; - } + pTask->pBackend = NULL; // sink STaskOutputInfo* pOutputInfo = &pTask->outputInfo; @@ -770,22 +767,22 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) { const char* pNext = streamTaskGetStatusStr(pTask->status.taskStatus); if (pTask->info.fillHistory) { - tqInfo("vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 + tqInfo("vgId:%d build stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " nextProcessVer:%" PRId64 " child id:%d, level:%d, cur-status:%s, next-status:%s fill-history:%d, related stream task:0x%x " - "trigger:%" PRId64 " ms, inputVer:%" PRId64, + "delaySched:%" PRId64 " ms, inputVer:%" PRId64, vgId, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, pTask->info.selfChildId, pTask->info.taskLevel, p, pNext, pTask->info.fillHistory, - (int32_t)pTask->streamTaskId.taskId, pTask->info.triggerParam, nextProcessVer); + (int32_t)pTask->streamTaskId.taskId, pTask->info.delaySchedParam, nextProcessVer); } else { tqInfo( - "vgId:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 + "vgId:%d build stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " nextProcessVer:%" PRId64 - " child id:%d, level:%d, cur-status:%s next-status:%s fill-history:%d, related fill-task:0x%x trigger:%" PRId64 + " child id:%d, level:%d, cur-status:%s next-status:%s fill-history:%d, related fill-task:0x%x delaySched:%" PRId64 " ms, inputVer:%" PRId64, vgId, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, pTask->info.selfChildId, pTask->info.taskLevel, p, pNext, pTask->info.fillHistory, - (int32_t)pTask->hTaskInfo.id.taskId, pTask->info.triggerParam, nextProcessVer); + (int32_t)pTask->hTaskInfo.id.taskId, pTask->info.delaySchedParam, nextProcessVer); ASSERT(pChkInfo->checkpointVer <= pChkInfo->nextProcessVer); } @@ -800,6 +797,11 @@ int32_t tqProcessTaskCheckRsp(STQ* pTq, SRpcMsg* pMsg) { } int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { +// if (!pTq->pVnode->restored) { +// tqDebug("vgId:%d not restored, ignore the stream task deploy msg", TD_VID(pTq->pVnode)); +// return TSDB_CODE_SUCCESS; +// } + return tqStreamTaskProcessDeployReq(pTq->pStreamMeta, &pTq->pVnode->msgCb, sversion, msg, msgLen, vnodeIsRoleLeader(pTq->pVnode), pTq->pVnode->restored); } @@ -828,7 +830,7 @@ static void doStartFillhistoryStep2(SStreamTask* pTask, SStreamTask* pStreamTask streamSetParamForStreamScannerStep2(pTask, pStep2Range, pWindow); - int64_t dstVer =pStep2Range->minVer; + int64_t dstVer = pStep2Range->minVer; pTask->chkInfo.nextProcessVer = dstVer; walReaderSetSkipToVersion(pTask->exec.pWalReader, dstVer); @@ -1009,6 +1011,20 @@ int32_t tqProcessTaskDropReq(STQ* pTq, char* msg, int32_t msgLen) { return tqStreamTaskProcessDropReq(pTq->pStreamMeta, msg, msgLen); } +int32_t tqProcessTaskUpdateCheckpointReq(STQ* pTq, char* msg, int32_t msgLen) { + int32_t vgId = TD_VID(pTq->pVnode); + SVUpdateCheckpointInfoReq* pReq = (SVUpdateCheckpointInfoReq*)msg; + +// if (!pTq->pVnode->restored) { +// tqDebug("vgId:%d update-checkpoint-info msg received during restoring, checkpointId:%" PRId64 +// ", transId:%d s-task:0x%x ignore it", +// vgId, pReq->checkpointId, pReq->transId, pReq->taskId); +// return TSDB_CODE_SUCCESS; +// } + + return tqStreamTaskProcessUpdateCheckpointReq(pTq->pStreamMeta, msg, msgLen); +} + int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { return tqStreamTaskProcessTaskPauseReq(pTq->pStreamMeta, msg); } @@ -1120,10 +1136,7 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) } if (pTask->status.downstreamReady != 1) { - pTask->chkInfo.failedId = req.checkpointId; // record the latest failed checkpoint id - pTask->chkInfo.checkpointingId = req.checkpointId; - pTask->chkInfo.transId = req.transId; - + streamTaskSetFailedChkptInfo(pTask, req.transId, req.checkpointId); // record the latest failed checkpoint id tqError("s-task:%s not ready for checkpoint, since downstream not ready, ignore this checkpointId:%" PRId64 ", transId:%d set it failed", pTask->id.idStr, req.checkpointId, req.transId); @@ -1162,9 +1175,12 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) // check if the checkpoint msg already sent or not. if (status == TASK_STATUS__CK) { + int64_t checkpointId = 0; + streamTaskGetActiveCheckpointInfo(pTask, NULL, &checkpointId); + tqWarn("s-task:%s repeatly recv checkpoint-source msg checkpointId:%" PRId64 " transId:%d already handled, ignore msg and continue process checkpoint", - pTask->id.idStr, pTask->chkInfo.checkpointingId, req.transId); + pTask->id.idStr, checkpointId, req.transId); taosThreadMutexUnlock(&pTask->lock); streamMetaReleaseTask(pMeta, pTask); @@ -1213,6 +1229,15 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) // downstream task has complete the stream task checkpoint procedure, let's start the handle the rsp by execute task int32_t tqProcessTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg) { + int32_t vgId = TD_VID(pTq->pVnode); + + SRetrieveChkptTriggerReq* pReq = (SRetrieveChkptTriggerReq*) pMsg->pCont; + if (!vnodeIsRoleLeader(pTq->pVnode)) { + tqError("vgId:%d not leader, ignore the retrieve checkpoint-trigger msg from 0x%x", vgId, + (int32_t)pReq->downstreamTaskId); + return TSDB_CODE_STREAM_NOT_LEADER; + } + return tqStreamTaskProcessCheckpointReadyMsg(pTq->pStreamMeta, pMsg); } @@ -1224,6 +1249,23 @@ int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg) { return tqStreamTaskProcessTaskResetReq(pTq->pStreamMeta, pMsg); } +int32_t tqProcessTaskRetrieveTriggerReq(STQ* pTq, SRpcMsg* pMsg) { + int32_t vgId = TD_VID(pTq->pVnode); + + SRetrieveChkptTriggerReq* pReq = (SRetrieveChkptTriggerReq*) pMsg->pCont; + if (!vnodeIsRoleLeader(pTq->pVnode)) { + tqError("vgId:%d not leader, ignore the retrieve checkpoint-trigger msg from 0x%x", vgId, + (int32_t)pReq->downstreamTaskId); + return TSDB_CODE_STREAM_NOT_LEADER; + } + + return tqStreamTaskProcessRetrieveTriggerReq(pTq->pStreamMeta, pMsg); +} + +int32_t tqProcessTaskRetrieveTriggerRsp(STQ* pTq, SRpcMsg* pMsg) { + return tqStreamTaskProcessRetrieveTriggerRsp(pTq->pStreamMeta, pMsg); +} + // this function is needed, do not try to remove it. int32_t tqProcessStreamHbRsp(STQ* pTq, SRpcMsg* pMsg) { return tqStreamProcessStreamHbRsp(pTq->pStreamMeta, pMsg); diff --git a/source/dnode/vnode/src/tq/tqMeta.c b/source/dnode/vnode/src/tq/tqMeta.c index 76322c527f..404cbf26dd 100644 --- a/source/dnode/vnode/src/tq/tqMeta.c +++ b/source/dnode/vnode/src/tq/tqMeta.c @@ -178,6 +178,7 @@ int32_t tqMetaRestoreCheckInfo(STQ* pTq) { goto END; } } + END: tdbFree(pKey); tdbFree(pVal); @@ -514,35 +515,6 @@ int32_t tqMetaTransform(STQ* pTq) { return code; } -//int32_t tqMetaRestoreHandle(STQ* pTq) { -// int code = 0; -// TBC* pCur = NULL; -// if (tdbTbcOpen(pTq->pExecStore, &pCur, NULL) < 0) { -// return -1; -// } -// -// void* pKey = NULL; -// int kLen = 0; -// void* pVal = NULL; -// int vLen = 0; -// -// tdbTbcMoveToFirst(pCur); -// -// while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) { -// STqHandle handle = {0}; -// code = restoreHandle(pTq, pVal, vLen, &handle); -// if (code < 0) { -// tqDestroyTqHandle(&handle); -// break; -// } -// } -// -// tdbFree(pKey); -// tdbFree(pVal); -// tdbTbcClose(pCur); -// return code; -//} - int32_t tqMetaGetHandle(STQ* pTq, const char* key) { void* pVal = NULL; int vLen = 0; diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 40b817accd..d3be8fa666 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -1083,7 +1083,7 @@ int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) { } SStreamTask* pTask = *(SStreamTask**)pIter; - if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + if ((pTask->info.taskLevel == TASK_LEVEL__SOURCE) && (pTask->exec.pExecutor != NULL)) { int32_t code = qUpdateTableListForStreamScanner(pTask->exec.pExecutor, tbUidList, isAdd); if (code != 0) { tqError("vgId:%d, s-task:%s update qualified table error for stream task", vgId, pTask->id.idStr); diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 5522d06263..5f3e1e3d14 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -34,11 +34,12 @@ static int32_t doBuildAndSendDeleteMsg(SVnode* pVnode, char* stbFullName, SSData static int32_t doBuildAndSendSubmitMsg(SVnode* pVnode, SStreamTask* pTask, SSubmitReq2* pReq, int32_t numOfBlocks); static int32_t buildSubmitMsgImpl(SSubmitReq2* pSubmitReq, int32_t vgId, void** pMsg, int32_t* msgLen); static int32_t doConvertRows(SSubmitTbData* pTableData, const STSchema* pTSchema, SSDataBlock* pDataBlock, - const char* id); + int64_t earlyTs, const char* id); static int32_t doWaitForDstTableCreated(SVnode* pVnode, SStreamTask* pTask, STableSinkInfo* pTableSinkInfo, const char* dstTableName, int64_t* uid); static int32_t doPutIntoCache(SSHashObj* pSinkTableMap, STableSinkInfo* pTableSinkInfo, uint64_t groupId, const char* id); +static int32_t doRemoveFromCache(SSHashObj* pSinkTableMap, uint64_t groupId, const char* id); static bool isValidDstChildTable(SMetaReader* pReader, int32_t vgId, const char* ctbName, int64_t suid); static int32_t initCreateTableMsg(SVCreateTbReq* pCreateTableReq, uint64_t suid, const char* stbFullName, int32_t numOfTags); @@ -396,46 +397,6 @@ int32_t doMergeExistedRows(SSubmitTbData* pExisted, const SSubmitTbData* pNew, c return TSDB_CODE_SUCCESS; } -int32_t doBuildAndSendDeleteMsg(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataBlock, SStreamTask* pTask, - int64_t suid) { - SBatchDeleteReq deleteReq = {.suid = suid, .deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq))}; - - int32_t code = tqBuildDeleteReq(pVnode->pTq, stbFullName, pDataBlock, &deleteReq, pTask->id.idStr, - pTask->ver >= SSTREAM_TASK_SUBTABLE_CHANGED_VER && pTask->subtableWithoutMd5 != 1); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - if (taosArrayGetSize(deleteReq.deleteReqs) == 0) { - taosArrayDestroy(deleteReq.deleteReqs); - return TSDB_CODE_SUCCESS; - } - - int32_t len; - tEncodeSize(tEncodeSBatchDeleteReq, &deleteReq, len, code); - if (code != TSDB_CODE_SUCCESS) { - qError("s-task:%s failed to encode delete request", pTask->id.idStr); - return code; - } - - SEncoder encoder; - void* serializedDeleteReq = rpcMallocCont(len + sizeof(SMsgHead)); - void* abuf = POINTER_SHIFT(serializedDeleteReq, sizeof(SMsgHead)); - tEncoderInit(&encoder, abuf, len); - tEncodeSBatchDeleteReq(&encoder, &deleteReq); - tEncoderClear(&encoder); - taosArrayDestroy(deleteReq.deleteReqs); - - ((SMsgHead*)serializedDeleteReq)->vgId = TD_VID(pVnode); - - SRpcMsg msg = {.msgType = TDMT_VND_BATCH_DEL, .pCont = serializedDeleteReq, .contLen = len + sizeof(SMsgHead)}; - if (tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg) != 0) { - tqDebug("failed to put delete req into write-queue since %s", terrstr()); - } - - return TSDB_CODE_SUCCESS; -} - bool isValidDstChildTable(SMetaReader* pReader, int32_t vgId, const char* ctbName, int64_t suid) { if (pReader->me.type != TSDB_CHILD_TABLE) { tqError("vgId:%d, failed to write into %s, since table type:%d incorrect", vgId, ctbName, pReader->me.type); @@ -484,23 +445,6 @@ SVCreateTbReq* buildAutoCreateTableReq(const char* stbFullName, int64_t suid, in return pCreateTbReq; } -int32_t doPutIntoCache(SSHashObj* pSinkTableMap, STableSinkInfo* pTableSinkInfo, uint64_t groupId, const char* id) { - if (tSimpleHashGetSize(pSinkTableMap) > MAX_CACHE_TABLE_INFO_NUM) { - taosMemoryFreeClear(pTableSinkInfo); // too many items, failed to cache it - return TSDB_CODE_FAILED; - } - - int32_t code = tSimpleHashPut(pSinkTableMap, &groupId, sizeof(uint64_t), &pTableSinkInfo, POINTER_BYTES); - if (code != TSDB_CODE_SUCCESS) { - taosMemoryFreeClear(pTableSinkInfo); - } else { - tqDebug("s-task:%s new dst table:%s(uid:%" PRIu64 ") added into cache, total:%d", id, pTableSinkInfo->name.data, - pTableSinkInfo->uid, tSimpleHashGetSize(pSinkTableMap)); - } - - return code; -} - int32_t buildSubmitMsgImpl(SSubmitReq2* pSubmitReq, int32_t vgId, void** pMsg, int32_t* msgLen) { int32_t code = 0; void* pBuf = NULL; @@ -552,7 +496,8 @@ int32_t tsAscendingSortFn(const void* p1, const void* p2) { } } -int32_t doConvertRows(SSubmitTbData* pTableData, const STSchema* pTSchema, SSDataBlock* pDataBlock, const char* id) { +int32_t doConvertRows(SSubmitTbData* pTableData, const STSchema* pTSchema, SSDataBlock* pDataBlock, int64_t earlyTs, + const char* id) { int32_t numOfRows = pDataBlock->info.rows; int32_t code = TSDB_CODE_SUCCESS; @@ -581,6 +526,14 @@ int32_t doConvertRows(SSubmitTbData* pTableData, const STSchema* pTSchema, SSDat SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex); ts = *(int64_t*)colDataGetData(pColData, j); tqTrace("s-task:%s sink row %d, col %d ts %" PRId64, id, j, k, ts); + + if (ts < earlyTs) { + tqError("s-task:%s ts:%" PRId64 " of generated results out of valid time range %" PRId64 " , discarded", id, + ts, earlyTs); + pTableData->aRowP = taosArrayDestroy(pTableData->aRowP); + taosArrayDestroy(pVals); + return TSDB_CODE_SUCCESS; + } } if (IS_SET_NULL(pCol)) { @@ -605,8 +558,7 @@ int32_t doConvertRows(SSubmitTbData* pTableData, const STSchema* pTSchema, SSDat dataIndex++; } else { void* colData = colDataGetData(pColData, j); - if (IS_VAR_DATA_TYPE(pCol->type)) { - // address copy, no value + if (IS_VAR_DATA_TYPE(pCol->type)) { // address copy, no value SValue sv = (SValue){.type = pCol->type, .nData = varDataLen(colData), .pData = (uint8_t*)varDataVal(colData)}; SColVal cv = COL_VAL_VALUE(pCol->colId, sv); @@ -744,7 +696,7 @@ int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDat tqTrace("s-task:%s set the dstTable uid from cache:%" PRId64, id, pTableData->uid); } } else { - // The auto-create option will always set to be open for those submit messages, which arrive during the period + // The auto-create option will always set to be open for those submit messages, which arrives during the period // the creating of the destination table, due to the absence of the user-specified table in TSDB. When scanning // data from WAL, those submit messages, with auto-created table option, will be discarded expect the first, for // those mismatched table uids. Only the FIRST table has the correct table uid, and those remain all have @@ -752,7 +704,7 @@ int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDat SMetaReader mr = {0}; metaReaderDoInit(&mr, pVnode->pMeta, META_READER_LOCK); - // table not in cache, let's try the extract it from tsdb meta + // table not in cache, let's try to extract it from tsdb meta if (metaGetTableEntryByName(&mr, dstTableName) < 0) { metaReaderClear(&mr); @@ -796,34 +748,26 @@ int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDat } int32_t tqSetDstTableDataPayload(uint64_t suid, const STSchema *pTSchema, int32_t blockIndex, SSDataBlock* pDataBlock, - SSubmitTbData* pTableData, const char* id) { + SSubmitTbData* pTableData, int64_t earlyTs, const char* id) { int32_t numOfRows = pDataBlock->info.rows; + char* dstTableName = pDataBlock->info.parTbName; tqDebug("s-task:%s sink data pipeline, build submit msg from %dth resBlock, including %d rows, dst suid:%" PRId64, id, blockIndex + 1, numOfRows, suid); - char* dstTableName = pDataBlock->info.parTbName; // convert all rows - int32_t code = doConvertRows(pTableData, pTSchema, pDataBlock, id); + int32_t code = doConvertRows(pTableData, pTSchema, pDataBlock, earlyTs, id); if (code != TSDB_CODE_SUCCESS) { tqError("s-task:%s failed to convert rows from result block, code:%s", id, tstrerror(terrno)); return code; } - taosArraySort(pTableData->aRowP, tsAscendingSortFn); - tqTrace("s-task:%s build submit msg for dstTable:%s, numOfRows:%d", id, dstTableName, numOfRows); - return code; -} - -bool hasOnlySubmitData(const SArray* pBlocks, int32_t numOfBlocks) { - for (int32_t i = 0; i < numOfBlocks; ++i) { - SSDataBlock* p = taosArrayGet(pBlocks, i); - if (p->info.type == STREAM_DELETE_RESULT || p->info.type == STREAM_CREATE_CHILD_TABLE) { - return false; - } + if (pTableData->aRowP != NULL) { + taosArraySort(pTableData->aRowP, tsAscendingSortFn); + tqTrace("s-task:%s build submit msg for dstTable:%s, numOfRows:%d", id, dstTableName, numOfRows); } - return true; + return code; } void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { @@ -836,6 +780,7 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { int32_t numOfBlocks = taosArrayGetSize(pBlocks); int32_t code = TSDB_CODE_SUCCESS; const char* id = pTask->id.idStr; + int64_t earlyTs = tsdbGetEarliestTs(pVnode->pTsdb); bool onlySubmitData = hasOnlySubmitData(pBlocks, numOfBlocks); if (!onlySubmitData) { @@ -870,8 +815,13 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { continue; } - code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, id); - if (code != TSDB_CODE_SUCCESS) { + code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, earlyTs, id); + if (code != TSDB_CODE_SUCCESS || tbData.aRowP == NULL) { + if (tbData.pCreateTbReq != NULL) { + tdDestroySVCreateTbReq(tbData.pCreateTbReq); + doRemoveFromCache(pTask->outputInfo.tbSink.pTblInfo, pDataBlock->info.id.groupId, id); + tbData.pCreateTbReq = NULL; + } continue; } @@ -918,8 +868,13 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { continue; } - code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, id); - if (code != TSDB_CODE_SUCCESS) { + code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, earlyTs, id); + if (code != TSDB_CODE_SUCCESS || tbData.aRowP == NULL) { + if (tbData.pCreateTbReq != NULL) { + tdDestroySVCreateTbReq(tbData.pCreateTbReq); + doRemoveFromCache(pTask->outputInfo.tbSink.pTblInfo, groupId, id); + tbData.pCreateTbReq = NULL; + } continue; } @@ -928,8 +883,12 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { int32_t size = (int32_t)taosArrayGetSize(submitReq.aSubmitTbData) - 1; taosHashPut(pTableIndexMap, &groupId, sizeof(groupId), &size, sizeof(size)); } else { - code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, id); - if (code != TSDB_CODE_SUCCESS) { + code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, earlyTs, id); + if (code != TSDB_CODE_SUCCESS || tbData.aRowP == NULL) { + if (tbData.pCreateTbReq != NULL) { + tdDestroySVCreateTbReq(tbData.pCreateTbReq); + tbData.pCreateTbReq = NULL; + } continue; } @@ -953,3 +912,82 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { } } } + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool hasOnlySubmitData(const SArray* pBlocks, int32_t numOfBlocks) { + for (int32_t i = 0; i < numOfBlocks; ++i) { + SSDataBlock* p = taosArrayGet(pBlocks, i); + if (p->info.type == STREAM_DELETE_RESULT || p->info.type == STREAM_CREATE_CHILD_TABLE) { + return false; + } + } + + return true; +} + +int32_t doPutIntoCache(SSHashObj* pSinkTableMap, STableSinkInfo* pTableSinkInfo, uint64_t groupId, const char* id) { + if (tSimpleHashGetSize(pSinkTableMap) > MAX_CACHE_TABLE_INFO_NUM) { + taosMemoryFreeClear(pTableSinkInfo); // too many items, failed to cache it + return TSDB_CODE_FAILED; + } + + int32_t code = tSimpleHashPut(pSinkTableMap, &groupId, sizeof(uint64_t), &pTableSinkInfo, POINTER_BYTES); + if (code != TSDB_CODE_SUCCESS) { + taosMemoryFreeClear(pTableSinkInfo); + } else { + tqDebug("s-task:%s new dst table:%s(uid:%" PRIu64 ") added into cache, total:%d", id, pTableSinkInfo->name.data, + pTableSinkInfo->uid, tSimpleHashGetSize(pSinkTableMap)); + } + + return code; +} + +int32_t doRemoveFromCache(SSHashObj* pSinkTableMap, uint64_t groupId, const char* id) { + if (tSimpleHashGetSize(pSinkTableMap) == 0) { + return TSDB_CODE_SUCCESS; + } + + int32_t code = tSimpleHashRemove(pSinkTableMap, &groupId, sizeof(groupId)); + tqDebug("s-task:%s remove cached table meta for groupId:%" PRId64, id, groupId); + return code; +} + +int32_t doBuildAndSendDeleteMsg(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataBlock, SStreamTask* pTask, + int64_t suid) { + SBatchDeleteReq deleteReq = {.suid = suid, .deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq))}; + + int32_t code = tqBuildDeleteReq(pVnode->pTq, stbFullName, pDataBlock, &deleteReq, pTask->id.idStr, + pTask->ver >= SSTREAM_TASK_SUBTABLE_CHANGED_VER && pTask->subtableWithoutMd5 != 1); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + if (taosArrayGetSize(deleteReq.deleteReqs) == 0) { + taosArrayDestroy(deleteReq.deleteReqs); + return TSDB_CODE_SUCCESS; + } + + int32_t len; + tEncodeSize(tEncodeSBatchDeleteReq, &deleteReq, len, code); + if (code != TSDB_CODE_SUCCESS) { + qError("s-task:%s failed to encode delete request", pTask->id.idStr); + return code; + } + + SEncoder encoder; + void* serializedDeleteReq = rpcMallocCont(len + sizeof(SMsgHead)); + void* abuf = POINTER_SHIFT(serializedDeleteReq, sizeof(SMsgHead)); + tEncoderInit(&encoder, abuf, len); + tEncodeSBatchDeleteReq(&encoder, &deleteReq); + tEncoderClear(&encoder); + taosArrayDestroy(deleteReq.deleteReqs); + + ((SMsgHead*)serializedDeleteReq)->vgId = TD_VID(pVnode); + + SRpcMsg msg = {.msgType = TDMT_VND_BATCH_DEL, .pCont = serializedDeleteReq, .contLen = len + sizeof(SMsgHead)}; + if (tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg) != 0) { + tqDebug("failed to put delete req into write-queue since %s", terrstr()); + } + + return TSDB_CODE_SUCCESS; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index 5a29f67ae3..bcf17bf1e1 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -291,8 +291,7 @@ bool doPutDataIntoInputQ(SStreamTask* pTask, int64_t maxVer, int32_t* numOfItems } } else { walReaderSeekVer(pTask->exec.pWalReader, pTask->chkInfo.nextProcessVer); - tqError("s-task:%s append input queue failed, code:too many items, ver:%" PRId64, id, - pTask->chkInfo.nextProcessVer); + tqTrace("s-task:%s append input queue failed, code:too many items, ver:%" PRId64, id, pTask->chkInfo.nextProcessVer); break; } } diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index 62c3b06b65..c55745e5c5 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -19,6 +19,13 @@ typedef struct SMStreamCheckpointReadyRspMsg { SMsgHead head; + int64_t streamId; + int32_t upstreamTaskId; + int32_t upstreamNodeId; + int32_t downstreamTaskId; + int32_t downstreamNodeId; + int64_t checkpointId; + int32_t transId; } SMStreamCheckpointReadyRspMsg; static int32_t doProcessDummyRspMsg(SStreamMeta* pMeta, SRpcMsg* pMsg); @@ -39,20 +46,27 @@ static void restoreStreamTaskId(SStreamTask* pTask, STaskId* pId) { pTask->id.streamId = pId->streamId; } -int32_t tqExpandStreamTask(SStreamTask* pTask, SStreamMeta* pMeta, void* pVnode) { - int32_t vgId = pMeta->vgId; - STaskId taskId = {0}; +int32_t tqExpandStreamTask(SStreamTask* pTask) { + SStreamMeta* pMeta = pTask->pMeta; + int32_t vgId = pMeta->vgId; + STaskId taskId = {0}; + int64_t st = taosGetTimestampMs(); + + tqDebug("s-task:%s vgId:%d start to expand stream task", pTask->id.idStr, vgId); if (pTask->info.fillHistory) { taskId = replaceStreamTaskId(pTask); } - pTask->pState = streamStateOpen(pMeta->path, pTask, false, -1, -1); - if (pTask->pState == NULL) { - tqError("s-task:%s (vgId:%d) failed to open state for task, expand task failed", pTask->id.idStr, vgId); - return -1; - } else { - tqDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); + // sink task does not need the pState + if (pTask->info.taskLevel != TASK_LEVEL__SINK) { + pTask->pState = streamStateOpen(pMeta->path, pTask, false, -1, -1); + if (pTask->pState == NULL) { + tqError("s-task:%s (vgId:%d) failed to open state for task, expand task failed", pTask->id.idStr, vgId); + return -1; + } else { + tqDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); + } } if (pTask->info.fillHistory) { @@ -65,8 +79,9 @@ int32_t tqExpandStreamTask(SStreamTask* pTask, SStreamMeta* pMeta, void* pVnode) .fillHistory = pTask->info.fillHistory, .winRange = pTask->dataRange.window, }; + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { - handle.vnode = pVnode; + handle.vnode = ((STQ*)pMeta->ahandle)->pVnode; handle.initTqReader = 1; } else if (pTask->info.taskLevel == TASK_LEVEL__AGG) { handle.numOfVgroups = (int32_t)taosArrayGetSize(pTask->upstreamInfo.pList); @@ -83,6 +98,9 @@ int32_t tqExpandStreamTask(SStreamTask* pTask, SStreamMeta* pMeta, void* pVnode) qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId); } + double el = (taosGetTimestampMs() - st) / 1000.0; + tqDebug("s-task:%s vgId:%d expand stream task completed, elapsed time:%.2fsec", pTask->id.idStr, vgId, el); + return TSDB_CODE_SUCCESS; } @@ -157,10 +175,9 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM STaskId id = {.streamId = req.streamId, .taskId = req.taskId}; SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask == NULL || *ppTask == NULL) { - tqError("vgId:%d failed to acquire task:0x%x when handling update, it may have been dropped", vgId, req.taskId); + tqError("vgId:%d failed to acquire task:0x%x when handling update task epset, it may have been dropped", vgId, req.taskId); rsp.code = TSDB_CODE_SUCCESS; streamMetaWUnLock(pMeta); - taosArrayDestroy(req.pNodeList); return rsp.code; } @@ -476,21 +493,27 @@ int32_t tqStreamTaskProcessCheckpointReadyMsg(SStreamMeta* pMeta, SRpcMsg* pMsg) SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.upstreamTaskId); if (pTask == NULL) { tqError("vgId:%d failed to find s-task:0x%x, it may have been destroyed already", vgId, req.downstreamTaskId); - return code; + return TSDB_CODE_STREAM_TASK_NOT_EXIST; } - tqDebug("vgId:%d s-task:%s received the checkpoint ready msg from task:0x%x (vgId:%d), handle it", vgId, + tqDebug("vgId:%d s-task:%s received the checkpoint-ready msg from task:0x%x (vgId:%d), handle it", vgId, pTask->id.idStr, req.downstreamTaskId, req.downstreamNodeId); - streamProcessCheckpointReadyMsg(pTask); + streamProcessCheckpointReadyMsg(pTask, req.checkpointId, req.downstreamTaskId, req.downstreamNodeId); streamMetaReleaseTask(pMeta, pTask); { // send checkpoint ready rsp - SRpcMsg rsp = {.code = 0, .info = pMsg->info, .contLen = sizeof(SMStreamCheckpointReadyRspMsg)}; - rsp.pCont = rpcMallocCont(rsp.contLen); - SMsgHead* pHead = rsp.pCont; - pHead->vgId = htonl(req.downstreamNodeId); + SMStreamCheckpointReadyRspMsg* pReadyRsp = rpcMallocCont(sizeof(SMStreamCheckpointReadyRspMsg)); + pReadyRsp->upstreamTaskId = req.upstreamTaskId; + pReadyRsp->upstreamNodeId = req.upstreamNodeId; + pReadyRsp->downstreamTaskId = req.downstreamTaskId; + pReadyRsp->downstreamNodeId = req.downstreamNodeId; + pReadyRsp->checkpointId = req.checkpointId; + pReadyRsp->streamId = req.streamId; + pReadyRsp->head.vgId = htonl(req.downstreamNodeId); + + SRpcMsg rsp = {.code = 0, .info = pMsg->info, .pCont = pReadyRsp, .contLen = sizeof(SMStreamCheckpointReadyRspMsg)}; tmsgSendRsp(&rsp); pMsg->info.handle = NULL; // disable auto rsp @@ -577,22 +600,11 @@ int32_t tqStreamTaskProcessDeployReq(SStreamMeta* pMeta, SMsgCb* cb, int64_t sve return code; } -void tqStreamRmTaskBackend(SStreamMeta* pMeta, STaskId* id) { - char taskKey[128] = {0}; - sprintf(taskKey, "0x%" PRIx64 "-0x%x", id->streamId, (int32_t)id->taskId); - - char* path = taosMemoryCalloc(1, strlen(pMeta->path) + 128); - sprintf(path, "%s%s%s", pMeta->path, TD_DIRSEP, taskKey); - taosRemoveDir(path); - taosMemoryFree(path); - // do nothing -} - int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen) { SVDropStreamTaskReq* pReq = (SVDropStreamTaskReq*)msg; - int32_t vgId = pMeta->vgId; - STaskId hTaskId = {0}; + int32_t vgId = pMeta->vgId; + STaskId hTaskId = {0}; tqDebug("vgId:%d receive msg to drop s-task:0x%x", vgId, pReq->taskId); streamMetaWLock(pMeta); @@ -608,6 +620,7 @@ int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen hTaskId.taskId = pTask->hTaskInfo.id.taskId; } + streamTaskSetRemoveBackendFiles(pTask); streamTaskClearHTaskAttr(pTask, pReq->resetRelHalt); streamMetaReleaseTask(pMeta, pTask); } @@ -634,10 +647,32 @@ int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen streamMetaWUnLock(pMeta); - tqStreamRmTaskBackend(pMeta, &id); +// tqStreamRemoveTaskBackend(pMeta, &id); return 0; } +int32_t tqStreamTaskProcessUpdateCheckpointReq(SStreamMeta* pMeta, char* msg, int32_t msgLen) { + SVUpdateCheckpointInfoReq* pReq = (SVUpdateCheckpointInfoReq*)msg; + + int32_t vgId = pMeta->vgId; + tqDebug("vgId:%d receive msg to update-checkpoint-info for s-task:0x%x", vgId, pReq->taskId); + + streamMetaWLock(pMeta); + + STaskId id = {.streamId = pReq->streamId, .taskId = pReq->taskId}; + SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); + + if (ppTask != NULL && (*ppTask) != NULL) { + streamTaskUpdateTaskCheckpointInfo(*ppTask, pReq); + } else { // failed to get the task. + tqError("vgId:%d failed to locate the s-task:0x%x to update the checkpoint info, it may have been dropped already", + vgId, pReq->taskId); + } + + streamMetaWUnLock(pMeta); + return TSDB_CODE_SUCCESS; +} + static int32_t restartStreamTasks(SStreamMeta* pMeta, bool isLeader) { int32_t vgId = pMeta->vgId; int32_t code = 0; @@ -680,10 +715,8 @@ static int32_t restartStreamTasks(SStreamMeta* pMeta, bool isLeader) { } if (isLeader && !tsDisableStream) { - streamMetaResetTaskStatus(pMeta); streamMetaWUnLock(pMeta); - - streamMetaStartAllTasks(pMeta); + streamMetaStartAllTasks(pMeta, tqExpandStreamTask); } else { streamMetaResetStartInfo(&pMeta->startInfo); streamMetaWUnLock(pMeta); @@ -701,10 +734,10 @@ int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLead int32_t vgId = pMeta->vgId; if (type == STREAM_EXEC_T_START_ONE_TASK) { - streamMetaStartOneTask(pMeta, pReq->streamId, pReq->taskId); + streamMetaStartOneTask(pMeta, pReq->streamId, pReq->taskId, tqExpandStreamTask); return 0; } else if (type == STREAM_EXEC_T_START_ALL_TASKS) { - streamMetaStartAllTasks(pMeta); + streamMetaStartAllTasks(pMeta, tqExpandStreamTask); return 0; } else if (type == STREAM_EXEC_T_RESTART_ALL_TASKS) { restartStreamTasks(pMeta, isLeader); @@ -820,13 +853,19 @@ int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { // clear flag set during do checkpoint, and open inputQ for all upstream tasks SStreamTaskState *pState = streamTaskGetStatus(pTask); if (pState->state == TASK_STATUS__CK) { + int32_t tranId = 0; + int64_t activeChkId = 0; + streamTaskGetActiveCheckpointInfo(pTask, &tranId, &activeChkId); + tqDebug("s-task:%s reset task status from checkpoint, current checkpointingId:%" PRId64 ", transId:%d", - pTask->id.idStr, pTask->chkInfo.checkpointingId, pTask->chkInfo.transId); + pTask->id.idStr, activeChkId, tranId); + streamTaskSetStatusReady(pTask); } else if (pState->state == TASK_STATUS__UNINIT) { tqDebug("s-task:%s start task by checking downstream tasks", pTask->id.idStr); ASSERT(pTask->status.downstreamReady == 0); - /*int32_t ret = */ streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_INIT); +// /*int32_t ret = */ streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_INIT); + tqStreamStartOneTaskAsync(pMeta, pTask->pMsgCb, pTask->id.streamId, pTask->id.taskId); } else { tqDebug("s-task:%s status:%s do nothing after receiving reset-task from mnode", pTask->id.idStr, pState->name); } @@ -837,6 +876,89 @@ int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { return TSDB_CODE_SUCCESS; } +int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { + SRetrieveChkptTriggerReq* pReq = (SRetrieveChkptTriggerReq*) pMsg->pCont; + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->upstreamTaskId); + if (pTask == NULL) { + tqError("vgId:%d process retrieve checkpoint trigger, checkpointId:%" PRId64 + " from s-task:0x%x, failed to acquire task:0x%x, it may have been dropped already", + pMeta->vgId, pReq->checkpointId, (int32_t)pReq->downstreamTaskId, pReq->upstreamTaskId); + return TSDB_CODE_STREAM_TASK_NOT_EXIST; + } + + tqDebug("s-task:0x%x recv retrieve checkpoint-trigger msg from downstream s-task:0x%x, checkpointId:%" PRId64, + pReq->upstreamTaskId, (int32_t)pReq->downstreamTaskId, pReq->checkpointId); + + if (pTask->status.downstreamReady != 1) { + tqError("s-task:%s not ready for checkpoint-trigger retrieve from 0x%x, since downstream not ready", + pTask->id.idStr, (int32_t)pReq->downstreamTaskId); + + streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, &pMsg->info, TSDB_CODE_STREAM_TASK_IVLD_STATUS); + streamMetaReleaseTask(pMeta, pTask); + + return TSDB_CODE_SUCCESS; + } + + SStreamTaskState* pState = streamTaskGetStatus(pTask); + if (pState->state == TASK_STATUS__CK) { // recv the checkpoint-source/trigger already + int32_t transId = 0; + int64_t checkpointId = 0; + + streamTaskGetActiveCheckpointInfo(pTask, &transId, &checkpointId); + ASSERT (checkpointId == pReq->checkpointId); + + if (streamTaskAlreadySendTrigger(pTask, pReq->downstreamNodeId)) { + // re-send the lost checkpoint-trigger msg to downstream task + tqDebug("s-task:%s re-send checkpoint-trigger to:0x%x, checkpointId:%" PRId64 ", transId:%d", pTask->id.idStr, + (int32_t)pReq->downstreamTaskId, checkpointId, transId); + streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, &pMsg->info, TSDB_CODE_SUCCESS); + } else { // not send checkpoint-trigger yet, wait + int32_t recv = 0, total = 0; + streamTaskGetTriggerRecvStatus(pTask, &recv, &total); + + if (recv == total) { // add the ts info + tqWarn("s-task:%s all upstream send checkpoint-source/trigger, but not processed yet, wait", pTask->id.idStr); + } else { + tqWarn( + "s-task:%s not all upstream send checkpoint-source/trigger, total recv:%d/%d, wait for all upstream " + "sending checkpoint-source/trigger", + pTask->id.idStr, recv, total); + } + streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, &pMsg->info, TSDB_CODE_ACTION_IN_PROGRESS); + } + } else { // upstream not recv the checkpoint-source/trigger till now + ASSERT(pState->state == TASK_STATUS__READY || pState->state == TASK_STATUS__HALT); + tqWarn( + "s-task:%s not recv checkpoint-source from mnode or checkpoint-trigger from upstream yet, wait for all " + "upstream sending checkpoint-source/trigger", + pTask->id.idStr); + streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, &pMsg->info, TSDB_CODE_ACTION_IN_PROGRESS); + } + + streamMetaReleaseTask(pMeta, pTask); + return TSDB_CODE_SUCCESS; +} + +int32_t tqStreamTaskProcessRetrieveTriggerRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { + SCheckpointTriggerRsp* pRsp = pMsg->pCont; + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pRsp->streamId, pRsp->taskId); + if (pTask == NULL) { + tqError( + "vgId:%d process retrieve checkpoint-trigger, failed to acquire task:0x%x, it may have been dropped already", + pMeta->vgId, pRsp->taskId); + return TSDB_CODE_STREAM_TASK_NOT_EXIST; + } + + tqDebug("s-task:%s recv re-send checkpoint-trigger msg from upstream:0x%x, checkpointId:%"PRId64", transId:%d", + pTask->id.idStr, pRsp->upstreamTaskId, pRsp->checkpointId, pRsp->transId); + + streamTaskProcessCheckpointTriggerRsp(pTask, pRsp); + streamMetaReleaseTask(pMeta, pTask); + return TSDB_CODE_SUCCESS; +} + int32_t tqStreamTaskProcessTaskPauseReq(SStreamMeta* pMeta, char* pMsg) { SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)pMsg; @@ -927,9 +1049,10 @@ static int32_t tqProcessTaskResumeImpl(void* handle, SStreamTask* pTask, int64_t int32_t tqStreamTaskProcessTaskResumeReq(void* handle, int64_t sversion, char* msg, bool fromVnode) { SVResumeStreamTaskReq* pReq = (SVResumeStreamTaskReq*)msg; - SStreamMeta* pMeta = fromVnode ? ((STQ*)handle)->pStreamMeta : handle; - SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); - int32_t code = tqProcessTaskResumeImpl(handle, pTask, sversion, pReq->igUntreated, fromVnode); + + SStreamMeta* pMeta = fromVnode ? ((STQ*)handle)->pStreamMeta : handle; + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId); + int32_t code = tqProcessTaskResumeImpl(handle, pTask, sversion, pReq->igUntreated, fromVnode); if (code != 0) { return code; } @@ -956,5 +1079,16 @@ int32_t tqStreamProcessStreamHbRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { return d int32_t tqStreamProcessReqCheckpointRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { return doProcessDummyRspMsg(pMeta, pMsg); } int32_t tqStreamProcessCheckpointReadyRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { - return doProcessDummyRspMsg(pMeta, pMsg); + SMStreamCheckpointReadyRspMsg* pRsp = pMsg->pCont; + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pRsp->streamId, pRsp->downstreamTaskId); + if (pTask == NULL) { + tqError("vgId:%d failed to acquire task:0x%x when handling checkpoint-ready msg, it may have been dropped", + pRsp->downstreamNodeId, pRsp->downstreamTaskId); + return TSDB_CODE_STREAM_TASK_NOT_EXIST; + } + + streamTaskProcessCheckpointReadyRsp(pTask, pRsp->upstreamTaskId, pRsp->checkpointId); + streamMetaReleaseTask(pMeta, pTask); + return TSDB_CODE_SUCCESS; } \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbOpen.c b/source/dnode/vnode/src/tsdb/tsdbOpen.c index ea3d285880..f9fede1d9b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbOpen.c +++ b/source/dnode/vnode/src/tsdb/tsdbOpen.c @@ -30,6 +30,14 @@ int32_t tsdbSetKeepCfg(STsdb *pTsdb, STsdbCfg *pCfg) { return 0; } +int64_t tsdbGetEarliestTs(STsdb *pTsdb) { + STsdbKeepCfg *pCfg = &pTsdb->keepCfg; + + int64_t now = taosGetTimestamp(pCfg->precision); + int64_t ts = now - (tsTickPerMin[pCfg->precision] * pCfg->keep2) + 1; // needs to add one tick + return ts; +} + /** * @brief * diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index 99520f7c92..d7c3eff571 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -226,14 +226,10 @@ static bool isEmptyQueryTimeWindow(STimeWindow* pWindow) { return pWindow->skey // Update the query time window according to the data time to live(TTL) information, in order to avoid to return // the expired data to client, even it is queried already. static STimeWindow updateQueryTimeWindow(STsdb* pTsdb, STimeWindow* pWindow) { - STsdbKeepCfg* pCfg = &pTsdb->keepCfg; - - int64_t now = taosGetTimestamp(pCfg->precision); - int64_t earilyTs = now - (tsTickPerMin[pCfg->precision] * pCfg->keep2) + 1; // needs to add one tick - + int64_t earlyTs = tsdbGetEarliestTs(pTsdb); STimeWindow win = *pWindow; - if (win.skey < earilyTs) { - win.skey = earilyTs; + if (win.skey < earlyTs) { + win.skey = earlyTs; } return win; diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index f071775990..5acaf2bce4 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -160,6 +160,12 @@ int vnodeShouldCommit(SVnode *pVnode, bool atExit) { (atExit && (pVnode->inUse->size > 0 || pVnode->pMeta->changed || pVnode->state.applied - pVnode->state.committed > 4096)); } + vTrace("vgId:%d, should commit:%d, disk available:%d, buffer size:%" PRId64 ", node size:%" PRId64 + ", meta changed:%d" + ", state:[%" PRId64 ",%" PRId64 "]", + TD_VID(pVnode), needCommit, diskAvail, pVnode->inUse ? pVnode->inUse->size : 0, + pVnode->inUse ? pVnode->inUse->node.size : 0, pVnode->pMeta->changed, pVnode->state.applied, + pVnode->state.committed); taosThreadMutexUnlock(&pVnode->mutex); return needCommit; } diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index 4bb5fbc822..611a603c63 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -422,7 +422,7 @@ int32_t vnodeSnapRead(SVSnapReader *pReader, uint8_t **ppData, uint32_t *nData) } // STREAM ============ - vInfo("vgId:%d stream task start", vgId); + vInfo("vgId:%d stream task start to take snapshot", vgId); if (!pReader->streamTaskDone) { if (pReader->pStreamTaskReader == NULL) { code = streamTaskSnapReaderOpen(pReader->pVnode->pTq, pReader->sver, pReader->sver, &pReader->pStreamTaskReader); diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 58faa59e81..002f04b8a7 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -625,6 +625,11 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg goto _err; } } break; + case TDMT_STREAM_TASK_UPDATE_CHKPT: { + if (tqProcessTaskUpdateCheckpointReq(pVnode->pTq, pMsg->pCont, pMsg->contLen) < 0) { + goto _err; + } + } break; case TDMT_STREAM_TASK_PAUSE: { if (pVnode->restored && vnodeIsLeader(pVnode) && tqProcessTaskPauseReq(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) { @@ -837,12 +842,16 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) return tqProcessTaskScanHistory(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_CHECKPOINT_READY: return tqProcessTaskCheckpointReadyMsg(pVnode->pTq, pMsg); + case TDMT_STREAM_TASK_CHECKPOINT_READY_RSP: + return tqProcessTaskCheckpointReadyRsp(pVnode->pTq, pMsg); + case TDMT_STREAM_RETRIEVE_TRIGGER: + return tqProcessTaskRetrieveTriggerReq(pVnode->pTq, pMsg); + case TDMT_STREAM_RETRIEVE_TRIGGER_RSP: + return tqProcessTaskRetrieveTriggerRsp(pVnode->pTq, pMsg); case TDMT_MND_STREAM_HEARTBEAT_RSP: return tqProcessStreamHbRsp(pVnode->pTq, pMsg); case TDMT_MND_STREAM_REQ_CHKPT_RSP: return tqProcessStreamReqCheckpointRsp(pVnode->pTq, pMsg); - case TDMT_STREAM_TASK_CHECKPOINT_READY_RSP: - return tqProcessTaskCheckpointReadyRsp(pVnode->pTq, pMsg); case TDMT_VND_GET_STREAM_PROGRESS: return tqStreamProgressRetrieveReq(pVnode->pTq, pMsg); default: diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 166a230c76..8f28871e3b 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -576,20 +576,18 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) if (tsDisableStream) { vInfo("vgId:%d, sync restore finished, not launch stream tasks, since stream tasks are disabled", vgId); } else { - vInfo("vgId:%d sync restore finished, start to launch stream task(s)", pVnode->config.vgId); - int32_t numOfTasks = tqStreamTasksGetTotalNum(pMeta); - if (numOfTasks > 0) { - if (pMeta->startInfo.startAllTasks == 1) { - pMeta->startInfo.restartCount += 1; - tqDebug("vgId:%d in start tasks procedure, inc restartCounter by 1, remaining restart:%d", vgId, - pMeta->startInfo.restartCount); - } else { - pMeta->startInfo.startAllTasks = 1; + vInfo("vgId:%d sync restore finished, start to launch stream task(s)", vgId); + if (pMeta->startInfo.startAllTasks == 1) { + pMeta->startInfo.restartCount += 1; + vDebug("vgId:%d in start tasks procedure, inc restartCounter by 1, remaining restart:%d", vgId, + pMeta->startInfo.restartCount); + } else { + pMeta->startInfo.startAllTasks = 1; + streamMetaWUnLock(pMeta); - streamMetaWUnLock(pMeta); - tqStreamTaskStartAsync(pMeta, &pVnode->msgCb, false); - return; - } + tqInfo("vgId:%d stream task already loaded, start them", vgId); + streamTaskSchedTask(&pVnode->msgCb, TD_VID(pVnode), 0, 0, STREAM_EXEC_T_START_ALL_TASKS); + return; } } } else { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index aabf410242..eef8b06ac5 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1314,6 +1314,7 @@ void resetTableScanInfo(STableScanInfo* pTableScanInfo, STimeWindow* pWin, uint6 pTableScanInfo->tableEndIndex = -1; pTableScanInfo->base.readerAPI.tsdReaderClose(pTableScanInfo->base.dataReader); pTableScanInfo->base.dataReader = NULL; + pTableScanInfo->scanMode = TABLE_SCAN__BLOCK_ORDER; } static SSDataBlock* readPreVersionData(SOperatorInfo* pTableScanOp, uint64_t tbUid, TSKEY startTs, TSKEY endTs, diff --git a/source/libs/executor/src/streamtimewindowoperator.c b/source/libs/executor/src/streamtimewindowoperator.c index 46844a2470..2224942893 100644 --- a/source/libs/executor/src/streamtimewindowoperator.c +++ b/source/libs/executor/src/streamtimewindowoperator.c @@ -855,7 +855,7 @@ static void doStreamIntervalAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDat int32_t pkLen = 0; SColumnInfoData* pPkColDataInfo = NULL; if (hasSrcPrimaryKeyCol(&pInfo->basic)) { - pPkColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); + pPkColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->basic.primaryPkIndex); } if (pSDataBlock->info.window.skey != tsCols[0] || pSDataBlock->info.window.ekey != tsCols[endRowId]) { @@ -2144,7 +2144,7 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData int32_t pkLen = 0; SColumnInfoData* pPkColDataInfo = NULL; if (hasSrcPrimaryKeyCol(&pInfo->basic)) { - pPkColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); + pPkColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->basic.primaryPkIndex); } for (int32_t i = 0; i < rows;) { diff --git a/source/libs/executor/src/timesliceoperator.c b/source/libs/executor/src/timesliceoperator.c index 9f421d6de3..cda22fa320 100644 --- a/source/libs/executor/src/timesliceoperator.c +++ b/source/libs/executor/src/timesliceoperator.c @@ -194,6 +194,7 @@ static void tRowGetKeyFromColData(int64_t ts, SColumnInfoData* pPkCol, int32_t r } } +// only the timestamp is needed to complete the duplicated timestamp check. static bool checkDuplicateTimestamps(STimeSliceOperatorInfo* pSliceInfo, SColumnInfoData* pTsCol, SColumnInfoData* pPkCol, int32_t curIndex, int32_t rows) { int64_t currentTs = *(int64_t*)colDataGetData(pTsCol, curIndex); diff --git a/source/libs/scalar/src/scalar.c b/source/libs/scalar/src/scalar.c index 5f43ae9f3c..50de5e760d 100644 --- a/source/libs/scalar/src/scalar.c +++ b/source/libs/scalar/src/scalar.c @@ -1213,6 +1213,7 @@ EDealRes sclRewriteFunction(SNode **pNode, SScalarCtx *ctx) { res->translate = true; + strcpy(res->node.aliasName, node->node.aliasName); res->node.resType.type = output.columnData->info.type; res->node.resType.bytes = output.columnData->info.bytes; res->node.resType.scale = output.columnData->info.scale; @@ -1268,6 +1269,7 @@ EDealRes sclRewriteLogic(SNode **pNode, SScalarCtx *ctx) { res->node.resType = node->node.resType; res->translate = true; + strcpy(res->node.aliasName, node->node.aliasName); int32_t type = output.columnData->info.type; if (IS_VAR_DATA_TYPE(type)) { res->datum.p = output.columnData->pData; @@ -1309,6 +1311,7 @@ EDealRes sclRewriteOperator(SNode **pNode, SScalarCtx *ctx) { res->translate = true; + strcpy(res->node.aliasName, node->node.aliasName); res->node.resType = node->node.resType; if (colDataIsNull_s(output.columnData, 0)) { res->isNull = true; @@ -1364,6 +1367,7 @@ EDealRes sclRewriteCaseWhen(SNode **pNode, SScalarCtx *ctx) { res->translate = true; + strcpy(res->node.aliasName, node->node.aliasName); res->node.resType = node->node.resType; if (colDataIsNull_s(output.columnData, 0)) { res->isNull = true; diff --git a/source/libs/stream/inc/streamBackendRocksdb.h b/source/libs/stream/inc/streamBackendRocksdb.h index bb6362ff73..35114a1be6 100644 --- a/source/libs/stream/inc/streamBackendRocksdb.h +++ b/source/libs/stream/inc/streamBackendRocksdb.h @@ -141,7 +141,7 @@ SListNode* streamBackendAddCompare(void* backend, void* arg); void streamBackendDelCompare(void* backend, void* arg); int32_t streamStateCvtDataFormat(char* path, char* key, void* cfInst); -STaskDbWrapper* taskDbOpen(char* path, char* key, int64_t chkpId); +STaskDbWrapper* taskDbOpen(const char* path, const char* key, int64_t chkptId); void taskDbDestroy(void* pBackend, bool flush); void taskDbDestroy2(void* pBackend); int32_t taskDbDoCheckpoint(void* arg, int64_t chkpId); diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index 3ccb25a62a..154f623b9d 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -52,6 +52,23 @@ extern "C" { #define stTrace(...) do { if (stDebugFlag & DEBUG_TRACE) { taosPrintLog("STM ", DEBUG_TRACE, stDebugFlag, __VA_ARGS__); }} while(0) // clang-format on +struct SActiveCheckpointInfo { + TdThreadMutex lock; + int32_t transId; + int64_t firstRecvTs; // first time to recv checkpoint trigger info + int64_t activeId; // current active checkpoint id + int64_t failedId; + bool dispatchTrigger; + SArray* pDispatchTriggerList; // SArray + SArray* pReadyMsgList; // SArray + int8_t allUpstreamTriggerRecv; + SArray* pCheckpointReadyRecvList; // SArray + int32_t checkCounter; + tmr_h pChkptTriggerTmr; + int32_t sendReadyCheckCounter; + tmr_h pSendReadyMsgTmr; +}; + typedef struct { int8_t type; SSDataBlock* pBlock; @@ -81,6 +98,35 @@ struct STokenBucket { int64_t quotaFillTimestamp; // fill timestamp }; +typedef struct { + int32_t upstreamTaskId; + SEpSet upstreamNodeEpset; + int32_t upstreamNodeId; + int32_t transId; + int32_t childId; + SRpcMsg msg; // for mnode checkpoint-source rsp + int64_t checkpointId; + int64_t recvTs; + int32_t sendCompleted; +} STaskCheckpointReadyInfo; + +typedef struct { + int64_t sendTs; + int64_t recvTs; + bool recved; + int32_t nodeId; + int32_t taskId; +} STaskTriggerSendInfo; + +typedef struct { + int64_t streamId; + int64_t recvTs; + int32_t downstreamNodeId; + int32_t downstreamTaskId; + int64_t checkpointId; + int32_t transId; +} STaskDownstreamReadyInfo; + struct SStreamQueue { STaosQueue* pQueue; STaosQall* qall; @@ -113,7 +159,7 @@ void destroyDispatchMsg(SStreamDispatchReq* pReq, int32_t numOfVgroups); int32_t getNumOfDispatchBranch(SStreamTask* pTask); void clearBufferedDispatchMsg(SStreamTask* pTask); -int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBlock); +int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock* pBlock); SStreamDataBlock* createStreamBlockFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg); SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamTask* pTask, int64_t resultSize, SArray* pRes); @@ -122,7 +168,6 @@ void destroyStreamDataBlock(SStreamDataBlock* pBlock); int32_t streamRetrieveReqToData(const SStreamRetrieveReq* pReq, SStreamDataBlock* pData); int32_t streamBroadcastToUpTasks(SStreamTask* pTask, const SSDataBlock* pBlock); -int32_t streamSaveTaskCheckpointInfo(SStreamTask* p, int64_t checkpointId); int32_t streamSendCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet); int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t srcTaskId, int32_t index, int64_t checkpointId); @@ -132,11 +177,13 @@ int32_t streamTaskSendCheckpointReq(SStreamTask* pTask); void streamTaskSetFailedCheckpointId(SStreamTask* pTask); int32_t streamTaskGetNumOfDownstream(const SStreamTask* pTask); +int32_t streamTaskGetNumOfUpstream(const SStreamTask* pTask); int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t numRate, float quotaRate, const char*); STaskId streamTaskGetTaskId(const SStreamTask* pTask); void streamTaskInitForLaunchHTask(SHistoryTaskInfo* pInfo); void streamTaskSetRetryInfoForLaunch(SHistoryTaskInfo* pInfo); int32_t streamTaskResetTimewindowFilter(SStreamTask* pTask); +void streamTaskClearActiveInfo(SActiveCheckpointInfo* pInfo); void streamClearChkptReadyMsg(SStreamTask* pTask); EExtractDataCode streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks, @@ -165,9 +212,14 @@ typedef enum ECHECKPOINT_BACKUP_TYPE { ECHECKPOINT_BACKUP_TYPE streamGetCheckpointBackupType(); -int32_t streamTaskDownloadCheckpointData(char* id, char* path); +int32_t streamTaskDownloadCheckpointData(const char* id, char* path); int32_t streamTaskOnNormalTaskReady(SStreamTask* pTask); -int32_t streamTaskOnScanhistoryTaskReady(SStreamTask* pTask); +int32_t streamTaskOnScanHistoryTaskReady(SStreamTask* pTask); + +int32_t initCheckpointReadyInfo(STaskCheckpointReadyInfo* pReadyInfo, int32_t upstreamNodeId, int32_t upstreamTaskId, + int32_t childId, SEpSet* pEpset, int64_t checkpointId); +int32_t initCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamNodeId, int32_t upstreamTaskId, int32_t childId, + int64_t checkpointId, SRpcMsg* pMsg); typedef int32_t (*__stream_async_exec_fn_t)(void* param); diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index b157597e60..d42a3b545a 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -40,9 +40,9 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t void destroyRocksdbCfInst(RocksdbCfInst* inst); int32_t getCfIdx(const char* cfName); -STaskDbWrapper* taskDbOpenImpl(char* key, char* statePath, char* dbPath); +STaskDbWrapper* taskDbOpenImpl(const char* key, char* statePath, char* dbPath); -int32_t backendCopyFiles(char* src, char* dst); +static int32_t backendCopyFiles(const char* src, const char* dst); void destroyCompactFilteFactory(void* arg); void destroyCompactFilte(void* arg); @@ -237,12 +237,14 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { tstrerror(TAOS_SYSTEM_ERROR(errno)), state); taosMkDir(state); } + taosMemoryFree(chkp); } - *dst = state; + *dst = state; return 0; } + int32_t remoteChkp_readMetaData(char* path, SArray* list) { char* metaPath = taosMemoryCalloc(1, strlen(path)); sprintf(metaPath, "%s%s%s", path, TD_DIRSEP, "META"); @@ -326,27 +328,30 @@ int32_t remoteChkp_validAndCvtMeta(char* path, SArray* list, int64_t chkpId) { return complete == 1 ? 0 : -1; } -int32_t rebuildFromRemoteChkp_rsync(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { - // impl later +int32_t rebuildFromRemoteChkp_rsync(const char* key, char* chkptPath, int64_t checkpointId, char* defaultPath) { int32_t code = 0; - if (taosIsDir(chkpPath)) { - taosRemoveDir(chkpPath); + if (taosIsDir(chkptPath)) { + taosRemoveDir(chkptPath); + stDebug("remove local checkpoint data dir:%s succ", chkptPath); } if (taosIsDir(defaultPath)) { taosRemoveDir(defaultPath); + taosMulMkDir(defaultPath); + stDebug("clear local default dir before downloading checkpoint data:%s succ", defaultPath); } - code = streamTaskDownloadCheckpointData(key, chkpPath); + code = streamTaskDownloadCheckpointData(key, chkptPath); if (code != 0) { + stError("failed to download checkpoint data:%s", key); return code; } - code = backendCopyFiles(chkpPath, defaultPath); - return code; + stDebug("download remote checkpoint data for checkpointId:%" PRId64 ", %s", checkpointId, key); + return backendCopyFiles(chkptPath, defaultPath); } -int32_t rebuildFromRemoteChkp_s3(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { +int32_t rebuildFromRemoteChkp_s3(const char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { int32_t code = streamTaskDownloadCheckpointData(key, chkpPath); if (code != 0) { return code; @@ -381,13 +386,16 @@ int32_t rebuildFromRemoteChkp_s3(char* key, char* chkpPath, int64_t chkpId, char return code; } -int32_t rebuildFromRemoteChkp(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { +int32_t rebuildFromRemoteCheckpoint(const char* key, char* chkptPath, int64_t checkpointId, char* defaultPath) { ECHECKPOINT_BACKUP_TYPE type = streamGetCheckpointBackupType(); if (type == DATA_UPLOAD_S3) { - return rebuildFromRemoteChkp_s3(key, chkpPath, chkpId, defaultPath); + return rebuildFromRemoteChkp_s3(key, chkptPath, checkpointId, defaultPath); } else if (type == DATA_UPLOAD_RSYNC) { - return rebuildFromRemoteChkp_rsync(key, chkpPath, chkpId, defaultPath); + return rebuildFromRemoteChkp_rsync(key, chkptPath, checkpointId, defaultPath); + } else { + stError("%s no remote backup checkpoint data for:%" PRId64, key, checkpointId); } + return -1; } @@ -406,7 +414,7 @@ int32_t copyFiles_hardlink(char* src, char* dst, int8_t type) { return taosLinkFile(src, dst); } -int32_t backendFileCopyFilesImpl(char* src, char* dst) { +int32_t backendFileCopyFilesImpl(const char* src, const char* dst) { const char* current = "CURRENT"; size_t currLen = strlen(current); @@ -415,20 +423,26 @@ int32_t backendFileCopyFilesImpl(char* src, char* dst) { int32_t dLen = strlen(dst); char* srcName = taosMemoryCalloc(1, sLen + 64); char* dstName = taosMemoryCalloc(1, dLen + 64); - // copy file to dst + // copy file to dst TdDirPtr pDir = taosOpenDir(src); if (pDir == NULL) { taosMemoryFree(srcName); taosMemoryFree(dstName); + code = TAOS_SYSTEM_ERROR(errno); + errno = 0; - return -1; + return code; } + errno = 0; TdDirEntryPtr de = NULL; + while ((de = taosReadDir(pDir)) != NULL) { char* name = taosGetDirEntryName(de); - if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; + if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) { + continue; + } sprintf(srcName, "%s%s%s", src, TD_DIRSEP, name); sprintf(dstName, "%s%s%s", dst, TD_DIRSEP, name); @@ -436,18 +450,21 @@ int32_t backendFileCopyFilesImpl(char* src, char* dst) { if (strncmp(name, current, strlen(name) <= currLen ? strlen(name) : currLen) == 0) { code = copyFiles_create(srcName, dstName, 0); if (code != 0) { - stError("failed to copy file, detail: %s to %s reason: %s", srcName, dstName, - tstrerror(TAOS_SYSTEM_ERROR(code))); + code = TAOS_SYSTEM_ERROR(code); + stError("failed to copy file, detail: %s to %s reason:%s", srcName, dstName, tstrerror(code)); goto _ERROR; } } else { code = copyFiles_hardlink(srcName, dstName, 0); if (code != 0) { - stError("failed to hard line file, detail: %s to %s, reason: %s", srcName, dstName, - tstrerror(TAOS_SYSTEM_ERROR(code))); + code = TAOS_SYSTEM_ERROR(code); + stError("failed to hard link file, detail:%s to %s, reason:%s", srcName, dstName, tstrerror(code)); goto _ERROR; + } else { + stDebug("succ hard link file:%s to %s", srcName, dstName); } } + memset(srcName, 0, sLen + 64); memset(dstName, 0, dLen + 64); } @@ -456,88 +473,54 @@ int32_t backendFileCopyFilesImpl(char* src, char* dst) { taosMemoryFreeClear(dstName); taosCloseDir(&pDir); errno = 0; - return 0; + return code; + _ERROR: taosMemoryFreeClear(srcName); taosMemoryFreeClear(dstName); taosCloseDir(&pDir); errno = 0; - return -1; + return code; } -int32_t backendCopyFiles(char* src, char* dst) { + +int32_t backendCopyFiles(const char* src, const char* dst) { return backendFileCopyFilesImpl(src, dst); - // // opt later, just hard link - // int32_t sLen = strlen(src); - // int32_t dLen = strlen(dst); - // char* srcName = taosMemoryCalloc(1, sLen + 64); - // char* dstName = taosMemoryCalloc(1, dLen + 64); - - // TdDirPtr pDir = taosOpenDir(src); - // if (pDir == NULL) { - // taosMemoryFree(srcName); - // taosMemoryFree(dstName); - // return -1; - // } - - // TdDirEntryPtr de = NULL; - // while ((de = taosReadDir(pDir)) != NULL) { - // char* name = taosGetDirEntryName(de); - // if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; - - // sprintf(srcName, "%s%s%s", src, TD_DIRSEP, name); - // sprintf(dstName, "%s%s%s", dst, TD_DIRSEP, name); - // // if (!taosDirEntryIsDir(de)) { - // // // code = taosCopyFile(srcName, dstName); - // // if (code == -1) { - // // goto _err; - // // } - // // } - // return backendFileCopyFilesImpl(src, dst); - - // memset(srcName, 0, sLen + 64); - // memset(dstName, 0, dLen + 64); - // } - - // _err: - // taosMemoryFreeClear(srcName); - // taosMemoryFreeClear(dstName); - // taosCloseDir(&pDir); - // return code >= 0 ? 0 : -1; - - // return 0; } -int32_t rebuildFromLocalChkp(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { + +static int32_t rebuildFromLocalCheckpoint(const char* pTaskIdStr, const char* checkpointPath, int64_t checkpointId, + const char* defaultPath) { int32_t code = 0; + if (taosIsDir(defaultPath)) { taosRemoveDir(defaultPath); taosMkDir(defaultPath); - - stInfo("succ to clear stream backend %s", defaultPath); + stInfo("%s clear local backend dir:%s, succ", pTaskIdStr, defaultPath); } - if (taosIsDir(chkpPath) && isValidCheckpoint(chkpPath)) { - code = backendCopyFiles(chkpPath, defaultPath); - if (code != 0) { + + if (taosIsDir(checkpointPath) && isValidCheckpoint(checkpointPath)) { + stDebug("%s local checkpoint data existed, checkpointId:%" PRId64 " copy to backend dir", pTaskIdStr, checkpointId); + + code = backendCopyFiles(checkpointPath, defaultPath); + if (code != TSDB_CODE_SUCCESS) { taosRemoveDir(defaultPath); taosMkDir(defaultPath); - stError("failed to restart stream backend from %s, reason: %s, start to restart from empty path: %s", chkpPath, - tstrerror(TAOS_SYSTEM_ERROR(errno)), defaultPath); - code = 0; + stError("%s failed to start stream backend from local %s, reason:%s, try download checkpoint from remote", + pTaskIdStr, checkpointPath, tstrerror(TAOS_SYSTEM_ERROR(errno))); + code = TSDB_CODE_SUCCESS; } else { - stInfo("start to restart stream backend at checkpoint path: %s", chkpPath); + stInfo("%s copy checkpoint data from:%s to:%s succ, try to start stream backend", pTaskIdStr, checkpointPath, + defaultPath); } + } else { + code = TSDB_CODE_FAILED; + stError("%s no valid data for checkpointId:%" PRId64 " in %s", pTaskIdStr, checkpointId, checkpointPath); } return code; } -int32_t rebuildFromlocalDefault(char* key, char* chkpPath, int64_t chkpId, char* defaultPath) { - int32_t code = 0; - return code; -} - -int32_t rebuildDirFromChkp2(const char* path, char* key, int64_t chkpId, char** dbPrefixPath, char** dbPath) { - // impl later +int32_t restoreCheckpointData(const char* path, const char* key, int64_t chkptId, char** dbPrefixPath, char** dbPath) { int32_t code = 0; char* prefixPath = taosMemoryCalloc(1, strlen(path) + 128); @@ -550,33 +533,42 @@ int32_t rebuildDirFromChkp2(const char* path, char* key, int64_t chkpId, char** char* defaultPath = taosMemoryCalloc(1, strlen(path) + 256); sprintf(defaultPath, "%s%s%s", prefixPath, TD_DIRSEP, "state"); + if (!taosIsDir(defaultPath)) { taosMulMkDir(defaultPath); } - char* chkpPath = taosMemoryCalloc(1, strlen(path) + 256); - if (chkpId != 0) { - sprintf(chkpPath, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkpId); - code = rebuildFromLocalChkp(key, chkpPath, chkpId, defaultPath); - if (code != 0) { - code = rebuildFromRemoteChkp(key, chkpPath, chkpId, defaultPath); - } + int32_t pathLen = strlen(path) + 256; - if (code != 0) { - stInfo("failed to start stream backend at %s, reason: %s, restart from default defaultPath dir:%s", chkpPath, - tstrerror(TAOS_SYSTEM_ERROR(errno)), defaultPath); - code = taosMkDir(defaultPath); - } - } else { - sprintf(chkpPath, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", - (int64_t)-1); + char* checkpointRoot = taosMemoryCalloc(1, pathLen); + sprintf(checkpointRoot, "%s%s%s", prefixPath, TD_DIRSEP, "checkpoints"); - code = rebuildFromLocalChkp(key, chkpPath, -1, defaultPath); - if (code != 0) { - code = taosMkDir(defaultPath); - } + if (!taosIsDir(checkpointRoot)) { + taosMulMkDir(checkpointRoot); } - taosMemoryFree(chkpPath); + taosMemoryFree(checkpointRoot); + + stDebug("%s check local backend dir:%s, checkpointId:%" PRId64 " succ", key, defaultPath, chkptId); + + char* chkptPath = taosMemoryCalloc(1, pathLen); + if (chkptId > 0) { + snprintf(chkptPath, pathLen, "%s%s%s%s%s%" PRId64 "", prefixPath, TD_DIRSEP, "checkpoints", TD_DIRSEP, "checkpoint", chkptId); + + code = rebuildFromLocalCheckpoint(key, chkptPath, chkptId, defaultPath); + if (code != 0) { + code = rebuildFromRemoteCheckpoint(key, chkptPath, chkptId, defaultPath); + } + + if (code != 0) { + stError("failed to start stream backend at %s, reason: %s, restart from default defaultPath:%s", chkptPath, + tstrerror(code), defaultPath); + code = 0; // reset the error code + } + } else { // no valid checkpoint id + stInfo("%s no valid checkpoint ever generated, no need to copy checkpoint data", key); + } + + taosMemoryFree(chkptPath); *dbPath = defaultPath; *dbPrefixPath = prefixPath; @@ -594,11 +586,12 @@ bool streamBackendDataIsExist(const char* path, int64_t chkpId, int32_t vgId) { taosMemoryFree(state); return exist; } + void* streamBackendInit(const char* streamPath, int64_t chkpId, int32_t vgId) { char* backendPath = NULL; int32_t code = rebuildDirFromCheckpoint(streamPath, chkpId, &backendPath); - stDebug("start to init stream backend at %s, checkpointid: %" PRId64 " vgId:%d", backendPath, chkpId, vgId); + stDebug("start to init stream backend:%s, checkpointId:%" PRId64 " vgId:%d", backendPath, chkpId, vgId); uint32_t dbMemLimit = nextPow2(tsMaxStreamBackendCache) << 20; SBackendWrapper* pHandle = taosMemoryCalloc(1, sizeof(SBackendWrapper)); @@ -657,10 +650,12 @@ void* streamBackendInit(const char* streamPath, int64_t chkpId, int32_t vgId) { goto _EXIT; } } + if (cfs != NULL) { rocksdb_list_column_families_destroy(cfs, nCf); } - stDebug("succ to init stream backend at %s, backend:%p, vgId:%d", backendPath, pHandle, vgId); + + stDebug("init stream backend at %s, backend:%p, vgId:%d", backendPath, pHandle, vgId); taosMemoryFreeClear(backendPath); return (void*)pHandle; @@ -1101,6 +1096,7 @@ _ERROR: rocksdb_checkpoint_object_destroy(cp); return code; } + int32_t chkpPreFlushDb(rocksdb_t* db, rocksdb_column_family_handle_t** cf, int32_t nCf) { int code = 0; char* err = NULL; @@ -1143,6 +1139,7 @@ int32_t chkpPreBuildDir(char* path, int64_t chkpId, char** chkpDir, char** chkpI return 0; } + int32_t taskDbBuildSnap(void* arg, SArray* pSnap) { SStreamMeta* pMeta = arg; @@ -1292,6 +1289,7 @@ _EXIT: taosMemoryFree(ppCf); return code; } + int32_t streamBackendDoCheckpoint(void* arg, int64_t chkpId) { return taskDbDoCheckpoint(arg, chkpId); } SListNode* streamBackendAddCompare(void* backend, void* arg) { @@ -2040,6 +2038,7 @@ int32_t taskDbBuildFullPath(char* path, char* key, char** dbFullPath, char** sta *stateFullPath = statePath; return 0; } + void taskDbUpdateChkpId(void* pTaskDb, int64_t chkpId) { STaskDbWrapper* p = pTaskDb; taosThreadMutexLock(&p->mutex); @@ -2047,7 +2046,7 @@ void taskDbUpdateChkpId(void* pTaskDb, int64_t chkpId) { taosThreadMutexUnlock(&p->mutex); } -STaskDbWrapper* taskDbOpenImpl(char* key, char* statePath, char* dbPath) { +STaskDbWrapper* taskDbOpenImpl(const char* key, char* statePath, char* dbPath) { char* err = NULL; char** cfNames = NULL; size_t nCf = 0; @@ -2062,7 +2061,7 @@ STaskDbWrapper* taskDbOpenImpl(char* key, char* statePath, char* dbPath) { cfNames = rocksdb_list_column_families(pTaskDb->dbOpt, dbPath, &nCf, &err); if (nCf == 0) { - stInfo("newly create db, need to restart"); + stInfo("%s newly create db, need to restart", key); // pre create db pTaskDb->db = rocksdb_open(pTaskDb->pCfOpts[0], dbPath, &err); if (pTaskDb->db == NULL) goto _EXIT; @@ -2071,11 +2070,15 @@ STaskDbWrapper* taskDbOpenImpl(char* key, char* statePath, char* dbPath) { if (cfNames != NULL) { rocksdb_list_column_families_destroy(cfNames, nCf); } + taosMemoryFree(err); err = NULL; cfNames = rocksdb_list_column_families(pTaskDb->dbOpt, dbPath, &nCf, &err); - ASSERT(err == NULL); + if (err != NULL) { + stError("%s failed to create column-family, %s, %" PRIzu ", reason:%s", key, dbPath, nCf, err); + goto _EXIT; + } } if (taskDbOpenCfs(pTaskDb, dbPath, cfNames, nCf) != 0) { @@ -2087,21 +2090,21 @@ STaskDbWrapper* taskDbOpenImpl(char* key, char* statePath, char* dbPath) { cfNames = NULL; } - stDebug("succ to init stream backend at %s, backend:%p", dbPath, pTaskDb); + stDebug("init s-task backend in:%s, backend:%p, %s", dbPath, pTaskDb, key); return pTaskDb; -_EXIT: +_EXIT: taskDbDestroy(pTaskDb, false); if (err) taosMemoryFree(err); if (cfNames) rocksdb_list_column_families_destroy(cfNames, nCf); return NULL; } -STaskDbWrapper* taskDbOpen(char* path, char* key, int64_t chkpId) { +STaskDbWrapper* taskDbOpen(const char* path, const char* key, int64_t chkptId) { char* statePath = NULL; char* dbPath = NULL; - if (rebuildDirFromChkp2(path, key, chkpId, &statePath, &dbPath) != 0) { + if (restoreCheckpointData(path, key, chkptId, &statePath, &dbPath) != 0) { return NULL; } diff --git a/source/libs/stream/src/streamCheckStatus.c b/source/libs/stream/src/streamCheckStatus.c index 4a8ca69ba5..11fecf7683 100644 --- a/source/libs/stream/src/streamCheckStatus.c +++ b/source/libs/stream/src/streamCheckStatus.c @@ -40,7 +40,7 @@ static SDownstreamStatusInfo* findCheckRspStatus(STaskCheckInfo* pInfo, int32_t int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage, int64_t* oldStage) { - SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); + SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); ASSERT(pInfo != NULL); *oldStage = pInfo->stage; @@ -246,7 +246,7 @@ int32_t streamTaskProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* } int32_t streamTaskSendCheckRsp(const SStreamMeta* pMeta, int32_t vgId, SStreamTaskCheckRsp* pRsp, - SRpcHandleInfo* pRpcInfo, int32_t taskId) { + SRpcHandleInfo* pRpcInfo, int32_t taskId) { SEncoder encoder; int32_t code; int32_t len; @@ -285,7 +285,7 @@ int32_t streamTaskStartMonitorCheckRsp(SStreamTask* pTask) { streamTaskInitTaskCheckInfo(pInfo, &pTask->outputInfo, taosGetTimestampMs()); int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); - stDebug("s-task:%s start check-rsp monit, ref:%d ", pTask->id.idStr, ref); + stDebug("s-task:%s start check-rsp monitor, ref:%d ", pTask->id.idStr, ref); if (pInfo->checkRspTmr == NULL) { pInfo->checkRspTmr = taosTmrStart(rspMonitorFn, CHECK_RSP_CHECK_INTERVAL, pTask, streamTimer); diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 5a4e3a5439..94d2198e31 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -20,9 +20,9 @@ typedef struct { ECHECKPOINT_BACKUP_TYPE type; - char* taskId; - int64_t chkpId; + char* taskId; + int64_t chkpId; SStreamTask* pTask; int64_t dbRefId; void* pMeta; @@ -30,24 +30,40 @@ typedef struct { static int32_t downloadCheckpointDataByName(const char* id, const char* fname, const char* dstName); static int32_t deleteCheckpointFile(const char* id, const char* name); -static int32_t streamTaskBackupCheckpoint(const char* id, const char* path); +static int32_t streamTaskUploadCheckpoint(const char* id, const char* path); static int32_t deleteCheckpoint(const char* id); static int32_t downloadCheckpointByNameS3(const char* id, const char* fname, const char* dstName); +static int32_t continueDispatchCheckpointTriggerBlock(SStreamDataBlock* pBlock, SStreamTask* pTask); +static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType, int64_t checkpointId, int32_t transId); +static int32_t doSendRetrieveTriggerMsg(SStreamTask* pTask, SArray* pNotSendList); +static void checkpointTriggerMonitorFn(void* param, void* tmrId); -static int32_t streamAlignCheckpoint(SStreamTask* pTask) { - int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList); - int64_t old = atomic_val_compare_exchange_32(&pTask->chkInfo.downstreamAlignNum, 0, num); - if (old == 0) { - stDebug("s-task:%s set initial align upstream num:%d", pTask->id.idStr, num); +static SStreamDataBlock* createChkptTriggerBlock(SStreamTask* pTask, int32_t checkpointType, int64_t checkpointId, int32_t transId); + +bool streamTaskIsAllUpstreamSendTrigger(SStreamTask* pTask) { + SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; + int32_t numOfUpstreams = taosArrayGetSize(pTask->upstreamInfo.pList); + bool allSend = true; + + taosThreadMutexLock(&pActiveInfo->lock); + int32_t numOfRecv = taosArrayGetSize(pActiveInfo->pReadyMsgList); + + if (numOfRecv < numOfUpstreams) { + stDebug("s-task:%s received checkpoint-trigger block, idx:%d, %d upstream tasks not send yet, total:%d", + pTask->id.idStr, pTask->info.selfChildId, (numOfUpstreams - numOfRecv), numOfUpstreams); + allSend = false; } - return atomic_sub_fetch_32(&pTask->chkInfo.downstreamAlignNum, 1); + taosThreadMutexUnlock(&pActiveInfo->lock); + return allSend; } -static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType) { +SStreamDataBlock* createChkptTriggerBlock(SStreamTask* pTask, int32_t checkpointType, int64_t checkpointId, + int32_t transId) { SStreamDataBlock* pChkpoint = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, sizeof(SSDataBlock)); if (pChkpoint == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; } pChkpoint->type = checkpointType; @@ -55,12 +71,13 @@ static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpoint SSDataBlock* pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock)); if (pBlock == NULL) { taosFreeQitem(pChkpoint); - return TSDB_CODE_OUT_OF_MEMORY; + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; } pBlock->info.type = STREAM_CHECKPOINT; - pBlock->info.version = pTask->chkInfo.checkpointingId; - pBlock->info.window.ekey = pBlock->info.window.skey = pTask->chkInfo.transId; // NOTE: set the transId + pBlock->info.version = checkpointId; + pBlock->info.window.ekey = pBlock->info.window.skey = transId; // NOTE: set the transId pBlock->info.rows = 1; pBlock->info.childId = pTask->info.selfChildId; @@ -68,7 +85,15 @@ static int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpoint taosArrayPush(pChkpoint->blocks, pBlock); taosMemoryFree(pBlock); - if (streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pChkpoint) < 0) { + terrno = 0; + + return pChkpoint; +} + +int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType, int64_t checkpointId, int32_t transId) { + SStreamDataBlock* pCheckpoint = createChkptTriggerBlock(pTask, checkpointType, checkpointId, transId); + + if (streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pCheckpoint) < 0) { return TSDB_CODE_OUT_OF_MEMORY; } @@ -84,23 +109,57 @@ int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSo int32_t code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_GEN_CHECKPOINT); ASSERT(code == TSDB_CODE_SUCCESS); - pTask->chkInfo.transId = pReq->transId; - pTask->chkInfo.checkpointingId = pReq->checkpointId; - pTask->chkInfo.numOfNotReady = streamTaskGetNumOfDownstream(pTask); + pTask->chkInfo.pActiveInfo->transId = pReq->transId; + pTask->chkInfo.pActiveInfo->activeId = pReq->checkpointId; pTask->chkInfo.startTs = taosGetTimestampMs(); pTask->execInfo.checkpoint += 1; // 2. Put the checkpoint block into inputQ, to make sure all blocks with less version have been handled by this task - return appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER); + // and this is the last item in the inputQ. + return appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER, pReq->checkpointId, pReq->transId); } -static int32_t continueDispatchCheckpointBlock(SStreamDataBlock* pBlock, SStreamTask* pTask) { +int32_t streamTaskProcessCheckpointTriggerRsp(SStreamTask* pTask, SCheckpointTriggerRsp* pRsp) { + ASSERT(pTask->info.taskLevel != TASK_LEVEL__SOURCE); + + if (pRsp->rspCode != TSDB_CODE_SUCCESS) { + stDebug("s-task:%s retrieve checkpoint-trgger rsp from upstream:0x%x invalid, code:%s", pTask->id.idStr, + pRsp->upstreamTaskId, tstrerror(pRsp->rspCode)); + return TSDB_CODE_SUCCESS; + } + + appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER, pRsp->checkpointId, pRsp->transId); + return TSDB_CODE_SUCCESS; +} + +int32_t streamTaskSendCheckpointTriggerMsg(SStreamTask* pTask, int32_t dstTaskId, SRpcHandleInfo* pRpcInfo, int32_t code) { + SCheckpointTriggerRsp* pRsp = rpcMallocCont(sizeof(SCheckpointTriggerRsp)); + pRsp->streamId = pTask->id.streamId; + pRsp->upstreamTaskId = pTask->id.taskId; + pRsp->taskId = dstTaskId; + + if (code == TSDB_CODE_SUCCESS) { + pRsp->checkpointId = pTask->chkInfo.pActiveInfo->activeId; + pRsp->transId = pTask->chkInfo.pActiveInfo->transId; + } else { + pRsp->checkpointId = -1; + pRsp->transId = -1; + } + + pRsp->rspCode = code; + + SRpcMsg rspMsg = {.code = 0, .pCont = pRsp, .contLen = sizeof(SCheckpointTriggerRsp), .info = *pRpcInfo}; + tmsgSendRsp(&rspMsg); + return 0; +} + +int32_t continueDispatchCheckpointTriggerBlock(SStreamDataBlock* pBlock, SStreamTask* pTask) { pBlock->srcTaskId = pTask->id.taskId; pBlock->srcVgId = pTask->pMeta->vgId; int32_t code = taosWriteQitem(pTask->outputq.queue->pQueue, pBlock); if (code == 0) { - ASSERT(pTask->chkInfo.dispatchCheckpointTrigger == false); + ASSERT(pTask->chkInfo.pActiveInfo->dispatchTrigger == false); streamDispatchStreamBlock(pTask); } else { stError("s-task:%s failed to put checkpoint into outputQ, code:%s", pTask->id.idStr, tstrerror(code)); @@ -110,22 +169,90 @@ static int32_t continueDispatchCheckpointBlock(SStreamDataBlock* pBlock, SStream return code; } -int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBlock) { +int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock* pBlock) { SSDataBlock* pDataBlock = taosArrayGet(pBlock->blocks, 0); int64_t checkpointId = pDataBlock->info.version; int32_t transId = pDataBlock->info.window.skey; const char* id = pTask->id.idStr; int32_t code = TSDB_CODE_SUCCESS; int32_t vgId = pTask->pMeta->vgId; + int32_t taskLevel = pTask->info.taskLevel; - stDebug("s-task:%s vgId:%d start to handle the checkpoint block, checkpointId:%" PRId64 " ver:%" PRId64 - ", transId:%d current checkpointingId:%" PRId64, + SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; + + taosThreadMutexLock(&pTask->lock); + if (pTask->chkInfo.checkpointId > checkpointId) { + stError("s-task:%s vgId:%d current checkpointId:%" PRId64 + " recv expired checkpoint-trigger block, checkpointId:%" PRId64 " transId:%d, discard", + id, vgId, pTask->chkInfo.checkpointId, checkpointId, transId); + taosThreadMutexUnlock(&pTask->lock); + return TSDB_CODE_SUCCESS; + } + + if (pTask->chkInfo.checkpointId == checkpointId) { + { // send checkpoint-ready msg to upstream + SRpcMsg msg ={0}; + + SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, pBlock->srcTaskId); + initCheckpointReadyMsg(pTask, pInfo->nodeId, pBlock->srcTaskId, pInfo->childId, checkpointId, &msg); + tmsgSendReq(&pInfo->epSet, &msg); + } + + stWarn( + "s-task:%s vgId:%d recv already finished checkpoint msg, send checkpoint-ready to upstream:0x%x to resume the " + "interrupted checkpoint", + id, vgId, pBlock->srcTaskId); + + streamTaskOpenUpstreamInput(pTask, pBlock->srcTaskId); + taosThreadMutexUnlock(&pTask->lock); + return TSDB_CODE_SUCCESS; + } + + if (streamTaskGetStatus(pTask)->state == TASK_STATUS__CK) { + if (pActiveInfo->activeId != checkpointId) { + stError("s-task:%s vgId:%d active checkpointId:%" PRId64 ", recv invalid checkpoint-trigger checkpointId:%" PRId64 + " discard", + id, vgId, pActiveInfo->activeId, checkpointId); + taosThreadMutexUnlock(&pTask->lock); + return TSDB_CODE_SUCCESS; + } else { // checkpointId == pActiveInfo->activeId + if (pActiveInfo->allUpstreamTriggerRecv == 1) { + stDebug( + "s-task:%s vgId:%d all upstream checkpoint-trigger recv, discard this checkpoint-trigger, " + "checkpointId:%" PRId64 " transId:%d", + id, vgId, checkpointId, transId); + taosThreadMutexUnlock(&pTask->lock); + return TSDB_CODE_SUCCESS; + } + + if (taskLevel == TASK_LEVEL__SINK || taskLevel == TASK_LEVEL__AGG) { + // check if already recv or not, and duplicated checkpoint-trigger msg recv, discard it + for (int32_t i = 0; i < taosArrayGetSize(pActiveInfo->pReadyMsgList); ++i) { + STaskCheckpointReadyInfo* p = taosArrayGet(pActiveInfo->pReadyMsgList, i); + if (p->upstreamTaskId == pBlock->srcTaskId) { + ASSERT(p->checkpointId == checkpointId); + stWarn("s-task:%s repeatly recv checkpoint-source msg from task:0x%x vgId:%d, checkpointId:%" PRId64 + ", prev recvTs:%" PRId64 " discard", + pTask->id.idStr, p->upstreamTaskId, p->upstreamNodeId, p->checkpointId, p->recvTs); + + taosThreadMutexUnlock(&pTask->lock); + return TSDB_CODE_SUCCESS; + } + } + } + } + } + + taosThreadMutexUnlock(&pTask->lock); + + stDebug("s-task:%s vgId:%d start to handle the checkpoint-trigger block, checkpointId:%" PRId64 " ver:%" PRId64 + ", transId:%d current active checkpointId:%" PRId64, id, vgId, pTask->chkInfo.checkpointId, pTask->chkInfo.checkpointVer, transId, checkpointId); // set task status if (streamTaskGetStatus(pTask)->state != TASK_STATUS__CK) { - pTask->chkInfo.checkpointingId = checkpointId; - pTask->chkInfo.transId = transId; + pActiveInfo->activeId = checkpointId; + pActiveInfo->transId = transId; code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_GEN_CHECKPOINT); if (code != TSDB_CODE_SUCCESS) { @@ -133,18 +260,27 @@ int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBloc streamFreeQitem((SStreamQueueItem*)pBlock); return code; } + + int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s start checkpoint-trigger monitor in 10s, ref:%d ", pTask->id.idStr, ref); + streamMetaAcquireOneTask(pTask); + + if (pActiveInfo->pChkptTriggerTmr == NULL) { + pActiveInfo->pChkptTriggerTmr = taosTmrStart(checkpointTriggerMonitorFn, 100, pTask, streamTimer); + } else { + taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pChkptTriggerTmr); + } } - // todo fix race condition: set the status and append checkpoint block - int32_t taskLevel = pTask->info.taskLevel; if (taskLevel == TASK_LEVEL__SOURCE) { int8_t type = pTask->outputInfo.type; + pActiveInfo->allUpstreamTriggerRecv = 1; + if (type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__SHUFFLE_DISPATCH) { stDebug("s-task:%s set childIdx:%d, and add checkpoint-trigger block into outputQ", id, pTask->info.selfChildId); - continueDispatchCheckpointBlock(pBlock, pTask); + continueDispatchCheckpointTriggerBlock(pBlock, pTask); } else { // only one task exists, no need to dispatch downstream info - atomic_add_fetch_32(&pTask->chkInfo.numOfNotReady, 1); - streamProcessCheckpointReadyMsg(pTask); + appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT, pActiveInfo->activeId, pActiveInfo->transId); streamFreeQitem((SStreamQueueItem*)pBlock); } } else if (taskLevel == TASK_LEVEL__SINK || taskLevel == TASK_LEVEL__AGG) { @@ -158,31 +294,21 @@ int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBloc streamAddCheckpointReadyMsg(pTask, pBlock->srcTaskId, pTask->info.selfChildId, checkpointId); // there are still some upstream tasks not send checkpoint request, do nothing and wait for then - int32_t notReady = streamAlignCheckpoint(pTask); - int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList); - if (notReady > 0) { - stDebug("s-task:%s received checkpoint block, idx:%d, %d upstream tasks not send checkpoint info yet, total:%d", - id, pTask->info.selfChildId, notReady, num); + if (pActiveInfo->allUpstreamTriggerRecv != 1) { streamFreeQitem((SStreamQueueItem*)pBlock); return code; } + int32_t num = streamTaskGetNumOfUpstream(pTask); if (taskLevel == TASK_LEVEL__SINK) { - stDebug("s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, send ready msg to upstream", - id, num); + stDebug("s-task:%s process checkpoint-trigger block, all %d upstreams sent, send ready msg to upstream", id, num); streamFreeQitem((SStreamQueueItem*)pBlock); streamTaskBuildCheckpoint(pTask); } else { // source & agg tasks need to forward the checkpoint msg downwards - stDebug("s-task:%s process checkpoint block, all %d upstreams sent checkpoint msgs, continue forwards msg", id, - num); - - // set the needed checked downstream tasks, only when all downstream tasks do checkpoint complete, this task - // can start local checkpoint procedure - pTask->chkInfo.numOfNotReady = streamTaskGetNumOfDownstream(pTask); - + stDebug("s-task:%s process checkpoint-trigger block, all %d upstreams sent, forwards to downstream", id, num); // Put the checkpoint block into inputQ, to make sure all blocks with less version have been handled by this task // already. And then, dispatch check point msg to all downstream tasks - code = continueDispatchCheckpointBlock(pBlock, pTask); + code = continueDispatchCheckpointTriggerBlock(pBlock, pTask); } } @@ -193,115 +319,210 @@ int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBloc * All down stream tasks have successfully completed the check point task. * Current stream task is allowed to start to do checkpoint things in ASYNC model. */ -int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask) { +int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask, int64_t checkpointId, int32_t downstreamNodeId, + int32_t downstreamTaskId) { ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE || pTask->info.taskLevel == TASK_LEVEL__AGG); + SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; + + const char* id = pTask->id.idStr; + bool received = false; + int32_t total = streamTaskGetNumOfDownstream(pTask); + ASSERT(total > 0); + + // 1. not in checkpoint status now + SStreamTaskState* pStat = streamTaskGetStatus(pTask); + if (pStat->state != TASK_STATUS__CK) { + stError("s-task:%s status:%s discard checkpoint-ready msg from task:0x%x", id, pStat->name, downstreamTaskId); + return TSDB_CODE_STREAM_TASK_IVLD_STATUS; + } + + // 2. expired checkpoint-ready msg, invalid checkpoint-ready msg + if (pTask->chkInfo.checkpointId > checkpointId || pInfo->activeId != checkpointId) { + stError("s-task:%s status:%s checkpointId:%" PRId64 " new arrival checkpoint-ready msg (checkpointId:%" PRId64 + ") from task:0x%x, expired and discard ", + id, pStat->name, pTask->chkInfo.checkpointId, checkpointId, downstreamTaskId); + return -1; + } + + taosThreadMutexLock(&pInfo->lock); // only when all downstream tasks are send checkpoint rsp, we can start the checkpoint procedure for the agg task - int32_t notReady = atomic_sub_fetch_32(&pTask->chkInfo.numOfNotReady, 1); - ASSERT(notReady >= 0); + int32_t size = taosArrayGetSize(pInfo->pCheckpointReadyRecvList); + for (int32_t i = 0; i < size; ++i) { + STaskDownstreamReadyInfo* p = taosArrayGet(pInfo->pCheckpointReadyRecvList, i); + if (p->downstreamTaskId == downstreamTaskId) { + received = true; + break; + } + } + + if (received) { + stDebug("s-task:%s already recv checkpoint-ready msg from downstream:0x%x, ignore. %d/%d downstream not ready", id, + downstreamTaskId, (int32_t)(total - taosArrayGetSize(pInfo->pCheckpointReadyRecvList)), total); + } else { + STaskDownstreamReadyInfo info = {.recvTs = taosGetTimestampMs(), + .downstreamTaskId = downstreamTaskId, + .checkpointId = pInfo->activeId, + .transId = pInfo->transId, + .streamId = pTask->id.streamId, + .downstreamNodeId = downstreamNodeId}; + taosArrayPush(pInfo->pCheckpointReadyRecvList, &info); + } + + int32_t notReady = total - taosArrayGetSize(pInfo->pCheckpointReadyRecvList); + int32_t transId = pInfo->transId; + taosThreadMutexUnlock(&pInfo->lock); if (notReady == 0) { - stDebug("s-task:%s all downstream tasks have completed the checkpoint, start to do checkpoint for current task", - pTask->id.idStr); - appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT); - } else { - int32_t total = streamTaskGetNumOfDownstream(pTask); - stDebug("s-task:%s %d/%d downstream tasks are not ready, wait", pTask->id.idStr, notReady, total); + stDebug("s-task:%s all downstream task(s) have completed build checkpoint, start to do checkpoint for current task", id); + appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT, checkpointId, transId); } return 0; } -void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg) { - pTask->chkInfo.checkpointingId = 0; // clear the checkpoint id - pTask->chkInfo.failedId = 0; - pTask->chkInfo.startTs = 0; // clear the recorded start time - pTask->chkInfo.numOfNotReady = 0; - pTask->chkInfo.transId = 0; - pTask->chkInfo.dispatchCheckpointTrigger = false; - pTask->chkInfo.downstreamAlignNum = 0; +int32_t streamTaskProcessCheckpointReadyRsp(SStreamTask* pTask, int32_t upstreamTaskId, int64_t checkpointId) { + SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; + int64_t now = taosGetTimestampMs(); + int32_t numOfConfirmed = 0; + taosThreadMutexLock(&pInfo->lock); + for(int32_t i = 0; i < taosArrayGetSize(pInfo->pReadyMsgList); ++i) { + STaskCheckpointReadyInfo* pReadyInfo = taosArrayGet(pInfo->pReadyMsgList, i); + if (pReadyInfo->upstreamTaskId == upstreamTaskId && pReadyInfo->checkpointId == checkpointId) { + pReadyInfo->sendCompleted = 1; + stDebug("s-task:%s send checkpoint-ready msg to upstream:0x%x confirmed, checkpointId:%" PRId64 " ts:%" PRId64, + pTask->id.idStr, upstreamTaskId, checkpointId, now); + break; + } + } + + for(int32_t i = 0; i < taosArrayGetSize(pInfo->pReadyMsgList); ++i) { + STaskCheckpointReadyInfo* pReadyInfo = taosArrayGet(pInfo->pReadyMsgList, i); + if (pReadyInfo->sendCompleted == 1) { + numOfConfirmed += 1; + } + } + + stDebug("s-task:%s send checkpoint-ready msg to %d upstream confirmed, checkpointId:%" PRId64, pTask->id.idStr, + numOfConfirmed, checkpointId); + + taosThreadMutexUnlock(&pInfo->lock); + return TSDB_CODE_SUCCESS; +} + +void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg) { + pTask->chkInfo.startTs = 0; // clear the recorded start time + + streamTaskClearActiveInfo(pTask->chkInfo.pActiveInfo); streamTaskOpenAllUpstreamInput(pTask); // open inputQ for all upstream tasks if (clearChkpReadyMsg) { streamClearChkptReadyMsg(pTask); } } -int32_t streamSaveTaskCheckpointInfo(SStreamTask* p, int64_t checkpointId) { - SStreamMeta* pMeta = p->pMeta; +int32_t streamTaskUpdateTaskCheckpointInfo(SStreamTask* pTask, SVUpdateCheckpointInfoReq* pReq) { + SStreamMeta* pMeta = pTask->pMeta; int32_t vgId = pMeta->vgId; - const char* id = p->id.idStr; int32_t code = 0; - SCheckpointInfo* pCKInfo = &p->chkInfo; + const char* id = pTask->id.idStr; + SCheckpointInfo* pInfo = &pTask->chkInfo; - // fill-history task, rsma task, and sink task will not generate the checkpoint - if ((p->info.fillHistory == 1) || (p->info.taskLevel > TASK_LEVEL__SINK)) { - return code; + taosThreadMutexLock(&pTask->lock); + + if (pReq->checkpointId <= pInfo->checkpointId) { + stDebug("s-task:%s vgId:%d latest checkpointId:%" PRId64 " checkpointVer:%" PRId64 + " no need to update the checkpoint info, updated checkpointId:%" PRId64 " checkpointVer:%" PRId64 " ignored", + id, vgId, pInfo->checkpointId, pInfo->checkpointVer, pReq->checkpointId, pReq->checkpointVer); + taosThreadMutexUnlock(&pTask->lock); + + { // destroy the related fill-history tasks + // drop task should not in the meta-lock, and drop the related fill-history task now + streamMetaWUnLock(pMeta); + if (pReq->dropRelHTask) { + streamMetaUnregisterTask(pMeta, pReq->hStreamId, pReq->hTaskId); + int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); + stDebug("s-task:%s vgId:%d related fill-history task:0x%x dropped in update checkpointInfo, remain tasks:%d", + id, vgId, pReq->taskId, numOfTasks); + } + + streamMetaWLock(pMeta); + } + + return TSDB_CODE_SUCCESS; } - taosThreadMutexLock(&p->lock); + SStreamTaskState* pStatus = streamTaskGetStatus(pTask); - SStreamTaskState* pStatus = streamTaskGetStatus(p); - ETaskStatus prevStatus = pStatus->state; + stDebug("s-task:%s vgId:%d status:%s start to update the checkpoint info, checkpointId:%" PRId64 "->%" PRId64 + " checkpointVer:%" PRId64 "->%" PRId64 " checkpointTs:%" PRId64 "->%" PRId64, + id, vgId, pStatus->name, pInfo->checkpointId, pReq->checkpointId, pInfo->checkpointVer, pReq->checkpointVer, + pInfo->checkpointTime, pReq->checkpointTs); - if (pStatus->state == TASK_STATUS__CK) { - ASSERT(pCKInfo->checkpointId <= pCKInfo->checkpointingId && pCKInfo->checkpointingId == checkpointId && - pCKInfo->checkpointVer <= pCKInfo->processedVer); + if (pStatus->state != TASK_STATUS__DROPPING) { + ASSERT(pInfo->checkpointId <= pReq->checkpointId && pInfo->checkpointVer <= pReq->checkpointVer); - pCKInfo->checkpointId = pCKInfo->checkpointingId; - pCKInfo->checkpointVer = pCKInfo->processedVer; - pCKInfo->checkpointTime = pCKInfo->startTs; + pInfo->checkpointId = pReq->checkpointId; + pInfo->checkpointVer = pReq->checkpointVer; + pInfo->checkpointTime = pReq->checkpointTs; - streamTaskClearCheckInfo(p, false); - taosThreadMutexUnlock(&p->lock); + streamTaskClearCheckInfo(pTask, false); - code = streamTaskHandleEvent(p->status.pSM, TASK_EVENT_CHECKPOINT_DONE); + // todo handle error + if (pStatus->state == TASK_STATUS__CK) { + code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_CHECKPOINT_DONE); + } else { + stDebug("s-task:0x%x vgId:%d not handle checkpoint-done event, status:%s", pReq->taskId, vgId, pStatus->name); + } } else { - stDebug("s-task:%s vgId:%d status:%s not keep the checkpoint metaInfo, checkpoint:%" PRId64 " failed", id, vgId, - pStatus->name, pCKInfo->checkpointingId); - taosThreadMutexUnlock(&p->lock); + stDebug("s-task:0x%x vgId:%d status:%s not update checkpoint info, checkpointId:%" PRId64 "->%" PRId64 " failed", + pReq->taskId, vgId, pStatus->name, pInfo->checkpointId, pReq->checkpointId); + taosThreadMutexUnlock(&pTask->lock); return TSDB_CODE_STREAM_TASK_IVLD_STATUS; } + if (pReq->dropRelHTask) { + stDebug("s-task:0x%x vgId:%d drop the related fill-history task:0x%" PRIx64 " after update checkpoint", + pReq->taskId, vgId, pReq->hTaskId); + CLEAR_RELATED_FILLHISTORY_TASK(pTask); + } + + stDebug("s-task:0x%x set the persistent status attr to be ready, prev:%s, status in sm:%s", pReq->taskId, + streamTaskGetStatusStr(pTask->status.taskStatus), streamTaskGetStatus(pTask)->name); + + pTask->status.taskStatus = TASK_STATUS__READY; + + code = streamMetaSaveTask(pMeta, pTask); if (code != TSDB_CODE_SUCCESS) { - stDebug("s-task:%s vgId:%d handle event:checkpoint-done failed", id, vgId); + stError("s-task:%s vgId:%d failed to save task info after do checkpoint, checkpointId:%" PRId64 ", since %s", id, + vgId, pReq->checkpointId, terrstr()); return code; } - stDebug("vgId:%d s-task:%s level:%d open upstream inputQ, save status after checkpoint, checkpointId:%" PRId64 - ", Ver(saved):%" PRId64 " currentVer:%" PRId64 ", status: ready, prev:%s", - vgId, id, p->info.taskLevel, checkpointId, pCKInfo->checkpointVer, pCKInfo->nextProcessVer, - streamTaskGetStatusStr(prevStatus)); + taosThreadMutexUnlock(&pTask->lock); + streamMetaWUnLock(pMeta); - // save the task if not sink task - if (p->info.taskLevel <= TASK_LEVEL__SINK) { - streamMetaWLock(pMeta); - - code = streamMetaSaveTask(pMeta, p); - if (code != TSDB_CODE_SUCCESS) { - streamMetaWUnLock(pMeta); - stError("s-task:%s vgId:%d failed to save task info after do checkpoint, checkpointId:%" PRId64 ", since %s", id, - vgId, checkpointId, terrstr()); - return code; - } - - code = streamMetaCommit(pMeta); - if (code != TSDB_CODE_SUCCESS) { - stError("s-task:%s vgId:%d failed to commit stream meta after do checkpoint, checkpointId:%" PRId64 ", since %s", - id, vgId, checkpointId, terrstr()); - } - - streamMetaWUnLock(pMeta); + // drop task should not in the meta-lock, and drop the related fill-history task now + if (pReq->dropRelHTask) { + streamMetaUnregisterTask(pMeta, pReq->hStreamId, pReq->hTaskId); + int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); + stDebug("s-task:%s vgId:%d related fill-history task:0x%x dropped, remain tasks:%d", id, vgId, pReq->taskId, numOfTasks); } - return code; + streamMetaWLock(pMeta); + + if (streamMetaCommit(pMeta) < 0) { + // persist to disk + } + + return TSDB_CODE_SUCCESS; } void streamTaskSetFailedCheckpointId(SStreamTask* pTask) { - pTask->chkInfo.failedId = pTask->chkInfo.checkpointingId; + pTask->chkInfo.pActiveInfo->failedId = pTask->chkInfo.pActiveInfo->activeId; stDebug("s-task:%s mark the checkpointId:%" PRId64 " (transId:%d) failed", pTask->id.idStr, - pTask->chkInfo.checkpointingId, pTask->chkInfo.transId); + pTask->chkInfo.pActiveInfo->activeId, pTask->chkInfo.pActiveInfo->transId); } static int32_t getCheckpointDataMeta(const char* id, const char* path, SArray* list) { @@ -310,7 +531,7 @@ static int32_t getCheckpointDataMeta(const char* id, const char* path, SArray* l int32_t code = downloadCheckpointDataByName(id, "META", file); if (code != 0) { - stDebug("chkp failed to download meta file:%s", file); + stDebug("%s chkp failed to download meta file:%s", id, file); taosMemoryFree(file); return code; } @@ -334,6 +555,7 @@ static int32_t getCheckpointDataMeta(const char* id, const char* path, SArray* l } } } + taosCloseFile(&pFile); taosRemoveFile(file); taosMemoryFree(file); @@ -341,58 +563,75 @@ static int32_t getCheckpointDataMeta(const char* id, const char* path, SArray* l } int32_t uploadCheckpointData(void* param) { - SAsyncUploadArg* arg = param; + SAsyncUploadArg* pParam = param; char* path = NULL; int32_t code = 0; - SArray* toDelFiles = taosArrayInit(4, sizeof(void*)); - char* taskStr = arg->taskId ? arg->taskId : "NULL"; + SArray* toDelFiles = taosArrayInit(4, POINTER_BYTES); + char* taskStr = pParam->taskId ? pParam->taskId : "NULL"; - void* pBackend = taskAcquireDb(arg->dbRefId); + void* pBackend = taskAcquireDb(pParam->dbRefId); if (pBackend == NULL) { stError("s-task:%s failed to acquire db", taskStr); - taosMemoryFree(arg->taskId); - taosMemoryFree(arg); + taosMemoryFree(pParam->taskId); + taosMemoryFree(pParam); return -1; } - if ((code = taskDbGenChkpUploadData(arg->pTask->pBackend, ((SStreamMeta*)arg->pMeta)->bkdChkptMgt, arg->chkpId, - (int8_t)(arg->type), &path, toDelFiles)) != 0) { - stError("s-task:%s failed to gen upload checkpoint:%" PRId64 "", taskStr, arg->chkpId); + if ((code = taskDbGenChkpUploadData(pParam->pTask->pBackend, ((SStreamMeta*)pParam->pMeta)->bkdChkptMgt, + pParam->chkpId, (int8_t)(pParam->type), &path, toDelFiles)) != 0) { + stError("s-task:%s failed to gen upload checkpoint:%" PRId64, taskStr, pParam->chkpId); } - if (arg->type == DATA_UPLOAD_S3) { - if (code == 0 && (code = getCheckpointDataMeta(arg->taskId, path, toDelFiles)) != 0) { - stError("s-task:%s failed to get checkpointId:%" PRId64 " meta", taskStr, arg->chkpId); + if (pParam->type == DATA_UPLOAD_S3) { + if (code == 0 && (code = getCheckpointDataMeta(pParam->taskId, path, toDelFiles)) != 0) { + stError("s-task:%s failed to get checkpointData for checkpointId:%" PRId64 " meta", taskStr, pParam->chkpId); } } - if (code == 0 && (code = streamTaskBackupCheckpoint(arg->taskId, path)) != 0) { - stError("s-task:%s failed to upload checkpointId:%" PRId64, taskStr, arg->chkpId); + if (code == TSDB_CODE_SUCCESS) { + code = streamTaskUploadCheckpoint(pParam->taskId, path); + if (code == TSDB_CODE_SUCCESS) { + stDebug("s-task:%s upload checkpointId:%" PRId64 " to remote succ", taskStr, pParam->chkpId); + } else { + stError("s-task:%s failed to upload checkpointId:%" PRId64 " data:%s", taskStr, pParam->chkpId, path); + } } - taskReleaseDb(arg->dbRefId); + taskReleaseDb(pParam->dbRefId); if (code == 0) { - for (int i = 0; i < taosArrayGetSize(toDelFiles); i++) { - char* p = taosArrayGetP(toDelFiles, i); - code = deleteCheckpointFile(arg->taskId, p); - stDebug("s-task:%s try to del file: %s", taskStr, p); + int32_t size = taosArrayGetSize(toDelFiles); + stDebug("s-task:%s remove redundant %d files", taskStr, size); + + for (int i = 0; i < size; i++) { + char* pName = taosArrayGetP(toDelFiles, i); + code = deleteCheckpointFile(pParam->taskId, pName); if (code != 0) { + stDebug("s-task:%s failed to del file: %s", taskStr, pName); break; } } + + stDebug("s-task:%s remove redundant files done", taskStr); } taosArrayDestroyP(toDelFiles, taosMemoryFree); - taosRemoveDir(path); + + if (code == TSDB_CODE_SUCCESS) { + stDebug("s-task:%s remove local checkpointId:%" PRId64 " data %s", taskStr, pParam->chkpId, path); + taosRemoveDir(path); + } else { + stDebug("s-task:%s update checkpointId:%" PRId64 " keep local checkpoint data", taskStr, pParam->chkpId); + } + taosMemoryFree(path); - taosMemoryFree(arg->taskId); - taosMemoryFree(arg); + taosMemoryFree(pParam->taskId); + taosMemoryFree(pParam); return code; } -int32_t streamTaskRemoteBackupCheckpoint(SStreamTask* pTask, int64_t chkpId, char* taskId) { +int32_t streamTaskRemoteBackupCheckpoint(SStreamTask* pTask, int64_t checkpointId, char* taskId) { ECHECKPOINT_BACKUP_TYPE type = streamGetCheckpointBackupType(); if (type == DATA_UPLOAD_DISABLE) { return 0; @@ -405,7 +644,7 @@ int32_t streamTaskRemoteBackupCheckpoint(SStreamTask* pTask, int64_t chkpId, cha SAsyncUploadArg* arg = taosMemoryCalloc(1, sizeof(SAsyncUploadArg)); arg->type = type; arg->taskId = taosStrdup(taskId); - arg->chkpId = chkpId; + arg->chkpId = checkpointId; arg->pTask = pTask; arg->dbRefId = taskGetDBRef(pTask->pBackend); arg->pMeta = pTask->pMeta; @@ -416,7 +655,7 @@ int32_t streamTaskRemoteBackupCheckpoint(SStreamTask* pTask, int64_t chkpId, cha int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { int32_t code = TSDB_CODE_SUCCESS; int64_t startTs = pTask->chkInfo.startTs; - int64_t ckId = pTask->chkInfo.checkpointingId; + int64_t ckId = pTask->chkInfo.pActiveInfo->activeId; const char* id = pTask->id.idStr; bool dropRelHTask = (streamTaskGetPrevStatus(pTask) == TASK_STATUS__HALT); SStreamMeta* pMeta = pTask->pMeta; @@ -446,9 +685,11 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { } } - // clear the checkpoint info, and commit the newest checkpoint info if all works are done successfully - if (code == TSDB_CODE_SUCCESS) { - code = streamSaveTaskCheckpointInfo(pTask, ckId); + // update the latest checkpoint info if all works are done successfully, for rsma, the pMsgCb is null. + if (code == TSDB_CODE_SUCCESS && (pTask->pMsgCb != NULL)) { + STaskId* pHTaskId = &pTask->hTaskInfo.id; + code = streamBuildAndSendCheckpointUpdateMsg(pTask->pMsgCb, pMeta->vgId, &pTask->id, pHTaskId, &pTask->chkInfo, + dropRelHTask); if (code == TSDB_CODE_SUCCESS) { code = streamTaskRemoteBackupCheckpoint(pTask, ckId, (char*)id); if (code != TSDB_CODE_SUCCESS) { @@ -459,22 +700,6 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { } } - if ((code == TSDB_CODE_SUCCESS) && dropRelHTask) { - // transferred from the halt status, it is done the fill-history procedure and finish with the checkpoint - // free it and remove fill-history task from disk meta-store - taosThreadMutexLock(&pTask->lock); - if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { - SStreamTaskId hTaskId = {.streamId = pTask->hTaskInfo.id.streamId, .taskId = pTask->hTaskInfo.id.taskId}; - - stDebug("s-task:%s fill-history finish checkpoint done, drop related fill-history task:0x%x", id, hTaskId.taskId); - streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &hTaskId, 1); - } else { - stWarn("s-task:%s related fill-history task:0x%x is erased", id, (int32_t)pTask->hTaskInfo.id.taskId); - } - - taosThreadMutexUnlock(&pTask->lock); - } - // clear the checkpoint info if failed if (code != TSDB_CODE_SUCCESS) { taosThreadMutexLock(&pTask->lock); @@ -488,12 +713,260 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { double el = (taosGetTimestampMs() - startTs) / 1000.0; stInfo("s-task:%s vgId:%d level:%d, checkpointId:%" PRId64 " ver:%" PRId64 " elapsed time:%.2f Sec, %s ", id, - pTask->pMeta->vgId, pTask->info.taskLevel, ckId, pTask->chkInfo.checkpointVer, el, + pMeta->vgId, pTask->info.taskLevel, ckId, pTask->chkInfo.checkpointVer, el, (code == TSDB_CODE_SUCCESS) ? "succ" : "failed"); return code; } +void checkpointTriggerMonitorFn(void* param, void* tmrId) { + SStreamTask* pTask = param; + int32_t vgId = pTask->pMeta->vgId; + int64_t now = taosGetTimestampMs(); + const char* id = pTask->id.idStr; + + SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; + + // check the status every 100ms + if (streamTaskShouldStop(pTask)) { + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s vgId:%d quit from monitor checkpoint-trigger, ref:%d", id, vgId, ref); + streamMetaReleaseTask(pTask->pMeta, pTask); + return; + } + + if (++pActiveInfo->checkCounter < 100) { + taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pChkptTriggerTmr); + return; + } + + pActiveInfo->checkCounter = 0; + stDebug("s-task:%s vgId:%d checkpoint-trigger monitor in tmr, ts:%" PRId64, pTask->id.idStr, vgId, now); + + taosThreadMutexLock(&pTask->lock); + SStreamTaskState* pState = streamTaskGetStatus(pTask); + if (pState->state != TASK_STATUS__CK) { + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s vgId:%d not in checkpoint status, quit from monitor checkpoint-trigger, ref:%d", pTask->id.idStr, + vgId, ref); + + taosThreadMutexUnlock(&pTask->lock); + streamMetaReleaseTask(pTask->pMeta, pTask); + return; + } + taosThreadMutexUnlock(&pTask->lock); + + taosThreadMutexLock(&pActiveInfo->lock); + + // send msg to retrieve checkpoint trigger msg + SArray* pList = pTask->upstreamInfo.pList; + ASSERT(pTask->info.taskLevel > TASK_LEVEL__SOURCE); + SArray* pNotSendList = taosArrayInit(4, sizeof(SStreamUpstreamEpInfo)); + + for(int32_t i = 0; i < taosArrayGetSize(pList); ++i) { + SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pList, i); + + bool recved = false; + for(int32_t j = 0; j < taosArrayGetSize(pActiveInfo->pReadyMsgList); ++j) { + STaskCheckpointReadyInfo* pReady = taosArrayGet(pActiveInfo->pReadyMsgList, j); + if (pInfo->nodeId == pReady->upstreamNodeId) { + recved = true; + break; + } + } + + if (!recved) { // make sure the inputQ is opened for not recv upstream checkpoint-trigger message + streamTaskOpenUpstreamInput(pTask, pInfo->taskId); + taosArrayPush(pNotSendList, pInfo); + } + } + + // do send retrieve checkpoint trigger msg to upstream + int32_t size = taosArrayGetSize(pNotSendList); + doSendRetrieveTriggerMsg(pTask, pNotSendList); + taosThreadMutexUnlock(&pActiveInfo->lock); + + // check every 100ms + if (size > 0) { + stDebug("s-task:%s start to monitor checkpoint-trigger in 10s", id); + taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pChkptTriggerTmr); + } else { + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s all checkpoint-trigger recved, quit from monitor checkpoint-trigger tmr, ref:%d", id, ref); + streamMetaReleaseTask(pTask->pMeta, pTask); + } + + taosArrayDestroy(pNotSendList); +} + +int32_t doSendRetrieveTriggerMsg(SStreamTask* pTask, SArray* pNotSendList) { + int32_t code = 0; + int32_t vgId = pTask->pMeta->vgId; + const char* pId = pTask->id.idStr; + int32_t size = taosArrayGetSize(pNotSendList); + int32_t numOfUpstream = streamTaskGetNumOfUpstream(pTask); + + if (size <= 0) { + stDebug("s-task:%s all upstream checkpoint trigger recved, no need to send retrieve", pId); + return code; + } + + stDebug("s-task:%s %d/%d not recv checkpoint-trigger from upstream(s), start to send trigger-retrieve", pId, size, + numOfUpstream); + + for (int32_t i = 0; i < size; i++) { + SStreamUpstreamEpInfo* pUpstreamTask = taosArrayGet(pNotSendList, i); + + SRetrieveChkptTriggerReq* pReq = rpcMallocCont(sizeof(SRetrieveChkptTriggerReq)); + if (pReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + stError("vgId:%d failed to create msg to retrieve trigger msg for task:%s exec, code:out of memory", vgId, pId); + continue; + } + + pReq->head.vgId = htonl(pUpstreamTask->nodeId); + pReq->streamId = pTask->id.streamId; + pReq->downstreamTaskId = pTask->id.taskId; + pReq->downstreamNodeId = vgId; + pReq->upstreamTaskId = pUpstreamTask->taskId; + pReq->upstreamNodeId = pUpstreamTask->nodeId; + pReq->checkpointId = pTask->chkInfo.pActiveInfo->activeId; + + + SRpcMsg rpcMsg = {0}; + initRpcMsg(&rpcMsg, TDMT_STREAM_RETRIEVE_TRIGGER, pReq, sizeof(SRetrieveChkptTriggerReq)); + + code = tmsgSendReq(&pUpstreamTask->epSet, &rpcMsg); + stDebug("s-task:%s vgId:%d send checkpoint-trigger retrieve msg to 0x%x(vgId:%d) checkpointId:%" PRId64, pId, vgId, + pUpstreamTask->taskId, pUpstreamTask->nodeId, pReq->checkpointId); + } + + return TSDB_CODE_SUCCESS; +} + +bool streamTaskAlreadySendTrigger(SStreamTask* pTask, int32_t downstreamNodeId) { + int64_t now = taosGetTimestampMs(); + const char* id = pTask->id.idStr; + SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; + SStreamTaskState* pStatus = streamTaskGetStatus(pTask); + + if (pStatus->state != TASK_STATUS__CK) { + return false; + } + + taosThreadMutexLock(&pInfo->lock); + if (!pInfo->dispatchTrigger) { + taosThreadMutexUnlock(&pInfo->lock); + return false; + } + + for(int32_t i = 0; i < taosArrayGetSize(pInfo->pDispatchTriggerList); ++i) { + STaskTriggerSendInfo* pSendInfo = taosArrayGet(pInfo->pDispatchTriggerList, i); + if (pSendInfo->nodeId != downstreamNodeId) { + continue; + } + + // has send trigger msg to downstream node, + double before = (now - pSendInfo->sendTs) / 1000.0; + if (pSendInfo->recved) { + stWarn("s-task:%s checkpoint-trigger msg already send at:%" PRId64 + "(%.2fs before) and recv confirmed by downstream:0x%x, checkpointId:%" PRId64 ", transId:%d", + id, pSendInfo->sendTs, before, pSendInfo->taskId, pInfo->activeId, pInfo->transId); + } else { + stWarn("s-task:%s checkpoint-trigger already send at:%" PRId64 "(%.2fs before), checkpointId:%" PRId64 + ", transId:%d", + id, pSendInfo->sendTs, before, pInfo->activeId, pInfo->transId); + } + + taosThreadMutexUnlock(&pInfo->lock); + return true; + } + + ASSERT(0); + return false; +} + +void streamTaskGetTriggerRecvStatus(SStreamTask* pTask, int32_t* pRecved, int32_t* pTotal) { + *pRecved = taosArrayGetSize(pTask->chkInfo.pActiveInfo->pReadyMsgList); + + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + *pTotal = 1; + } else { + *pTotal = streamTaskGetNumOfUpstream(pTask); + } +} + +// record the dispatch checkpoint trigger info in the list +void streamTaskInitTriggerDispatchInfo(SStreamTask* pTask) { + SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; + + int64_t now = taosGetTimestampMs(); + taosThreadMutexLock(&pInfo->lock); + + // outputQ should be empty here + ASSERT(streamQueueGetNumOfUnAccessedItems(pTask->outputq.queue) == 0); + + pInfo->dispatchTrigger = true; + if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { + STaskDispatcherFixed* pDispatch = &pTask->outputInfo.fixedDispatcher; + + STaskTriggerSendInfo p = {.sendTs = now, .recved = false, .nodeId = pDispatch->nodeId, .taskId = pDispatch->taskId}; + taosArrayPush(pInfo->pDispatchTriggerList, &p); + } else { + for (int32_t i = 0; i < streamTaskGetNumOfDownstream(pTask); ++i) { + SVgroupInfo* pVgInfo = taosArrayGet(pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos, i); + + STaskTriggerSendInfo p = {.sendTs = now, .recved = false, .nodeId = pVgInfo->vgId, .taskId = pVgInfo->taskId}; + taosArrayPush(pInfo->pDispatchTriggerList, &p); + } + } + + taosThreadMutexUnlock(&pInfo->lock); +} + +int32_t streamTaskGetNumOfConfirmed(SStreamTask* pTask) { + SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; + + int32_t num = 0; + taosThreadMutexLock(&pInfo->lock); + for(int32_t i = 0; i < taosArrayGetSize(pInfo->pDispatchTriggerList); ++i) { + STaskTriggerSendInfo* p = taosArrayGet(pInfo->pDispatchTriggerList, i); + if (p->recved) { + num ++; + } + } + taosThreadMutexUnlock(&pInfo->lock); + return num; +} + +void streamTaskSetTriggerDispatchConfirmed(SStreamTask* pTask, int32_t vgId) { + SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; + + int32_t taskId = 0; + taosThreadMutexLock(&pInfo->lock); + + for (int32_t i = 0; i < taosArrayGetSize(pInfo->pDispatchTriggerList); ++i) { + STaskTriggerSendInfo* p = taosArrayGet(pInfo->pDispatchTriggerList, i); + if (p->nodeId == vgId) { + ASSERT(p->recved == false); + + p->recved = true; + p->recvTs = taosGetTimestampMs(); + taskId = p->taskId; + break; + } + } + + taosThreadMutexUnlock(&pInfo->lock); + + int32_t numOfConfirmed = streamTaskGetNumOfConfirmed(pTask); + int32_t total = streamTaskGetNumOfDownstream(pTask); + stDebug("s-task:%s set downstream:0x%x(vgId:%d) checkpoint-trigger dispatch confirmed, total confirmed:%d/%d", + pTask->id.idStr, taskId, vgId, numOfConfirmed, total); + + ASSERT(taskId != 0); +} + static int32_t uploadCheckpointToS3(const char* id, const char* path) { TdDirPtr pDir = taosOpenDir(path); if (pDir == NULL) return -1; @@ -553,14 +1026,14 @@ ECHECKPOINT_BACKUP_TYPE streamGetCheckpointBackupType() { } } -int32_t streamTaskBackupCheckpoint(const char* id, const char* path) { +int32_t streamTaskUploadCheckpoint(const char* id, const char* path) { if (id == NULL || path == NULL || strlen(id) == 0 || strlen(path) == 0 || strlen(path) >= PATH_MAX) { - stError("streamTaskBackupCheckpoint parameters invalid"); + stError("invalid parameters in upload checkpoint, %s", id); return -1; } if (strlen(tsSnodeAddress) != 0) { - return uploadRsync(id, path); + return uploadByRsync(id, path); } else if (tsS3StreamEnabled) { return uploadCheckpointToS3(id, path); } @@ -571,7 +1044,7 @@ int32_t streamTaskBackupCheckpoint(const char* id, const char* path) { // fileName: CURRENT int32_t downloadCheckpointDataByName(const char* id, const char* fname, const char* dstName) { if (id == NULL || fname == NULL || strlen(id) == 0 || strlen(fname) == 0 || strlen(fname) >= PATH_MAX) { - stError("uploadCheckpointByName parameters invalid"); + stError("down load checkpoint data parameters invalid"); return -1; } @@ -584,9 +1057,9 @@ int32_t downloadCheckpointDataByName(const char* id, const char* fname, const ch return 0; } -int32_t streamTaskDownloadCheckpointData(char* id, char* path) { +int32_t streamTaskDownloadCheckpointData(const char* id, char* path) { if (id == NULL || path == NULL || strlen(id) == 0 || strlen(path) == 0 || strlen(path) >= PATH_MAX) { - stError("streamTaskDownloadCheckpointData parameters invalid"); + stError("down checkpoint data parameters invalid"); return -1; } @@ -615,6 +1088,7 @@ int32_t deleteCheckpoint(const char* id) { int32_t deleteCheckpointFile(const char* id, const char* name) { char object[128] = {0}; snprintf(object, sizeof(object), "%s/%s", id, name); + char* tmp = object; s3DeleteObjects((const char**)&tmp, 1); return 0; diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index cc9105ffdc..2e776313e0 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -23,12 +23,6 @@ typedef struct SBlockName { char parTbName[TSDB_TABLE_NAME_LEN]; } SBlockName; -typedef struct { - int32_t upStreamTaskId; - SEpSet upstreamNodeEpset; - SRpcMsg msg; -} SStreamChkptReadyInfo; - static void doRetryDispatchData(void* param, void* tmrId); static int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t vgId, SEpSet* pEpSet); static int32_t streamAddBlockIntoDispatchMsg(const SSDataBlock* pBlock, SStreamDispatchReq* pReq); @@ -85,12 +79,14 @@ int32_t streamTaskBroadcastRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* r void* buf = NULL; int32_t sz = taosArrayGetSize(pTask->upstreamInfo.pList); ASSERT(sz > 0); + for (int32_t i = 0; i < sz; i++) { req->reqId = tGenIdPI64(); - SStreamChildEpInfo* pEpInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); + SStreamUpstreamEpInfo* pEpInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); req->dstNodeId = pEpInfo->nodeId; req->dstTaskId = pEpInfo->taskId; int32_t len; + tEncodeSize(tEncodeStreamRetrieveReq, req, len, code); if (code != 0) { ASSERT(0); @@ -115,7 +111,6 @@ int32_t streamTaskBroadcastRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* r code = tmsgSendReq(&pEpInfo->epSet, &rpcMsg); if (code != 0) { - ASSERT(0); rpcFreeCont(buf); return code; } @@ -124,15 +119,16 @@ int32_t streamTaskBroadcastRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* r stDebug("s-task:%s (child %d) send retrieve req to task:0x%x (vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr, pTask->info.selfChildId, pEpInfo->taskId, pEpInfo->nodeId, req->reqId); } + return code; } static int32_t buildStreamRetrieveReq(SStreamTask* pTask, const SSDataBlock* pBlock, SStreamRetrieveReq* req){ - SRetrieveTableRsp* pRetrieve = NULL; int32_t dataStrLen = sizeof(SRetrieveTableRsp) + blockGetEncodeSize(pBlock); - - pRetrieve = taosMemoryCalloc(1, dataStrLen); - if (pRetrieve == NULL) return TSDB_CODE_OUT_OF_MEMORY; + SRetrieveTableRsp* pRetrieve = taosMemoryCalloc(1, dataStrLen); + if (pRetrieve == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } int32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); pRetrieve->useconds = 0; @@ -341,7 +337,7 @@ static int32_t sendDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pDispatch SEpSet* pEpSet = &pTask->outputInfo.fixedDispatcher.epSet; int32_t downstreamTaskId = pTask->outputInfo.fixedDispatcher.taskId; - stDebug("s-task:%s (child taskId:%d) fix-dispatch %d block(s) to s-task:0x%x (vgId:%d), id:%d", id, + stDebug("s-task:%s (child taskId:%d) fix-dispatch %d block(s) to s-task:0x%x (vgId:%d), msgId:%d", id, pTask->info.selfChildId, 1, downstreamTaskId, vgId, msgId); code = doSendDispatchMsg(pTask, pDispatchMsg, vgId, pEpSet); @@ -357,8 +353,8 @@ static int32_t sendDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pDispatch for (int32_t i = 0; i < numOfVgroups; i++) { if (pDispatchMsg[i].blockNum > 0) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); - stDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to vgId:%d", pTask->id.idStr, - pTask->info.selfChildId, pDispatchMsg[i].blockNum, pVgInfo->vgId); + stDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to vgId:%d", id, pTask->info.selfChildId, + pDispatchMsg[i].blockNum, pVgInfo->vgId); code = doSendDispatchMsg(pTask, &pDispatchMsg[i], pVgInfo->vgId, &pVgInfo->epSet); if (code < 0) { @@ -372,8 +368,7 @@ static int32_t sendDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pDispatch } } - stDebug("s-task:%s complete shuffle-dispatch blocks to all %d vnodes, msgId:%d", pTask->id.idStr, numOfVgroups, - msgId); + stDebug("s-task:%s complete shuffle-dispatch blocks to all %d vnodes, msgId:%d", id, numOfVgroups, msgId); } return code; @@ -425,14 +420,14 @@ static void doRetryDispatchData(void* param, void* tmrId) { } } - stDebug("s-task:%s complete re-try shuffle-dispatch blocks to all %d vnodes, msgId:%d", pTask->id.idStr, + stDebug("s-task:%s complete retry shuffle-dispatch blocks to all %d vnodes, msgId:%d", pTask->id.idStr, numOfFailed, msgId); } else { int32_t vgId = pTask->outputInfo.fixedDispatcher.nodeId; SEpSet* pEpSet = &pTask->outputInfo.fixedDispatcher.epSet; int32_t downstreamTaskId = pTask->outputInfo.fixedDispatcher.taskId; - stDebug("s-task:%s (child taskId:%d) fix-dispatch %d block(s) to s-task:0x%x (vgId:%d), id:%d", id, + stDebug("s-task:%s (child taskId:%d) fix-dispatch %d block(s) to s-task:0x%x (vgId:%d), msgId:%d", id, pTask->info.selfChildId, 1, downstreamTaskId, vgId, msgId); code = doSendDispatchMsg(pTask, pReq, vgId, pEpSet); @@ -463,13 +458,13 @@ static void doRetryDispatchData(void* param, void* tmrId) { void streamRetryDispatchData(SStreamTask* pTask, int64_t waitDuration) { pTask->msgInfo.retryCount++; - stWarn("s-task:%s retry send dispatch data in %" PRId64 "ms, in timer msgId:%d, retryTimes:%d", pTask->id.idStr, + stTrace("s-task:%s retry send dispatch data in %" PRId64 "ms, in timer msgId:%d, retryTimes:%d", pTask->id.idStr, waitDuration, pTask->execInfo.dispatch, pTask->msgInfo.retryCount); - if (pTask->msgInfo.pTimer != NULL) { - taosTmrReset(doRetryDispatchData, waitDuration, pTask, streamTimer, &pTask->msgInfo.pTimer); + if (pTask->msgInfo.pRetryTmr != NULL) { + taosTmrReset(doRetryDispatchData, waitDuration, pTask, streamTimer, &pTask->msgInfo.pRetryTmr); } else { - pTask->msgInfo.pTimer = taosTmrStart(doRetryDispatchData, waitDuration, pTask, streamTimer); + pTask->msgInfo.pRetryTmr = taosTmrStart(doRetryDispatchData, waitDuration, pTask, streamTimer); } } @@ -549,9 +544,10 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { const char* id = pTask->id.idStr; int32_t numOfElems = streamQueueGetNumOfItems(pTask->outputq.queue); if (numOfElems > 0) { - double size = SIZE_IN_MiB(taosQueueMemorySize(pTask->outputq.queue->pQueue)); - stDebug("s-task:%s start to dispatch intermediate block to downstream, elem in outputQ:%d, size:%.2fMiB", id, - numOfElems, size); + double size = SIZE_IN_MiB(taosQueueMemorySize(pTask->outputq.queue->pQueue)); + int32_t numOfUnAccessed = streamQueueGetNumOfUnAccessedItems(pTask->outputq.queue); + stDebug("s-task:%s start to dispatch intermediate block to downstream, elem in outputQ:%d/%d, size:%.2fMiB", id, + numOfUnAccessed, numOfElems, size); } // to make sure only one dispatch is running @@ -562,8 +558,8 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { return 0; } - if (pTask->chkInfo.dispatchCheckpointTrigger) { - stDebug("s-task:%s already send checkpoint trigger, not dispatch anymore", id); + if (pTask->chkInfo.pActiveInfo->dispatchTrigger) { + stDebug("s-task:%s already send checkpoint-trigger, no longer dispatch any other data", id); atomic_store_8(&pTask->outputq.status, TASK_OUTPUT_STATUS__NORMAL); return 0; } @@ -578,8 +574,9 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { return 0; } - ASSERT(pBlock->type == STREAM_INPUT__DATA_BLOCK || pBlock->type == STREAM_INPUT__CHECKPOINT_TRIGGER || - pBlock->type == STREAM_INPUT__TRANS_STATE); + int32_t type = pBlock->type; + ASSERT(type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__CHECKPOINT_TRIGGER || + type == STREAM_INPUT__TRANS_STATE); pTask->execInfo.dispatch += 1; pTask->msgInfo.startTs = taosGetTimestampMs(); @@ -590,6 +587,10 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { } else { // todo handle build dispatch msg failed } + if (type == STREAM_INPUT__CHECKPOINT_TRIGGER) { + streamTaskInitTriggerDispatchInfo(pTask); + } + int32_t retryCount = 0; while (1) { code = sendDispatchMsg(pTask, pTask->msgInfo.pData); @@ -622,43 +623,185 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { return TSDB_CODE_SUCCESS; } -// this function is usually invoked by sink/agg task -int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask) { - int32_t num = taosArrayGetSize(pTask->pReadyMsgList); +int32_t initCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamNodeId, int32_t upstreamTaskId, int32_t childId, + int64_t checkpointId, SRpcMsg* pMsg) { + int32_t code = 0; + int32_t tlen = 0; + void* buf = NULL; + + SStreamCheckpointReadyMsg req = {0}; + req.downstreamNodeId = pTask->pMeta->vgId; + req.downstreamTaskId = pTask->id.taskId; + req.streamId = pTask->id.streamId; + req.checkpointId = checkpointId; + req.childId = childId; + req.upstreamNodeId = upstreamNodeId; + req.upstreamTaskId = upstreamTaskId; + + tEncodeSize(tEncodeStreamCheckpointReadyMsg, &req, tlen, code); + if (code < 0) { + return -1; + } + + buf = rpcMallocCont(sizeof(SMsgHead) + tlen); + if (buf == NULL) { + return -1; + } + + ((SMsgHead*)buf)->vgId = htonl(req.upstreamNodeId); + void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + + SEncoder encoder; + tEncoderInit(&encoder, abuf, tlen); + if ((code = tEncodeStreamCheckpointReadyMsg(&encoder, &req)) < 0) { + rpcFreeCont(buf); + return code; + } + tEncoderClear(&encoder); + + initRpcMsg(pMsg, TDMT_STREAM_TASK_CHECKPOINT_READY, buf, tlen + sizeof(SMsgHead)); + return TSDB_CODE_SUCCESS; +} + +static void checkpointReadyMsgSendMonitorFn(void* param, void* tmrId) { + SStreamTask* pTask = param; + int32_t vgId = pTask->pMeta->vgId; + const char* id = pTask->id.idStr; + + // check the status every 100ms + if (streamTaskShouldStop(pTask)) { + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s vgId:%d quit from monitor checkpoint-trigger, ref:%d", id, vgId, ref); + streamMetaReleaseTask(pTask->pMeta, pTask); + return; + } + + SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; + if (++pActiveInfo->sendReadyCheckCounter < 100) { + taosTmrReset(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pSendReadyMsgTmr); + return; + } + + pActiveInfo->sendReadyCheckCounter = 0; + stDebug("s-task:%s in sending checkpoint-ready msg monitor timer", id); + + taosThreadMutexLock(&pActiveInfo->lock); + + SArray* pList = pActiveInfo->pReadyMsgList; + SArray* pNotRspList = taosArrayInit(4, sizeof(int32_t)); + + int32_t num = taosArrayGetSize(pList); ASSERT(taosArrayGetSize(pTask->upstreamInfo.pList) == num); for (int32_t i = 0; i < num; ++i) { - SStreamChkptReadyInfo* pInfo = taosArrayGet(pTask->pReadyMsgList, i); - tmsgSendReq(&pInfo->upstreamNodeEpset, &pInfo->msg); + STaskCheckpointReadyInfo* pInfo = taosArrayGet(pList, i); + if (pInfo->sendCompleted == 1) { + continue; + } - stDebug("s-task:%s level:%d checkpoint ready msg sent to upstream:0x%x", pTask->id.idStr, pTask->info.taskLevel, - pInfo->upStreamTaskId); + taosArrayPush(pNotRspList, &pInfo->upstreamTaskId); + stDebug("s-task:%s vgId:%d level:%d checkpoint-ready rsp from upstream:0x%x not confirmed yet", id, vgId, + pTask->info.taskLevel, pInfo->upstreamTaskId); } - taosArrayClear(pTask->pReadyMsgList); - stDebug("s-task:%s level:%d checkpoint ready msg sent to all %d upstreams", pTask->id.idStr, pTask->info.taskLevel, - num); + int32_t checkpointId = pActiveInfo->activeId; + + int32_t notRsp = taosArrayGetSize(pNotRspList); + if (notRsp > 0) { // send checkpoint-ready msg again + for (int32_t i = 0; i < taosArrayGetSize(pNotRspList); ++i) { + int32_t taskId = *(int32_t*)taosArrayGet(pNotRspList, i); + + for (int32_t j = 0; j < num; ++j) { + STaskCheckpointReadyInfo* pReadyInfo = taosArrayGet(pList, j); + if (taskId == pReadyInfo->upstreamTaskId) { // send msg again + + SRpcMsg msg = {0}; + initCheckpointReadyMsg(pTask, pReadyInfo->upstreamNodeId, pReadyInfo->upstreamTaskId, pReadyInfo->childId, + checkpointId, &msg); + tmsgSendReq(&pReadyInfo->upstreamNodeEpset, &msg); + stDebug("s-task:%s level:%d checkpoint-ready msg sent to upstream:0x%x again", id, pTask->info.taskLevel, + pReadyInfo->upstreamTaskId); + } + } + } + + taosTmrReset(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pSendReadyMsgTmr); + taosThreadMutexUnlock(&pActiveInfo->lock); + } else { + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug( + "s-task:%s vgId:%d recv of checkpoint-ready msg confirmed by all upstream task(s), quit from timer and clear " + "checkpoint-ready msg, ref:%d", + id, vgId, ref); + + streamClearChkptReadyMsg(pTask); + taosThreadMutexUnlock(&pActiveInfo->lock); + streamMetaReleaseTask(pTask->pMeta, pTask); + } + + taosArrayDestroy(pNotRspList); +} + +// this function is usually invoked by sink/agg task +int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask) { + SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; + + const char* id = pTask->id.idStr; + SArray* pList = pActiveInfo->pReadyMsgList; + + taosThreadMutexLock(&pActiveInfo->lock); + + int32_t num = taosArrayGetSize(pList); + ASSERT(taosArrayGetSize(pTask->upstreamInfo.pList) == num); + + for (int32_t i = 0; i < num; ++i) { + STaskCheckpointReadyInfo* pInfo = taosArrayGet(pList, i); + + SRpcMsg msg = {0}; + initCheckpointReadyMsg(pTask, pInfo->upstreamNodeId, pInfo->upstreamTaskId, pInfo->childId, pInfo->checkpointId, &msg); + tmsgSendReq(&pInfo->upstreamNodeEpset, &msg); + + stDebug("s-task:%s level:%d checkpoint-ready msg sent to upstream:0x%x", id, pTask->info.taskLevel, + pInfo->upstreamTaskId); + } + + taosThreadMutexUnlock(&pActiveInfo->lock); + stDebug("s-task:%s level:%d checkpoint-ready msg sent to all %d upstreams", id, pTask->info.taskLevel, num); + + // start to check if checkpoint ready msg has successfully received by upstream tasks. + if (pTask->info.taskLevel == TASK_LEVEL__SINK || pTask->info.taskLevel == TASK_LEVEL__AGG) { + int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s start checkpoint-ready monitor in 10s, ref:%d ", pTask->id.idStr, ref); + streamMetaAcquireOneTask(pTask); + + if (pActiveInfo->pSendReadyMsgTmr == NULL) { + pActiveInfo->pSendReadyMsgTmr = taosTmrStart(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer); + } else { + taosTmrReset(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pSendReadyMsgTmr); + } + } return TSDB_CODE_SUCCESS; } // this function is only invoked by source task, and send rsp to mnode int32_t streamTaskSendCheckpointSourceRsp(SStreamTask* pTask) { - taosThreadMutexLock(&pTask->lock); + SArray* pList = pTask->chkInfo.pActiveInfo->pReadyMsgList; + taosThreadMutexLock(&pTask->chkInfo.pActiveInfo->lock); ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); - if (taosArrayGetSize(pTask->pReadyMsgList) == 1) { - SStreamChkptReadyInfo* pInfo = taosArrayGet(pTask->pReadyMsgList, 0); + if (taosArrayGetSize(pList) == 1) { + STaskCheckpointReadyInfo* pInfo = taosArrayGet(pList, 0); tmsgSendRsp(&pInfo->msg); - taosArrayClear(pTask->pReadyMsgList); + taosArrayClear(pList); stDebug("s-task:%s level:%d source checkpoint completed msg sent to mnode", pTask->id.idStr, pTask->info.taskLevel); } else { stDebug("s-task:%s level:%d already send rsp checkpoint success to mnode", pTask->id.idStr, pTask->info.taskLevel); } - taosThreadMutexUnlock(&pTask->lock); + taosThreadMutexUnlock(&pTask->chkInfo.pActiveInfo->lock); return TSDB_CODE_SUCCESS; } @@ -777,103 +920,113 @@ int32_t streamTaskBuildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRp } int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHandleInfo* pRpcInfo, SStreamTask* pTask) { - SStreamChkptReadyInfo info = {0}; + STaskCheckpointReadyInfo info = { + .recvTs = taosGetTimestampMs(), .transId = pReq->transId, .checkpointId = pReq->checkpointId}; + streamTaskBuildCheckpointSourceRsp(pReq, pRpcInfo, &info.msg, TSDB_CODE_SUCCESS); - if (pTask->pReadyMsgList == NULL) { - pTask->pReadyMsgList = taosArrayInit(4, sizeof(SStreamChkptReadyInfo)); + SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; + taosThreadMutexLock(&pActiveInfo->lock); + + int32_t size = taosArrayGetSize(pActiveInfo->pReadyMsgList); + if (size > 0) { + ASSERT(size == 1); + + STaskCheckpointReadyInfo* pReady = taosArrayGet(pActiveInfo->pReadyMsgList, 0); + if (pReady->transId == pReq->transId) { + stWarn("s-task:%s repeatly recv checkpoint source msg from mnode, checkpointId:%" PRId64 ", ignore", + pTask->id.idStr, pReq->checkpointId); + } else { + stError("s-task:%s checkpointId:%" PRId64 " transId:%d not completed, new transId:%d checkpointId:%" PRId64 + " recv from mnode", + pTask->id.idStr, pReady->checkpointId, pReady->transId, pReq->transId, pReq->checkpointId); + ASSERT(0); // failed to handle it + } + } else { + taosArrayPush(pActiveInfo->pReadyMsgList, &info); + stDebug("s-task:%s add checkpoint source rsp msg, total:%d", pTask->id.idStr, size + 1); } - taosArrayPush(pTask->pReadyMsgList, &info); + taosThreadMutexUnlock(&pActiveInfo->lock); + return TSDB_CODE_SUCCESS; +} + +int32_t initCheckpointReadyInfo(STaskCheckpointReadyInfo* pReadyInfo, int32_t upstreamNodeId, int32_t upstreamTaskId, + int32_t childId, SEpSet* pEpset, int64_t checkpointId) { + ASSERT(upstreamTaskId != 0); + + pReadyInfo->upstreamTaskId = upstreamTaskId; + pReadyInfo->upstreamNodeEpset = *pEpset; + pReadyInfo->upstreamNodeId = upstreamNodeId; + pReadyInfo->recvTs = taosGetTimestampMs(); + pReadyInfo->checkpointId = checkpointId; + pReadyInfo->childId = childId; - int32_t size = taosArrayGetSize(pTask->pReadyMsgList); - stDebug("s-task:%s add checkpoint source rsp msg, total:%d", pTask->id.idStr, size); return TSDB_CODE_SUCCESS; } int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, int32_t index, int64_t checkpointId) { - int32_t code = 0; - int32_t tlen = 0; - void* buf = NULL; if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { return TSDB_CODE_SUCCESS; } - SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); + SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); - SStreamCheckpointReadyMsg req = {0}; - req.downstreamNodeId = pTask->pMeta->vgId; - req.downstreamTaskId = pTask->id.taskId; - req.streamId = pTask->id.streamId; - req.checkpointId = checkpointId; - req.childId = pInfo->childId; - req.upstreamNodeId = pInfo->nodeId; - req.upstreamTaskId = pInfo->taskId; + STaskCheckpointReadyInfo info = {0}; + initCheckpointReadyInfo(&info, pInfo->nodeId, pInfo->taskId, pInfo->childId, &pInfo->epSet, checkpointId); - tEncodeSize(tEncodeStreamCheckpointReadyMsg, &req, tlen, code); - if (code < 0) { - return -1; + stDebug("s-task:%s (level:%d) prepare checkpoint-ready msg to upstream s-task:0x%" PRIx64 + "-0x%x (vgId:%d) idx:%d", + pTask->id.idStr, pTask->info.taskLevel, pTask->id.streamId, pInfo->taskId, pInfo->nodeId, index); + + SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; + + taosThreadMutexLock(&pActiveInfo->lock); + taosArrayPush(pActiveInfo->pReadyMsgList, &info); + + int32_t numOfRecv = taosArrayGetSize(pActiveInfo->pReadyMsgList); + int32_t total = streamTaskGetNumOfUpstream(pTask); + if (numOfRecv == total) { + stDebug("s-task:%s recv checkpoint-trigger from all upstream, continue", pTask->id.idStr); + pActiveInfo->allUpstreamTriggerRecv = 1; + } else { + ASSERT(numOfRecv <= total); + stDebug("s-task:%s %d/%d checkpoint-trigger recv", pTask->id.idStr, numOfRecv, total); } - buf = rpcMallocCont(sizeof(SMsgHead) + tlen); - if (buf == NULL) { - return -1; - } - - ((SMsgHead*)buf)->vgId = htonl(req.upstreamNodeId); - void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); - - SEncoder encoder; - tEncoderInit(&encoder, abuf, tlen); - if ((code = tEncodeStreamCheckpointReadyMsg(&encoder, &req)) < 0) { - rpcFreeCont(buf); - return code; - } - tEncoderClear(&encoder); - - ASSERT(req.upstreamTaskId != 0); - - SStreamChkptReadyInfo info = {.upStreamTaskId = pInfo->taskId, .upstreamNodeEpset = pInfo->epSet}; - initRpcMsg(&info.msg, TDMT_STREAM_TASK_CHECKPOINT_READY, buf, tlen + sizeof(SMsgHead)); - - stDebug("s-task:%s (level:%d) prepare checkpoint ready msg to upstream s-task:0x%" PRIx64 - ":0x%x (vgId:%d) idx:%d, vgId:%d", - pTask->id.idStr, pTask->info.taskLevel, req.streamId, req.upstreamTaskId, req.upstreamNodeId, index, - req.upstreamNodeId); - - if (pTask->pReadyMsgList == NULL) { - pTask->pReadyMsgList = taosArrayInit(4, sizeof(SStreamChkptReadyInfo)); - } - - taosArrayPush(pTask->pReadyMsgList, &info); + taosThreadMutexUnlock(&pActiveInfo->lock); return 0; } void streamClearChkptReadyMsg(SStreamTask* pTask) { - if (pTask->pReadyMsgList == NULL) { + SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; + if (pActiveInfo == NULL) { return; } - for (int i = 0; i < taosArrayGetSize(pTask->pReadyMsgList); i++) { - SStreamChkptReadyInfo* pInfo = taosArrayGet(pTask->pReadyMsgList, i); + for (int i = 0; i < taosArrayGetSize(pActiveInfo->pReadyMsgList); i++) { + STaskCheckpointReadyInfo* pInfo = taosArrayGet(pActiveInfo->pReadyMsgList, i); rpcFreeCont(pInfo->msg.pCont); } - taosArrayClear(pTask->pReadyMsgList); + + taosArrayClear(pActiveInfo->pReadyMsgList); } // this message has been sent successfully, let's try next one. -static int32_t handleDispatchSuccessRsp(SStreamTask* pTask, int32_t downstreamId) { +static int32_t handleDispatchSuccessRsp(SStreamTask* pTask, int32_t downstreamId, int32_t downstreamNodeId) { stDebug("s-task:%s destroy dispatch msg:%p", pTask->id.idStr, pTask->msgInfo.pData); bool delayDispatch = (pTask->msgInfo.dispatchMsgType == STREAM_INPUT__CHECKPOINT_TRIGGER); if (delayDispatch) { taosThreadMutexLock(&pTask->lock); // we only set the dispatch msg info for current checkpoint trans - if (streamTaskGetStatus(pTask)->state == TASK_STATUS__CK && pTask->chkInfo.checkpointingId == pTask->msgInfo.checkpointId) { - ASSERT(pTask->chkInfo.transId == pTask->msgInfo.transId); - pTask->chkInfo.dispatchCheckpointTrigger = true; - stDebug("s-task:%s checkpoint-trigger msg rsp for checkpointId:%" PRId64 " transId:%d confirmed", - pTask->id.idStr, pTask->msgInfo.checkpointId, pTask->msgInfo.transId); + if (streamTaskGetStatus(pTask)->state == TASK_STATUS__CK && + pTask->chkInfo.pActiveInfo->activeId == pTask->msgInfo.checkpointId) { + ASSERT(pTask->chkInfo.pActiveInfo->transId == pTask->msgInfo.transId); + stDebug("s-task:%s checkpoint-trigger msg to 0x%x rsp for checkpointId:%" PRId64 " transId:%d confirmed", + pTask->id.idStr, downstreamId, pTask->msgInfo.checkpointId, pTask->msgInfo.transId); + + streamTaskSetTriggerDispatchConfirmed(pTask, downstreamNodeId); } else { stWarn("s-task:%s checkpoint-trigger msg rsp for checkpointId:%" PRId64 " transId:%d discard, since expired", pTask->id.idStr, pTask->msgInfo.checkpointId, pTask->msgInfo.transId); @@ -958,7 +1111,7 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i taosArrayPush(pTask->msgInfo.pRetryList, &pRsp->downstreamNodeId); taosThreadMutexUnlock(&pTask->lock); - stWarn("s-task:%s inputQ of downstream task:0x%x(vgId:%d) is full, wait for %dms and retry dispatch", id, + stTrace("s-task:%s inputQ of downstream task:0x%x(vgId:%d) is full, wait for %dms and retry dispatch", id, pRsp->downstreamTaskId, pRsp->downstreamNodeId, DISPATCH_RETRY_INTERVAL_MS); } else if (pRsp->inputStatus == TASK_INPUT_STATUS__REFUSED) { // todo handle the agg task failure, add test case @@ -966,10 +1119,10 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i pTask->info.taskLevel == TASK_LEVEL__SOURCE) { stError("s-task:%s failed to dispatch checkpoint-trigger msg, checkpointId:%" PRId64 ", set the current checkpoint failed, and send rsp to mnode", - id, pTask->chkInfo.checkpointingId); + id, pTask->chkInfo.pActiveInfo->activeId); { // send checkpoint failure msg to mnode directly - pTask->chkInfo.failedId = pTask->chkInfo.checkpointingId; // record the latest failed checkpoint id - pTask->chkInfo.checkpointingId = pTask->chkInfo.checkpointingId; + pTask->chkInfo.pActiveInfo->failedId = pTask->chkInfo.pActiveInfo->activeId; // record the latest failed checkpoint id + pTask->chkInfo.pActiveInfo->activeId = pTask->chkInfo.pActiveInfo->activeId; streamTaskSendCheckpointSourceRsp(pTask); } } else { @@ -1023,7 +1176,7 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i // trans-state msg has been sent to downstream successfully. let's transfer the fill-history task state if (pTask->msgInfo.dispatchMsgType == STREAM_INPUT__TRANS_STATE) { - stDebug("s-task:%s dispatch transtate msgId:%d to downstream successfully, start to prepare transfer state", id, msgId); + stDebug("s-task:%s dispatch trans-state msgId:%d to downstream successfully, start to prepare transfer state", id, msgId); ASSERT(pTask->info.fillHistory == 1); code = streamTransferStatePrepare(pTask); @@ -1035,7 +1188,7 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i // now ready for next data output atomic_store_8(&pTask->outputq.status, TASK_OUTPUT_STATUS__NORMAL); } else { - handleDispatchSuccessRsp(pTask, pRsp->downstreamTaskId); + handleDispatchSuccessRsp(pTask, pRsp->downstreamTaskId, pRsp->downstreamNodeId); } } } @@ -1096,7 +1249,7 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S stDebug("s-task:%s receive dispatch msg from taskId:0x%x(vgId:%d), msgLen:%" PRId64 ", msgId:%d", id, pReq->upstreamTaskId, pReq->upstreamNodeId, pReq->totalLen, pReq->msgId); - SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, pReq->upstreamTaskId); + SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, pReq->upstreamTaskId); ASSERT(pInfo != NULL); if (pMeta->role == NODE_ROLE_FOLLOWER) { @@ -1117,25 +1270,10 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S // This task has received the checkpoint req from the upstream task, from which all the messages should be // blocked. Note that there is no race condition here. if (pReq->type == STREAM_INPUT__CHECKPOINT_TRIGGER) { - atomic_add_fetch_32(&pTask->upstreamInfo.numOfClosed, 1); streamTaskCloseUpstreamInput(pTask, pReq->upstreamTaskId); stDebug("s-task:%s close inputQ for upstream:0x%x, msgId:%d", id, pReq->upstreamTaskId, pReq->msgId); } else if (pReq->type == STREAM_INPUT__TRANS_STATE) { - atomic_add_fetch_32(&pTask->upstreamInfo.numOfClosed, 1); - streamTaskCloseUpstreamInput(pTask, pReq->upstreamTaskId); - - // disable the related stream task here to avoid it to receive the newly arrived data after the transfer-state - STaskId* pRelTaskId = &pTask->streamTaskId; - SStreamTask* pStreamTask = streamMetaAcquireTask(pMeta, pRelTaskId->streamId, pRelTaskId->taskId); - if (pStreamTask != NULL) { - atomic_add_fetch_32(&pStreamTask->upstreamInfo.numOfClosed, 1); - streamTaskCloseUpstreamInput(pStreamTask, pReq->upstreamRelTaskId); - streamMetaReleaseTask(pMeta, pStreamTask); - } - - stDebug("s-task:%s close inputQ for upstream:0x%x since trans-state msgId:%d recv, rel stream-task:0x%" PRIx64 - " close inputQ for upstream:0x%x", - id, pReq->upstreamTaskId, pReq->msgId, pTask->streamTaskId.taskId, pReq->upstreamRelTaskId); + stDebug("s-task:%s recv trans-state msgId:%d from upstream:0x%x", id, pReq->msgId, pReq->upstreamTaskId); } status = streamTaskAppendInputBlocks(pTask, pReq); @@ -1143,11 +1281,6 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S } } - // disable the data from upstream tasks -// if (streamTaskGetStatus(pTask)->state == TASK_STATUS__HALT) { -// status = TASK_INPUT_STATUS__BLOCKED; -// } - { // do send response with the input status int32_t code = buildDispatchRsp(pTask, pReq, status, &pRsp->pCont); diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 934ff898a9..95634b2ff3 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -96,7 +96,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i *totalSize = 0; int32_t size = 0; - int32_t numOfBlocks = 0; + int32_t numOfBlocks= 0; SArray* pRes = NULL; while (1) { @@ -426,7 +426,7 @@ int32_t streamTransferStatePrepare(SStreamTask* pTask) { streamMetaReleaseTask(pMeta, pStreamTask); return code; } else { - stDebug("s-task:%s halt by related fill-history task:%s", pStreamTask->id.idStr, pTask->id.idStr); + stDebug("s-task:%s sink task halt by related fill-history task:%s", pStreamTask->id.idStr, pTask->id.idStr); } streamMetaReleaseTask(pMeta, pStreamTask); } @@ -562,13 +562,13 @@ static int32_t doStreamExecTask(SStreamTask* pTask) { } if (streamQueueIsFull(pTask->outputq.queue)) { - stWarn("s-task:%s outputQ is full, idle for 500ms and retry", id); + stTrace("s-task:%s outputQ is full, idle for 500ms and retry", id); streamTaskSetIdleInfo(pTask, 1000); return 0; } if (pTask->inputq.status == TASK_INPUT_STATUS__BLOCKED) { - stWarn("s-task:%s downstream task inputQ blocked, idle for 1sec and retry", id); + stTrace("s-task:%s downstream task inputQ blocked, idle for 1sec and retry", id); streamTaskSetIdleInfo(pTask, 1000); return 0; } @@ -591,10 +591,13 @@ static int32_t doStreamExecTask(SStreamTask* pTask) { } } + pTask->execInfo.inputDataBlocks += numOfBlocks; + pTask->execInfo.inputDataSize += blockSize; + // dispatch checkpoint msg to all downstream tasks int32_t type = pInput->type; if (type == STREAM_INPUT__CHECKPOINT_TRIGGER) { - streamProcessCheckpointBlock(pTask, (SStreamDataBlock*)pInput); + streamProcessCheckpointTriggerBlock(pTask, (SStreamDataBlock*)pInput); continue; } @@ -606,11 +609,21 @@ static int32_t doStreamExecTask(SStreamTask* pTask) { if (pTask->info.taskLevel == TASK_LEVEL__SINK) { ASSERT(type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__CHECKPOINT); + int64_t st = taosGetTimestampMs(); + // here only handle the data block sink operation if (type == STREAM_INPUT__DATA_BLOCK) { pTask->execInfo.sink.dataSize += blockSize; stDebug("s-task:%s sink task start to sink %d blocks, size:%.2fKiB", id, numOfBlocks, SIZE_IN_KiB(blockSize)); doOutputResultBlockImpl(pTask, (SStreamDataBlock*)pInput); + + double el = (taosGetTimestampMs() - st) / 1000.0; + if (fabs(el - 0.0) <= DBL_EPSILON) { + pTask->execInfo.procsThroughput = 0; + } else { + pTask->execInfo.procsThroughput = (blockSize / el); + } + continue; } } @@ -644,13 +657,23 @@ static int32_t doStreamExecTask(SStreamTask* pTask) { int64_t ver = pTask->chkInfo.processedVer; doSetStreamInputBlock(pTask, pInput, &ver, id); - int64_t resSize = 0; + int64_t totalSize = 0; int32_t totalBlocks = 0; - streamTaskExecImpl(pTask, pInput, &resSize, &totalBlocks); + streamTaskExecImpl(pTask, pInput, &totalSize, &totalBlocks); double el = (taosGetTimestampMs() - st) / 1000.0; stDebug("s-task:%s batch of input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d", id, el, - SIZE_IN_MiB(resSize), totalBlocks); + SIZE_IN_MiB(totalSize), totalBlocks); + + pTask->execInfo.outputDataBlocks += totalBlocks; + pTask->execInfo.outputDataSize += totalSize; + if (fabs(el - 0.0) <= DBL_EPSILON) { + pTask->execInfo.procsThroughput = 0; + pTask->execInfo.outputThroughput = 0; + } else { + pTask->execInfo.outputThroughput = (totalSize / el); + pTask->execInfo.procsThroughput = (blockSize / el); + } SCheckpointInfo* pInfo = &pTask->chkInfo; diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 59d49a8231..f6449829a3 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -151,10 +151,10 @@ int32_t streamMetaCheckBackendCompatible(SStreamMeta* pMeta) { int8_t ret = STREAM_STATA_COMPATIBLE; TBC* pCur = NULL; - if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { - // no task info, no stream + if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { // no task info, no stream return ret; } + void* pKey = NULL; int32_t kLen = 0; void* pVal = NULL; @@ -165,20 +165,24 @@ int32_t streamMetaCheckBackendCompatible(SStreamMeta* pMeta) { if (pVal == NULL || vLen == 0) { break; } + SDecoder decoder; SCheckpointInfo info; tDecoderInit(&decoder, (uint8_t*)pVal, vLen); if (tDecodeStreamTaskChkInfo(&decoder, &info) < 0) { continue; } + if (info.msgVer <= SSTREAM_TASK_INCOMPATIBLE_VER) { ret = STREAM_STATA_NO_COMPATIBLE; } else if (info.msgVer >= SSTREAM_TASK_NEED_CONVERT_VER) { ret = STREAM_STATA_NEED_CONVERT; } + tDecoderClear(&decoder); break; } + tdbFree(pKey); tdbFree(pVal); tdbTbcClose(pCur); @@ -193,6 +197,7 @@ int32_t streamMetaCvtDbFormat(SStreamMeta* pMeta) { if (exist == false) { return code; } + SBackendWrapper* pBackend = streamBackendInit(pMeta->path, chkpId, pMeta->vgId); void* pIter = taosHashIterate(pBackend->cfInst, NULL); @@ -219,6 +224,7 @@ _EXIT: return code; } + int32_t streamMetaMayCvtDbFormat(SStreamMeta* pMeta) { int8_t compatible = streamMetaCheckBackendCompatible(pMeta); if (compatible == STREAM_STATA_COMPATIBLE) { @@ -239,14 +245,12 @@ int32_t streamMetaMayCvtDbFormat(SStreamMeta* pMeta) { return 0; } -int32_t streamTaskSetDb(SStreamMeta* pMeta, void* arg, char* key) { - SStreamTask* pTask = arg; - +int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key) { int64_t chkpId = pTask->chkInfo.checkpointId; taosThreadMutexLock(&pMeta->backendMutex); void** ppBackend = taosHashGet(pMeta->pTaskDbUnique, key, strlen(key)); - if (ppBackend != NULL && *ppBackend != NULL) { + if ((ppBackend != NULL) && (*ppBackend != NULL)) { taskDbAddRef(*ppBackend); STaskDbWrapper* pBackend = *ppBackend; @@ -254,24 +258,22 @@ int32_t streamTaskSetDb(SStreamMeta* pMeta, void* arg, char* key) { pTask->pBackend = pBackend; taosThreadMutexUnlock(&pMeta->backendMutex); - stDebug("s-task:0x%x set backend %p", pTask->id.taskId, pBackend); return 0; } - STaskDbWrapper* pBackend = taskDbOpen(pMeta->path, key, chkpId); + STaskDbWrapper* pBackend = NULL; while (1) { - if (pBackend == NULL) { - taosThreadMutexUnlock(&pMeta->backendMutex); - taosMsleep(1000); - stDebug("backend held by other task, restart later, path:%s, key:%s", pMeta->path, key); - } else { - taosThreadMutexUnlock(&pMeta->backendMutex); + pBackend = taskDbOpen(pMeta->path, key, chkpId); + if (pBackend != NULL) { break; } + taosThreadMutexUnlock(&pMeta->backendMutex); + taosMsleep(1000); + + stDebug("backend held by other task, restart later, path:%s, key:%s", pMeta->path, key); taosThreadMutexLock(&pMeta->backendMutex); - pBackend = taskDbOpen(pMeta->path, key, chkpId); } int64_t tref = taosAddRef(taskDbWrapperId, pBackend); @@ -456,10 +458,10 @@ void streamMetaClear(SStreamMeta* pMeta) { SStreamTask* p = *(SStreamTask**)pIter; // release the ref by timer - if (p->info.triggerParam != 0 && p->info.fillHistory == 0) { // one more ref in timer + if (p->info.delaySchedParam != 0 && p->info.fillHistory == 0) { // one more ref in timer stDebug("s-task:%s stop schedTimer, and (before) desc ref:%d", p->id.idStr, p->refCnt); taosTmrStop(p->schedInfo.pDelayTimer); - p->info.triggerParam = 0; + p->info.delaySchedParam = 0; streamMetaReleaseTask(pMeta, p); } @@ -541,6 +543,7 @@ void streamMetaCloseImpl(void* arg) { // todo let's check the status for each task int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { + int32_t vgId = pTask->pMeta->vgId; void* buf = NULL; int32_t len; int32_t code; @@ -556,6 +559,7 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { if (pTask->ver < SSTREAM_TASK_SUBTABLE_CHANGED_VER){ pTask->ver = SSTREAM_TASK_VER; } + SEncoder encoder = {0}; tEncoderInit(&encoder, buf, len); tEncodeStreamTask(&encoder, pTask); @@ -565,9 +569,9 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { code = tdbTbUpsert(pMeta->pTaskDb, id, STREAM_TASK_KEY_LEN, buf, len, pMeta->txn); if (code != TSDB_CODE_SUCCESS) { - stError("s-task:%s task meta save to disk failed, code:%s", pTask->id.idStr, tstrerror(terrno)); + stError("s-task:%s vgId:%d task meta save to disk failed, code:%s", pTask->id.idStr, vgId, tstrerror(terrno)); } else { - stDebug("s-task:%s task meta save to disk", pTask->id.idStr); + stDebug("s-task:%s vgId:%d task meta save to disk", pTask->id.idStr, vgId); } taosMemoryFree(buf); @@ -748,10 +752,10 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t ASSERT(pTask->status.timerActive == 0); - if (pTask->info.triggerParam != 0 && pTask->info.fillHistory == 0) { + if (pTask->info.delaySchedParam != 0 && pTask->info.fillHistory == 0) { stDebug("s-task:%s stop schedTimer, and (before) desc ref:%d", pTask->id.idStr, pTask->refCnt); taosTmrStop(pTask->schedInfo.pDelayTimer); - pTask->info.triggerParam = 0; + pTask->info.delaySchedParam = 0; streamMetaReleaseTask(pMeta, pTask); } @@ -794,11 +798,11 @@ int32_t streamMetaCommit(SStreamMeta* pMeta) { } int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta) { - int64_t chkpId = 0; + int64_t checkpointId = 0; TBC* pCur = NULL; if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { - return chkpId; + return checkpointId; } void* pKey = NULL; @@ -819,16 +823,16 @@ int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta) { } tDecoderClear(&decoder); - chkpId = TMAX(chkpId, info.checkpointId); + checkpointId = TMAX(checkpointId, info.checkpointId); } - stDebug("get max chkp id: %" PRId64 "", chkpId); + stDebug("vgId:%d get max checkpointId:%" PRId64, pMeta->vgId, checkpointId); tdbFree(pKey); tdbFree(pVal); tdbTbcClose(pCur); - return chkpId; + return checkpointId; } // not allowed to return error code @@ -857,6 +861,10 @@ void streamMetaLoadAllTasks(SStreamMeta* pMeta) { tdbTbcMoveToFirst(pCur); while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) { + if (pVal == NULL || vLen == 0) { + break; + } + SStreamTask* pTask = taosMemoryCalloc(1, sizeof(SStreamTask)); if (pTask == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -1020,8 +1028,13 @@ static int32_t metaHeartbeatToMnodeImpl(SStreamMeta* pMeta) { .checkpointInfo.latestId = (*pTask)->chkInfo.checkpointId, .checkpointInfo.latestVer = (*pTask)->chkInfo.checkpointVer, .checkpointInfo.latestTime = (*pTask)->chkInfo.checkpointTime, + .checkpointInfo.latestSize = 0, + .checkpointInfo.remoteBackup = 0, .hTaskId = (*pTask)->hTaskInfo.id.taskId, - + .procsTotal = SIZE_IN_MiB((*pTask)->execInfo.inputDataSize), + .outputTotal = SIZE_IN_MiB((*pTask)->execInfo.outputDataSize), + .procsThroughput = SIZE_IN_KiB((*pTask)->execInfo.procsThroughput), + .outputThroughput = SIZE_IN_KiB((*pTask)->execInfo.outputThroughput), .startCheckpointId = (*pTask)->execInfo.startCheckpointId, .startCheckpointVer = (*pTask)->execInfo.startCheckpointVer, }; @@ -1032,13 +1045,13 @@ static int32_t metaHeartbeatToMnodeImpl(SStreamMeta* pMeta) { entry.sinkDataSize = SIZE_IN_MiB((*pTask)->execInfo.sink.dataSize); } - if ((*pTask)->chkInfo.checkpointingId != 0) { - entry.checkpointInfo.failed = ((*pTask)->chkInfo.failedId >= (*pTask)->chkInfo.checkpointingId) ? 1 : 0; - entry.checkpointInfo.activeId = (*pTask)->chkInfo.checkpointingId; - entry.checkpointInfo.activeTransId = (*pTask)->chkInfo.transId; + if ((*pTask)->chkInfo.pActiveInfo->activeId != 0) { + entry.checkpointInfo.failed = ((*pTask)->chkInfo.pActiveInfo->failedId >= (*pTask)->chkInfo.pActiveInfo->activeId) ? 1 : 0; + entry.checkpointInfo.activeId = (*pTask)->chkInfo.pActiveInfo->activeId; + entry.checkpointInfo.activeTransId = (*pTask)->chkInfo.pActiveInfo->transId; if (entry.checkpointInfo.failed) { - stInfo("s-task:%s set kill checkpoint trans in hb, transId:%d", (*pTask)->id.idStr, (*pTask)->chkInfo.transId); + stInfo("s-task:%s set kill checkpoint trans in hb, transId:%d", (*pTask)->id.idStr, (*pTask)->chkInfo.pActiveInfo->transId); } } @@ -1108,6 +1121,7 @@ void metaHbToMnode(void* param, void* tmrId) { SStreamMeta* pMeta = taosAcquireRef(streamMetaId, rid); if (pMeta == NULL) { + stError("invalid rid:%" PRId64 " failed to acquired stream-meta", rid); return; } @@ -1328,8 +1342,8 @@ void streamMetaUpdateStageRole(SStreamMeta* pMeta, int64_t stage, bool isLeader) streamMetaWUnLock(pMeta); if (isLeader) { - stInfo("vgId:%d update meta stage:%" PRId64 ", prev:%" PRId64 " leader:%d, start to send Hb", pMeta->vgId, - prevStage, stage, isLeader); + stInfo("vgId:%d update meta stage:%" PRId64 ", prev:%" PRId64 " leader:%d, start to send Hb, rid:%" PRId64, + pMeta->vgId, prevStage, stage, isLeader, pMeta->rid); streamMetaStartHb(pMeta); } else { stInfo("vgId:%d update meta stage:%" PRId64 " prev:%" PRId64 " leader:%d sendMsg beforeClosing:%d", pMeta->vgId, @@ -1358,7 +1372,7 @@ static int32_t prepareBeforeStartTasks(SStreamMeta* pMeta, SArray** pList, int64 return TSDB_CODE_SUCCESS; } -int32_t streamMetaStartAllTasks(SStreamMeta* pMeta) { +int32_t streamMetaStartAllTasks(SStreamMeta* pMeta, __stream_task_expand_fn expandFn) { int32_t code = TSDB_CODE_SUCCESS; int32_t vgId = pMeta->vgId; int64_t now = taosGetTimestampMs(); @@ -1378,13 +1392,36 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta) { return TSDB_CODE_SUCCESS; } + // broadcast the check downstream tasks msg numOfTasks = taosArrayGetSize(pTaskList); - // broadcast the check downstream tasks msg + // prepare the fill-history task before starting all stream tasks, to avoid fill-history tasks are started without + // initialization , when the operation of check downstream tasks status is executed far quickly. + for (int32_t i = 0; i < numOfTasks; ++i) { + SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId); + if (pTask == NULL) { + stError("vgId:%d failed to acquire task:0x%x during start tasks", pMeta->vgId, pTaskId->taskId); + streamMetaAddFailedTask(pMeta, pTaskId->streamId, pTaskId->taskId); + continue; + } + + if (pTask->pBackend == NULL) { // TODO: add test cases for this + code = expandFn(pTask); + if (code != TSDB_CODE_SUCCESS) { + stError("s-task:0x%x vgId:%d failed to expand stream backend", pTaskId->taskId, vgId); + streamMetaAddFailedTaskSelf(pTask, pTask->execInfo.readyTs); + } + } else { + stDebug("s-task:0x%x vgId:%d fill-history task backend has initialized already", pTaskId->taskId, vgId); + } + + streamMetaReleaseTask(pMeta, pTask); + } + for (int32_t i = 0; i < numOfTasks; ++i) { SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); - // todo: use hashTable instead SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId); if (pTask == NULL) { stError("vgId:%d failed to acquire task:0x%x during start tasks", pMeta->vgId, pTaskId->taskId); @@ -1392,14 +1429,16 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta) { continue; } - // fill-history task can only be launched by related stream tasks. STaskExecStatisInfo* pInfo = &pTask->execInfo; + + // fill-history task can only be launched by related stream tasks. if (pTask->info.fillHistory == 1) { stDebug("s-task:%s fill-history task wait related stream task start", pTask->id.idStr); streamMetaReleaseTask(pMeta, pTask); continue; } + // ready now, start the related fill-history task if (pTask->status.downstreamReady == 1) { if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { stDebug("s-task:%s downstream ready, no need to check downstream, check only related fill-history task", @@ -1422,7 +1461,7 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta) { streamMetaReleaseTask(pMeta, pTask); } - stInfo("vgId:%d start tasks completed", pMeta->vgId); + stInfo("vgId:%d start all task(s) completed", pMeta->vgId); taosArrayDestroy(pTaskList); return code; } @@ -1481,13 +1520,13 @@ bool streamMetaAllTasksReady(const SStreamMeta* pMeta) { return true; } -int32_t streamMetaStartOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId) { +int32_t streamMetaStartOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId, __stream_task_expand_fn expandFn) { int32_t vgId = pMeta->vgId; - stInfo("vgId:%d start to task:0x%x by checking downstream status", vgId, taskId); + stInfo("vgId:%d start task:0x%x by checking it's downstream status", vgId, taskId); SStreamTask* pTask = streamMetaAcquireTask(pMeta, streamId, taskId); if (pTask == NULL) { - stError("vgId:%d failed to acquire task:0x%x during start tasks", pMeta->vgId, taskId); + stError("vgId:%d failed to acquire task:0x%x when starting task", pMeta->vgId, taskId); streamMetaAddFailedTask(pMeta, streamId, taskId); return TSDB_CODE_STREAM_TASK_IVLD_STATUS; } @@ -1500,6 +1539,32 @@ int32_t streamMetaStartOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t tas } ASSERT(pTask->status.downstreamReady == 0); + if (pTask->pBackend == NULL) { + int32_t code = expandFn(pTask); + if (code != TSDB_CODE_SUCCESS) { + streamMetaAddFailedTaskSelf(pTask, pInfo->readyTs); + streamMetaReleaseTask(pMeta, pTask); + return code; + } + + if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { + SStreamTask* pHTask = streamMetaAcquireTask(pMeta, pTask->hTaskInfo.id.streamId, pTask->hTaskInfo.id.taskId); + if (pHTask != NULL) { + if (pHTask->pBackend == NULL) { + code = expandFn(pHTask); + if (code != TSDB_CODE_SUCCESS) { + streamMetaAddFailedTaskSelf(pHTask, pInfo->readyTs); + + streamMetaReleaseTask(pMeta, pHTask); + streamMetaReleaseTask(pMeta, pTask); + return code; + } + } + + streamMetaReleaseTask(pMeta, pHTask); + } + } + } int32_t ret = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_INIT); if (ret != TSDB_CODE_SUCCESS) { diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 247baea16f..9c5c230a3d 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -117,6 +117,13 @@ int32_t streamQueueGetNumOfItems(const SStreamQueue* pQueue) { return numOfItems1 + numOfItems2; } +int32_t streamQueueGetNumOfUnAccessedItems(const SStreamQueue* pQueue) { + int32_t numOfItems1 = taosQueueItemSize(pQueue->pQueue); + int32_t numOfItems2 = taosQallUnAccessedItemSize(pQueue->qall); + + return numOfItems1 + numOfItems2; +} + int32_t streamQueueGetItemSize(const SStreamQueue* pQueue) { return taosQueueMemorySize(pQueue->pQueue) + taosQallUnAccessedMemSize(pQueue->qall); } @@ -322,9 +329,10 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) ASSERT(0); } - if (type != STREAM_INPUT__GET_RES && type != STREAM_INPUT__CHECKPOINT && pTask->info.triggerParam != 0) { + if (type != STREAM_INPUT__GET_RES && type != STREAM_INPUT__CHECKPOINT && type != STREAM_INPUT__CHECKPOINT_TRIGGER && + (pTask->info.delaySchedParam != 0)) { atomic_val_compare_exchange_8(&pTask->schedInfo.status, TASK_TRIGGER_STATUS__INACTIVE, TASK_TRIGGER_STATUS__ACTIVE); - stDebug("s-task:%s new data arrived, active the trigger, triggerStatus:%d", pTask->id.idStr, pTask->schedInfo.status); + stDebug("s-task:%s new data arrived, active the sched-trigger, triggerStatus:%d", pTask->id.idStr, pTask->schedInfo.status); } return 0; diff --git a/source/libs/stream/src/streamSched.c b/source/libs/stream/src/streamSched.c index 52e7431e70..9c817d565b 100644 --- a/source/libs/stream/src/streamSched.c +++ b/source/libs/stream/src/streamSched.c @@ -20,13 +20,13 @@ static void streamTaskResumeHelper(void* param, void* tmrId); static void streamTaskSchedHelper(void* param, void* tmrId); int32_t streamSetupScheduleTrigger(SStreamTask* pTask) { - if (pTask->info.triggerParam != 0 && pTask->info.fillHistory == 0) { + if (pTask->info.delaySchedParam != 0 && pTask->info.fillHistory == 0) { int32_t ref = atomic_add_fetch_32(&pTask->refCnt, 1); ASSERT(ref == 2 && pTask->schedInfo.pDelayTimer == NULL); - stDebug("s-task:%s setup scheduler trigger, delay:%" PRId64 " ms", pTask->id.idStr, pTask->info.triggerParam); + stDebug("s-task:%s setup scheduler trigger, delay:%" PRId64 " ms", pTask->id.idStr, pTask->info.delaySchedParam); - pTask->schedInfo.pDelayTimer = taosTmrStart(streamTaskSchedHelper, (int32_t)pTask->info.triggerParam, pTask, streamTimer); + pTask->schedInfo.pDelayTimer = taosTmrStart(streamTaskSchedHelper, (int32_t)pTask->info.delaySchedParam, pTask, streamTimer); pTask->schedInfo.status = TASK_TRIGGER_STATUS__INACTIVE; } @@ -52,7 +52,11 @@ int32_t streamTaskSchedTask(SMsgCb* pMsgCb, int32_t vgId, int64_t streamId, int3 return -1; } - stDebug("vgId:%d create msg to start stream task:0x%x, exec type:%d", vgId, taskId, execType); + if (streamId != 0) { + stDebug("vgId:%d create msg to start stream task:0x%x, exec type:%d", vgId, taskId, execType); + } else { + stDebug("vgId:%d create msg to exec, type:%d", vgId, execType); + } pRunReq->head.vgId = vgId; pRunReq->streamId = streamId; @@ -115,7 +119,7 @@ void streamTaskResumeHelper(void* param, void* tmrId) { void streamTaskSchedHelper(void* param, void* tmrId) { SStreamTask* pTask = (void*)param; const char* id = pTask->id.idStr; - int32_t nextTrigger = (int32_t)pTask->info.triggerParam; + int32_t nextTrigger = (int32_t)pTask->info.delaySchedParam; int8_t status = atomic_load_8(&pTask->schedInfo.status); stTrace("s-task:%s in scheduler, trigger status:%d, next:%dms", id, status, nextTrigger); diff --git a/source/libs/stream/src/streamStartHistory.c b/source/libs/stream/src/streamStartHistory.c index 6882f6617d..7a864a60d2 100644 --- a/source/libs/stream/src/streamStartHistory.c +++ b/source/libs/stream/src/streamStartHistory.c @@ -155,7 +155,7 @@ int32_t streamTaskOnNormalTaskReady(SStreamTask* pTask) { return TSDB_CODE_SUCCESS; } -int32_t streamTaskOnScanhistoryTaskReady(SStreamTask* pTask) { +int32_t streamTaskOnScanHistoryTaskReady(SStreamTask* pTask) { // set the state to be ready streamTaskSetReady(pTask); streamTaskSetRangeStreamCalc(pTask); diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index 52002b7ea8..47324bd8c9 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -98,7 +98,7 @@ int stateKeyCmpr(const void* pKey1, int kLen1, const void* pKey2, int kLen2) { return winKeyCmprImpl(&pWin1->key, &pWin2->key); } -SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t szPage, int32_t pages) { +SStreamState* streamStateOpen(const char* path, void* pTask, bool specPath, int32_t szPage, int32_t pages) { SStreamState* pState = taosMemoryCalloc(1, sizeof(SStreamState)); stDebug("open stream state %p, %s", pState, path); if (pState == NULL) { @@ -127,7 +127,7 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT); pState->parNameMap = tSimpleHashInit(1024, hashFn); - stInfo("succ to open state %p on backend %p 0x%" PRIx64 "-%d", pState, pMeta->streamBackend, pState->streamId, + stInfo("open state %p on backend %p 0x%" PRIx64 "-%d succ", pState, pMeta->streamBackend, pState->streamId, pState->taskId); return pState; diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 1e622f615d..834daf15d0 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -24,6 +24,7 @@ static void streamTaskDestroyUpstreamInfo(SUpstreamInfo* pUpstreamInfo); static void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet, bool* pUpdated); static void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet, bool* pUpdate); +static void streamTaskDestroyActiveChkptInfo(SActiveCheckpointInfo* pInfo); static int32_t addToTaskset(SArray* pArray, SStreamTask* pTask) { int32_t childId = taosArrayGetSize(pArray); @@ -70,12 +71,12 @@ static void freeItem(void* p) { } static void freeUpstreamItem(void* p) { - SStreamChildEpInfo** pInfo = p; + SStreamUpstreamEpInfo** pInfo = p; taosMemoryFree(*pInfo); } -static SStreamChildEpInfo* createStreamTaskEpInfo(const SStreamTask* pTask) { - SStreamChildEpInfo* pEpInfo = taosMemoryMalloc(sizeof(SStreamChildEpInfo)); +static SStreamUpstreamEpInfo* createStreamTaskEpInfo(const SStreamTask* pTask) { + SStreamUpstreamEpInfo* pEpInfo = taosMemoryMalloc(sizeof(SStreamUpstreamEpInfo)); if (pEpInfo == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; @@ -106,7 +107,7 @@ SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, SEpSet* pEpset, pTask->info.taskLevel = taskLevel; pTask->info.fillHistory = fillHistory; - pTask->info.triggerParam = triggerParam; + pTask->info.delaySchedParam = triggerParam; pTask->subtableWithoutMd5 = subtableWithoutMd5; pTask->status.pSM = streamCreateStateMachine(pTask); @@ -187,8 +188,9 @@ int32_t tDecodeStreamTaskId(SDecoder* pDecoder, STaskId* pTaskId) { } void tFreeStreamTask(SStreamTask* pTask) { - char* p = NULL; - int32_t taskId = pTask->id.taskId; + char* p = NULL; + int32_t taskId = pTask->id.taskId; + STaskExecStatisInfo* pStatis = &pTask->execInfo; ETaskStatus status1 = TASK_STATUS__UNINIT; @@ -200,7 +202,7 @@ void tFreeStreamTask(SStreamTask* pTask) { } taosThreadMutexUnlock(&pTask->lock); - stDebug("start to free s-task:0x%x, %p, state:%s", taskId, pTask, p); + stDebug("start to free s-task:0x%x %p, state:%s", taskId, pTask, p); SCheckpointInfo* pCkInfo = &pTask->chkInfo; stDebug("s-task:0x%x task exec summary: create:%" PRId64 ", init:%" PRId64 ", start:%" PRId64 @@ -226,9 +228,9 @@ void tFreeStreamTask(SStreamTask* pTask) { pTask->hTaskInfo.pTimer = NULL; } - if (pTask->msgInfo.pTimer != NULL) { - /*bool ret = */taosTmrStop(pTask->msgInfo.pTimer); - pTask->msgInfo.pTimer = NULL; + if (pTask->msgInfo.pRetryTmr != NULL) { + /*bool ret = */taosTmrStop(pTask->msgInfo.pRetryTmr); + pTask->msgInfo.pRetryTmr = NULL; } if (pTask->inputq.queue) { @@ -253,7 +255,6 @@ void tFreeStreamTask(SStreamTask* pTask) { } streamClearChkptReadyMsg(pTask); - pTask->pReadyMsgList = taosArrayDestroy(pTask->pReadyMsgList); if (pTask->msgInfo.pData != NULL) { clearBufferedDispatchMsg(pTask); @@ -275,10 +276,6 @@ void tFreeStreamTask(SStreamTask* pTask) { taskDbRemoveRef(pTask->pBackend); } - if (pTask->id.idStr != NULL) { - taosMemoryFree((void*)pTask->id.idStr); - } - if (pTask->pNameMap) { tSimpleHashCleanup(pTask->pNameMap); } @@ -292,6 +289,22 @@ void tFreeStreamTask(SStreamTask* pTask) { pTask->outputInfo.pNodeEpsetUpdateList = taosArrayDestroy(pTask->outputInfo.pNodeEpsetUpdateList); + if ((pTask->status.removeBackendFiles) && (pTask->pMeta != NULL)) { + char* path = taosMemoryCalloc(1, strlen(pTask->pMeta->path) + 128); + sprintf(path, "%s%s%s", pTask->pMeta->path, TD_DIRSEP, pTask->id.idStr); + taosRemoveDir(path); + + stInfo("s-task:0x%x vgId:%d remove all backend files:%s", taskId, pTask->pMeta->vgId, path); + taosMemoryFree(path); + } + + if (pTask->id.idStr != NULL) { + taosMemoryFree((void*)pTask->id.idStr); + } + + streamTaskDestroyActiveChkptInfo(pTask->chkInfo.pActiveInfo); + pTask->chkInfo.pActiveInfo = NULL; + taosMemoryFree(pTask); stDebug("s-task:0x%x free task completed", taskId); } @@ -404,6 +417,10 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i return TSDB_CODE_OUT_OF_MEMORY; } + if (pTask->chkInfo.pActiveInfo == NULL) { + pTask->chkInfo.pActiveInfo = streamTaskCreateActiveChkptInfo(); + } + return TSDB_CODE_SUCCESS; } @@ -423,8 +440,12 @@ int32_t streamTaskGetNumOfDownstream(const SStreamTask* pTask) { } } +int32_t streamTaskGetNumOfUpstream(const SStreamTask* pTask) { + return taosArrayGetSize(pTask->upstreamInfo.pList); +} + int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstreamTask) { - SStreamChildEpInfo* pEpInfo = createStreamTaskEpInfo(pUpstreamTask); + SStreamUpstreamEpInfo* pEpInfo = createStreamTaskEpInfo(pUpstreamTask); if (pEpInfo == NULL) { return TSDB_CODE_OUT_OF_MEMORY; } @@ -443,7 +464,7 @@ void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpS int32_t numOfUpstream = taosArrayGetSize(pTask->upstreamInfo.pList); for (int32_t i = 0; i < numOfUpstream; ++i) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); + SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); if (pInfo->nodeId == nodeId) { bool equal = isEpsetEqual(&pInfo->epSet, pEpSet); if (!equal) { @@ -579,7 +600,7 @@ void streamTaskResetUpstreamStageInfo(SStreamTask* pTask) { int32_t size = taosArrayGetSize(pTask->upstreamInfo.pList); for (int32_t i = 0; i < size; ++i) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); + SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); pInfo->stage = -1; } @@ -593,7 +614,7 @@ void streamTaskOpenAllUpstreamInput(SStreamTask* pTask) { } for (int32_t i = 0; i < num; ++i) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); + SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); pInfo->dataAllowed = true; } @@ -602,9 +623,19 @@ void streamTaskOpenAllUpstreamInput(SStreamTask* pTask) { } void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId) { - SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, taskId); + SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, taskId); if (pInfo != NULL) { pInfo->dataAllowed = false; + int32_t t = atomic_add_fetch_32(&pTask->upstreamInfo.numOfClosed, 1); + } +} + +void streamTaskOpenUpstreamInput(SStreamTask* pTask, int32_t taskId) { + SStreamUpstreamEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, taskId); + if (pInfo != NULL) { + pInfo->dataAllowed = true; + int32_t t = atomic_sub_fetch_32(&pTask->upstreamInfo.numOfClosed, 1); + ASSERT(t >= 0); } } @@ -686,16 +717,47 @@ int32_t streamBuildAndSendDropTaskMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskI pReq->head.vgId = vgId; pReq->taskId = pTaskId->taskId; pReq->streamId = pTaskId->streamId; - pReq->resetRelHalt = resetRelHalt; + pReq->resetRelHalt = resetRelHalt; // todo: remove this attribute SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_DROP, .pCont = pReq, .contLen = sizeof(SVDropStreamTaskReq)}; int32_t code = tmsgPutToQueue(pMsgCb, WRITE_QUEUE, &msg); if (code != TSDB_CODE_SUCCESS) { stError("vgId:%d failed to send drop task:0x%x msg, code:%s", vgId, pTaskId->taskId, tstrerror(code)); - return code; + } else { + stDebug("vgId:%d build and send drop task:0x%x msg", vgId, pTaskId->taskId); } - stDebug("vgId:%d build and send drop task:0x%x msg", vgId, pTaskId->taskId); + return code; +} + +int32_t streamBuildAndSendCheckpointUpdateMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskId* pTaskId, STaskId* pHTaskId, + SCheckpointInfo* pCheckpointInfo, int8_t dropRelHTask) { + SVUpdateCheckpointInfoReq* pReq = rpcMallocCont(sizeof(SVUpdateCheckpointInfoReq)); + if (pReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + pReq->head.vgId = vgId; + pReq->taskId = pTaskId->taskId; + pReq->streamId = pTaskId->streamId; + pReq->dropRelHTask = dropRelHTask; + pReq->hStreamId = pHTaskId->streamId; + pReq->hTaskId = pHTaskId->taskId; + pReq->transId = pCheckpointInfo->pActiveInfo->transId; + + pReq->checkpointId = pCheckpointInfo->pActiveInfo->activeId; + pReq->checkpointVer = pCheckpointInfo->processedVer; + pReq->checkpointTs = pCheckpointInfo->startTs; + + SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_UPDATE_CHKPT, .pCont = pReq, .contLen = sizeof(SVUpdateCheckpointInfoReq)}; + int32_t code = tmsgPutToQueue(pMsgCb, WRITE_QUEUE, &msg); + + if (code != TSDB_CODE_SUCCESS) { + stError("vgId:%d task:0x%x failed to send update checkpoint info msg, code:%s", vgId, pTaskId->taskId, tstrerror(code)); + } else { + stDebug("vgId:%d task:0x%x build and send update checkpoint info msg msg", vgId, pTaskId->taskId); + } return code; } @@ -730,6 +792,10 @@ void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc) pDst->stage = pSrc->stage; pDst->inputQUsed = pSrc->inputQUsed; pDst->inputRate = pSrc->inputRate; + pDst->procsTotal = pSrc->procsTotal; + pDst->procsThroughput = pSrc->procsThroughput; + pDst->outputTotal = pSrc->outputTotal; + pDst->outputThroughput = pSrc->outputThroughput; pDst->processedVer = pSrc->processedVer; pDst->verRange = pSrc->verRange; pDst->sinkQuota = pSrc->sinkQuota; @@ -815,10 +881,10 @@ int32_t streamTaskSendCheckpointReq(SStreamTask* pTask) { return 0; } -SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId) { +SStreamUpstreamEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId) { int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList); for (int32_t i = 0; i < num; ++i) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); + SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); if (pInfo->taskId == taskId) { return pInfo; } @@ -828,6 +894,24 @@ SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t return NULL; } +SEpSet* streamTaskGetDownstreamEpInfo(SStreamTask* pTask, int32_t taskId) { + if (pTask->info.taskLevel == TASK_OUTPUT__FIXED_DISPATCH) { + if (pTask->outputInfo.fixedDispatcher.taskId == taskId) { + return &pTask->outputInfo.fixedDispatcher.epSet; + } + } else if (pTask->info.taskLevel == TASK_OUTPUT__SHUFFLE_DISPATCH) { + SArray* pList = pTask->outputInfo.shuffleDispatcher.dbInfo.pVgroupInfos; + for(int32_t i = 0; i < taosArrayGetSize(pList); ++i) { + SVgroupInfo* pVgInfo = taosArrayGet(pList, i); + if (pVgInfo->taskId == taskId) { + return &pVgInfo->epSet; + } + } + } + + return NULL; +} + char* createStreamTaskIdStr(int64_t streamId, int32_t taskId) { char buf[128] = {0}; sprintf(buf, "0x%" PRIx64 "-0x%x", streamId, taskId); @@ -865,4 +949,76 @@ int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq) { return code; } return streamTrySchedExec(pTask); +} + +void streamTaskSetRemoveBackendFiles(SStreamTask* pTask) { + pTask->status.removeBackendFiles = true; +} + +int32_t streamTaskGetActiveCheckpointInfo(const SStreamTask* pTask, int32_t* pTransId, int64_t* pCheckpointId) { + if (pTransId != NULL) { + *pTransId = pTask->chkInfo.pActiveInfo->transId; + } + + if (pCheckpointId != NULL) { + *pCheckpointId = pTask->chkInfo.pActiveInfo->activeId; + } + + return TSDB_CODE_SUCCESS; +} + +int32_t streamTaskSetActiveCheckpointInfo(SStreamTask* pTask, int64_t activeCheckpointId) { + pTask->chkInfo.pActiveInfo->activeId = activeCheckpointId; + return TSDB_CODE_SUCCESS; +} + +int32_t streamTaskSetFailedChkptInfo(SStreamTask* pTask, int32_t transId, int64_t checkpointId) { + pTask->chkInfo.pActiveInfo->transId = transId; + pTask->chkInfo.pActiveInfo->activeId = checkpointId; + pTask->chkInfo.pActiveInfo->failedId = checkpointId; + return TSDB_CODE_SUCCESS; +} + +SActiveCheckpointInfo* streamTaskCreateActiveChkptInfo() { + SActiveCheckpointInfo* pInfo = taosMemoryCalloc(1, sizeof(SActiveCheckpointInfo)); + taosThreadMutexInit(&pInfo->lock, NULL); + + pInfo->pDispatchTriggerList = taosArrayInit(4, sizeof(STaskTriggerSendInfo)); + pInfo->pReadyMsgList = taosArrayInit(4, sizeof(STaskCheckpointReadyInfo)); + pInfo->pCheckpointReadyRecvList = taosArrayInit(4, sizeof(STaskDownstreamReadyInfo)); + return pInfo; +} + +void streamTaskDestroyActiveChkptInfo(SActiveCheckpointInfo* pInfo) { + if (pInfo == NULL) { + return; + } + + taosThreadMutexDestroy(&pInfo->lock); + pInfo->pDispatchTriggerList = taosArrayDestroy(pInfo->pDispatchTriggerList); + pInfo->pReadyMsgList = taosArrayDestroy(pInfo->pReadyMsgList); + pInfo->pCheckpointReadyRecvList = taosArrayDestroy(pInfo->pCheckpointReadyRecvList); + + if (pInfo->pChkptTriggerTmr != NULL) { + taosTmrStop(pInfo->pChkptTriggerTmr); + pInfo->pChkptTriggerTmr = NULL; + } + + if (pInfo->pSendReadyMsgTmr != NULL) { + taosTmrStop(pInfo->pSendReadyMsgTmr); + pInfo->pSendReadyMsgTmr = NULL; + } + + taosMemoryFree(pInfo); +} + +void streamTaskClearActiveInfo(SActiveCheckpointInfo* pInfo) { + pInfo->activeId = 0; // clear the checkpoint id + pInfo->failedId = 0; + pInfo->transId = 0; + pInfo->allUpstreamTriggerRecv = 0; + pInfo->dispatchTrigger = false; + + taosArrayClear(pInfo->pDispatchTriggerList); + taosArrayClear(pInfo->pCheckpointReadyRecvList); } \ No newline at end of file diff --git a/source/libs/stream/src/streamTaskSm.c b/source/libs/stream/src/streamTaskSm.c index cced6a6b84..82ea2f88ef 100644 --- a/source/libs/stream/src/streamTaskSm.c +++ b/source/libs/stream/src/streamTaskSm.c @@ -584,7 +584,7 @@ void doInitStateTransferTable(void) { // initialization event handle STaskStateTrans trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__READY, TASK_EVENT_INIT, streamTaskInitStatus, streamTaskOnNormalTaskReady, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__SCAN_HISTORY, TASK_EVENT_INIT_SCANHIST, streamTaskInitStatus, streamTaskOnScanhistoryTaskReady, NULL); + trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__SCAN_HISTORY, TASK_EVENT_INIT_SCANHIST, streamTaskInitStatus, streamTaskOnScanHistoryTaskReady, NULL); taosArrayPush(streamTaskSMTrans, &trans); // scan-history related event diff --git a/source/libs/stream/src/streammsg.c b/source/libs/stream/src/streammsg.c index 5e52b927c6..705406f044 100644 --- a/source/libs/stream/src/streammsg.c +++ b/source/libs/stream/src/streammsg.c @@ -17,7 +17,7 @@ #include "streammsg.h" #include "tstream.h" -int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo) { +int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamUpstreamEpInfo* pInfo) { if (tEncodeI32(pEncoder, pInfo->taskId) < 0) return -1; if (tEncodeI32(pEncoder, pInfo->nodeId) < 0) return -1; if (tEncodeI32(pEncoder, pInfo->childId) < 0) return -1; @@ -26,7 +26,7 @@ int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo) return 0; } -int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamChildEpInfo* pInfo) { +int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamUpstreamEpInfo* pInfo) { if (tDecodeI32(pDecoder, &pInfo->taskId) < 0) return -1; if (tDecodeI32(pDecoder, &pInfo->nodeId) < 0) return -1; if (tDecodeI32(pDecoder, &pInfo->childId) < 0) return -1; @@ -335,6 +335,10 @@ int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pReq) { if (tEncodeI32(pEncoder, ps->nodeId) < 0) return -1; if (tEncodeDouble(pEncoder, ps->inputQUsed) < 0) return -1; if (tEncodeDouble(pEncoder, ps->inputRate) < 0) return -1; + if (tEncodeDouble(pEncoder, ps->procsTotal) < 0) return -1; + if (tEncodeDouble(pEncoder, ps->procsThroughput) < 0) return -1; + if (tEncodeDouble(pEncoder, ps->outputTotal) < 0) return -1; + if (tEncodeDouble(pEncoder, ps->outputThroughput) < 0) return -1; if (tEncodeDouble(pEncoder, ps->sinkQuota) < 0) return -1; if (tEncodeDouble(pEncoder, ps->sinkDataSize) < 0) return -1; if (tEncodeI64(pEncoder, ps->processedVer) < 0) return -1; @@ -346,6 +350,8 @@ int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pReq) { if (tEncodeI64(pEncoder, ps->checkpointInfo.latestId) < 0) return -1; if (tEncodeI64(pEncoder, ps->checkpointInfo.latestVer) < 0) return -1; if (tEncodeI64(pEncoder, ps->checkpointInfo.latestTime) < 0) return -1; + if (tEncodeI64(pEncoder, ps->checkpointInfo.latestSize) < 0) return -1; + if (tEncodeI8(pEncoder, ps->checkpointInfo.remoteBackup) < 0) return -1; if (tEncodeI64(pEncoder, ps->startTime) < 0) return -1; if (tEncodeI64(pEncoder, ps->startCheckpointId) < 0) return -1; if (tEncodeI64(pEncoder, ps->startCheckpointVer) < 0) return -1; @@ -381,6 +387,10 @@ int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pReq) { if (tDecodeI32(pDecoder, &entry.nodeId) < 0) return -1; if (tDecodeDouble(pDecoder, &entry.inputQUsed) < 0) return -1; if (tDecodeDouble(pDecoder, &entry.inputRate) < 0) return -1; + if (tDecodeDouble(pDecoder, &entry.procsTotal) < 0) return -1; + if (tDecodeDouble(pDecoder, &entry.procsThroughput) < 0) return -1; + if (tDecodeDouble(pDecoder, &entry.outputTotal) < 0) return -1; + if (tDecodeDouble(pDecoder, &entry.outputThroughput) < 0) return -1; if (tDecodeDouble(pDecoder, &entry.sinkQuota) < 0) return -1; if (tDecodeDouble(pDecoder, &entry.sinkDataSize) < 0) return -1; if (tDecodeI64(pDecoder, &entry.processedVer) < 0) return -1; @@ -393,6 +403,8 @@ int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pReq) { if (tDecodeI64(pDecoder, &entry.checkpointInfo.latestId) < 0) return -1; if (tDecodeI64(pDecoder, &entry.checkpointInfo.latestVer) < 0) return -1; if (tDecodeI64(pDecoder, &entry.checkpointInfo.latestTime) < 0) return -1; + if (tDecodeI64(pDecoder, &entry.checkpointInfo.latestSize) < 0) return -1; + if (tDecodeI8(pDecoder, &entry.checkpointInfo.remoteBackup) < 0) return -1; if (tDecodeI64(pDecoder, &entry.startTime) < 0) return -1; if (tDecodeI64(pDecoder, &entry.startCheckpointId) < 0) return -1; if (tDecodeI64(pDecoder, &entry.startCheckpointVer) < 0) return -1; @@ -469,7 +481,7 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { int32_t epSz = taosArrayGetSize(pTask->upstreamInfo.pList); if (tEncodeI32(pEncoder, epSz) < 0) return -1; for (int32_t i = 0; i < epSz; i++) { - SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); + SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); if (tEncodeStreamEpInfo(pEncoder, pInfo) < 0) return -1; } @@ -493,7 +505,7 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { if (tSerializeSUseDbRspImp(pEncoder, &pTask->outputInfo.shuffleDispatcher.dbInfo) < 0) return -1; if (tEncodeCStr(pEncoder, pTask->outputInfo.shuffleDispatcher.stbFullName) < 0) return -1; } - if (tEncodeI64(pEncoder, pTask->info.triggerParam) < 0) return -1; + if (tEncodeI64(pEncoder, pTask->info.delaySchedParam) < 0) return -1; if (tEncodeI8(pEncoder, pTask->subtableWithoutMd5) < 0) return -1; if (tEncodeCStrWithLen(pEncoder, pTask->reserve, sizeof(pTask->reserve) - 1) < 0) return -1; @@ -545,7 +557,7 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { pTask->upstreamInfo.pList = taosArrayInit(epSz, POINTER_BYTES); for (int32_t i = 0; i < epSz; i++) { - SStreamChildEpInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamChildEpInfo)); + SStreamUpstreamEpInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamUpstreamEpInfo)); if (pInfo == NULL) return -1; if (tDecodeStreamEpInfo(pDecoder, pInfo) < 0) { taosMemoryFreeClear(pInfo); @@ -576,7 +588,7 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tDeserializeSUseDbRspImp(pDecoder, &pTask->outputInfo.shuffleDispatcher.dbInfo) < 0) return -1; if (tDecodeCStrTo(pDecoder, pTask->outputInfo.shuffleDispatcher.stbFullName) < 0) return -1; } - if (tDecodeI64(pDecoder, &pTask->info.triggerParam) < 0) return -1; + if (tDecodeI64(pDecoder, &pTask->info.delaySchedParam) < 0) return -1; if (pTask->ver >= SSTREAM_TASK_SUBTABLE_CHANGED_VER){ if (tDecodeI8(pDecoder, &pTask->subtableWithoutMd5) < 0) return -1; } diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 3543ed574c..796a45d997 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -874,6 +874,17 @@ int32_t syncLogReplRecover(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEn sError("vgId:%d, failed to get prev log term since %s. index:%" PRId64, pNode->vgId, terrstr(), index + 1); return -1; } + + if (pMsg->matchIndex == -1) { + // first time to restore + sInfo("vgId:%d, first time to restore sync log repl. peer: dnode:%d (%" PRIx64 "), repl-mgr:[%" PRId64 " %" PRId64 + ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 "), index:%" PRId64 + ", firstVer:%" PRId64 ", term:%" PRId64 ", lastMatchTerm:%" PRId64, + pNode->vgId, DID(&destId), destId.addr, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, + pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex, index, firstVer, term, + pMsg->lastMatchTerm); + } + if ((index + 1 < firstVer) || (term < 0) || (term != pMsg->lastMatchTerm && (index + 1 == firstVer || index == firstVer))) { ASSERT(term >= 0 || terrno == TSDB_CODE_WAL_LOG_NOT_EXIST); diff --git a/source/util/test/cfgTest.cpp b/source/util/test/cfgTest.cpp index 92422b6a80..9f8645b14c 100644 --- a/source/util/test/cfgTest.cpp +++ b/source/util/test/cfgTest.cpp @@ -67,7 +67,7 @@ TEST_F(CfgTest, 02_Basic) { SConfigItem* pItem = NULL; SConfigIter* pIter = cfgCreateIter(pConfig); - while((pItem == cfgNextIter(pIter)) != NULL) { + while((pItem = cfgNextIter(pIter)) != NULL) { switch (pItem->dtype) { case CFG_DTYPE_BOOL: printf("index:%d, cfg:%s value:%d\n", size, pItem->name, pItem->bval); diff --git a/tests/army/community/insert/insert_basic.py b/tests/army/community/insert/insert_basic.py index 481db4eadd..1f2488a756 100644 --- a/tests/army/community/insert/insert_basic.py +++ b/tests/army/community/insert/insert_basic.py @@ -35,18 +35,18 @@ class TDTestCase(TBase): tdSql.execute("create database db_geometry;") tdSql.execute("use db_geometry;") tdSql.execute("create table t_ge (ts timestamp, id int, c1 GEOMETRY(512));") - tdSql.execute("insert into t_ge values(now, 1, 'MULTIPOINT ((0 0), (1 1))');") - tdSql.execute("insert into t_ge values(now, 1, 'MULTIPOINT (0 0, 1 1)');") - tdSql.execute("insert into t_ge values(now, 2, 'POINT (0 0)');") - tdSql.execute("insert into t_ge values(now, 2, 'POINT EMPTY');") - tdSql.execute("insert into t_ge values(now, 3, 'LINESTRING (0 0, 0 1, 1 2)');") - tdSql.execute("insert into t_ge values(now, 3, 'LINESTRING EMPTY');") - tdSql.execute("insert into t_ge values(now, 4, 'POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))');") - tdSql.execute("insert into t_ge values(now, 4, 'POLYGON ((0 0, 4 0, 4 4, 0 4, 0 0), (1 1, 1 2, 2 2, 2 1, 1 1))');") - tdSql.execute("insert into t_ge values(now, 4, 'POLYGON EMPTY');") - tdSql.execute("insert into t_ge values(now, 5, 'MULTILINESTRING ((0 0, 1 1), (2 2, 3 3))');") - tdSql.execute("insert into t_ge values(now, 6, 'MULTIPOLYGON (((1 1, 1 3, 3 3, 3 1, 1 1)), ((4 3, 6 3, 6 1, 4 1, 4 3)))');") - tdSql.execute("insert into t_ge values(now, 7, 'GEOMETRYCOLLECTION (MULTIPOINT((0 0), (1 1)), POINT(3 4), LINESTRING(2 3, 3 4))');") + tdSql.execute("insert into t_ge values(1717122943000, 1, 'MULTIPOINT ((0 0), (1 1))');") + tdSql.execute("insert into t_ge values(1717122944000, 1, 'MULTIPOINT (0 0, 1 1)');") + tdSql.execute("insert into t_ge values(1717122945000, 2, 'POINT (0 0)');") + tdSql.execute("insert into t_ge values(1717122946000, 2, 'POINT EMPTY');") + tdSql.execute("insert into t_ge values(1717122947000, 3, 'LINESTRING (0 0, 0 1, 1 2)');") + tdSql.execute("insert into t_ge values(1717122948000, 3, 'LINESTRING EMPTY');") + tdSql.execute("insert into t_ge values(1717122949000, 4, 'POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))');") + tdSql.execute("insert into t_ge values(1717122950000, 4, 'POLYGON ((0 0, 4 0, 4 4, 0 4, 0 0), (1 1, 1 2, 2 2, 2 1, 1 1))');") + tdSql.execute("insert into t_ge values(1717122951000, 4, 'POLYGON EMPTY');") + tdSql.execute("insert into t_ge values(1717122952000, 5, 'MULTILINESTRING ((0 0, 1 1), (2 2, 3 3))');") + tdSql.execute("insert into t_ge values(1717122953000, 6, 'MULTIPOLYGON (((1 1, 1 3, 3 3, 3 1, 1 1)), ((4 3, 6 3, 6 1, 4 1, 4 3)))');") + tdSql.execute("insert into t_ge values(1717122954000, 7, 'GEOMETRYCOLLECTION (MULTIPOINT((0 0), (1 1)), POINT(3 4), LINESTRING(2 3, 3 4))');") tdSql.query("select * from t_ge;") tdSql.checkRows(12) tdSql.query("select * from t_ge where id=1;") diff --git a/tests/script/tsim/insert/insert_select.sim b/tests/script/tsim/insert/insert_select.sim index 333964b1d6..888f349fbb 100644 --- a/tests/script/tsim/insert/insert_select.sim +++ b/tests/script/tsim/insert/insert_select.sim @@ -61,5 +61,23 @@ if $data02 != 1 then return -1 endi +sql insert into t2 (ts, b, a) select ts + 1, 11, 12 from t1; +sql select * from t2; +if $rows != 2 then + return -1 +endi +if $data01 != 2 then + return -1 +endi +if $data02 != 1 then + return -1 +endi +if $data11 != 12 then + return -1 +endi +if $data12 != 11 then + return -1 +endi + system sh/exec.sh -n dnode1 -s stop -x SIGINT diff --git a/tests/system-test/0-others/information_schema.py b/tests/system-test/0-others/information_schema.py index 8578f22084..b4d1cb71b1 100644 --- a/tests/system-test/0-others/information_schema.py +++ b/tests/system-test/0-others/information_schema.py @@ -222,7 +222,7 @@ class TDTestCase: tdSql.query("select * from information_schema.ins_columns where db_name ='information_schema'") tdLog.info(len(tdSql.queryResult)) - tdSql.checkEqual(True, len(tdSql.queryResult) in range(256, 257)) + tdSql.checkEqual(True, len(tdSql.queryResult) in range(261, 262)) tdSql.query("select * from information_schema.ins_columns where db_name ='performance_schema'") tdSql.checkEqual(54, len(tdSql.queryResult)) diff --git a/tests/system-test/1-insert/drop.py b/tests/system-test/1-insert/drop.py index 21817ef20d..493e1491b8 100644 --- a/tests/system-test/1-insert/drop.py +++ b/tests/system-test/1-insert/drop.py @@ -20,6 +20,7 @@ from util.common import * from util.sqlset import * class TDTestCase: + updatecfgDict = {'stdebugflag':143} def init(self, conn, logSql, replicaVar=1): self.replicaVar = int(replicaVar) tdLog.debug("start to execute %s" % __file__) diff --git a/tests/system-test/1-insert/insert_double.py b/tests/system-test/1-insert/insert_double.py index b7af7237db..d6dada32cd 100644 --- a/tests/system-test/1-insert/insert_double.py +++ b/tests/system-test/1-insert/insert_double.py @@ -26,30 +26,30 @@ class TDTestCase: tdSql.execute(f"drop table if exists {table_name}") tdSql.execute(f"create table {table_name}(ts timestamp, i1 {dtype}, i2 {dtype} unsigned)") - tdSql.execute(f"insert into {table_name} values(now, -16, +6)") - tdSql.execute(f"insert into {table_name} values(now, 80.99, +0042)") - tdSql.execute(f"insert into {table_name} values(now, -0042, +80.99)") - tdSql.execute(f"insert into {table_name} values(now, 52.34354, 18.6)") - tdSql.execute(f"insert into {table_name} values(now, -12., +3.)") - tdSql.execute(f"insert into {table_name} values(now, -0.12, +3.0)") - tdSql.execute(f"insert into {table_name} values(now, -2.3e1, +2.324e2)") - tdSql.execute(f"insert into {table_name} values(now, -2e1, +2e2)") - tdSql.execute(f"insert into {table_name} values(now, -2.e1, +2.e2)") - tdSql.execute(f"insert into {table_name} values(now, -0x40, +0b10000)") - tdSql.execute(f"insert into {table_name} values(now, -0b10000, +0x40)") + tdSql.execute(f"insert into {table_name} values(1717122943000, -16, +6)") + tdSql.execute(f"insert into {table_name} values(1717122944000, 80.99, +0042)") + tdSql.execute(f"insert into {table_name} values(1717122945000, -0042, +80.99)") + tdSql.execute(f"insert into {table_name} values(1717122946000, 52.34354, 18.6)") + tdSql.execute(f"insert into {table_name} values(1717122947000, -12., +3.)") + tdSql.execute(f"insert into {table_name} values(1717122948000, -0.12, +3.0)") + tdSql.execute(f"insert into {table_name} values(1717122949000, -2.3e1, +2.324e2)") + tdSql.execute(f"insert into {table_name} values(1717122950000, -2e1, +2e2)") + tdSql.execute(f"insert into {table_name} values(1717122951000, -2.e1, +2.e2)") + tdSql.execute(f"insert into {table_name} values(1717122952000, -0x40, +0b10000)") + tdSql.execute(f"insert into {table_name} values(1717122953000, -0b10000, +0x40)") # str support - tdSql.execute(f"insert into {table_name} values(now, '-16', '+6')") - tdSql.execute(f"insert into {table_name} values(now, ' -80.99', ' +0042')") - tdSql.execute(f"insert into {table_name} values(now, ' -0042', ' +80.99')") - tdSql.execute(f"insert into {table_name} values(now, '52.34354', '18.6')") - tdSql.execute(f"insert into {table_name} values(now, '-12.', '+5.')") - tdSql.execute(f"insert into {table_name} values(now, '-.12', '+.5')") - tdSql.execute(f"insert into {table_name} values(now, '-2.e1', '+2.e2')") - tdSql.execute(f"insert into {table_name} values(now, '-2e1', '+2e2')") - tdSql.execute(f"insert into {table_name} values(now, '-2.3e1', '+2.324e2')") - tdSql.execute(f"insert into {table_name} values(now, '-0x40', '+0b10010')") - tdSql.execute(f"insert into {table_name} values(now, '-0b10010', '+0x40')") + tdSql.execute(f"insert into {table_name} values(1717122954000, '-16', '+6')") + tdSql.execute(f"insert into {table_name} values(1717122955000, ' -80.99', ' +0042')") + tdSql.execute(f"insert into {table_name} values(1717122956000, ' -0042', ' +80.99')") + tdSql.execute(f"insert into {table_name} values(1717122957000, '52.34354', '18.6')") + tdSql.execute(f"insert into {table_name} values(1717122958000, '-12.', '+5.')") + tdSql.execute(f"insert into {table_name} values(1717122959000, '-.12', '+.5')") + tdSql.execute(f"insert into {table_name} values(1717122960000, '-2.e1', '+2.e2')") + tdSql.execute(f"insert into {table_name} values(1717122961000, '-2e1', '+2e2')") + tdSql.execute(f"insert into {table_name} values(1717122962000, '-2.3e1', '+2.324e2')") + tdSql.execute(f"insert into {table_name} values(1717122963000, '-0x40', '+0b10010')") + tdSql.execute(f"insert into {table_name} values(1717122964000, '-0b10010', '+0x40')") tdSql.query(f"select * from {table_name}") tdSql.checkRows(22) @@ -64,22 +64,22 @@ class TDTestCase: min_u = 0 print("val:", baseval, negval, posval, max_i) - tdSql.execute(f"insert into {table_name} values(now, {negval}, {posval})") - tdSql.execute(f"insert into {table_name} values(now, -{baseval}, {baseval})") - tdSql.execute(f"insert into {table_name} values(now, {max_i}, {max_u})") - tdSql.execute(f"insert into {table_name} values(now, {min_i}, {min_u})") + tdSql.execute(f"insert into {table_name} values(1717122965000, {negval}, {posval})") + tdSql.execute(f"insert into {table_name} values(1717122966000, -{baseval}, {baseval})") + tdSql.execute(f"insert into {table_name} values(1717122967000, {max_i}, {max_u})") + tdSql.execute(f"insert into {table_name} values(1717122968000, {min_i}, {min_u})") tdSql.query(f"select * from {table_name}") tdSql.checkRows(26) # error case - tdSql.error(f"insert into {table_name} values(now, 0, {max_u+1})") - tdSql.error(f"insert into {table_name} values(now, 0, -1)") - tdSql.error(f"insert into {table_name} values(now, 0, -2.0)") - tdSql.error(f"insert into {table_name} values(now, 0, '-2.0')") - tdSql.error(f"insert into {table_name} values(now, {max_i+1}, 0)") - tdSql.error(f"insert into {table_name} values(now, {min_i-1}, 0)") - tdSql.error(f"insert into {table_name} values(now, '{min_i-1}', 0)") + tdSql.error(f"insert into {table_name} values(1717122969000, 0, {max_u+1})") + tdSql.error(f"insert into {table_name} values(1717122970000, 0, -1)") + tdSql.error(f"insert into {table_name} values(1717122971000, 0, -2.0)") + tdSql.error(f"insert into {table_name} values(1717122972000, 0, '-2.0')") + tdSql.error(f"insert into {table_name} values(1717122973000, {max_i+1}, 0)") + tdSql.error(f"insert into {table_name} values(1717122974000, {min_i-1}, 0)") + tdSql.error(f"insert into {table_name} values(1717122975000, '{min_i-1}', 0)") def test_tags(self, stable_name, dtype, bits): tdSql.execute(f"create stable {stable_name}(ts timestamp, i1 {dtype}, i2 {dtype} unsigned) tags(id {dtype})") @@ -93,20 +93,20 @@ class TDTestCase: max_u = 2*bigval - 1 min_u = 0 - tdSql.execute(f"insert into {stable_name}_1 using {stable_name} tags('{negval}') values(now, {negval}, {posval})") - tdSql.execute(f"insert into {stable_name}_2 using {stable_name} tags({posval}) values(now, -{baseval} , {baseval})") - tdSql.execute(f"insert into {stable_name}_3 using {stable_name} tags('0x40') values(now, {max_i}, {max_u})") - tdSql.execute(f"insert into {stable_name}_4 using {stable_name} tags(0b10000) values(now, {min_i}, {min_u})") + tdSql.execute(f"insert into {stable_name}_1 using {stable_name} tags('{negval}') values(1717122976000, {negval}, {posval})") + tdSql.execute(f"insert into {stable_name}_2 using {stable_name} tags({posval}) values(1717122977000, -{baseval} , {baseval})") + tdSql.execute(f"insert into {stable_name}_3 using {stable_name} tags('0x40') values(1717122978000, {max_i}, {max_u})") + tdSql.execute(f"insert into {stable_name}_4 using {stable_name} tags(0b10000) values(1717122979000, {min_i}, {min_u})") - tdSql.execute(f"insert into {stable_name}_5 using {stable_name} tags({max_i}) values(now, '{negval}', '{posval}')") - tdSql.execute(f"insert into {stable_name}_6 using {stable_name} tags('{min_i}') values(now, '-{baseval}' , '{baseval}')") - tdSql.execute(f"insert into {stable_name}_7 using {stable_name} tags(-0x40) values(now, '{max_i}', '{max_u}')") - tdSql.execute(f"insert into {stable_name}_8 using {stable_name} tags('-0b10000') values(now, '{min_i}', '{min_u}')") + tdSql.execute(f"insert into {stable_name}_5 using {stable_name} tags({max_i}) values(1717122980000, '{negval}', '{posval}')") + tdSql.execute(f"insert into {stable_name}_6 using {stable_name} tags('{min_i}') values(1717122981000, '-{baseval}' , '{baseval}')") + tdSql.execute(f"insert into {stable_name}_7 using {stable_name} tags(-0x40) values(1717122982000, '{max_i}', '{max_u}')") + tdSql.execute(f"insert into {stable_name}_8 using {stable_name} tags('-0b10000') values(1717122983000, '{min_i}', '{min_u}')") - tdSql.execute(f"insert into {stable_name}_9 using {stable_name} tags(12.) values(now, {negval}, {posval})") - tdSql.execute(f"insert into {stable_name}_10 using {stable_name} tags('-8.3') values(now, -{baseval} , {baseval})") - tdSql.execute(f"insert into {stable_name}_11 using {stable_name} tags(2.e1) values(now, {max_i}, {max_u})") - tdSql.execute(f"insert into {stable_name}_12 using {stable_name} tags('-2.3e1') values(now, {min_i}, {min_u})") + tdSql.execute(f"insert into {stable_name}_9 using {stable_name} tags(12.) values(1717122984000, {negval}, {posval})") + tdSql.execute(f"insert into {stable_name}_10 using {stable_name} tags('-8.3') values(1717122985000, -{baseval} , {baseval})") + tdSql.execute(f"insert into {stable_name}_11 using {stable_name} tags(2.e1) values(1717122986000, {max_i}, {max_u})") + tdSql.execute(f"insert into {stable_name}_12 using {stable_name} tags('-2.3e1') values(1717122987000, {min_i}, {min_u})") tdSql.query(f"select * from {stable_name}") tdSql.checkRows(12) diff --git a/tests/system-test/2-query/td-28068.py b/tests/system-test/2-query/td-28068.py index 0dfaf8e126..0a7e75fef2 100644 --- a/tests/system-test/2-query/td-28068.py +++ b/tests/system-test/2-query/td-28068.py @@ -10,14 +10,14 @@ class TDTestCase: tdSql.execute("create database td_28068;") tdSql.execute("create database if not exists td_28068;") tdSql.execute("create stable td_28068.st (ts timestamp, test_case nchar(10), time_cost float, num float) tags (branch nchar(10), scenario nchar(10));") - tdSql.execute("insert into td_28068.ct1 using td_28068.st (branch, scenario) tags ('3.0', 'scenario1') values (now(), 'query1', 1,2);") - tdSql.execute("insert into td_28068.ct1 using td_28068.st (branch, scenario) tags ('3.0', 'scenario1') values (now(), 'query1', 2,3);") - tdSql.execute("insert into td_28068.ct2 using td_28068.st (branch, scenario) tags ('3.0', 'scenario2') values (now(), 'query1', 10,1);") - tdSql.execute("insert into td_28068.ct2 using td_28068.st (branch, scenario) tags ('3.0', 'scenario2') values (now(), 'query1', 11,5);") - tdSql.execute("insert into td_28068.ct3 using td_28068.st (branch, scenario) tags ('3.1', 'scenario1') values (now(), 'query1', 20,4);") - tdSql.execute("insert into td_28068.ct3 using td_28068.st (branch, scenario) tags ('3.1', 'scenario1') values (now(), 'query1', 30,1);") - tdSql.execute("insert into td_28068.ct4 using td_28068.st (branch, scenario) tags ('3.1', 'scenario2') values (now(), 'query1', 8,8);") - tdSql.execute("insert into td_28068.ct4 using td_28068.st (branch, scenario) tags ('3.1', 'scenario2') values (now(), 'query1', 9,10);") + tdSql.execute("insert into td_28068.ct1 using td_28068.st (branch, scenario) tags ('3.0', 'scenario1') values (1717122943000, 'query1', 1,2);") + tdSql.execute("insert into td_28068.ct1 using td_28068.st (branch, scenario) tags ('3.0', 'scenario1') values (1717122944000, 'query1', 2,3);") + tdSql.execute("insert into td_28068.ct2 using td_28068.st (branch, scenario) tags ('3.0', 'scenario2') values (1717122945000, 'query1', 10,1);") + tdSql.execute("insert into td_28068.ct2 using td_28068.st (branch, scenario) tags ('3.0', 'scenario2') values (1717122946000, 'query1', 11,5);") + tdSql.execute("insert into td_28068.ct3 using td_28068.st (branch, scenario) tags ('3.1', 'scenario1') values (1717122947000, 'query1', 20,4);") + tdSql.execute("insert into td_28068.ct3 using td_28068.st (branch, scenario) tags ('3.1', 'scenario1') values (1717122948000, 'query1', 30,1);") + tdSql.execute("insert into td_28068.ct4 using td_28068.st (branch, scenario) tags ('3.1', 'scenario2') values (1717122949000, 'query1', 8,8);") + tdSql.execute("insert into td_28068.ct4 using td_28068.st (branch, scenario) tags ('3.1', 'scenario2') values (1717122950000, 'query1', 9,10);") def run(self): tdSql.query('select last(ts) as ts, last(branch) as branch, last(scenario) as scenario, last(test_case) as test_case from td_28068.st group by branch, scenario order by last(branch);')