From 680eb5674ebc9f29995b0c3c982408785d49eb0c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sun, 5 Nov 2023 01:23:23 +0800 Subject: [PATCH 01/20] refactor: do some internal refactor. --- source/dnode/vnode/src/tq/tq.c | 40 +++++++++++++++++---------- source/libs/executor/src/executor.c | 2 +- source/libs/stream/src/streamMeta.c | 1 - source/libs/stream/src/streamQueue.c | 3 +- source/libs/stream/src/streamTask.c | 3 +- source/libs/stream/src/streamTaskSm.c | 1 - 6 files changed, 30 insertions(+), 20 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 4802988f18..2422841c3b 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1061,7 +1061,7 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms return code; } -static void doStartStep2(SStreamTask* pTask, SStreamTask* pStreamTask, STQ* pTq) { +static void doStartFillhistoryStep2(SStreamTask* pTask, SStreamTask* pStreamTask, STQ* pTq) { const char* id = pTask->id.idStr; int64_t nextProcessedVer = pStreamTask->hTaskInfo.haltVer; @@ -1102,7 +1102,7 @@ static void doStartStep2(SStreamTask* pTask, SStreamTask* pStreamTask, STQ* pTq) } } -// this function should be executed by only one thread +// this function should be executed by only one thread, so we set an sentinel to protect this function int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { SStreamScanHistoryReq* pReq = (SStreamScanHistoryReq*)pMsg->pCont; SStreamMeta* pMeta = pTq->pStreamMeta; @@ -1131,6 +1131,7 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { } } + // let's decide which step should be executed now if (pTask->execInfo.step1Start == 0) { ASSERT(pTask->status.pauseAllowed == false); int64_t ts = taosGetTimestampMs(); @@ -1164,14 +1165,28 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { return 0; } - streamScanHistoryData(pTask); + EScanHistoryRet ret = streamScanHistoryData(pTask); + // todo update the step1 exec elapsed time double el = (taosGetTimestampMs() - pTask->execInfo.step1Start) / 1000.0; - if (streamTaskGetStatus(pTask, NULL) == TASK_STATUS__PAUSE) { - int8_t status = streamTaskSetSchedStatusInactive(pTask); - tqDebug("s-task:%s is paused in the step1, elapsed time:%.2fs, sched-status:%d", pTask->id.idStr, el, status); + if (ret == TASK_SCANHISTORY_QUIT || ret == TASK_SCANHISTORY_REXEC) { + int8_t status = streamTaskSetSchedStatusInactive(pTask); atomic_store_32(&pTask->status.inScanHistorySentinel, 0); + + if (ret == TASK_SCANHISTORY_REXEC) { + streamStartScanHistoryAsync(pTask, 0); + } else { + char* p = NULL; + ETaskStatus s = streamTaskGetStatus(pTask, &p); + + if (s == TASK_STATUS__PAUSE) { + tqDebug("s-task:%s is paused in the step1, elapsed time:%.2fs, sched-status:%d", pTask->id.idStr, el, status); + } else if (s == TASK_STATUS__STOP || s == TASK_STATUS__DROPPING) { + tqDebug("s-task:%s status:%p not continue scan-history data", pTask->id.idStr, p); + } + } + streamMetaReleaseTask(pMeta, pTask); return 0; } @@ -1200,23 +1215,20 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { code = streamTaskHandleEvent(pStreamTask->status.pSM, TASK_EVENT_HALT); if (code == TSDB_CODE_SUCCESS) { - doStartStep2(pTask, pStreamTask, pTq); + doStartFillhistoryStep2(pTask, pStreamTask, pTq); } else { tqError("s-task:%s failed to halt s-task:%s, not launch step2", id, pStreamTask->id.idStr); } streamMetaReleaseTask(pMeta, pStreamTask); - } else { STimeWindow* pWindow = &pTask->dataRange.window; ASSERT(HAS_RELATED_FILLHISTORY_TASK(pTask)); - // Not update the fill-history time window until the state transfer is completed if the related fill-history task - // exists. - tqDebug( - "s-task:%s scan-history in stream time window completed, now start to handle data from WAL, startVer:%" PRId64 - ", window:%" PRId64 " - %" PRId64, - id, pTask->chkInfo.nextProcessVer, pWindow->skey, pWindow->ekey); + // Not update the fill-history time window until the state transfer is completed. + tqDebug("s-task:%s scan-history in stream time window completed, start to handle data from WAL, startVer:%" PRId64 + ", window:%" PRId64 " - %" PRId64, + id, pTask->chkInfo.nextProcessVer, pWindow->skey, pWindow->ekey); code = streamTaskScanHistoryDataComplete(pTask); } diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 4452eff8c7..9ed7d6e033 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -1046,7 +1046,7 @@ int32_t qRestoreStreamOperatorOption(qTaskInfo_t tinfo) { } } -bool qStreamRecoverScanFinished(qTaskInfo_t tinfo) { +bool qStreamScanhistoryFinished(qTaskInfo_t tinfo) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; return pTaskInfo->streamInfo.recoverScanFinished; } diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 76945f17a9..7dfe88e6de 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -1104,7 +1104,6 @@ void streamMetaRLock(SStreamMeta* pMeta) { void streamMetaRUnLock(SStreamMeta* pMeta) { stTrace("vgId:%d meta-runlock", pMeta->vgId); taosRUnLockLatch(&pMeta->lock); - } void streamMetaWLock(SStreamMeta* pMeta) { stTrace("vgId:%d meta-wlock", pMeta->vgId); diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index eae4605dbc..90e862005b 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -340,10 +340,11 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) return 0; } -// the result should be put into the outputQ in any cases, otherwise, the result may be lost +// the result should be put into the outputQ in any cases, the result may be lost otherwise. int32_t streamTaskPutDataIntoOutputQ(SStreamTask* pTask, SStreamDataBlock* pBlock) { STaosQueue* pQueue = pTask->outputq.queue->pQueue; + // wait for the output queue is available for new data to dispatch while (streamQueueIsFull(pTask->outputq.queue)) { if (streamTaskShouldStop(pTask)) { stInfo("s-task:%s discard result block due to task stop", pTask->id.idStr); diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index f949d46315..a1245ba32c 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -400,8 +400,7 @@ void tFreeStreamTask(SStreamTask* pTask) { taosMemoryFree(pTask->outputInfo.pTokenBucket); taosThreadMutexDestroy(&pTask->lock); - taosArrayDestroy(pTask->outputInfo.pDownstreamUpdateList); - pTask->outputInfo.pDownstreamUpdateList = NULL; + pTask->outputInfo.pDownstreamUpdateList = taosArrayDestroy(pTask->outputInfo.pDownstreamUpdateList); taosMemoryFree(pTask); stDebug("s-task:0x%x free task completed", taskId); diff --git a/source/libs/stream/src/streamTaskSm.c b/source/libs/stream/src/streamTaskSm.c index 09656cbe97..04b449aaaf 100644 --- a/source/libs/stream/src/streamTaskSm.c +++ b/source/libs/stream/src/streamTaskSm.c @@ -315,7 +315,6 @@ int32_t streamTaskOnHandleEventSuccess(SStreamTaskSM* pSM, EStreamTaskEvent even GET_EVT_NAME(event), pSM->current.name, GET_EVT_NAME(pSM->prev.evt)); taosThreadMutexUnlock(&pTask->lock); - stDebug("s-task:%s unlockx", pTask->id.idStr); return TSDB_CODE_STREAM_INVALID_STATETRANS; } From 1f756f58f683f49578bd639d61233854e44fd500 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sun, 5 Nov 2023 01:24:23 +0800 Subject: [PATCH 02/20] fix(stream): add need update node into update node list. --- source/libs/stream/src/streamStart.c | 60 +++++++++++++++------------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/source/libs/stream/src/streamStart.c b/source/libs/stream/src/streamStart.c index 5ebc60dc13..4f757f09fb 100644 --- a/source/libs/stream/src/streamStart.c +++ b/source/libs/stream/src/streamStart.c @@ -318,6 +318,31 @@ void doProcessDownstreamReadyRsp(SStreamTask* pTask) { streamTaskOnHandleEventSuccess(pTask->status.pSM, event); } +static void addIntoNodeUpdateList(SStreamTask* pTask, int32_t nodeId) { + int32_t vgId = pTask->pMeta->vgId; + + taosThreadMutexLock(&pTask->lock); + int32_t num = taosArrayGetSize(pTask->outputInfo.pDownstreamUpdateList); + bool existed = false; + for (int i = 0; i < num; ++i) { + SDownstreamTaskEpset* p = taosArrayGet(pTask->outputInfo.pDownstreamUpdateList, i); + if (p->nodeId == nodeId) { + existed = true; + break; + } + } + + if (!existed) { + SDownstreamTaskEpset t = {.nodeId = nodeId}; + taosArrayPush(pTask->outputInfo.pDownstreamUpdateList, &t); + + stInfo("s-task:%s vgId:%d downstream nodeId:%d needs to be updated, total needs updated:%d", pTask->id.idStr, vgId, + t.nodeId, (int32_t)taosArrayGetSize(pTask->outputInfo.pDownstreamUpdateList)); + } + + taosThreadMutexUnlock(&pTask->lock); +} + int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) { ASSERT(pTask->id.taskId == pRsp->upstreamTaskId); const char* id = pTask->id.idStr; @@ -372,35 +397,16 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs "s-task:%s vgId:%d self vnode-transfer/leader-change/restart detected, old stage:%d, current stage:%d, " "not check wait for downstream task nodeUpdate, and all tasks restart", id, pRsp->upstreamNodeId, pRsp->oldStage, (int32_t)pTask->pMeta->stage); - } else { - if (pRsp->status == TASK_DOWNSTREAM_NOT_LEADER) { - stError( - "s-task:%s downstream taskId:0x%x (vgId:%d) not leader, self dispatch epset needs to be updated, not check " - "downstream again, nodeUpdate needed", - id, pRsp->downstreamTaskId, pRsp->downstreamNodeId); - taosThreadMutexLock(&pTask->lock); - int32_t num = taosArrayGetSize(pTask->outputInfo.pDownstreamUpdateList); - bool existed = false; - for (int i = 0; i < num; ++i) { - SDownstreamTaskEpset* p = taosArrayGet(pTask->outputInfo.pDownstreamUpdateList, i); - if (p->nodeId == pRsp->downstreamNodeId) { - existed = true; - break; - } - } - - if (!existed) { - SDownstreamTaskEpset t = {.nodeId = pRsp->downstreamNodeId}; - taosArrayPush(pTask->outputInfo.pDownstreamUpdateList, &t); - stInfo("s-task:%s vgId:%d downstream nodeId:%d needs to be updated, total needs updated:%d", id, vgId, - t.nodeId, (int32_t)taosArrayGetSize(pTask->outputInfo.pDownstreamUpdateList)); - } - - taosThreadMutexUnlock(&pTask->lock); - return 0; - } + addIntoNodeUpdateList(pTask, pRsp->downstreamNodeId); + } else if (pRsp->status == TASK_DOWNSTREAM_NOT_LEADER) { + stError( + "s-task:%s downstream taskId:0x%x (vgId:%d) not leader, self dispatch epset needs to be updated, not check " + "downstream again, nodeUpdate needed", + id, pRsp->downstreamTaskId, pRsp->downstreamNodeId); + addIntoNodeUpdateList(pTask, pRsp->downstreamNodeId); + } else { // TASK_DOWNSTREAM_NOT_READY, let's retry in 100ms STaskRecheckInfo* pInfo = createRecheckInfo(pTask, pRsp); int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); From 0e44950a37020becf3d56b9b5a1f3ae6cd742a5b Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sun, 5 Nov 2023 01:26:53 +0800 Subject: [PATCH 03/20] enh(stream): execute scan history by using time slice 1s. --- include/libs/executor/executor.h | 2 +- include/libs/stream/tstream.h | 8 +++++- source/dnode/vnode/src/tq/tq.c | 1 + source/libs/stream/src/streamExec.c | 44 +++++++++++++++++++---------- 4 files changed, 38 insertions(+), 17 deletions(-) diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index f549f23c7f..d0e2b45937 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -213,7 +213,7 @@ int32_t qStreamSourceScanParamForHistoryScanStep1(qTaskInfo_t tinfo, SVersionRan int32_t qStreamSourceScanParamForHistoryScanStep2(qTaskInfo_t tinfo, SVersionRange *pVerRange, STimeWindow* pWindow); int32_t qStreamRecoverFinish(qTaskInfo_t tinfo); int32_t qRestoreStreamOperatorOption(qTaskInfo_t tinfo); -bool qStreamRecoverScanFinished(qTaskInfo_t tinfo); +bool qStreamScanhistoryFinished(qTaskInfo_t tinfo); int32_t qStreamInfoResetTimewindowFilter(qTaskInfo_t tinfo); void resetTaskInfo(qTaskInfo_t tinfo); diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 2e4204ab34..0605f4b2e5 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -775,10 +775,16 @@ void streamTaskOpenAllUpstreamInput(SStreamTask* pTask); void streamTaskStatusInit(STaskStatusEntry* pEntry, const SStreamTask* pTask); void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc); +typedef enum { + TASK_SCANHISTORY_CONT = 0x1, + TASK_SCANHISTORY_QUIT = 0x2, + TASK_SCANHISTORY_REXEC = 0x3, +} EScanHistoryRet; + // source level int32_t streamSetParamForStreamScannerStep1(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow); int32_t streamSetParamForStreamScannerStep2(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow); -int32_t streamScanHistoryData(SStreamTask* pTask); +EScanHistoryRet streamScanHistoryData(SStreamTask* pTask); int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask); // agg level diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 2422841c3b..2e6844736c 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1175,6 +1175,7 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { atomic_store_32(&pTask->status.inScanHistorySentinel, 0); if (ret == TASK_SCANHISTORY_REXEC) { + // todo wait for 100ms and retry streamStartScanHistoryAsync(pTask, 0); } else { char* p = NULL; diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 05a62140f9..fa042b5687 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -18,7 +18,8 @@ // maximum allowed processed block batches. One block may include several submit blocks #define MAX_STREAM_EXEC_BATCH_NUM 32 #define STREAM_RESULT_DUMP_THRESHOLD 300 -#define STREAM_RESULT_DUMP_SIZE_THRESHOLD (1048576 * 1) +#define STREAM_RESULT_DUMP_SIZE_THRESHOLD (1048576 * 1) // 1MiB result data +#define STREAM_SCAN_HISTORY_TIMESLICE 1000 // 1000 ms static int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask); @@ -48,10 +49,9 @@ static int32_t doOutputResultBlockImpl(SStreamTask* pTask, SStreamDataBlock* pBl } streamDispatchStreamBlock(pTask); - return code; } - return 0; + return code; } static int32_t doDumpResult(SStreamTask* pTask, SStreamQueueItem* pItem, SArray* pRes, int32_t size, int64_t* totalSize, @@ -187,12 +187,13 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i return code; } -int32_t streamScanHistoryData(SStreamTask* pTask) { +EScanHistoryRet streamScanHistoryData(SStreamTask* pTask) { ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); int32_t code = TSDB_CODE_SUCCESS; void* exec = pTask->exec.pExecutor; bool finished = false; + int64_t st = taosGetTimestampMs(); qSetStreamOpOpen(exec); @@ -200,13 +201,14 @@ int32_t streamScanHistoryData(SStreamTask* pTask) { if (streamTaskShouldPause(pTask)) { double el = (taosGetTimestampMs() - pTask->execInfo.step1Start) / 1000.0; stDebug("s-task:%s paused from the scan-history task, elapsed time:%.2fsec", pTask->id.idStr, el); - break; + return TASK_SCANHISTORY_QUIT; // quit from step1, not continue to handle the step2 } SArray* pRes = taosArrayInit(0, sizeof(SSDataBlock)); if (pRes == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + stError("s-task:%s scan-history prepare result block failed, code:%s, retry later", pTask->id.idStr, tstrerror(terrno)); + continue; } int32_t size = 0; @@ -214,26 +216,26 @@ int32_t streamScanHistoryData(SStreamTask* pTask) { while (1) { if (streamTaskShouldStop(pTask)) { taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - return 0; + return TASK_SCANHISTORY_QUIT; } if (pTask->inputq.status == TASK_INPUT_STATUS__BLOCKED) { - stDebug("s-task:%s inputQ is blocked, wait for 10sec and retry", pTask->id.idStr); - taosMsleep(10000); - continue; + stDebug("s-task:%s level:%d inputQ is blocked, retry later", pTask->id.idStr, pTask->info.taskLevel); + taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); + return TASK_SCANHISTORY_REXEC; } SSDataBlock* output = NULL; uint64_t ts = 0; code = qExecTask(exec, &output, &ts); if (code != TSDB_CODE_TSC_QUERY_KILLED && code != TSDB_CODE_SUCCESS) { - stError("%s scan-history data error occurred code:%s, continue scan", pTask->id.idStr, tstrerror(code)); + stError("s-task:%s scan-history data error occurred code:%s, continue scan", pTask->id.idStr, tstrerror(code)); continue; } // the generated results before fill-history task been paused, should be dispatched to sink node if (output == NULL) { - finished = qStreamRecoverScanFinished(exec); + finished = qStreamScanhistoryFinished(exec); break; } @@ -243,8 +245,9 @@ int32_t streamScanHistoryData(SStreamTask* pTask) { taosArrayPush(pRes, &block); size += blockDataGetSize(output) + sizeof(SSDataBlock) + sizeof(SColumnInfoData) * blockDataGetNumOfCols(&block); + numOfBlocks += 1; - if ((++numOfBlocks) >= STREAM_RESULT_DUMP_THRESHOLD || size >= STREAM_RESULT_DUMP_SIZE_THRESHOLD) { + if (numOfBlocks >= STREAM_RESULT_DUMP_THRESHOLD || size >= STREAM_RESULT_DUMP_SIZE_THRESHOLD) { stDebug("s-task:%s scan exec numOfBlocks:%d, size:%.2fKiB output num-limit:%d, size-limit:%.2fKiB reached", pTask->id.idStr, numOfBlocks, SIZE_IN_KiB(size), STREAM_RESULT_DUMP_THRESHOLD, SIZE_IN_KiB(STREAM_RESULT_DUMP_SIZE_THRESHOLD)); @@ -256,14 +259,25 @@ int32_t streamScanHistoryData(SStreamTask* pTask) { SStreamDataBlock* pStreamBlocks = createStreamBlockFromResults(NULL, pTask, size, pRes); code = doOutputResultBlockImpl(pTask, pStreamBlocks); if (code != TSDB_CODE_SUCCESS) { - return code; + terrno = code; + stDebug("s-task:%s dump fill-history results failed, code:%s, retry in 100ms", pTask->id.idStr, tstrerror(code)); + return TASK_SCANHISTORY_REXEC; } } else { taosArrayDestroy(pRes); } + + int64_t el = taosGetTimestampMs() - st; + if (el >= STREAM_SCAN_HISTORY_TIMESLICE) { + stDebug("s-task:%s fill-history:%d level:%d timeslice for scan-history exhausted", pTask->id.idStr, + pTask->info.fillHistory, pTask->info.taskLevel); + + // todo exec scanhistory in 100ms + return TASK_SCANHISTORY_REXEC; + } } - return 0; + return TASK_SCANHISTORY_CONT; } // wait for the stream task to be idle From d41fd271893477cbea05d9b171859db58e6c4c6f Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 6 Nov 2023 11:57:21 +0800 Subject: [PATCH 04/20] fix(stream): restart tasks in stream threads, instead of write thread. --- include/libs/stream/tstream.h | 3 +- source/dnode/vnode/src/inc/tq.h | 7 +-- source/dnode/vnode/src/inc/vnodeInt.h | 7 ++- source/dnode/vnode/src/tq/tq.c | 51 ++++-------------- source/dnode/vnode/src/tq/tqStreamTask.c | 68 ++++++++++++++++++++++-- source/dnode/vnode/src/vnd/vnodeSync.c | 4 +- source/libs/stream/src/streamMeta.c | 12 ++--- 7 files changed, 91 insertions(+), 61 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 0605f4b2e5..4dc1b9315e 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -432,7 +432,8 @@ struct SStreamTask { typedef struct STaskStartInfo { int64_t startTs; int64_t readyTs; - int32_t startAllTasksFlag; + int32_t tasksWillRestart; + int32_t taskRestarting; // restart flag, sentinel to guard the restart procedure. SHashObj* pReadyTaskSet; // tasks that are all ready for running stream processing int32_t elapsedTime; } STaskStartInfo; diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index cd285fe6a1..4e4431d4d9 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -43,9 +43,9 @@ extern "C" { typedef struct STqOffsetStore STqOffsetStore; -// tqPush #define STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID (-1) -#define STREAM_EXEC_TASK_STATUS_CHECK_ID (-2) +#define STREAM_EXEC_START_ALL_TASKS_ID (-2) +#define STREAM_EXEC_RESTART_ALL_TASKS_ID (-3) // tqExec typedef struct { @@ -155,9 +155,6 @@ char* tqOffsetBuildFName(const char* path, int32_t fVer); int32_t tqOffsetRestoreFromFile(STqOffsetStore* pStore, const char* fname); // tqStream -int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver); -int32_t tqScanWal(STQ* pTq); -int32_t tqStartStreamTask(STQ* pTq); int32_t tqResetStreamTaskStatus(STQ* pTq); int32_t tqStopStreamTasks(STQ* pTq); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index f184702eda..724f3bc92e 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -231,7 +231,12 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) int32_t tqProcessTaskCheckpointReadyMsg(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg); -int32_t tqLaunchStreamTaskAsync(STQ* pTq); + +int32_t tqStartStreamTaskAsync(STQ* pTq, bool restart); +int32_t tqRestartStreamTasks(STQ* pTq); +int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver); +int32_t tqScanWal(STQ* pTq); +int32_t tqStartStreamTasks(STQ* pTq); int tqCommit(STQ*); int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 2e6844736c..05fa029f32 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1307,14 +1307,15 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { int32_t taskId = pReq->taskId; int32_t vgId = TD_VID(pTq->pVnode); - if (taskId == STREAM_EXEC_TASK_STATUS_CHECK_ID) { - tqStartStreamTask(pTq); - return 0; - } - if (taskId == STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID) { // all tasks are extracted submit data from the wal tqScanWal(pTq); return 0; + } else if (taskId == STREAM_EXEC_START_ALL_TASKS_ID) { + tqStartStreamTasks(pTq); + return 0; + } else if (taskId == STREAM_EXEC_RESTART_ALL_TASKS_ID) { + tqRestartStreamTasks(pTq); + return 0; } SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->streamId, taskId); @@ -1900,7 +1901,7 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); int32_t updateTasks = taosHashGetSize(pMeta->updateInfo.pTasks); - pMeta->startInfo.startAllTasksFlag = 1; + pMeta->startInfo.tasksWillRestart = 1; if (updateTasks < numOfTasks) { tqDebug("vgId:%d closed tasks:%d, unclosed:%d, all tasks will be started when nodeEp update completed", vgId, @@ -1909,45 +1910,11 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { } else { if (!pTq->pVnode->restored) { tqDebug("vgId:%d vnode restore not completed, not restart the tasks, clear the start after nodeUpdate flag", vgId); - pMeta->startInfo.startAllTasksFlag = 0; + pMeta->startInfo.tasksWillRestart = 0; streamMetaWUnLock(pMeta); } else { - tqInfo("vgId:%d tasks are all updated and stopped, restart them", vgId); - terrno = 0; - - streamMetaWUnLock(pMeta); - - while (streamMetaTaskInTimer(pMeta)) { - tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId); - taosMsleep(100); - } - - streamMetaWLock(pMeta); - - int32_t code = streamMetaReopen(pMeta); - if (code != 0) { - tqError("vgId:%d failed to reopen stream meta", vgId); - streamMetaWUnLock(pMeta); - taosArrayDestroy(req.pNodeList); - return -1; - } - - if (streamMetaLoadAllTasks(pTq->pStreamMeta) < 0) { - tqError("vgId:%d failed to load stream tasks", vgId); - streamMetaWUnLock(pMeta); - taosArrayDestroy(req.pNodeList); - return -1; - } - - if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) { - tqInfo("vgId:%d restart all stream tasks after all tasks being updated", vgId); - tqResetStreamTaskStatus(pTq); - tqLaunchStreamTaskAsync(pTq); - } else { - tqInfo("vgId:%d, follower node not start stream tasks", vgId); - } - streamMetaWUnLock(pMeta); + tqStartStreamTaskAsync(pTq, true); } } diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index 2d94f23009..f7607dabab 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -58,7 +58,7 @@ int32_t tqScanWal(STQ* pTq) { return 0; } -int32_t tqStartStreamTask(STQ* pTq) { +int32_t tqStartStreamTasks(STQ* pTq) { int32_t code = TSDB_CODE_SUCCESS; int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; @@ -115,7 +115,67 @@ int32_t tqStartStreamTask(STQ* pTq) { return code; } -int32_t tqLaunchStreamTaskAsync(STQ* pTq) { +int32_t tqRestartStreamTasks(STQ* pTq) { + SStreamMeta* pMeta = pTq->pStreamMeta; + int32_t vgId = pMeta->vgId; + int32_t code = 0; + int64_t st = taosGetTimestampMs(); + + while(1) { + int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskRestarting, 0, 1); + if (startVal == 0) { + break; + } + + tqDebug("vgId:%d in start stream tasks procedure, wait for 500ms and recheck", vgId); + taosMsleep(500); + } + + terrno = 0; + tqInfo("vgId:%d tasks are all updated and stopped, restart all tasks, triggered by transId:%d", vgId, + pMeta->updateInfo.transId); + + while (streamMetaTaskInTimer(pMeta)) { + tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId); + taosMsleep(100); + } + + streamMetaWLock(pMeta); + + code = streamMetaReopen(pMeta); + if (code != TSDB_CODE_SUCCESS) { + tqError("vgId:%d failed to reopen stream meta", vgId); + streamMetaWUnLock(pMeta); + code = terrno; + return code; + } + + int64_t el = taosGetTimestampMs() - st; + + tqInfo("vgId:%d close&reload state elapsed time:%.3fms", vgId, el/1000.); + + code = streamMetaLoadAllTasks(pTq->pStreamMeta); + if (code != TSDB_CODE_SUCCESS) { + tqError("vgId:%d failed to load stream tasks, code:%s", vgId, tstrerror(terrno)); + streamMetaWUnLock(pMeta); + code = terrno; + return code; + } + + if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) { + tqInfo("vgId:%d restart all stream tasks after all tasks being updated", vgId); + tqResetStreamTaskStatus(pTq); + tqStartStreamTasks(pTq); + } else { + tqInfo("vgId:%d, follower node not start stream tasks", vgId); + } + + streamMetaWUnLock(pMeta); + code = terrno; + return code; +} + +int32_t tqStartStreamTaskAsync(STQ* pTq, bool restart) { SStreamMeta* pMeta = pTq->pStreamMeta; int32_t vgId = pMeta->vgId; @@ -132,10 +192,10 @@ int32_t tqLaunchStreamTaskAsync(STQ* pTq) { return -1; } - tqDebug("vgId:%d check %d stream task(s) status async", vgId, numOfTasks); + tqDebug("vgId:%d start all %d stream task(s) async", vgId, numOfTasks); pRunReq->head.vgId = vgId; pRunReq->streamId = 0; - pRunReq->taskId = STREAM_EXEC_TASK_STATUS_CHECK_ID; + pRunReq->taskId = restart? STREAM_EXEC_RESTART_ALL_TASKS_ID:STREAM_EXEC_START_ALL_TASKS_ID; SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg); diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index b4afd898fa..d4876caf15 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -554,7 +554,7 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) SStreamMeta* pMeta = pVnode->pTq->pStreamMeta; streamMetaWLock(pMeta); - if (pMeta->startInfo.startAllTasksFlag) { + if (pMeta->startInfo.tasksWillRestart) { vInfo("vgId:%d, sync restore finished, stream tasks will be launched by other thread", vgId); streamMetaWUnLock(pMeta); return; @@ -567,7 +567,7 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) } else { vInfo("vgId:%d sync restore finished, start to launch stream tasks", pVnode->config.vgId); tqResetStreamTaskStatus(pVnode->pTq); - tqLaunchStreamTaskAsync(pVnode->pTq); + tqStartStreamTaskAsync(pVnode->pTq, false); } } else { vInfo("vgId:%d, sync restore finished, not launch stream tasks since not leader", vgId); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 7dfe88e6de..ee02ccf8f1 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -228,12 +228,8 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF } int32_t streamMetaReopen(SStreamMeta* pMeta) { - // backup the restart flag - int32_t restartFlag = pMeta->startInfo.startAllTasksFlag; streamMetaClear(pMeta); - pMeta->startInfo.startAllTasksFlag = restartFlag; - // NOTE: role should not be changed during reopen meta pMeta->streamBackendRid = -1; pMeta->streamBackend = NULL; @@ -302,7 +298,9 @@ void streamMetaClear(SStreamMeta* pMeta) { pMeta->numOfPausedTasks = 0; pMeta->chkptNotReadyTasks = 0; - streamMetaResetStartInfo(&pMeta->startInfo); + // the willrestart/starting flag can NOT be cleared + taosHashClear(pMeta->startInfo.pReadyTaskSet); + pMeta->startInfo.readyTs = 0; } void streamMetaClose(SStreamMeta* pMeta) { @@ -1093,8 +1091,10 @@ void streamMetaInitForSnode(SStreamMeta* pMeta) { void streamMetaResetStartInfo(STaskStartInfo* pStartInfo) { taosHashClear(pStartInfo->pReadyTaskSet); - pStartInfo->startAllTasksFlag = 0; + pStartInfo->tasksWillRestart = 0; pStartInfo->readyTs = 0; + // reset the sentinel flag value to be 0 + atomic_store_32(&pStartInfo->taskRestarting, 0); } void streamMetaRLock(SStreamMeta* pMeta) { From bfd14076d7f54002ba757aa825d7dc46f79cabbb Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 6 Nov 2023 12:05:47 +0800 Subject: [PATCH 05/20] fix(stream): fix dead-lock. --- source/dnode/vnode/src/tq/tqStreamTask.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index f7607dabab..6e0e63b67d 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -165,12 +165,15 @@ int32_t tqRestartStreamTasks(STQ* pTq) { if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) { tqInfo("vgId:%d restart all stream tasks after all tasks being updated", vgId); tqResetStreamTaskStatus(pTq); + streamMetaWUnLock(pMeta); + tqStartStreamTasks(pTq); } else { + streamMetaWUnLock(pMeta); + tqInfo("vgId:%d, follower node not start stream tasks", vgId); } - streamMetaWUnLock(pMeta); code = terrno; return code; } From 69863febe680c29c021cc8ead5d1fe7599ec7ad0 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 6 Nov 2023 13:17:13 +0800 Subject: [PATCH 06/20] fix(stream):reset the starting flag for the followers. --- include/libs/stream/tstream.h | 3 ++- source/dnode/vnode/src/tq/tqStreamTask.c | 8 ++++---- source/libs/stream/inc/streamInt.h | 2 -- source/libs/stream/src/streamMeta.c | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 4dc1b9315e..a33a259ef7 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -433,7 +433,7 @@ typedef struct STaskStartInfo { int64_t startTs; int64_t readyTs; int32_t tasksWillRestart; - int32_t taskRestarting; // restart flag, sentinel to guard the restart procedure. + int32_t taskStarting; // restart flag, sentinel to guard the restart procedure. SHashObj* pReadyTaskSet; // tasks that are all ready for running stream processing int32_t elapsedTime; } STaskStartInfo; @@ -817,6 +817,7 @@ void streamMetaRLock(SStreamMeta* pMeta); void streamMetaRUnLock(SStreamMeta* pMeta); void streamMetaWLock(SStreamMeta* pMeta); void streamMetaWUnLock(SStreamMeta* pMeta); +void streamMetaResetStartInfo(STaskStartInfo* pMeta); // checkpoint int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index 6e0e63b67d..d7ab76bd82 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -122,7 +122,7 @@ int32_t tqRestartStreamTasks(STQ* pTq) { int64_t st = taosGetTimestampMs(); while(1) { - int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskRestarting, 0, 1); + int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskStarting, 0, 1); if (startVal == 0) { break; } @@ -152,7 +152,7 @@ int32_t tqRestartStreamTasks(STQ* pTq) { int64_t el = taosGetTimestampMs() - st; - tqInfo("vgId:%d close&reload state elapsed time:%.3fms", vgId, el/1000.); + tqInfo("vgId:%d close&reload state elapsed time:%.3fs", vgId, el/1000.); code = streamMetaLoadAllTasks(pTq->pStreamMeta); if (code != TSDB_CODE_SUCCESS) { @@ -165,12 +165,12 @@ int32_t tqRestartStreamTasks(STQ* pTq) { if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) { tqInfo("vgId:%d restart all stream tasks after all tasks being updated", vgId); tqResetStreamTaskStatus(pTq); - streamMetaWUnLock(pMeta); + streamMetaWUnLock(pMeta); tqStartStreamTasks(pTq); } else { + streamMetaResetStartInfo(&pMeta->startInfo); streamMetaWUnLock(pMeta); - tqInfo("vgId:%d, follower node not start stream tasks", vgId); } diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index c63b51d745..09d26119f1 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -132,8 +132,6 @@ STaskId streamTaskExtractKey(const SStreamTask* pTask); void streamTaskInitForLaunchHTask(SHistoryTaskInfo* pInfo); void streamTaskSetRetryInfoForLaunch(SHistoryTaskInfo* pInfo); -void streamMetaResetStartInfo(STaskStartInfo* pMeta); - SStreamQueue* streamQueueOpen(int64_t cap); void streamQueueClose(SStreamQueue* pQueue, int32_t taskId); void streamQueueProcessSuccess(SStreamQueue* queue); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index ee02ccf8f1..3ff7fbcf4f 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -1094,7 +1094,7 @@ void streamMetaResetStartInfo(STaskStartInfo* pStartInfo) { pStartInfo->tasksWillRestart = 0; pStartInfo->readyTs = 0; // reset the sentinel flag value to be 0 - atomic_store_32(&pStartInfo->taskRestarting, 0); + atomic_store_32(&pStartInfo->taskStarting, 0); } void streamMetaRLock(SStreamMeta* pMeta) { From 6b1889284bbcdae357bfd82737fce27f18a7b362 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 6 Nov 2023 15:45:44 +0800 Subject: [PATCH 07/20] fix(stream): record the start failure tasks. --- include/libs/stream/tstream.h | 5 +- source/dnode/vnode/src/tq/tqStreamTask.c | 3 +- source/libs/stream/src/streamMeta.c | 10 ++++ source/libs/stream/src/streamStart.c | 73 +++++++++++++++++------- 4 files changed, 68 insertions(+), 23 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index a33a259ef7..8eda5de7cb 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -434,7 +434,8 @@ typedef struct STaskStartInfo { int64_t readyTs; int32_t tasksWillRestart; int32_t taskStarting; // restart flag, sentinel to guard the restart procedure. - SHashObj* pReadyTaskSet; // tasks that are all ready for running stream processing + SHashObj* pReadyTaskSet; // tasks that are all ready for running stream processing + SHashObj* pFailedTaskSet; // tasks that are done the check downstream process, may be successful or failed int32_t elapsedTime; } STaskStartInfo; @@ -812,7 +813,7 @@ void streamMetaNotifyClose(SStreamMeta* pMeta); void streamMetaStartHb(SStreamMeta* pMeta); void streamMetaInitForSnode(SStreamMeta* pMeta); bool streamMetaTaskInTimer(SStreamMeta* pMeta); -int32_t streamMetaUpdateTaskReadyInfo(SStreamTask* pTask); +int32_t streamMetaUpdateTaskDownstreamStatus(SStreamTask* pTask, int64_t startTs, int64_t endTs, bool succ); void streamMetaRLock(SStreamMeta* pMeta); void streamMetaRUnLock(SStreamMeta* pMeta); void streamMetaWLock(SStreamMeta* pMeta); diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index d7ab76bd82..55771ca10b 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -73,6 +73,7 @@ int32_t tqStartStreamTasks(STQ* pTq) { streamMetaWLock(pMeta); pTaskList = taosArrayDup(pMeta->pTaskList, NULL); taosHashClear(pMeta->startInfo.pReadyTaskSet); + taosHashClear(pMeta->startInfo.pFailedTaskSet); pMeta->startInfo.startTs = taosGetTimestampMs(); streamMetaWUnLock(pMeta); @@ -97,7 +98,7 @@ int32_t tqStartStreamTasks(STQ* pTq) { streamLaunchFillHistoryTask(pTask); } - streamMetaUpdateTaskReadyInfo(pTask); + streamMetaUpdateTaskDownstreamStatus(pTask, pTask->execInfo.init, pTask->execInfo.start, true); streamMetaReleaseTask(pMeta, pTask); continue; } diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 3ff7fbcf4f..17cd9fac57 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -150,6 +150,12 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF pMeta->startInfo.pReadyTaskSet = taosHashInit(64, fp, false, HASH_NO_LOCK); if (pMeta->startInfo.pReadyTaskSet == NULL) { + goto _err; + } + + pMeta->startInfo.pFailedTaskSet = taosHashInit(4, fp, false, HASH_NO_LOCK); + if (pMeta->startInfo.pFailedTaskSet == NULL) { + goto _err; } pMeta->pHbInfo = taosMemoryCalloc(1, sizeof(SMetaHbInfo)); @@ -221,6 +227,7 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF if (pMeta->pHbInfo) taosMemoryFreeClear(pMeta->pHbInfo); if (pMeta->updateInfo.pTasks) taosHashCleanup(pMeta->updateInfo.pTasks); if (pMeta->startInfo.pReadyTaskSet) taosHashCleanup(pMeta->startInfo.pReadyTaskSet); + if (pMeta->startInfo.pFailedTaskSet) taosHashCleanup(pMeta->startInfo.pFailedTaskSet); taosMemoryFree(pMeta); stError("failed to open stream meta"); @@ -300,6 +307,7 @@ void streamMetaClear(SStreamMeta* pMeta) { // the willrestart/starting flag can NOT be cleared taosHashClear(pMeta->startInfo.pReadyTaskSet); + taosHashClear(pMeta->startInfo.pFailedTaskSet); pMeta->startInfo.readyTs = 0; } @@ -340,6 +348,7 @@ void streamMetaCloseImpl(void* arg) { taosHashCleanup(pMeta->pTaskBackendUnique); taosHashCleanup(pMeta->updateInfo.pTasks); taosHashCleanup(pMeta->startInfo.pReadyTaskSet); + taosHashCleanup(pMeta->startInfo.pFailedTaskSet); taosMemoryFree(pMeta->pHbInfo); taosMemoryFree(pMeta->path); @@ -1091,6 +1100,7 @@ void streamMetaInitForSnode(SStreamMeta* pMeta) { void streamMetaResetStartInfo(STaskStartInfo* pStartInfo) { taosHashClear(pStartInfo->pReadyTaskSet); + taosHashClear(pStartInfo->pFailedTaskSet); pStartInfo->tasksWillRestart = 0; pStartInfo->readyTs = 0; // reset the sentinel flag value to be 0 diff --git a/source/libs/stream/src/streamStart.c b/source/libs/stream/src/streamStart.c index 4f757f09fb..fdad5124ed 100644 --- a/source/libs/stream/src/streamStart.c +++ b/source/libs/stream/src/streamStart.c @@ -57,7 +57,7 @@ int32_t streamTaskSetReady(SStreamTask* pTask) { stDebug("s-task:%s all %d downstream ready, init completed, elapsed time:%" PRId64 "ms, task status:%s", pTask->id.idStr, numOfDowns, el, p); - streamMetaUpdateTaskReadyInfo(pTask); + streamMetaUpdateTaskDownstreamStatus(pTask, pTask->execInfo.init, pTask->execInfo.start, true); return TSDB_CODE_SUCCESS; } @@ -392,20 +392,22 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs doProcessDownstreamReadyRsp(pTask); } } else { // not ready, wait for 100ms and retry - if (pRsp->status == TASK_UPSTREAM_NEW_STAGE) { - stError( - "s-task:%s vgId:%d self vnode-transfer/leader-change/restart detected, old stage:%d, current stage:%d, " - "not check wait for downstream task nodeUpdate, and all tasks restart", - id, pRsp->upstreamNodeId, pRsp->oldStage, (int32_t)pTask->pMeta->stage); + if (pRsp->status == TASK_UPSTREAM_NEW_STAGE || pRsp->status == TASK_DOWNSTREAM_NOT_LEADER) { + if (pRsp->status == TASK_UPSTREAM_NEW_STAGE) { + stError( + "s-task:%s vgId:%d self vnode-transfer/leader-change/restart detected, old stage:%d, current stage:%d, " + "not check wait for downstream task nodeUpdate, and all tasks restart", + id, pRsp->upstreamNodeId, pRsp->oldStage, (int32_t)pTask->pMeta->stage); + } else { + stError( + "s-task:%s downstream taskId:0x%x (vgId:%d) not leader, self dispatch epset needs to be updated, not check " + "downstream again, nodeUpdate needed", + id, pRsp->downstreamTaskId, pRsp->downstreamNodeId); + } addIntoNodeUpdateList(pTask, pRsp->downstreamNodeId); - } else if (pRsp->status == TASK_DOWNSTREAM_NOT_LEADER) { - stError( - "s-task:%s downstream taskId:0x%x (vgId:%d) not leader, self dispatch epset needs to be updated, not check " - "downstream again, nodeUpdate needed", - id, pRsp->downstreamTaskId, pRsp->downstreamNodeId); + streamMetaUpdateTaskDownstreamStatus(pTask, pTask->execInfo.init, taosGetTimestampMs(), false); - addIntoNodeUpdateList(pTask, pRsp->downstreamNodeId); } else { // TASK_DOWNSTREAM_NOT_READY, let's retry in 100ms STaskRecheckInfo* pInfo = createRecheckInfo(pTask, pRsp); @@ -981,28 +983,59 @@ void streamTaskEnablePause(SStreamTask* pTask) { pTask->status.pauseAllowed = 1; } -int32_t streamMetaUpdateTaskReadyInfo(SStreamTask* pTask) { +typedef struct STaskInitTs { + int64_t start; + int64_t end; + bool success; +} STaskInitTs; + +static void displayStatusInfo(SStreamMeta* pMeta, SHashObj* pTaskSet, bool succ) { + int32_t vgId = pMeta->vgId; + void* pIter = NULL; + size_t keyLen = 0; + + stInfo("vgId:%d %d tasks check-downstream completed %s", vgId, taosHashGetSize(pTaskSet), + succ ? "success" : "failed"); + + while ((pIter = taosHashIterate(pTaskSet, pIter)) != NULL) { + STaskInitTs* pInfo = pIter; + void* key = taosHashGetKey(pIter, &keyLen); + + SStreamTask** pTask1 = taosHashGet(pMeta->pTasksMap, key, sizeof(STaskId)); + stInfo("s-task:%s level:%d vgId:%d, init:%" PRId64 ", initEnd:%" PRId64 ", %s", (*pTask1)->id.idStr, + (*pTask1)->info.taskLevel, vgId, pInfo->start, pInfo->end, pInfo->success ? "success" : "failed"); + } +} + +int32_t streamMetaUpdateTaskDownstreamStatus(SStreamTask* pTask, int64_t startTs, int64_t endTs, bool ready) { SStreamMeta* pMeta = pTask->pMeta; streamMetaWLock(pMeta); STaskId id = streamTaskExtractKey(pTask); - taosHashPut(pMeta->startInfo.pReadyTaskSet, &id, sizeof(id), NULL, 0); + STaskStartInfo* pStartInfo = &pMeta->startInfo; + + SHashObj* pDst = ready? pStartInfo->pReadyTaskSet:pStartInfo->pFailedTaskSet; + + STaskInitTs initTs = {.start = startTs, .end = endTs, .success = ready}; + taosHashPut(pDst, &id, sizeof(id), &initTs, sizeof(STaskInitTs)); int32_t numOfTotal = streamMetaGetNumOfTasks(pMeta); - if (taosHashGetSize(pMeta->startInfo.pReadyTaskSet) == numOfTotal) { - STaskStartInfo* pStartInfo = &pMeta->startInfo; - + if (taosHashGetSize(pStartInfo->pReadyTaskSet) + taosHashGetSize(pStartInfo->pFailedTaskSet) == numOfTotal) { pStartInfo->readyTs = pTask->execInfo.start; pStartInfo->elapsedTime = (pStartInfo->startTs != 0) ? pStartInfo->readyTs - pStartInfo->startTs : 0; - streamMetaResetStartInfo(pStartInfo); - - stDebug("vgId:%d all %d task(s) are started successfully, last ready task:%s level:%d, startTs:%" PRId64 + stDebug("vgId:%d all %d task(s) check downstream completed, last completed task:%s level:%d, startTs:%" PRId64 ", readyTs:%" PRId64 " total elapsed time:%.2fs", pMeta->vgId, numOfTotal, pTask->id.idStr, pTask->info.taskLevel, pStartInfo->startTs, pStartInfo->readyTs, pStartInfo->elapsedTime / 1000.0); + + // print the initialization elapsed time and info + displayStatusInfo(pMeta, pStartInfo->pReadyTaskSet, true); + displayStatusInfo(pMeta, pStartInfo->pFailedTaskSet, false); + + streamMetaResetStartInfo(pStartInfo); } streamMetaWUnLock(pMeta); From d9474016fc766c0c14bdc3fc21f0ab298f582cee Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 6 Nov 2023 18:55:53 +0800 Subject: [PATCH 08/20] fix(stream): exec scan-history in timer. --- include/libs/stream/tstream.h | 43 ++++++++++++------- source/dnode/vnode/src/tq/tq.c | 13 +++--- source/dnode/vnode/src/tq/tqSink.c | 2 +- source/dnode/vnode/src/tq/tqStreamTask.c | 7 ++-- source/libs/stream/inc/streamInt.h | 3 ++ source/libs/stream/src/streamDispatch.c | 1 - source/libs/stream/src/streamExec.c | 16 ++++--- source/libs/stream/src/streamStart.c | 53 ++++++++++++++++++++++-- 8 files changed, 102 insertions(+), 36 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 8eda5de7cb..a063f39d92 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -241,6 +241,24 @@ typedef struct { SEpSet epset; } SDownstreamTaskEpset; +typedef enum { + TASK_SCANHISTORY_CONT = 0x1, + TASK_SCANHISTORY_QUIT = 0x2, + TASK_SCANHISTORY_REXEC = 0x3, +} EScanHistoryRet; + +typedef struct { + EScanHistoryRet ret; + int32_t idleTime; +} SScanhistoryDataInfo; + +typedef struct { + int32_t idleDuration; // idle time before use time slice the continue execute scan-history + int32_t numOfTicks; + tmr_h pTimer; + int32_t execCount; +} SScanhistorySchedInfo; + typedef struct { int64_t stbUid; char stbFullName[TSDB_TABLE_FNAME_LEN]; @@ -378,9 +396,10 @@ typedef struct STaskOutputInfo { union { STaskDispatcherFixed fixedDispatcher; STaskDispatcherShuffle shuffleDispatcher; - STaskSinkTb tbSink; - STaskSinkSma smaSink; - STaskSinkFetch fetchSink; + + STaskSinkTb tbSink; + STaskSinkSma smaSink; + STaskSinkFetch fetchSink; }; int8_t type; STokenBucket* pTokenBucket; @@ -414,7 +433,10 @@ struct SStreamTask { SStreamState* pState; // state backend SArray* pRspMsgList; SUpstreamInfo upstreamInfo; + // the followings attributes don't be serialized + SScanhistorySchedInfo schedHistoryInfo; + int32_t notReadyTasks; int32_t numOfWaitingUpstream; int64_t checkReqId; @@ -734,8 +756,6 @@ void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen) // recover and fill history void streamTaskCheckDownstream(SStreamTask* pTask); -int32_t onNormalTaskReady(SStreamTask* pTask); -int32_t onScanhistoryTaskReady(SStreamTask* pTask); int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage); int32_t streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList); @@ -757,7 +777,9 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs int32_t streamLaunchFillHistoryTask(SStreamTask* pTask); int32_t streamTaskScanHistoryDataComplete(SStreamTask* pTask); int32_t streamStartScanHistoryAsync(SStreamTask* pTask, int8_t igUntreated); +int32_t streamReExecScanHistoryFuture(SStreamTask* pTask, int32_t idleDuration); bool streamHistoryTaskSetVerRangeStep2(SStreamTask* pTask, int64_t latestVer); + int32_t streamQueueGetNumOfItems(const SStreamQueue* pQueue); // common @@ -777,21 +799,14 @@ void streamTaskOpenAllUpstreamInput(SStreamTask* pTask); void streamTaskStatusInit(STaskStatusEntry* pEntry, const SStreamTask* pTask); void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc); -typedef enum { - TASK_SCANHISTORY_CONT = 0x1, - TASK_SCANHISTORY_QUIT = 0x2, - TASK_SCANHISTORY_REXEC = 0x3, -} EScanHistoryRet; - // source level int32_t streamSetParamForStreamScannerStep1(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow); int32_t streamSetParamForStreamScannerStep2(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow); -EScanHistoryRet streamScanHistoryData(SStreamTask* pTask); +SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask); int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask); // agg level -int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, - SRpcHandleInfo* pRpcInfo); +int32_t streamProcessScanHistoryFinishReq(SStreamTask* pTask, SStreamScanHistoryFinishReq* pReq, SRpcHandleInfo* pInfo); int32_t streamProcessScanHistoryFinishRsp(SStreamTask* pTask); // stream task meta diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 05fa029f32..81defbe6d0 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1102,6 +1102,10 @@ static void doStartFillhistoryStep2(SStreamTask* pTask, SStreamTask* pStreamTask } } +static void ddxx() { + +} + // this function should be executed by only one thread, so we set an sentinel to protect this function int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { SStreamScanHistoryReq* pReq = (SStreamScanHistoryReq*)pMsg->pCont; @@ -1165,18 +1169,17 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { return 0; } - EScanHistoryRet ret = streamScanHistoryData(pTask); + SScanhistoryDataInfo retInfo = streamScanHistoryData(pTask); // todo update the step1 exec elapsed time double el = (taosGetTimestampMs() - pTask->execInfo.step1Start) / 1000.0; - if (ret == TASK_SCANHISTORY_QUIT || ret == TASK_SCANHISTORY_REXEC) { + if (retInfo.ret == TASK_SCANHISTORY_QUIT || retInfo.ret == TASK_SCANHISTORY_REXEC) { int8_t status = streamTaskSetSchedStatusInactive(pTask); atomic_store_32(&pTask->status.inScanHistorySentinel, 0); - if (ret == TASK_SCANHISTORY_REXEC) { - // todo wait for 100ms and retry - streamStartScanHistoryAsync(pTask, 0); + if (retInfo.ret == TASK_SCANHISTORY_REXEC) { + streamReExecScanHistoryFuture(pTask, retInfo.idleTime); } else { char* p = NULL; ETaskStatus s = streamTaskGetStatus(pTask, &p); diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 742b170a8c..3cd05841cd 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -25,7 +25,7 @@ typedef struct STableSinkInfo { tstr name; } STableSinkInfo; -static bool hasOnlySubmitData(const SArray* pBlocks, int32_t numOfBlocks); +static bool hasOnlySubmitData(const SArray* pBlocks, int32_t numOfBlocks); static int32_t tsAscendingSortFn(const void* p1, const void* p2); static int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDataBlock, char* stbFullName, SSubmitTbData* pTableData); diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index 55771ca10b..44eb0351d7 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -22,6 +22,8 @@ static int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, bool* pScanIdle); static int32_t setWalReaderStartOffset(SStreamTask* pTask, int32_t vgId); static bool handleFillhistoryScanComplete(SStreamTask* pTask, int64_t ver); +static bool taskReadyForDataFromWal(SStreamTask* pTask); +static bool doPutDataIntoInputQFromWal(SStreamTask* pTask, int64_t maxVer, int32_t* numOfItems); // extract data blocks(submit/delete) from WAL, and add them into the input queue for all the sources tasks. int32_t tqScanWal(STQ* pTq) { @@ -384,14 +386,13 @@ bool handleFillhistoryScanComplete(SStreamTask* pTask, int64_t ver) { return false; } -static bool taskReadyForDataFromWal(SStreamTask* pTask) { +bool taskReadyForDataFromWal(SStreamTask* pTask) { // non-source or fill-history tasks don't need to response the WAL scan action. if ((pTask->info.taskLevel != TASK_LEVEL__SOURCE) || (pTask->status.downstreamReady == 0)) { return false; } // not in ready state, do not handle the data from wal -// int32_t status = pTask->status.taskStatus; char* p = NULL; int32_t status = streamTaskGetStatus(pTask, &p); if (streamTaskGetStatus(pTask, &p) != TASK_STATUS__READY) { @@ -423,7 +424,7 @@ static bool taskReadyForDataFromWal(SStreamTask* pTask) { return true; } -static bool doPutDataIntoInputQFromWal(SStreamTask* pTask, int64_t maxVer, int32_t* numOfItems) { +bool doPutDataIntoInputQFromWal(SStreamTask* pTask, int64_t maxVer, int32_t* numOfItems) { const char* id = pTask->id.idStr; int32_t numOfNewItems = 0; diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index 09d26119f1..aa9b26381e 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -140,6 +140,9 @@ void* streamQueueNextItem(SStreamQueue* pQueue); void streamFreeQitem(SStreamQueueItem* data); int32_t streamQueueGetItemSize(const SStreamQueue* pQueue); +int32_t onNormalTaskReady(SStreamTask* pTask); +int32_t onScanhistoryTaskReady(SStreamTask* pTask); + #ifdef __cplusplus } #endif diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index edfc66762d..5665e7a917 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -1007,7 +1007,6 @@ int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, info.msg.info = *pRpcInfo; taosThreadMutexLock(&pTask->lock); - stDebug("s-task:%s lock", pTask->id.idStr); if (pTask->pRspMsgList == NULL) { pTask->pRspMsgList = taosArrayInit(4, sizeof(SStreamContinueExecInfo)); diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index fa042b5687..d81a5f0c2f 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -187,7 +187,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i return code; } -EScanHistoryRet streamScanHistoryData(SStreamTask* pTask) { +SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask) { ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); int32_t code = TSDB_CODE_SUCCESS; @@ -201,7 +201,7 @@ EScanHistoryRet streamScanHistoryData(SStreamTask* pTask) { if (streamTaskShouldPause(pTask)) { double el = (taosGetTimestampMs() - pTask->execInfo.step1Start) / 1000.0; stDebug("s-task:%s paused from the scan-history task, elapsed time:%.2fsec", pTask->id.idStr, el); - return TASK_SCANHISTORY_QUIT; // quit from step1, not continue to handle the step2 + return (SScanhistoryDataInfo){TASK_SCANHISTORY_QUIT, 0}; // quit from step1, not continue to handle the step2 } SArray* pRes = taosArrayInit(0, sizeof(SSDataBlock)); @@ -216,13 +216,13 @@ EScanHistoryRet streamScanHistoryData(SStreamTask* pTask) { while (1) { if (streamTaskShouldStop(pTask)) { taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - return TASK_SCANHISTORY_QUIT; + return (SScanhistoryDataInfo){TASK_SCANHISTORY_QUIT, 0}; } if (pTask->inputq.status == TASK_INPUT_STATUS__BLOCKED) { stDebug("s-task:%s level:%d inputQ is blocked, retry later", pTask->id.idStr, pTask->info.taskLevel); taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - return TASK_SCANHISTORY_REXEC; + return (SScanhistoryDataInfo){TASK_SCANHISTORY_REXEC, 5000}; } SSDataBlock* output = NULL; @@ -261,7 +261,7 @@ EScanHistoryRet streamScanHistoryData(SStreamTask* pTask) { if (code != TSDB_CODE_SUCCESS) { terrno = code; stDebug("s-task:%s dump fill-history results failed, code:%s, retry in 100ms", pTask->id.idStr, tstrerror(code)); - return TASK_SCANHISTORY_REXEC; + return (SScanhistoryDataInfo){TASK_SCANHISTORY_REXEC, 100}; } } else { taosArrayDestroy(pRes); @@ -271,13 +271,11 @@ EScanHistoryRet streamScanHistoryData(SStreamTask* pTask) { if (el >= STREAM_SCAN_HISTORY_TIMESLICE) { stDebug("s-task:%s fill-history:%d level:%d timeslice for scan-history exhausted", pTask->id.idStr, pTask->info.fillHistory, pTask->info.taskLevel); - - // todo exec scanhistory in 100ms - return TASK_SCANHISTORY_REXEC; + return (SScanhistoryDataInfo){TASK_SCANHISTORY_REXEC, 100}; } } - return TASK_SCANHISTORY_CONT; + return (SScanhistoryDataInfo){TASK_SCANHISTORY_CONT, 0};; } // wait for the stream task to be idle diff --git a/source/libs/stream/src/streamStart.c b/source/libs/stream/src/streamStart.c index fdad5124ed..65a14aff6b 100644 --- a/source/libs/stream/src/streamStart.c +++ b/source/libs/stream/src/streamStart.c @@ -19,6 +19,10 @@ #include "wal.h" #include "streamsm.h" +#define SCANHISTORY_IDLE_TIME_SLICE 100 // 100ms +#define SCANHISTORY_MAX_IDLE_TIME 10 // 10 sec +#define SCANHISTORY_IDLE_TICK ((SCANHISTORY_MAX_IDLE_TIME * 1000) / SCANHISTORY_IDLE_TIME_SLICE) + typedef struct SLaunchHTaskInfo { SStreamMeta* pMeta; STaskId id; @@ -81,6 +85,50 @@ int32_t streamStartScanHistoryAsync(SStreamTask* pTask, int8_t igUntreated) { return 0; } +static void doReExecScanhistory(void* param, void* tmrId) { + SStreamTask* pTask = param; + pTask->schedHistoryInfo.numOfTicks -= 1; + + char* p = NULL; + ETaskStatus status = streamTaskGetStatus(pTask, &p); + if (status == TASK_STATUS__DROPPING || status == TASK_STATUS__STOP) { + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s status:%s not start scan-history again, ref:%d", pTask->id.idStr, p, ref); + } + + if (pTask->schedHistoryInfo.numOfTicks <= 0) { + streamStartScanHistoryAsync(pTask, 0); + + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s fill-history:%d start scan-history data, out of tmr, ref:%d", pTask->id.idStr, + pTask->info.fillHistory, ref); + } else { + taosTmrReset(doReExecScanhistory, 100, pTask, NULL, pTask->schedHistoryInfo.pTimer); + } +} + +int32_t streamReExecScanHistoryFuture(SStreamTask* pTask, int32_t idleDuration) { + int32_t numOfTicks = idleDuration / SCANHISTORY_IDLE_TIME_SLICE; + if (numOfTicks <= 0) { + numOfTicks = 1; + } else if (numOfTicks > SCANHISTORY_IDLE_TICK) { + numOfTicks = SCANHISTORY_IDLE_TICK; + } + + pTask->schedHistoryInfo.numOfTicks = numOfTicks; + + int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); + stDebug("s-task:%s scan-history start in %.2fs, ref:%d", pTask->id.idStr, numOfTicks*0.1, ref); + + if (pTask->schedHistoryInfo.pTimer == NULL) { + pTask->schedHistoryInfo.pTimer = taosTmrStart(doReExecScanhistory, SCANHISTORY_IDLE_TIME_SLICE, pTask, streamEnv.timer); + } else { + taosTmrReset(doReExecScanhistory, SCANHISTORY_IDLE_TIME_SLICE, pTask, streamEnv.timer, pTask->schedHistoryInfo.pTimer); + } + + return TSDB_CODE_SUCCESS; +} + static int32_t doStartScanHistoryTask(SStreamTask* pTask) { SVersionRange* pRange = &pTask->dataRange.range; if (pTask->info.fillHistory) { @@ -684,9 +732,8 @@ static void tryLaunchHistoryTask(void* param, void* tmrId) { int32_t hTaskId = pHTaskInfo->id.taskId; streamTaskGetStatus(pTask, &p); - stDebug( - "s-task:%s status:%s failed to launch fill-history task:0x%x, retry launch:%dms, retryCount:%d", - pTask->id.idStr, p, hTaskId, pHTaskInfo->waitInterval, pHTaskInfo->retryTimes); + stDebug("s-task:%s status:%s failed to launch fill-history task:0x%x, retry launch:%dms, retryCount:%d", + pTask->id.idStr, p, hTaskId, pHTaskInfo->waitInterval, pHTaskInfo->retryTimes); taosTmrReset(tryLaunchHistoryTask, LAUNCH_HTASK_INTERVAL, pInfo, streamEnv.timer, &pHTaskInfo->pTimer); streamMetaReleaseTask(pMeta, pTask); From 1f09d3a46b539a8a0362d39584cf9123ffc24749 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 6 Nov 2023 19:46:47 +0800 Subject: [PATCH 09/20] fix(stream): fix error. --- source/libs/stream/src/streamStart.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/libs/stream/src/streamStart.c b/source/libs/stream/src/streamStart.c index 65a14aff6b..e782ec84ba 100644 --- a/source/libs/stream/src/streamStart.c +++ b/source/libs/stream/src/streamStart.c @@ -103,7 +103,8 @@ static void doReExecScanhistory(void* param, void* tmrId) { stDebug("s-task:%s fill-history:%d start scan-history data, out of tmr, ref:%d", pTask->id.idStr, pTask->info.fillHistory, ref); } else { - taosTmrReset(doReExecScanhistory, 100, pTask, NULL, pTask->schedHistoryInfo.pTimer); + taosTmrReset(doReExecScanhistory, SCANHISTORY_IDLE_TIME_SLICE, pTask, streamEnv.timer, + &pTask->schedHistoryInfo.pTimer); } } @@ -123,7 +124,7 @@ int32_t streamReExecScanHistoryFuture(SStreamTask* pTask, int32_t idleDuration) if (pTask->schedHistoryInfo.pTimer == NULL) { pTask->schedHistoryInfo.pTimer = taosTmrStart(doReExecScanhistory, SCANHISTORY_IDLE_TIME_SLICE, pTask, streamEnv.timer); } else { - taosTmrReset(doReExecScanhistory, SCANHISTORY_IDLE_TIME_SLICE, pTask, streamEnv.timer, pTask->schedHistoryInfo.pTimer); + taosTmrReset(doReExecScanhistory, SCANHISTORY_IDLE_TIME_SLICE, pTask, streamEnv.timer, &pTask->schedHistoryInfo.pTimer); } return TSDB_CODE_SUCCESS; From 312dbc1caa4ab768e752a8466ef66cc803e87c82 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 6 Nov 2023 19:49:25 +0800 Subject: [PATCH 10/20] refactor: do some internal refactor. --- source/libs/stream/src/streamStart.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source/libs/stream/src/streamStart.c b/source/libs/stream/src/streamStart.c index e782ec84ba..9044e65eb6 100644 --- a/source/libs/stream/src/streamStart.c +++ b/source/libs/stream/src/streamStart.c @@ -34,6 +34,12 @@ typedef struct STaskRecheckInfo { void* checkTimer; } STaskRecheckInfo; +typedef struct STaskInitTs { + int64_t start; + int64_t end; + bool success; +} STaskInitTs; + static int32_t streamSetParamForScanHistory(SStreamTask* pTask); static void streamTaskSetRangeStreamCalc(SStreamTask* pTask); static int32_t initScanHistoryReq(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated); @@ -1031,12 +1037,6 @@ void streamTaskEnablePause(SStreamTask* pTask) { pTask->status.pauseAllowed = 1; } -typedef struct STaskInitTs { - int64_t start; - int64_t end; - bool success; -} STaskInitTs; - static void displayStatusInfo(SStreamMeta* pMeta, SHashObj* pTaskSet, bool succ) { int32_t vgId = pMeta->vgId; void* pIter = NULL; From 4d8548e9387fa7c7dc7290012c867036dc216ba9 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 6 Nov 2023 23:42:16 +0800 Subject: [PATCH 11/20] refactor: do some internal refactor. --- include/libs/stream/tstream.h | 2 ++ source/dnode/vnode/src/tq/tq.c | 11 +++++------ source/libs/stream/src/streamDispatch.c | 8 ++++---- source/libs/stream/src/streamExec.c | 19 +++++++++++++++---- source/libs/stream/src/streamStart.c | 2 +- 5 files changed, 27 insertions(+), 15 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index a063f39d92..4842b4b8e2 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -372,7 +372,9 @@ typedef struct STaskExecStatisInfo { int64_t init; int64_t start; int64_t step1Start; + double step1El; int64_t step2Start; + double step2El; int32_t updateCount; int64_t latestUpdateTs; int32_t processDataBlocks; diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 81defbe6d0..118355383f 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1102,10 +1102,6 @@ static void doStartFillhistoryStep2(SStreamTask* pTask, SStreamTask* pStreamTask } } -static void ddxx() { - -} - // this function should be executed by only one thread, so we set an sentinel to protect this function int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { SStreamScanHistoryReq* pReq = (SStreamScanHistoryReq*)pMsg->pCont; @@ -1149,9 +1145,12 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { } } else { if (pTask->execInfo.step2Start == 0) { - tqDebug("s-task:%s resume from paused, original step1 startTs:%" PRId64, id, pTask->execInfo.step1Start); + tqDebug("s-task:%s continue exec scan-history(step1), original step1 startTs:%" PRId64", already elapsed:%.2fs", id, + pTask->execInfo.step1Start, pTask->execInfo.step1El); } else { - tqDebug("s-task:%s already in step2, no need to scan-history data, step2 starTs:%"PRId64, id, pTask->execInfo.step2Start); + tqDebug("s-task:%s already in step2, no need to scan-history data, step2 startTs:%" PRId64, id, + pTask->execInfo.step2Start); + atomic_store_32(&pTask->status.inScanHistorySentinel, 0); streamMetaReleaseTask(pMeta, pTask); return 0; diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 5665e7a917..6bb15dfd23 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -1106,8 +1106,8 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i taosArrayPush(pTask->msgInfo.pRetryList, &pRsp->downstreamNodeId); taosThreadMutexUnlock(&pTask->lock); - stError("s-task:%s inputQ of downstream task:0x%x(vgId:%d) is full, wait for %dms and retry dispatch data", id, - pRsp->downstreamTaskId, pRsp->downstreamNodeId, DISPATCH_RETRY_INTERVAL_MS); + stWarn("s-task:%s inputQ of downstream task:0x%x(vgId:%d) is full, wait for %dms and retry dispatch", id, + pRsp->downstreamTaskId, pRsp->downstreamNodeId, DISPATCH_RETRY_INTERVAL_MS); } else if (pRsp->inputStatus == TASK_INPUT_STATUS__REFUSED) { stError("s-task:%s downstream task:0x%x(vgId:%d) refused the dispatch msg, treat it as success", id, pRsp->downstreamTaskId, pRsp->downstreamNodeId); @@ -1147,8 +1147,8 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i } int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); - stDebug("s-task:%s failed to dispatch msg to downstream code:%s, add timer to retry in %dms, ref:%d", - pTask->id.idStr, tstrerror(terrno), DISPATCH_RETRY_INTERVAL_MS, ref); + stDebug("s-task:%s failed to dispatch msg to downstream, add into timer to retry in %dms, ref:%d", + pTask->id.idStr, DISPATCH_RETRY_INTERVAL_MS, ref); streamRetryDispatchData(pTask, DISPATCH_RETRY_INTERVAL_MS); } else { // this message has been sent successfully, let's try next one. diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index d81a5f0c2f..a3e981658a 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -220,7 +220,12 @@ SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask) { } if (pTask->inputq.status == TASK_INPUT_STATUS__BLOCKED) { - stDebug("s-task:%s level:%d inputQ is blocked, retry later", pTask->id.idStr, pTask->info.taskLevel); + int64_t el = taosGetTimestampMs() - st; + pTask->execInfo.step1El += el/1000.0; + + stDebug("s-task:%s level:%d inputQ is blocked, resume in 5sec, elapsed time:%.2fs", pTask->id.idStr, + pTask->info.taskLevel, pTask->execInfo.step1El); + taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); return (SScanhistoryDataInfo){TASK_SCANHISTORY_REXEC, 5000}; } @@ -268,14 +273,20 @@ SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask) { } int64_t el = taosGetTimestampMs() - st; + pTask->execInfo.step1El += el/1000.0; + if (el >= STREAM_SCAN_HISTORY_TIMESLICE) { - stDebug("s-task:%s fill-history:%d level:%d timeslice for scan-history exhausted", pTask->id.idStr, - pTask->info.fillHistory, pTask->info.taskLevel); + stDebug("s-task:%s fill-history:%d time slice for scan-history exhausted, elapse time:%.2fs, retry in 100ms", + pTask->id.idStr, pTask->info.fillHistory, el / 1000.0); return (SScanhistoryDataInfo){TASK_SCANHISTORY_REXEC, 100}; } } - return (SScanhistoryDataInfo){TASK_SCANHISTORY_CONT, 0};; + // todo refactor + int64_t el = taosGetTimestampMs() - st; + pTask->execInfo.step1El += el/1000.0; + + return (SScanhistoryDataInfo){TASK_SCANHISTORY_CONT, 0}; } // wait for the stream task to be idle diff --git a/source/libs/stream/src/streamStart.c b/source/libs/stream/src/streamStart.c index 9044e65eb6..0424dcd715 100644 --- a/source/libs/stream/src/streamStart.c +++ b/source/libs/stream/src/streamStart.c @@ -125,7 +125,7 @@ int32_t streamReExecScanHistoryFuture(SStreamTask* pTask, int32_t idleDuration) pTask->schedHistoryInfo.numOfTicks = numOfTicks; int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); - stDebug("s-task:%s scan-history start in %.2fs, ref:%d", pTask->id.idStr, numOfTicks*0.1, ref); + stDebug("s-task:%s scan-history resumed in %.2fs, ref:%d", pTask->id.idStr, numOfTicks*0.1, ref); if (pTask->schedHistoryInfo.pTimer == NULL) { pTask->schedHistoryInfo.pTimer = taosTmrStart(doReExecScanhistory, SCANHISTORY_IDLE_TIME_SLICE, pTask, streamEnv.timer); From 1312b9ea36dd235e212f818b7a3a35ca19478737 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 6 Nov 2023 23:44:01 +0800 Subject: [PATCH 12/20] fix(stream): fix error. --- source/libs/stream/src/streamExec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index a3e981658a..f44bcfdfa7 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -19,7 +19,7 @@ #define MAX_STREAM_EXEC_BATCH_NUM 32 #define STREAM_RESULT_DUMP_THRESHOLD 300 #define STREAM_RESULT_DUMP_SIZE_THRESHOLD (1048576 * 1) // 1MiB result data -#define STREAM_SCAN_HISTORY_TIMESLICE 1000 // 1000 ms +#define STREAM_SCAN_HISTORY_TIMESLICE 10000000 // 1000 ms static int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask); From 96f92649060cccba51248ae7f9a92acbabccb73b Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 7 Nov 2023 00:38:02 +0800 Subject: [PATCH 13/20] fix(stream): add null ptr check. --- source/libs/stream/src/streamExec.c | 2 +- source/libs/stream/src/streamStart.c | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index f44bcfdfa7..e32bb47cc0 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -19,7 +19,7 @@ #define MAX_STREAM_EXEC_BATCH_NUM 32 #define STREAM_RESULT_DUMP_THRESHOLD 300 #define STREAM_RESULT_DUMP_SIZE_THRESHOLD (1048576 * 1) // 1MiB result data -#define STREAM_SCAN_HISTORY_TIMESLICE 10000000 // 1000 ms +#define STREAM_SCAN_HISTORY_TIMESLICE 1000 // 1000 ms static int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask); diff --git a/source/libs/stream/src/streamStart.c b/source/libs/stream/src/streamStart.c index 0424dcd715..44b95fb62c 100644 --- a/source/libs/stream/src/streamStart.c +++ b/source/libs/stream/src/streamStart.c @@ -1050,8 +1050,12 @@ static void displayStatusInfo(SStreamMeta* pMeta, SHashObj* pTaskSet, bool succ) void* key = taosHashGetKey(pIter, &keyLen); SStreamTask** pTask1 = taosHashGet(pMeta->pTasksMap, key, sizeof(STaskId)); - stInfo("s-task:%s level:%d vgId:%d, init:%" PRId64 ", initEnd:%" PRId64 ", %s", (*pTask1)->id.idStr, - (*pTask1)->info.taskLevel, vgId, pInfo->start, pInfo->end, pInfo->success ? "success" : "failed"); + if (pTask1 == NULL) { + stInfo("s-task:0x%x is dropped already, %s", (int32_t)((STaskId*)key)->taskId, succ ? "success" : "failed"); + } else { + stInfo("s-task:%s level:%d vgId:%d, init:%" PRId64 ", initEnd:%" PRId64 ", %s", (*pTask1)->id.idStr, + (*pTask1)->info.taskLevel, vgId, pInfo->start, pInfo->end, pInfo->success ? "success" : "failed"); + } } } From 207b53f3f691c79071c095425c8705df4c803b31 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 7 Nov 2023 16:19:09 +0800 Subject: [PATCH 14/20] fix(stream): fix errors in scan-history, introduced by refactor --- include/libs/stream/tstream.h | 2 +- source/dnode/vnode/src/tq/tq.c | 20 ++-- source/libs/stream/src/streamExec.c | 158 +++++++++++++++------------- 3 files changed, 98 insertions(+), 82 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 4842b4b8e2..eab3ecf04e 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -804,7 +804,7 @@ void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc); // source level int32_t streamSetParamForStreamScannerStep1(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow); int32_t streamSetParamForStreamScannerStep2(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow); -SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask); +SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask, int64_t st); int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask); // agg level diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 118355383f..fd35f5f7eb 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1145,8 +1145,8 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { } } else { if (pTask->execInfo.step2Start == 0) { - tqDebug("s-task:%s continue exec scan-history(step1), original step1 startTs:%" PRId64", already elapsed:%.2fs", id, - pTask->execInfo.step1Start, pTask->execInfo.step1El); + tqDebug("s-task:%s continue exec scan-history(step1), original step1 startTs:%" PRId64 ", already elapsed:%.2fs", + id, pTask->execInfo.step1Start, pTask->execInfo.step1El); } else { tqDebug("s-task:%s already in step2, no need to scan-history data, step2 startTs:%" PRId64, id, pTask->execInfo.step2Start); @@ -1168,10 +1168,11 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { return 0; } - SScanhistoryDataInfo retInfo = streamScanHistoryData(pTask); + int64_t st = taosGetTimestampMs(); + SScanhistoryDataInfo retInfo = streamScanHistoryData(pTask, st); - // todo update the step1 exec elapsed time - double el = (taosGetTimestampMs() - pTask->execInfo.step1Start) / 1000.0; + double el = (taosGetTimestampMs() - st) / 1000.0; + pTask->execInfo.step1El += el; if (retInfo.ret == TASK_SCANHISTORY_QUIT || retInfo.ret == TASK_SCANHISTORY_REXEC) { int8_t status = streamTaskSetSchedStatusInactive(pTask); @@ -1184,9 +1185,11 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { ETaskStatus s = streamTaskGetStatus(pTask, &p); if (s == TASK_STATUS__PAUSE) { - tqDebug("s-task:%s is paused in the step1, elapsed time:%.2fs, sched-status:%d", pTask->id.idStr, el, status); + tqDebug("s-task:%s is paused in the step1, elapsed time:%.2fs total:%.2fs, sched-status:%d", pTask->id.idStr, + el, pTask->execInfo.step1El, status); } else if (s == TASK_STATUS__STOP || s == TASK_STATUS__DROPPING) { - tqDebug("s-task:%s status:%p not continue scan-history data", pTask->id.idStr, p); + tqDebug("s-task:%s status:%p not continue scan-history data, total elapsed time:%.2fs quit", pTask->id.idStr, p, + pTask->execInfo.step1El); } } @@ -1195,7 +1198,7 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { } // the following procedure should be executed, no matter status is stop/pause or not - tqDebug("s-task:%s scan-history(step 1) ended, elapsed time:%.2fs", id, el); + tqDebug("s-task:%s scan-history(step 1) ended, elapsed time:%.2fs", id, pTask->execInfo.step1El); if (pTask->info.fillHistory) { SStreamTask* pStreamTask = NULL; @@ -1505,6 +1508,7 @@ int32_t tqProcessTaskResumeImpl(STQ* pTq, SStreamTask* pTask, int64_t sversion, streamSchedExec(pTask); } } else if (status == TASK_STATUS__UNINIT) { + // todo: fill-history task init ? if (pTask->info.fillHistory == 0) { EStreamTaskEvent event = HAS_RELATED_FILLHISTORY_TASK(pTask) ? TASK_EVENT_INIT_STREAM_SCANHIST : TASK_EVENT_INIT; streamTaskHandleEvent(pTask->status.pSM, event); diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index e32bb47cc0..701cc76086 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -187,106 +187,118 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i return code; } -SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask) { - ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); +static int32_t handleResultBlocks(SStreamTask* pTask, SArray* pRes, int32_t size) { + int32_t code = TSDB_CODE_SUCCESS; + if (taosArrayGetSize(pRes) > 0) { + SStreamDataBlock* pStreamBlocks = createStreamBlockFromResults(NULL, pTask, size, pRes); + code = doOutputResultBlockImpl(pTask, pStreamBlocks); + if (code != TSDB_CODE_SUCCESS) { + stDebug("s-task:%s dump fill-history results failed, code:%s", pTask->id.idStr, tstrerror(code)); + } + } else { + taosArrayDestroy(pRes); + } + return code; +} +static void streamScanHistoryDataImpl(SStreamTask* pTask, SArray* pRes, int32_t* pSize, bool* pFinish) { int32_t code = TSDB_CODE_SUCCESS; + void* exec = pTask->exec.pExecutor; + int32_t numOfBlocks = 0; + + while (1) { + if (streamTaskShouldStop(pTask)) { + break; + } + + if (pTask->inputq.status == TASK_INPUT_STATUS__BLOCKED) { + stDebug("s-task:%s level:%d inputQ is blocked, retry in 5s", pTask->id.idStr, pTask->info.taskLevel); + break; + } + + SSDataBlock* output = NULL; + uint64_t ts = 0; + code = qExecTask(exec, &output, &ts); + if (code != TSDB_CODE_TSC_QUERY_KILLED && code != TSDB_CODE_SUCCESS) { + stError("s-task:%s scan-history data error occurred code:%s, continue scan-history", pTask->id.idStr, + tstrerror(code)); + continue; + } + + // the generated results before fill-history task been paused, should be dispatched to sink node + if (output == NULL) { + (*pFinish) = qStreamScanhistoryFinished(exec); + break; + } + + SSDataBlock block = {0}; + assignOneDataBlock(&block, output); + block.info.childId = pTask->info.selfChildId; + taosArrayPush(pRes, &block); + + (*pSize) += blockDataGetSize(output) + sizeof(SSDataBlock) + sizeof(SColumnInfoData) * blockDataGetNumOfCols(&block); + numOfBlocks += 1; + + if (numOfBlocks >= STREAM_RESULT_DUMP_THRESHOLD || (*pSize) >= STREAM_RESULT_DUMP_SIZE_THRESHOLD) { + stDebug("s-task:%s scan exec numOfBlocks:%d, size:%.2fKiB output num-limit:%d, size-limit:%.2fKiB reached", + pTask->id.idStr, numOfBlocks, SIZE_IN_KiB(*pSize), STREAM_RESULT_DUMP_THRESHOLD, + SIZE_IN_KiB(STREAM_RESULT_DUMP_SIZE_THRESHOLD)); + break; + } + } +} + +SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask, int64_t st) { + ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); + void* exec = pTask->exec.pExecutor; bool finished = false; - int64_t st = taosGetTimestampMs(); qSetStreamOpOpen(exec); - while (!finished) { + while (1) { if (streamTaskShouldPause(pTask)) { - double el = (taosGetTimestampMs() - pTask->execInfo.step1Start) / 1000.0; - stDebug("s-task:%s paused from the scan-history task, elapsed time:%.2fsec", pTask->id.idStr, el); - return (SScanhistoryDataInfo){TASK_SCANHISTORY_QUIT, 0}; // quit from step1, not continue to handle the step2 + stDebug("s-task:%s paused from the scan-history task", pTask->id.idStr); + // quit from step1, not continue to handle the step2 + return (SScanhistoryDataInfo){TASK_SCANHISTORY_QUIT, 0}; } SArray* pRes = taosArrayInit(0, sizeof(SSDataBlock)); if (pRes == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - stError("s-task:%s scan-history prepare result block failed, code:%s, retry later", pTask->id.idStr, tstrerror(terrno)); + stError("s-task:%s scan-history prepare result block failed, code:%s, retry later", pTask->id.idStr, + tstrerror(terrno)); continue; } int32_t size = 0; - int32_t numOfBlocks = 0; - while (1) { - if (streamTaskShouldStop(pTask)) { - taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - return (SScanhistoryDataInfo){TASK_SCANHISTORY_QUIT, 0}; - } + streamScanHistoryDataImpl(pTask, pRes, &size, &finished); - if (pTask->inputq.status == TASK_INPUT_STATUS__BLOCKED) { - int64_t el = taosGetTimestampMs() - st; - pTask->execInfo.step1El += el/1000.0; - - stDebug("s-task:%s level:%d inputQ is blocked, resume in 5sec, elapsed time:%.2fs", pTask->id.idStr, - pTask->info.taskLevel, pTask->execInfo.step1El); - - taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - return (SScanhistoryDataInfo){TASK_SCANHISTORY_REXEC, 5000}; - } - - SSDataBlock* output = NULL; - uint64_t ts = 0; - code = qExecTask(exec, &output, &ts); - if (code != TSDB_CODE_TSC_QUERY_KILLED && code != TSDB_CODE_SUCCESS) { - stError("s-task:%s scan-history data error occurred code:%s, continue scan", pTask->id.idStr, tstrerror(code)); - continue; - } - - // the generated results before fill-history task been paused, should be dispatched to sink node - if (output == NULL) { - finished = qStreamScanhistoryFinished(exec); - break; - } - - SSDataBlock block = {0}; - assignOneDataBlock(&block, output); - block.info.childId = pTask->info.selfChildId; - taosArrayPush(pRes, &block); - - size += blockDataGetSize(output) + sizeof(SSDataBlock) + sizeof(SColumnInfoData) * blockDataGetNumOfCols(&block); - numOfBlocks += 1; - - if (numOfBlocks >= STREAM_RESULT_DUMP_THRESHOLD || size >= STREAM_RESULT_DUMP_SIZE_THRESHOLD) { - stDebug("s-task:%s scan exec numOfBlocks:%d, size:%.2fKiB output num-limit:%d, size-limit:%.2fKiB reached", - pTask->id.idStr, numOfBlocks, SIZE_IN_KiB(size), STREAM_RESULT_DUMP_THRESHOLD, - SIZE_IN_KiB(STREAM_RESULT_DUMP_SIZE_THRESHOLD)); - break; - } + if(streamTaskShouldStop(pTask)) { + taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); + return (SScanhistoryDataInfo){TASK_SCANHISTORY_QUIT, 0}; } - if (taosArrayGetSize(pRes) > 0) { - SStreamDataBlock* pStreamBlocks = createStreamBlockFromResults(NULL, pTask, size, pRes); - code = doOutputResultBlockImpl(pTask, pStreamBlocks); - if (code != TSDB_CODE_SUCCESS) { - terrno = code; - stDebug("s-task:%s dump fill-history results failed, code:%s, retry in 100ms", pTask->id.idStr, tstrerror(code)); - return (SScanhistoryDataInfo){TASK_SCANHISTORY_REXEC, 100}; - } - } else { - taosArrayDestroy(pRes); - } + // dispatch the generated results + int32_t code = handleResultBlocks(pTask, pRes, size); int64_t el = taosGetTimestampMs() - st; - pTask->execInfo.step1El += el/1000.0; + + // downstream task input queue is full, try in 5sec + if (pTask->inputq.status == TASK_INPUT_STATUS__BLOCKED) { + return (SScanhistoryDataInfo){TASK_SCANHISTORY_REXEC, 5000}; + } + + if (finished) { + return (SScanhistoryDataInfo){TASK_SCANHISTORY_CONT, 0}; + } if (el >= STREAM_SCAN_HISTORY_TIMESLICE) { - stDebug("s-task:%s fill-history:%d time slice for scan-history exhausted, elapse time:%.2fs, retry in 100ms", + stDebug("s-task:%s fill-history:%d time slice exhausted, elapsed time:%.2fs, retry in 100ms", pTask->id.idStr, pTask->info.fillHistory, el / 1000.0); return (SScanhistoryDataInfo){TASK_SCANHISTORY_REXEC, 100}; } } - - // todo refactor - int64_t el = taosGetTimestampMs() - st; - pTask->execInfo.step1El += el/1000.0; - - return (SScanhistoryDataInfo){TASK_SCANHISTORY_CONT, 0}; } // wait for the stream task to be idle @@ -296,7 +308,7 @@ static void waitForTaskIdle(SStreamTask* pTask, SStreamTask* pStreamTask) { int64_t st = taosGetTimestampMs(); while (!streamTaskIsIdle(pStreamTask)) { stDebug("s-task:%s level:%d wait for stream task:%s to be idle, check again in 100ms", id, pTask->info.taskLevel, - pStreamTask->id.idStr); + pStreamTask->id.idStr); taosMsleep(100); } From 9c49de69f8b2992bfc96da9f85769f294e57b6f1 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 7 Nov 2023 17:04:55 +0800 Subject: [PATCH 15/20] refactor: do some internal refactor. --- source/libs/stream/inc/streamInt.h | 2 +- source/libs/stream/src/streamQueue.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index aa9b26381e..4a7d3a2a05 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -127,7 +127,7 @@ int32_t streamNotifyUpstreamContinue(SStreamTask* pTask); int32_t streamTaskFillHistoryFinished(SStreamTask* pTask); int32_t streamTransferStateToStreamTask(SStreamTask* pTask); -int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t numRate, float quotaRate); +int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t numRate, float quotaRate, const char*); STaskId streamTaskExtractKey(const SStreamTask* pTask); void streamTaskInitForLaunchHTask(SHistoryTaskInfo* pInfo); void streamTaskSetRetryInfoForLaunch(SHistoryTaskInfo* pInfo); diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 90e862005b..e004c9a403 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -374,7 +374,8 @@ int32_t streamTaskPutDataIntoOutputQ(SStreamTask* pTask, SStreamDataBlock* pBloc return TSDB_CODE_SUCCESS; } -int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t numRate, float quotaRate) { +int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t numRate, float quotaRate, + const char* id) { if (numCap < 10 || numRate < 10 || pBucket == NULL) { stError("failed to init sink task bucket, cap:%d, rate:%d", numCap, numRate); return TSDB_CODE_INVALID_PARA; @@ -389,6 +390,7 @@ int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t pBucket->quotaRemain = pBucket->quotaCapacity; pBucket->fillTimestamp = taosGetTimestampMs(); + stDebug("s-task:%s sink quotaRate:%.2fMiB, numRate:%d", id, quotaRate, numRate); return TSDB_CODE_SUCCESS; } From 344c3c5b13e7cb26ae630ee6804b69d2f622e3bb Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 7 Nov 2023 17:11:36 +0800 Subject: [PATCH 16/20] fix(stream): fix syntax error. --- source/libs/stream/src/streamTask.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index a1245ba32c..a7fb590d1b 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -446,7 +446,7 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i // 2MiB per second for sink task // 50 times sink operator per second - streamTaskInitTokenBucket(pTask->outputInfo.pTokenBucket, 50, 50, tsSinkDataRate); + streamTaskInitTokenBucket(pTask->outputInfo.pTokenBucket, 50, 50, tsSinkDataRate, pTask->id.idStr); TdThreadMutexAttr attr = {0}; int code = taosThreadMutexAttrInit(&attr); From 0b59681158fe99a01614ac3aa09a533102375e2b Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 7 Nov 2023 17:18:58 +0800 Subject: [PATCH 17/20] refactor: wait a while when no token available. --- source/libs/stream/src/streamQueue.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index e004c9a403..239ab883ae 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -159,7 +159,8 @@ int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInpu // no available token in bucket for sink task, let's wait for a little bit if (taskLevel == TASK_LEVEL__SINK && (!streamTaskExtractAvailableToken(pTask->outputInfo.pTokenBucket, pTask->id.idStr))) { - stDebug("s-task:%s no available token in bucket for sink data, wait for 50ms", id); + stDebug("s-task:%s no available token in bucket for sink data, wait for 10ms", id); + taosMsleep(10); return TSDB_CODE_SUCCESS; } From 06d02b862eb662e96fcc6ea4571f7cd180651efe Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 7 Nov 2023 17:33:30 +0800 Subject: [PATCH 18/20] fix(stream): update the fill-time for quota limitation. --- source/libs/stream/src/streamQueue.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 239ab883ae..22a1c22be4 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -160,7 +160,7 @@ int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInpu // no available token in bucket for sink task, let's wait for a little bit if (taskLevel == TASK_LEVEL__SINK && (!streamTaskExtractAvailableToken(pTask->outputInfo.pTokenBucket, pTask->id.idStr))) { stDebug("s-task:%s no available token in bucket for sink data, wait for 10ms", id); - taosMsleep(10); +// taosMsleep(10); return TSDB_CODE_SUCCESS; } @@ -410,10 +410,11 @@ static void fillTokenBucket(STokenBucket* pBucket, const char* id) { double incSize = (delta / 1000.0) * pBucket->quotaRate; if (incSize > 0) { pBucket->quotaRemain = TMIN(pBucket->quotaRemain + incSize, pBucket->quotaCapacity); + pBucket->fillTimestamp = now; } if (incNum > 0 || incSize > 0) { - stDebug("new token and capacity available, current token:%d inc:%d, current quota:%.2fMiB inc:%.2fMiB, ts:%" PRId64 + stDebug("new token and capacity available, current token:%d inc:%d, current quota:%.2fMiB inc:%.3fMiB, ts:%" PRId64 " idle for %.2f Sec, %s", pBucket->numOfToken, incNum, pBucket->quotaRemain, incSize, now, delta / 1000.0, id); } From 43b14ba8b4e35602f3b8fe9d1d85a0a777a89be1 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 7 Nov 2023 17:49:49 +0800 Subject: [PATCH 19/20] refactor: wait for a while when no quota available. --- source/libs/stream/src/streamQueue.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 22a1c22be4..63ee702ada 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -160,7 +160,7 @@ int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInpu // no available token in bucket for sink task, let's wait for a little bit if (taskLevel == TASK_LEVEL__SINK && (!streamTaskExtractAvailableToken(pTask->outputInfo.pTokenBucket, pTask->id.idStr))) { stDebug("s-task:%s no available token in bucket for sink data, wait for 10ms", id); -// taosMsleep(10); + taosMsleep(10); return TSDB_CODE_SUCCESS; } @@ -414,9 +414,8 @@ static void fillTokenBucket(STokenBucket* pBucket, const char* id) { } if (incNum > 0 || incSize > 0) { - stDebug("new token and capacity available, current token:%d inc:%d, current quota:%.2fMiB inc:%.3fMiB, ts:%" PRId64 - " idle for %.2f Sec, %s", - pBucket->numOfToken, incNum, pBucket->quotaRemain, incSize, now, delta / 1000.0, id); + stTrace("token/quota available, token:%d inc:%d, quota:%.2fMiB inc:%.3fMiB, ts:%" PRId64 " idle:%" PRId64 "ms, %s", + pBucket->numOfToken, incNum, pBucket->quotaRemain, incSize, now, delta, id); } } From a0d81734f1dd0f8b6690eec45fefbcf132cd9ae5 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 7 Nov 2023 18:18:58 +0800 Subject: [PATCH 20/20] fix(stream): add ref for task. --- source/libs/stream/src/streamStart.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/source/libs/stream/src/streamStart.c b/source/libs/stream/src/streamStart.c index 44b95fb62c..ed96f2c4b0 100644 --- a/source/libs/stream/src/streamStart.c +++ b/source/libs/stream/src/streamStart.c @@ -98,8 +98,10 @@ static void doReExecScanhistory(void* param, void* tmrId) { char* p = NULL; ETaskStatus status = streamTaskGetStatus(pTask, &p); if (status == TASK_STATUS__DROPPING || status == TASK_STATUS__STOP) { + streamMetaReleaseTask(pTask->pMeta, pTask); int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); stDebug("s-task:%s status:%s not start scan-history again, ref:%d", pTask->id.idStr, p, ref); + return; } if (pTask->schedHistoryInfo.numOfTicks <= 0) { @@ -108,6 +110,9 @@ static void doReExecScanhistory(void* param, void* tmrId) { int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); stDebug("s-task:%s fill-history:%d start scan-history data, out of tmr, ref:%d", pTask->id.idStr, pTask->info.fillHistory, ref); + + // release the task. + streamMetaReleaseTask(pTask->pMeta, pTask); } else { taosTmrReset(doReExecScanhistory, SCANHISTORY_IDLE_TIME_SLICE, pTask, streamEnv.timer, &pTask->schedHistoryInfo.pTimer); @@ -122,6 +127,10 @@ int32_t streamReExecScanHistoryFuture(SStreamTask* pTask, int32_t idleDuration) numOfTicks = SCANHISTORY_IDLE_TICK; } + // add ref for task + SStreamTask* p = streamMetaAcquireTask(pTask->pMeta, pTask->id.streamId, pTask->id.taskId); + ASSERT(p != NULL); + pTask->schedHistoryInfo.numOfTicks = numOfTicks; int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1);