fix(stream): wait for stream task completed.

This commit is contained in:
Haojun Liao 2023-06-15 01:14:41 +08:00
parent 9a3708e17b
commit 53377c2c1f
4 changed files with 55 additions and 19 deletions

View File

@ -44,9 +44,8 @@ enum {
TASK_STATUS__DROPPING, TASK_STATUS__DROPPING,
TASK_STATUS__FAIL, TASK_STATUS__FAIL,
TASK_STATUS__STOP, TASK_STATUS__STOP,
TASK_STATUS__WAIT_DOWNSTREAM, TASK_STATUS__SCAN_HISTORY, // stream task scan history data by using tsdbread in the stream scanner
TASK_STATUS__SCAN_HISTORY, TASK_STATUS__HALT, // stream task will handle all data in the input queue, and then paused
TASK_STATUS__HALT, // stream task halt to wait for the secondary scan history, this status is invisible for user
TASK_STATUS__PAUSE, TASK_STATUS__PAUSE,
}; };
@ -565,6 +564,7 @@ int32_t streamSchedExec(SStreamTask* pTask);
int32_t streamTaskOutputResultBlock(SStreamTask* pTask, SStreamDataBlock* pBlock); int32_t streamTaskOutputResultBlock(SStreamTask* pTask, SStreamDataBlock* pBlock);
bool streamTaskShouldStop(const SStreamStatus* pStatus); bool streamTaskShouldStop(const SStreamStatus* pStatus);
bool streamTaskShouldPause(const SStreamStatus* pStatus); bool streamTaskShouldPause(const SStreamStatus* pStatus);
bool streamTaskIsIdle(const SStreamTask* pTask);
int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz); int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz);

View File

@ -1121,8 +1121,6 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) {
taosMsleep(100); taosMsleep(100);
} }
taosMsleep(10000);
// now we can stop the stream task execution // now we can stop the stream task execution
pStreamTask->status.taskStatus = TASK_STATUS__HALT; pStreamTask->status.taskStatus = TASK_STATUS__HALT;
tqDebug("s-task:%s level:%d status is set to halt by history scan task:%s", pStreamTask->id.idStr, tqDebug("s-task:%s level:%d status is set to halt by history scan task:%s", pStreamTask->id.idStr,
@ -1378,12 +1376,13 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) {
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
if (pTask != NULL) { if (pTask != NULL) {
if (pTask->status.taskStatus == TASK_STATUS__NORMAL) { int8_t status = pTask->status.taskStatus;
tqDebug("vgId:%d s-task:%s start to process block from wal, last chk point:%" PRId64, vgId, pTask->id.idStr, if (status == TASK_STATUS__NORMAL || status == TASK_STATUS__HALT) {
tqDebug("vgId:%d s-task:%s start to process block from inputQ, last chk point:%" PRId64, vgId, pTask->id.idStr,
pTask->chkInfo.version); pTask->chkInfo.version);
streamProcessRunReq(pTask); streamProcessRunReq(pTask);
} else { } else {
if (streamTaskShouldPause(&pTask->status) || (pTask->status.taskStatus == TASK_STATUS__HALT)) { if (streamTaskShouldPause(&pTask->status)) {
atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
} }

View File

@ -189,6 +189,7 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) {
if (qExecTask(exec, &output, &ts) < 0) { if (qExecTask(exec, &output, &ts) < 0) {
continue; continue;
} }
if (output == NULL) { if (output == NULL) {
if (qStreamRecoverScanFinished(exec)) { if (qStreamRecoverScanFinished(exec)) {
finished = true; finished = true;
@ -396,16 +397,30 @@ int32_t streamExecForAll(SStreamTask* pTask) {
ASSERT(pStreamTask != NULL && pStreamTask->historyTaskId.taskId == pTask->id.taskId); ASSERT(pStreamTask != NULL && pStreamTask->historyTaskId.taskId == pTask->id.taskId);
STimeWindow* pTimeWindow = &pStreamTask->dataRange.window; STimeWindow* pTimeWindow = &pStreamTask->dataRange.window;
// here we need to wait for the stream task handle all data in the input queue.
if (pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE) { if (pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE) {
ASSERT(pStreamTask->status.taskStatus == TASK_STATUS__HALT); ASSERT(pStreamTask->status.taskStatus == TASK_STATUS__HALT);
} else {
ASSERT(pStreamTask->status.taskStatus == TASK_STATUS__NORMAL);
pStreamTask->status.taskStatus = TASK_STATUS__HALT;
}
{// wait for the stream task to be idle
while(!streamTaskIsIdle(pStreamTask)) {
qDebug("s-task:%s level:%d wait for stream task:%s to be idle, check again in 100ms", pTask->id.idStr,
pTask->info.taskLevel, pStreamTask->id.idStr);
taosMsleep(100);
}
}
if (pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE) {
// update the scan data range for source task. // update the scan data range for source task.
qDebug("s-task:%s level:%d stream task window %" PRId64 " - %" PRId64 " transfer to %" PRId64 " - %" PRId64 qDebug("s-task:%s level:%d stream task window %" PRId64 " - %" PRId64 " transfer to %" PRId64 " - %" PRId64
", status:%s, sched-status:%d", ", status:%s, sched-status:%d",
pStreamTask->id.idStr, TASK_LEVEL__SOURCE, pTimeWindow->skey, pTimeWindow->ekey, INT64_MIN, pStreamTask->id.idStr, TASK_LEVEL__SOURCE, pTimeWindow->skey, pTimeWindow->ekey, INT64_MIN,
pTimeWindow->ekey, streamGetTaskStatusStr(TASK_STATUS__NORMAL), pStreamTask->status.schedStatus); pTimeWindow->ekey, streamGetTaskStatusStr(TASK_STATUS__NORMAL), pStreamTask->status.schedStatus);
} else { } else {
// for agg task and sink task, they are continue to execute, no need to be halt. // for sink tasks, they are continue to execute, no need to be halt.
// the process should be stopped for a while, during the term of transfer task state. // the process should be stopped for a while, during the term of transfer task state.
// OR wait for the inputQ && outputQ of agg tasks are all consumed, and then start the state transfer // OR wait for the inputQ && outputQ of agg tasks are all consumed, and then start the state transfer
@ -413,12 +428,13 @@ int32_t streamExecForAll(SStreamTask* pTask) {
qDebug("s-task:%s no need to update time window, for non-source task", pStreamTask->id.idStr); qDebug("s-task:%s no need to update time window, for non-source task", pStreamTask->id.idStr);
} }
// expand the query time window for stream scanner
pTimeWindow->skey = INT64_MIN; pTimeWindow->skey = INT64_MIN;
streamSetStatusNormal(pStreamTask); streamSetStatusNormal(pStreamTask);
streamMetaSaveTask(pTask->pMeta, pStreamTask); streamMetaSaveTask(pTask->pMeta, pStreamTask);
if (streamMetaCommit(pTask->pMeta)) { if (streamMetaCommit(pTask->pMeta)) {
// persistent to disk for // persistent to disk
} }
streamSchedExec(pStreamTask); streamSchedExec(pStreamTask);
@ -481,12 +497,32 @@ int32_t streamExecForAll(SStreamTask* pTask) {
double el = (taosGetTimestampMs() - st) / 1000.0; double el = (taosGetTimestampMs() - st) / 1000.0;
qDebug("s-task:%s batch of (%d)input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d", qDebug("s-task:%s batch of (%d)input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d",
id, batchSize, el, resSize / 1048576.0, totalBlocks); id, batchSize, el, resSize / 1048576.0, totalBlocks);
streamFreeQitem(pInput); streamFreeQitem(pInput);
} }
return 0; return 0;
} }
bool streamTaskIsIdle(const SStreamTask* pTask) {
int32_t numOfItems = taosQueueItemSize(pTask->inputQueue->queue);
if (numOfItems > 0) {
return false;
}
numOfItems = taosQallItemSize(pTask->inputQueue->qall);
if (numOfItems > 0) {
return false;
}
// blocked by downstream task
if (pTask->outputStatus == TASK_OUTPUT_STATUS__BLOCKED) {
return false;
}
return (pTask->status.schedStatus == TASK_SCHED_STATUS__INACTIVE);
}
int32_t streamTryExec(SStreamTask* pTask) { int32_t streamTryExec(SStreamTask* pTask) {
// this function may be executed by multi-threads, so status check is required. // this function may be executed by multi-threads, so status check is required.
int8_t schedStatus = int8_t schedStatus =

View File

@ -40,7 +40,6 @@ int32_t streamStartRecoverTask(SStreamTask* pTask, int8_t igUntreated) {
const char* streamGetTaskStatusStr(int32_t status) { const char* streamGetTaskStatusStr(int32_t status) {
switch(status) { switch(status) {
case TASK_STATUS__NORMAL: return "normal"; case TASK_STATUS__NORMAL: return "normal";
case TASK_STATUS__WAIT_DOWNSTREAM: return "wait-for-downstream";
case TASK_STATUS__SCAN_HISTORY: return "scan-history"; case TASK_STATUS__SCAN_HISTORY: return "scan-history";
case TASK_STATUS__HALT: return "halt"; case TASK_STATUS__HALT: return "halt";
case TASK_STATUS__PAUSE: return "paused"; case TASK_STATUS__PAUSE: return "paused";
@ -217,18 +216,17 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs
return -1; return -1;
} }
// set the downstream tasks have been checked flag
ASSERT(pTask->status.checkDownstream == 0); ASSERT(pTask->status.checkDownstream == 0);
pTask->status.checkDownstream = 1; pTask->status.checkDownstream = 1;
ASSERT(pTask->status.taskStatus != TASK_STATUS__HALT); ASSERT(pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY || pTask->status.taskStatus == TASK_STATUS__NORMAL);
if (pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) { if (pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) {
qDebug("s-task:%s fixed downstream task is ready, now enter into scan-history-data stage, status:%s", id, qDebug("s-task:%s fixed downstream task is ready, now enter into scan-history-data stage, status:%s", id,
streamGetTaskStatusStr(pTask->status.taskStatus)); streamGetTaskStatusStr(pTask->status.taskStatus));
streamTaskLaunchScanHistory(pTask); streamTaskLaunchScanHistory(pTask);
} else { } else {
ASSERT(pTask->status.taskStatus == TASK_STATUS__NORMAL); qDebug("s-task:%s fixed downstream task is ready, ready for data from inputQ, status:%s", id,
qDebug("s-task:%s fixed downstream task is ready, now ready for data from wal, status:%s", id,
streamGetTaskStatusStr(pTask->status.taskStatus)); streamGetTaskStatusStr(pTask->status.taskStatus));
} }
} else { } else {
@ -396,15 +394,17 @@ int32_t streamAggRecoverPrepare(SStreamTask* pTask) {
return 0; return 0;
} }
int32_t streamAggChildrenRecoverFinish(SStreamTask* pTask) { int32_t streamAggUpstreamScanHistoryFinish(SStreamTask* pTask) {
void* exec = pTask->exec.pExecutor; void* exec = pTask->exec.pExecutor;
if (qRestoreStreamOperatorOption(exec) < 0) { if (qRestoreStreamOperatorOption(exec) < 0) {
return -1; return -1;
} }
if (qStreamRecoverFinish(exec) < 0) { if (qStreamRecoverFinish(exec) < 0) {
return -1; return -1;
} }
streamSetStatusNormal(pTask);
// streamSetStatusNormal(pTask);
return 0; return 0;
} }
@ -414,8 +414,9 @@ int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t childId) {
ASSERT(left >= 0); ASSERT(left >= 0);
if (left == 0) { if (left == 0) {
qDebug("s-task:%s all %d upstream tasks finish scan-history data", pTask->id.idStr, left); int32_t numOfTasks = taosArrayGetSize(pTask->pUpstreamEpInfoList);
streamAggChildrenRecoverFinish(pTask); qDebug("s-task:%s all %d upstream tasks finish scan-history data", pTask->id.idStr, numOfTasks);
streamAggUpstreamScanHistoryFinish(pTask);
} else { } else {
qDebug("s-task:%s remain unfinished upstream tasks:%d", pTask->id.idStr, left); qDebug("s-task:%s remain unfinished upstream tasks:%d", pTask->id.idStr, left);
} }