fix(stream): scan wal in step2
This commit is contained in:
parent
a4e1901309
commit
e89f530db2
|
@ -45,6 +45,7 @@ enum {
|
|||
TASK_STATUS__FAIL,
|
||||
TASK_STATUS__STOP,
|
||||
TASK_STATUS__SCAN_HISTORY, // stream task scan history data by using tsdbread in the stream scanner
|
||||
TASK_STATUS__SCAN_HISTORY_WAL, // scan history data in wal
|
||||
TASK_STATUS__HALT, // pause, but not be manipulated by user command
|
||||
TASK_STATUS__PAUSE, // pause
|
||||
};
|
||||
|
@ -302,6 +303,12 @@ typedef struct {
|
|||
SStreamQueue* queue;
|
||||
} STaskOutputInfo;
|
||||
|
||||
typedef struct {
|
||||
int64_t init;
|
||||
int64_t step1Start;
|
||||
int64_t step2Start;
|
||||
} STaskTimestamp;
|
||||
|
||||
struct SStreamTask {
|
||||
SStreamId id;
|
||||
SSTaskBasicInfo info;
|
||||
|
@ -316,7 +323,7 @@ struct SStreamTask {
|
|||
SArray* pUpstreamEpInfoList; // SArray<SStreamChildEpInfo*>, // children info
|
||||
int32_t nextCheckId;
|
||||
SArray* checkpointInfo; // SArray<SStreamCheckpointInfo>
|
||||
int64_t initTs;
|
||||
STaskTimestamp tsInfo;
|
||||
// output
|
||||
union {
|
||||
STaskDispatcherFixedEp fixedEpDispatcher;
|
||||
|
|
|
@ -72,7 +72,7 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
pTask->initTs = taosGetTimestampMs();
|
||||
pTask->tsInfo.init = taosGetTimestampMs();
|
||||
pTask->inputStatus = TASK_INPUT_STATUS__NORMAL;
|
||||
pTask->outputInfo.status = TASK_OUTPUT_STATUS__NORMAL;
|
||||
pTask->pMsgCb = &pSnode->msgCb;
|
||||
|
|
|
@ -818,7 +818,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
pTask->initTs = taosGetTimestampMs();
|
||||
pTask->tsInfo.init = taosGetTimestampMs();
|
||||
pTask->inputStatus = TASK_INPUT_STATUS__NORMAL;
|
||||
pTask->outputInfo.status = TASK_OUTPUT_STATUS__NORMAL;
|
||||
pTask->pMsgCb = &pTq->pVnode->msgCb;
|
||||
|
@ -1115,7 +1115,7 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) {
|
|||
streamTaskDisablePause(pTask);
|
||||
}
|
||||
|
||||
if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING || streamTaskShouldPause(&pTask->status)) {
|
||||
if (streamTaskShouldStop(&pTask->status) || streamTaskShouldPause(&pTask->status)) {
|
||||
tqDebug("s-task:%s is dropped or paused, abort recover in step1", id);
|
||||
atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
|
||||
streamMetaReleaseTask(pMeta, pTask);
|
||||
|
@ -1123,7 +1123,7 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) {
|
|||
}
|
||||
|
||||
double el = (taosGetTimestampMs() - st) / 1000.0;
|
||||
tqDebug("s-task:%s history data scan stage(step 1) ended, elapsed time:%.2fs", id, el);
|
||||
tqDebug("s-task:%s scan-history stage(step 1) ended, elapsed time:%.2fs", id, el);
|
||||
|
||||
if (pTask->info.fillHistory) {
|
||||
SVersionRange* pRange = NULL;
|
||||
|
@ -1173,34 +1173,35 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) {
|
|||
id, pTask->info.taskLevel, pRange->minVer, pRange->maxVer, pWindow->skey, pWindow->ekey, id);
|
||||
ASSERT(pTask->status.schedStatus == TASK_SCHED_STATUS__WAITING);
|
||||
|
||||
st = taosGetTimestampMs();
|
||||
pTask->tsInfo.step2Start = taosGetTimestampMs();
|
||||
streamSetParamForStreamScannerStep2(pTask, pRange, pWindow);
|
||||
}
|
||||
|
||||
if (!streamTaskRecoverScanStep2Finished(pTask)) {
|
||||
streamSourceScanHistoryData(pTask);
|
||||
|
||||
if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING || streamTaskShouldPause(&pTask->status)) {
|
||||
pTask->status.taskStatus = TASK_STATUS__SCAN_HISTORY_WAL;
|
||||
if (streamTaskShouldStop(&pTask->status) || streamTaskShouldPause(&pTask->status)) {
|
||||
tqDebug("s-task:%s is dropped or paused, abort recover in step1", id);
|
||||
streamMetaReleaseTask(pMeta, pTask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
streamTaskRecoverSetAllStepFinished(pTask);
|
||||
int64_t dstVer = pTask->dataRange.range.minVer - 1;
|
||||
walReaderSetSkipToVersion(pTask->exec.pWalReader, dstVer);
|
||||
tqDebug("s-task:%s seek wal reader to ver:%"PRId64, id, dstVer);
|
||||
}
|
||||
|
||||
el = (taosGetTimestampMs() - st) / 1000.0;
|
||||
tqDebug("s-task:%s history data scan stage(step 2) ended, elapsed time:%.2fs", id, el);
|
||||
|
||||
// 3. notify downstream tasks to transfer executor state after handle all history blocks.
|
||||
if (!pTask->status.transferState) {
|
||||
code = streamDispatchTransferStateMsg(pTask);
|
||||
if (code != TSDB_CODE_SUCCESS) {
|
||||
// todo handle error
|
||||
}
|
||||
|
||||
pTask->status.transferState = true;
|
||||
}
|
||||
// int64_t el = (taosGetTimestampMs() - pTask->tsInfo.step2Start) / 1000.0;
|
||||
// tqDebug("s-task:%s history data scan stage(step 2) ended, elapsed time:%.2fs", id, el);
|
||||
//
|
||||
// // 3. notify downstream tasks to transfer executor state after handle all history blocks.
|
||||
// if (!pTask->status.transferState) {
|
||||
// code = streamDispatchTransferStateMsg(pTask);
|
||||
// if (code != TSDB_CODE_SUCCESS) {
|
||||
// // todo handle error
|
||||
// }
|
||||
//
|
||||
// pTask->status.transferState = true;
|
||||
// }
|
||||
|
||||
// 4. 1) transfer the ownership of executor state, 2) update the scan data range for source task.
|
||||
// 5. resume the related stream task.
|
||||
|
@ -1409,8 +1410,8 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) {
|
|||
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
|
||||
if (pTask != NULL) {
|
||||
// even in halt status, the data in inputQ must be processed
|
||||
int8_t status = pTask->status.taskStatus;
|
||||
if (status == TASK_STATUS__NORMAL || status == TASK_STATUS__SCAN_HISTORY) {
|
||||
int8_t st = pTask->status.taskStatus;
|
||||
if (st == TASK_STATUS__NORMAL || st == TASK_STATUS__SCAN_HISTORY || st == TASK_STATUS__SCAN_HISTORY_WAL) {
|
||||
tqDebug("vgId:%d s-task:%s start to process block from inputQ, last chk point:%" PRId64, vgId, pTask->id.idStr,
|
||||
pTask->chkInfo.version);
|
||||
streamProcessRunReq(pTask);
|
||||
|
|
|
@ -247,7 +247,7 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) {
|
|||
continue;
|
||||
}
|
||||
|
||||
if (status != TASK_STATUS__NORMAL) {
|
||||
if (status != TASK_STATUS__NORMAL && status != TASK_STATUS__SCAN_HISTORY_WAL) {
|
||||
tqDebug("s-task:%s not ready for new submit block from wal, status:%s", pTask->id.idStr, streamGetTaskStatusStr(status));
|
||||
streamMetaReleaseTask(pStreamMeta, pTask);
|
||||
continue;
|
||||
|
@ -261,6 +261,17 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) {
|
|||
|
||||
*pScanIdle = false;
|
||||
|
||||
if (pTask->info.fillHistory == 1) {
|
||||
ASSERT(pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY_WAL);
|
||||
// the maximum version of data in the WAL has reached already, the step2 is done
|
||||
if (pTask->chkInfo.currentVer > pTask->dataRange.range.maxVer) {
|
||||
qWarn("s-task:%s fill-history scan WAL, reach the maximum ver:%" PRId64 ", not scan wal anymore",
|
||||
pTask->id.idStr, pTask->chkInfo.currentVer);
|
||||
streamMetaReleaseTask(pStreamMeta, pTask);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// seek the stored version and extract data from WAL
|
||||
int32_t code = doSetOffsetForWalReader(pTask, vgId);
|
||||
if (code != TSDB_CODE_SUCCESS) {
|
||||
|
@ -283,9 +294,9 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) {
|
|||
noDataInWal = false;
|
||||
code = tAppendDataToInputQueue(pTask, pItem);
|
||||
if (code == TSDB_CODE_SUCCESS) {
|
||||
pTask->chkInfo.currentVer = walReaderGetCurrentVer(pTask->exec.pWalReader);
|
||||
tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr,
|
||||
pTask->chkInfo.currentVer);
|
||||
int64_t ver = walReaderGetCurrentVer(pTask->exec.pWalReader);
|
||||
pTask->chkInfo.currentVer = ver;
|
||||
tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr, ver);
|
||||
} else {
|
||||
tqError("s-task:%s append input queue failed, too many in inputQ, ver:%" PRId64, pTask->id.idStr,
|
||||
pTask->chkInfo.currentVer);
|
||||
|
|
|
@ -324,9 +324,6 @@ int32_t streamProcessRunReq(SStreamTask* pTask) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
/*if (pTask->dispatchType == TASK_OUTPUT__FIXED_DISPATCH || pTask->dispatchType == TASK_OUTPUT__SHUFFLE_DISPATCH) {*/
|
||||
/*streamDispatchStreamBlock(pTask);*/
|
||||
/*}*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -589,6 +589,8 @@ int32_t streamTryExec(SStreamTask* pTask) {
|
|||
int8_t schedStatus =
|
||||
atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__WAITING, TASK_SCHED_STATUS__ACTIVE);
|
||||
|
||||
const char* id = pTask->id.idStr;
|
||||
|
||||
if (schedStatus == TASK_SCHED_STATUS__WAITING) {
|
||||
int32_t code = streamExecForAll(pTask);
|
||||
if (code < 0) { // todo this status shoudl be removed
|
||||
|
@ -597,16 +599,43 @@ int32_t streamTryExec(SStreamTask* pTask) {
|
|||
}
|
||||
|
||||
// todo the task should be commit here
|
||||
atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
|
||||
qDebug("s-task:%s exec completed, status:%s, sched-status:%d", pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus),
|
||||
pTask->status.schedStatus);
|
||||
if (taosQueueEmpty(pTask->inputQueue->queue)) {
|
||||
if (pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY_WAL &&
|
||||
pTask->chkInfo.currentVer > pTask->dataRange.range.maxVer) {
|
||||
// fill-history WAL scan has completed
|
||||
streamTaskRecoverSetAllStepFinished(pTask);
|
||||
|
||||
if (!taosQueueEmpty(pTask->inputQueue->queue) && (!streamTaskShouldStop(&pTask->status)) &&
|
||||
(!streamTaskShouldPause(&pTask->status))) {
|
||||
streamSchedExec(pTask);
|
||||
double el = (taosGetTimestampMs() - pTask->tsInfo.step2Start) / 1000.0;
|
||||
qDebug("s-task:%s scan-history from WAL stage(step 2) ended, elapsed time:%.2fs", id, el);
|
||||
|
||||
// 3. notify downstream tasks to transfer executor state after handle all history blocks.
|
||||
if (!pTask->status.transferState) {
|
||||
code = streamDispatchTransferStateMsg(pTask);
|
||||
if (code != TSDB_CODE_SUCCESS) {
|
||||
// todo handle error
|
||||
}
|
||||
|
||||
pTask->status.transferState = true;
|
||||
}
|
||||
|
||||
// the last execution of fill-history task, in order to transfer task operator states.
|
||||
code = streamExecForAll(pTask);
|
||||
|
||||
atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
|
||||
qDebug("s-task:%s exec completed, status:%s, sched-status:%d", id, streamGetTaskStatusStr(pTask->status.taskStatus),
|
||||
pTask->status.schedStatus);
|
||||
}
|
||||
} else {
|
||||
atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
|
||||
qDebug("s-task:%s exec completed, status:%s, sched-status:%d", id, streamGetTaskStatusStr(pTask->status.taskStatus),
|
||||
pTask->status.schedStatus);
|
||||
|
||||
if ((!streamTaskShouldStop(&pTask->status)) && (!streamTaskShouldPause(&pTask->status))) {
|
||||
streamSchedExec(pTask);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
qDebug("s-task:%s already started to exec by other thread, status:%s, sched-status:%d", pTask->id.idStr,
|
||||
qDebug("s-task:%s already started to exec by other thread, status:%s, sched-status:%d", id,
|
||||
streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus);
|
||||
}
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ static void streamTaskSetRangeStreamCalc(SStreamTask* pTask);
|
|||
static void streamTaskSetForReady(SStreamTask* pTask, int32_t numOfReqs) {
|
||||
ASSERT(pTask->status.downstreamReady == 0);
|
||||
pTask->status.downstreamReady = 1;
|
||||
int64_t el = (taosGetTimestampMs() - pTask->initTs);
|
||||
int64_t el = (taosGetTimestampMs() - pTask->tsInfo.init);
|
||||
|
||||
qDebug("s-task:%s all %d downstream ready, init completed, elapsed time:%dms, task status:%s",
|
||||
pTask->id.idStr, numOfReqs, (int32_t) el, streamGetTaskStatusStr(pTask->status.taskStatus));
|
||||
|
@ -663,7 +663,7 @@ void streamHistoryTaskSetVerRangeStep2(SStreamTask* pTask, int64_t latestVer) {
|
|||
// no input data yet. no need to execute the secondardy scan while stream task halt
|
||||
streamTaskRecoverSetAllStepFinished(pTask);
|
||||
qDebug(
|
||||
"s-task:%s no need to perform secondary scan-history-data(step 2), since no data ingest during secondary scan",
|
||||
"s-task:%s no need to perform secondary scan-history data(step 2), since no data ingest during secondary scan",
|
||||
pTask->id.idStr);
|
||||
} else {
|
||||
// 2. do secondary scan of the history data, the time window remain, and the version range is updated to
|
||||
|
|
Loading…
Reference in New Issue