enh(stream): scan wal by using timer to trigger it.

This commit is contained in:
Haojun Liao 2024-01-05 14:49:33 +08:00
parent 0c8d5e3586
commit 1912de9b49
5 changed files with 99 additions and 35 deletions

View File

@ -482,6 +482,11 @@ typedef struct STaskUpdateInfo {
int32_t transId; int32_t transId;
} STaskUpdateInfo; } STaskUpdateInfo;
typedef struct SScanWalInfo {
int32_t scanCounter;
tmr_h scanTimer;
} SScanWalInfo;
// meta // meta
typedef struct SStreamMeta { typedef struct SStreamMeta {
char* path; char* path;
@ -499,7 +504,7 @@ typedef struct SStreamMeta {
bool sendMsgBeforeClosing; // send hb to mnode before close all tasks when switch to follower. bool sendMsgBeforeClosing; // send hb to mnode before close all tasks when switch to follower.
STaskStartInfo startInfo; STaskStartInfo startInfo;
TdThreadRwlock lock; TdThreadRwlock lock;
int32_t walScanCounter; SScanWalInfo scanInfo;
void* streamBackend; void* streamBackend;
int64_t streamBackendRid; int64_t streamBackendRid;
SHashObj* pTaskDbUnique; SHashObj* pTaskDbUnique;

View File

@ -42,6 +42,7 @@ extern "C" {
// clang-format on // clang-format on
typedef struct STqOffsetStore STqOffsetStore; typedef struct STqOffsetStore STqOffsetStore;
extern void* tqTimer;
#define IS_OFFSET_RESET_TYPE(_t) ((_t) < 0) #define IS_OFFSET_RESET_TYPE(_t) ((_t) < 0)

View File

@ -19,13 +19,25 @@
// 0: not init // 0: not init
// 1: already inited // 1: already inited
// 2: wait to be inited or cleaup // 2: wait to be inited or cleanup
static int32_t tqInitialize(STQ* pTq); static int32_t tqInitialize(STQ* pTq);
static FORCE_INLINE bool tqIsHandleExec(STqHandle* pHandle) { return TMQ_HANDLE_STATUS_EXEC == pHandle->status; } static FORCE_INLINE bool tqIsHandleExec(STqHandle* pHandle) { return TMQ_HANDLE_STATUS_EXEC == pHandle->status; }
static FORCE_INLINE void tqSetHandleExec(STqHandle* pHandle) { pHandle->status = TMQ_HANDLE_STATUS_EXEC; } static FORCE_INLINE void tqSetHandleExec(STqHandle* pHandle) { pHandle->status = TMQ_HANDLE_STATUS_EXEC; }
static FORCE_INLINE void tqSetHandleIdle(STqHandle* pHandle) { pHandle->status = TMQ_HANDLE_STATUS_IDLE; } static FORCE_INLINE void tqSetHandleIdle(STqHandle* pHandle) { pHandle->status = TMQ_HANDLE_STATUS_IDLE; }
int32_t tqTimerInit() {
tqTimer = taosTmrInit(100, 100, 1000, "TQ");
if (tqTimer == NULL) {
return -1;
}
return 0;
}
void tqTimerCleanUp() {
taosTmrCleanUp(tqTimer);
}
void tqDestroyTqHandle(void* data) { void tqDestroyTqHandle(void* data) {
STqHandle* pData = (STqHandle*)data; STqHandle* pData = (STqHandle*)data;
qDestroyTask(pData->execHandle.task); qDestroyTask(pData->execHandle.task);
@ -106,6 +118,7 @@ int32_t tqInitialize(STQ* pTq) {
return -1; return -1;
} }
tqTimerInit();
return 0; return 0;
} }
@ -136,6 +149,8 @@ void tqClose(STQ* pTq) {
taosMemoryFree(pTq->path); taosMemoryFree(pTq->path);
tqMetaClose(pTq); tqMetaClose(pTq);
streamMetaClose(pTq->pStreamMeta); streamMetaClose(pTq->pStreamMeta);
tqTimerCleanUp();
qDebug("end to close tq"); qDebug("end to close tq");
taosMemoryFree(pTq); taosMemoryFree(pTq);
} }
@ -1055,7 +1070,8 @@ int32_t tqProcessTaskScanHistoryFinishRsp(STQ* pTq, SRpcMsg* pMsg) {
int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) {
SStreamTaskRunReq* pReq = pMsg->pCont; SStreamTaskRunReq* pReq = pMsg->pCont;
if (pReq->reqType == STREAM_EXEC_T_EXTRACT_WAL_DATA) { // all tasks are extracted submit data from the wal // extracted submit data from wal files for all tasks
if (pReq->reqType == STREAM_EXEC_T_EXTRACT_WAL_DATA) {
tqScanWal(pTq); tqScanWal(pTq);
return 0; return 0;
} }
@ -1351,14 +1367,8 @@ int32_t tqProcessTaskDropHTask(STQ* pTq, SRpcMsg* pMsg) {
} }
taosThreadMutexLock(&pTask->lock); taosThreadMutexLock(&pTask->lock);
ETaskStatus status = streamTaskGetStatus(pTask)->state;
// if (status == TASK_STATUS__STREAM_SCAN_HISTORY) {
// streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_SCANHIST_DONE);
// }
SStreamTaskId id = {.streamId = pTask->hTaskInfo.id.streamId, .taskId = pTask->hTaskInfo.id.taskId}; SStreamTaskId id = {.streamId = pTask->hTaskInfo.id.streamId, .taskId = pTask->hTaskInfo.id.taskId};
streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &id); streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &id);
taosThreadMutexUnlock(&pTask->lock); taosThreadMutexUnlock(&pTask->lock);
// clear the scheduler status // clear the scheduler status

View File

@ -19,11 +19,14 @@
#define MAX_REPEAT_SCAN_THRESHOLD 3 #define MAX_REPEAT_SCAN_THRESHOLD 3
#define SCAN_WAL_IDLE_DURATION 100 #define SCAN_WAL_IDLE_DURATION 100
void* tqTimer = NULL;
static int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, bool* pScanIdle); static int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, bool* pScanIdle);
static int32_t setWalReaderStartOffset(SStreamTask* pTask, int32_t vgId); static int32_t setWalReaderStartOffset(SStreamTask* pTask, int32_t vgId);
static bool handleFillhistoryScanComplete(SStreamTask* pTask, int64_t ver); static bool handleFillhistoryScanComplete(SStreamTask* pTask, int64_t ver);
static bool taskReadyForDataFromWal(SStreamTask* pTask); static bool taskReadyForDataFromWal(SStreamTask* pTask);
static bool doPutDataIntoInputQFromWal(SStreamTask* pTask, int64_t maxVer, int32_t* numOfItems); static bool doPutDataIntoInputQFromWal(SStreamTask* pTask, int64_t maxVer, int32_t* numOfItems);
static int32_t tqScanWalInFuture(STQ* pTq, int32_t numOfTasks, int32_t idleDuration);
// extract data blocks(submit/delete) from WAL, and add them into the input queue for all the sources tasks. // extract data blocks(submit/delete) from WAL, and add them into the input queue for all the sources tasks.
int32_t tqScanWal(STQ* pTq) { int32_t tqScanWal(STQ* pTq) {
@ -31,33 +34,78 @@ int32_t tqScanWal(STQ* pTq) {
SStreamMeta* pMeta = pTq->pStreamMeta; SStreamMeta* pMeta = pTq->pStreamMeta;
int64_t st = taosGetTimestampMs(); int64_t st = taosGetTimestampMs();
while (1) { tqDebug("vgId:%d continue to check if data in wal are available, scanCounter:%d", vgId, pMeta->scanInfo.scanCounter);
tqDebug("vgId:%d continue check if data in wal are available, walScanCounter:%d", vgId, pMeta->walScanCounter);
// check all tasks // check all tasks
bool shouldIdle = true; int32_t numOfTasks = 0;
doScanWalForAllTasks(pMeta, &shouldIdle); bool shouldIdle = true;
doScanWalForAllTasks(pMeta, &shouldIdle);
streamMetaWLock(pMeta); streamMetaWLock(pMeta);
int32_t times = (--pMeta->walScanCounter); int32_t times = (--pMeta->scanInfo.scanCounter);
ASSERT(pMeta->walScanCounter >= 0); ASSERT(pMeta->scanInfo.scanCounter >= 0);
streamMetaWUnLock(pMeta);
if (times > 0) { numOfTasks = taosArrayGetSize(pMeta->pTaskList);
tqDebug("vgId:%d scan wal for stream tasks for %d times in %dms", vgId, times, SCAN_WAL_IDLE_DURATION); streamMetaWUnLock(pMeta);
} else { // times <= 0
break;
}
// todo: remove the sleep
taosMsleep(SCAN_WAL_IDLE_DURATION);
}
int64_t el = (taosGetTimestampMs() - st); int64_t el = (taosGetTimestampMs() - st);
tqDebug("vgId:%d scan wal for stream tasks completed, elapsed time:%" PRId64 " ms", vgId, el); tqDebug("vgId:%d scan wal for stream tasks completed, elapsed time:%" PRId64 " ms", vgId, el);
if (times > 0) {
tqDebug("vgId:%d scan wal for stream tasks for %d times in %dms", vgId, times, SCAN_WAL_IDLE_DURATION);
tqScanWalInFuture(pTq, numOfTasks, SCAN_WAL_IDLE_DURATION);
}
return 0; return 0;
} }
typedef struct SBuildScanWalMsgParam {
STQ* pTq;
int32_t numOfTasks;
} SBuildScanWalMsgParam;
static void doStartScanWal(void* param, void* tmrId) {
SBuildScanWalMsgParam* pParam = (SBuildScanWalMsgParam*) param;
int32_t vgId = pParam->pTq->pStreamMeta->vgId;
SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq));
if (pRunReq == NULL) {
taosMemoryFree(pParam);
terrno = TSDB_CODE_OUT_OF_MEMORY;
tqError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr());
return;
}
tqDebug("vgId:%d create msg to start wal scan, numOfTasks:%d, vnd restored:%d", vgId, pParam->numOfTasks,
pParam->pTq->pVnode->restored);
pRunReq->head.vgId = vgId;
pRunReq->streamId = 0;
pRunReq->taskId = 0;
pRunReq->reqType = STREAM_EXEC_T_EXTRACT_WAL_DATA;
SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)};
tmsgPutToQueue(&pParam->pTq->pVnode->msgCb, STREAM_QUEUE, &msg);
taosMemoryFree(pParam);
}
int32_t tqScanWalInFuture(STQ* pTq, int32_t numOfTasks, int32_t idleDuration) {
SStreamMeta* pMeta = pTq->pStreamMeta;
SBuildScanWalMsgParam* pParam = taosMemoryMalloc(sizeof(SBuildScanWalMsgParam));
pParam->pTq = pTq;
pParam->numOfTasks = numOfTasks;
if (pMeta->scanInfo.scanTimer == NULL) {
pMeta->scanInfo.scanTimer = taosTmrStart(doStartScanWal, idleDuration, pParam, tqTimer);
} else {
taosTmrReset(doStartScanWal, idleDuration, pParam, tqTimer, &pMeta->scanInfo.scanTimer);
}
return TSDB_CODE_SUCCESS;
}
int32_t tqScanWalAsync(STQ* pTq, bool ckPause) { int32_t tqScanWalAsync(STQ* pTq, bool ckPause) {
int32_t vgId = TD_VID(pTq->pVnode); int32_t vgId = TD_VID(pTq->pVnode);
SStreamMeta* pMeta = pTq->pStreamMeta; SStreamMeta* pMeta = pTq->pStreamMeta;
@ -78,23 +126,23 @@ int32_t tqScanWalAsync(STQ* pTq, bool ckPause) {
return 0; return 0;
} }
pMeta->walScanCounter += 1; pMeta->scanInfo.scanCounter += 1;
if (pMeta->walScanCounter > MAX_REPEAT_SCAN_THRESHOLD) { if (pMeta->scanInfo.scanCounter > MAX_REPEAT_SCAN_THRESHOLD) {
pMeta->walScanCounter = MAX_REPEAT_SCAN_THRESHOLD; pMeta->scanInfo.scanCounter = MAX_REPEAT_SCAN_THRESHOLD;
} }
if (pMeta->walScanCounter > 1) { if (pMeta->scanInfo.scanCounter > 1) {
tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->walScanCounter); tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->scanInfo.scanCounter);
streamMetaWUnLock(pMeta); streamMetaWUnLock(pMeta);
return 0; return 0;
} }
int32_t numOfPauseTasks = pTq->pStreamMeta->numOfPausedTasks; int32_t numOfPauseTasks = pMeta->numOfPausedTasks;
if (ckPause && numOfTasks == numOfPauseTasks) { if (ckPause && numOfTasks == numOfPauseTasks) {
tqDebug("vgId:%d ignore all submit, all streams had been paused, reset the walScanCounter", vgId); tqDebug("vgId:%d ignore all submit, all streams had been paused, reset the walScanCounter", vgId);
// reset the counter value, since we do not launch the scan wal operation. // reset the counter value, since we do not launch the scan wal operation.
pMeta->walScanCounter = 0; pMeta->scanInfo.scanCounter = 0;
streamMetaWUnLock(pMeta); streamMetaWUnLock(pMeta);
return 0; return 0;
} }

View File

@ -359,7 +359,7 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF
goto _err; goto _err;
} }
pMeta->walScanCounter = 0; pMeta->scanInfo.scanCounter = 0;
pMeta->vgId = vgId; pMeta->vgId = vgId;
pMeta->ahandle = ahandle; pMeta->ahandle = ahandle;
pMeta->expandFunc = expandFunc; pMeta->expandFunc = expandFunc;