enh(stream): add checkpoint queue for source tasks.

This commit is contained in:
Haojun Liao 2025-01-26 18:19:37 +08:00
parent 2e24c8aaa9
commit 67853b4a54
3 changed files with 113 additions and 26 deletions

View File

@ -136,6 +136,7 @@ enum {
enum { enum {
STREAM_QUEUE__SUCESS = 1, STREAM_QUEUE__SUCESS = 1,
STREAM_QUEUE__FAILED, STREAM_QUEUE__FAILED,
STREAM_QUEUE__CHKPTFAILED,
STREAM_QUEUE__PROCESSING, STREAM_QUEUE__PROCESSING,
}; };

View File

@ -144,6 +144,8 @@ struct SStreamQueue {
STaosQall* qall; STaosQall* qall;
void* qItem; void* qItem;
int8_t status; int8_t status;
STaosQueue* pChkptQueue;
void* qChkptItem;
}; };
struct SStreamQueueItem { struct SStreamQueueItem {

View File

@ -47,7 +47,9 @@ static void streamQueueCleanup(SStreamQueue* pQueue) {
int32_t streamQueueOpen(int64_t cap, SStreamQueue** pQ) { int32_t streamQueueOpen(int64_t cap, SStreamQueue** pQ) {
*pQ = NULL; *pQ = NULL;
int32_t code = 0; int32_t code = 0;
int32_t lino = 0;
SStreamQueue* pQueue = taosMemoryCalloc(1, sizeof(SStreamQueue)); SStreamQueue* pQueue = taosMemoryCalloc(1, sizeof(SStreamQueue));
if (pQueue == NULL) { if (pQueue == NULL) {
@ -55,24 +57,26 @@ int32_t streamQueueOpen(int64_t cap, SStreamQueue** pQ) {
} }
code = taosOpenQueue(&pQueue->pQueue); code = taosOpenQueue(&pQueue->pQueue);
if (code) { TSDB_CHECK_CODE(code, lino, _error);
taosMemoryFreeClear(pQueue);
return code;
}
code = taosAllocateQall(&pQueue->qall); code = taosAllocateQall(&pQueue->qall);
if (code) { TSDB_CHECK_CODE(code, lino, _error);
taosCloseQueue(pQueue->pQueue);
taosMemoryFree(pQueue); code = taosOpenQueue(&pQueue->pChkptQueue);
return code; TSDB_CHECK_CODE(code, lino, _error);
}
pQueue->status = STREAM_QUEUE__SUCESS; pQueue->status = STREAM_QUEUE__SUCESS;
taosSetQueueCapacity(pQueue->pQueue, cap); taosSetQueueCapacity(pQueue->pQueue, cap);
taosSetQueueMemoryCapacity(pQueue->pQueue, cap * 1024); taosSetQueueMemoryCapacity(pQueue->pQueue, cap * 1024);
*pQ = pQueue; *pQ = pQueue;
return code; return code;
_error:
streamQueueClose(pQueue, 0);
stError("failed to open stream queue at line:%d, code:%s", lino, tstrerror(code));
return code;
} }
void streamQueueClose(SStreamQueue* pQueue, int32_t taskId) { void streamQueueClose(SStreamQueue* pQueue, int32_t taskId) {
@ -82,6 +86,11 @@ void streamQueueClose(SStreamQueue* pQueue, int32_t taskId) {
taosFreeQall(pQueue->qall); taosFreeQall(pQueue->qall);
taosCloseQueue(pQueue->pQueue); taosCloseQueue(pQueue->pQueue);
pQueue->pQueue = NULL;
taosCloseQueue(pQueue->pChkptQueue);
pQueue->pChkptQueue = NULL;
taosMemoryFree(pQueue); taosMemoryFree(pQueue);
} }
@ -94,6 +103,46 @@ void streamQueueNextItem(SStreamQueue* pQueue, SStreamQueueItem** pItem) {
} else { } else {
pQueue->qItem = NULL; pQueue->qItem = NULL;
(void) taosGetQitem(pQueue->qall, &pQueue->qItem); (void) taosGetQitem(pQueue->qall, &pQueue->qItem);
if (pQueue->qItem == NULL) {
(void) taosReadAllQitems(pQueue->pQueue, pQueue->qall);
(void) taosGetQitem(pQueue->qall, &pQueue->qItem);
}
*pItem = streamQueueCurItem(pQueue);
}
}
void streamQueueNextItemInSourceQ(SStreamQueue* pQueue, SStreamQueueItem** pItem, ETaskStatus status) {
*pItem = NULL;
int8_t flag = atomic_exchange_8(&pQueue->status, STREAM_QUEUE__PROCESSING);
if (flag == STREAM_QUEUE__CHKPTFAILED) {
*pItem = pQueue->qChkptItem;
} else {
pQueue->qChkptItem = NULL;
taosReadQitem(pQueue->pChkptQueue, (void**) &pQueue->qChkptItem);
if (pQueue->qChkptItem != NULL) {
stDebug("read data from checkpoint queue, status:%d", status);
*pItem = pQueue->qChkptItem;
return;
}
// if in checkpoint status, not read data from ordinary input q.
if (status == TASK_STATUS__CK) {
stDebug("in checkpoint status, not ready data in normal queue");
return;
}
}
// let's try the ordinary input q
if (flag == STREAM_QUEUE__FAILED) {
*pItem = streamQueueCurItem(pQueue);
} else {
pQueue->qItem = NULL;
(void) taosGetQitem(pQueue->qall, &pQueue->qItem);
if (pQueue->qItem == NULL) { if (pQueue->qItem == NULL) {
(void) taosReadAllQitems(pQueue->pQueue, pQueue->qall); (void) taosReadAllQitems(pQueue->pQueue, pQueue->qall);
(void) taosGetQitem(pQueue->qall, &pQueue->qItem); (void) taosGetQitem(pQueue->qall, &pQueue->qItem);
@ -110,6 +159,7 @@ void streamQueueProcessSuccess(SStreamQueue* queue) {
} }
queue->qItem = NULL; queue->qItem = NULL;
queue->qChkptItem = NULL;
atomic_store_8(&queue->status, STREAM_QUEUE__SUCESS); atomic_store_8(&queue->status, STREAM_QUEUE__SUCESS);
} }
@ -121,6 +171,14 @@ void streamQueueProcessFail(SStreamQueue* queue) {
atomic_store_8(&queue->status, STREAM_QUEUE__FAILED); atomic_store_8(&queue->status, STREAM_QUEUE__FAILED);
} }
void streamQueueGetSourceChkptFailed(SStreamQueue* pQueue) {
if (atomic_load_8(&pQueue->status) != STREAM_QUEUE__PROCESSING) {
stError("invalid queue status:%d, expect:%d", atomic_load_8(&pQueue->status), STREAM_QUEUE__PROCESSING);
return;
}
atomic_store_8(&pQueue->status, STREAM_QUEUE__CHKPTFAILED);
}
bool streamQueueIsFull(const SStreamQueue* pQueue) { bool streamQueueIsFull(const SStreamQueue* pQueue) {
int32_t numOfItems = streamQueueGetNumOfItems(pQueue); int32_t numOfItems = streamQueueGetNumOfItems(pQueue);
if (numOfItems >= STREAM_TASK_QUEUE_CAPACITY) { if (numOfItems >= STREAM_TASK_QUEUE_CAPACITY) {
@ -175,8 +233,9 @@ const char* streamQueueItemGetTypeStr(int32_t type) {
EExtractDataCode streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks, EExtractDataCode streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks,
int32_t* blockSize) { int32_t* blockSize) {
const char* id = pTask->id.idStr; const char* id = pTask->id.idStr;
int32_t taskLevel = pTask->info.taskLevel; int32_t taskLevel = pTask->info.taskLevel;
SStreamQueue* pQueue = pTask->inputq.queue;
*pInput = NULL; *pInput = NULL;
*numOfBlocks = 0; *numOfBlocks = 0;
@ -189,13 +248,19 @@ EExtractDataCode streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueIte
} }
while (1) { while (1) {
if (streamTaskShouldPause(pTask) || streamTaskShouldStop(pTask)) { ETaskStatus status = streamTaskGetStatus(pTask).state;
stDebug("s-task:%s task should pause, extract input blocks:%d", id, *numOfBlocks); if (status == TASK_STATUS__PAUSE || status == TASK_STATUS__STOP || status == TASK_STATUS__STOP) {
stDebug("s-task:%s task should pause/stop, extract input blocks:%d", id, *numOfBlocks);
return EXEC_CONTINUE; return EXEC_CONTINUE;
} }
SStreamQueueItem* qItem = NULL; SStreamQueueItem* qItem = NULL;
streamQueueNextItem(pTask->inputq.queue, (SStreamQueueItem**)&qItem); if (taskLevel == TASK_LEVEL__SOURCE) {
streamQueueNextItemInSourceQ(pQueue, &qItem, status);
} else {
streamQueueNextItem(pQueue, &qItem);
}
if (qItem == NULL) { if (qItem == NULL) {
// restore the token to bucket // restore the token to bucket
if (*numOfBlocks > 0) { if (*numOfBlocks > 0) {
@ -225,14 +290,19 @@ EExtractDataCode streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueIte
*numOfBlocks = 1; *numOfBlocks = 1;
*pInput = qItem; *pInput = qItem;
return EXEC_CONTINUE; return EXEC_CONTINUE;
} else { // previous existed blocks needs to be handle, before handle the checkpoint msg block } else { // previous existed blocks needs to be handled, before handle the checkpoint msg block
stDebug("s-task:%s %s msg extracted, handle previous blocks, numOfBlocks:%d", id, p, *numOfBlocks); stDebug("s-task:%s %s msg extracted, handle previous blocks, numOfBlocks:%d", id, p, *numOfBlocks);
*blockSize = streamQueueItemGetSize(*pInput); *blockSize = streamQueueItemGetSize(*pInput);
if (taskLevel == TASK_LEVEL__SINK) { if (taskLevel == TASK_LEVEL__SINK) {
streamTaskConsumeQuota(pTask->outputInfo.pTokenBucket, *blockSize); streamTaskConsumeQuota(pTask->outputInfo.pTokenBucket, *blockSize);
} }
streamQueueProcessFail(pTask->inputq.queue); if ((type == STREAM_INPUT__CHECKPOINT_TRIGGER || type == STREAM_INPUT__CHECKPOINT) &&
(taskLevel == TASK_LEVEL__SOURCE)) {
streamQueueGetSourceChkptFailed(pQueue);
} else {
streamQueueProcessFail(pQueue);
}
return EXEC_CONTINUE; return EXEC_CONTINUE;
} }
} else { } else {
@ -252,7 +322,7 @@ EExtractDataCode streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueIte
streamTaskConsumeQuota(pTask->outputInfo.pTokenBucket, *blockSize); streamTaskConsumeQuota(pTask->outputInfo.pTokenBucket, *blockSize);
} }
streamQueueProcessFail(pTask->inputq.queue); streamQueueProcessFail(pQueue);
return EXEC_CONTINUE; return EXEC_CONTINUE;
} }
@ -260,7 +330,7 @@ EExtractDataCode streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueIte
} }
*numOfBlocks += 1; *numOfBlocks += 1;
streamQueueProcessSuccess(pTask->inputq.queue); streamQueueProcessSuccess(pQueue);
if (*numOfBlocks >= MAX_STREAM_EXEC_BATCH_NUM) { if (*numOfBlocks >= MAX_STREAM_EXEC_BATCH_NUM) {
stDebug("s-task:%s batch size limit:%d reached, start to process blocks", id, MAX_STREAM_EXEC_BATCH_NUM); stDebug("s-task:%s batch size limit:%d reached, start to process blocks", id, MAX_STREAM_EXEC_BATCH_NUM);
@ -279,6 +349,7 @@ EExtractDataCode streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueIte
int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) { int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) {
int8_t type = pItem->type; int8_t type = pItem->type;
STaosQueue* pQueue = pTask->inputq.queue->pQueue; STaosQueue* pQueue = pTask->inputq.queue->pQueue;
int32_t level = pTask->info.taskLevel;
int32_t total = streamQueueGetNumOfItems(pTask->inputq.queue) + 1; int32_t total = streamQueueGetNumOfItems(pTask->inputq.queue) + 1;
if (type == STREAM_INPUT__DATA_SUBMIT) { if (type == STREAM_INPUT__DATA_SUBMIT) {
@ -326,15 +397,28 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem)
stDebug("s-task:%s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, total, size); stDebug("s-task:%s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, total, size);
} else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER || } else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER ||
type == STREAM_INPUT__TRANS_STATE || type == STREAM_INPUT__DATA_RETRIEVE) { type == STREAM_INPUT__TRANS_STATE || type == STREAM_INPUT__DATA_RETRIEVE) {
int32_t code = taosWriteQitem(pQueue, pItem);
if (code != TSDB_CODE_SUCCESS) {
streamFreeQitem(pItem);
return code;
}
double size = SIZE_IN_MiB(taosQueueMemorySize(pQueue)); int32_t code = 0;
stDebug("s-task:%s level:%d %s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, if ((type == STREAM_INPUT__CHECKPOINT_TRIGGER || type == STREAM_INPUT__CHECKPOINT) && level == TASK_LEVEL__SOURCE) {
pTask->info.taskLevel, streamQueueItemGetTypeStr(type), total, size); STaosQueue* pChkptQ = pTask->inputq.queue->pChkptQueue;
code = taosWriteQitem(pChkptQ, pItem);
double size = SIZE_IN_MiB(taosQueueMemorySize(pChkptQ));
int32_t num = taosQueueItemSize(pChkptQ);
stDebug("s-task:%s level:%d %s checkpoint enqueue ctrl queue, total in queue:%d, size:%.2fMiB, data queue:%d",
pTask->id.idStr, pTask->info.taskLevel, streamQueueItemGetTypeStr(type), num, size, (total - 1));
} else {
code = taosWriteQitem(pQueue, pItem);
if (code != TSDB_CODE_SUCCESS) {
streamFreeQitem(pItem);
return code;
}
double size = SIZE_IN_MiB(taosQueueMemorySize(pQueue));
stDebug("s-task:%s level:%d %s blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr,
pTask->info.taskLevel, streamQueueItemGetTypeStr(type), total, size);
}
} else if (type == STREAM_INPUT__GET_RES) { } else if (type == STREAM_INPUT__GET_RES) {
// use the default memory limit, refactor later. // use the default memory limit, refactor later.
int32_t code = taosWriteQitem(pQueue, pItem); int32_t code = taosWriteQitem(pQueue, pItem);