From 472020a2b968e28ffe14630ae0979ec57f919608 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sun, 18 Feb 2024 14:07:15 +0800 Subject: [PATCH 01/18] enh(stream): handle event async function. --- include/libs/stream/tstream.h | 5 +- source/dnode/vnode/src/tq/tq.c | 67 ++++++++----- source/libs/stream/inc/streamsm.h | 20 ++-- source/libs/stream/src/streamStart.c | 2 +- source/libs/stream/src/streamTaskSm.c | 139 ++++++++++++++++++++++---- 5 files changed, 176 insertions(+), 57 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 2135bb706b..587e762448 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -799,7 +799,10 @@ int8_t streamTaskSetSchedStatusInactive(SStreamTask* pTask); int32_t streamTaskClearHTaskAttr(SStreamTask* pTask, bool metaLock); int32_t streamTaskHandleEvent(SStreamTaskSM* pSM, EStreamTaskEvent event); -int32_t streamTaskOnHandleEventSuccess(SStreamTaskSM* pSM, EStreamTaskEvent event); + +typedef int32_t (*__state_trans_user_fn)(SStreamTask*, void* param); +int32_t streamTaskHandleEventAsync(SStreamTaskSM* pSM, EStreamTaskEvent event, __state_trans_user_fn callbackFn, void* param); +int32_t streamTaskOnHandleEventSuccess(SStreamTaskSM* pSM, EStreamTaskEvent event, __state_trans_user_fn callbackFn, void* param); void streamTaskRestoreStatus(SStreamTask* pTask); int32_t streamTaskStop(SStreamTask* pTask); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index bde6889ecd..940a8e0c49 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -916,6 +916,22 @@ static void doStartFillhistoryStep2(SStreamTask* pTask, SStreamTask* pStreamTask } } +int32_t handleStep2Async(SStreamTask* pStreamTask, void* param) { + STQ* pTq = param; + + SStreamMeta* pMeta = pStreamTask->pMeta; + STaskId hId = pStreamTask->hTaskInfo.id; + SStreamTask* pTask = streamMetaAcquireTask(pStreamTask->pMeta, hId.streamId, hId.taskId); + if (pTask == NULL) { + // todo handle error + } + + doStartFillhistoryStep2(pTask, pStreamTask, pTq); + + streamMetaReleaseTask(pMeta, pTask); + return 0; +} + // this function should be executed by only one thread, so we set an sentinel to protect this function int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { SStreamScanHistoryReq* pReq = (SStreamScanHistoryReq*)pMsg->pCont; @@ -988,7 +1004,7 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { streamReExecScanHistoryFuture(pTask, retInfo.idleTime); } else { SStreamTaskState* p = streamTaskGetStatus(pTask); - ETaskStatus s = p->state; + ETaskStatus s = p->state; if (s == TASK_STATUS__PAUSE) { tqDebug("s-task:%s is paused in the step1, elapsed time:%.2fs total:%.2fs, sched-status:%d", pTask->id.idStr, @@ -1006,37 +1022,34 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { // the following procedure should be executed, no matter status is stop/pause or not tqDebug("s-task:%s scan-history(step 1) ended, elapsed time:%.2fs", id, pTask->execInfo.step1El); - if (pTask->info.fillHistory) { - SStreamTask* pStreamTask = NULL; + ASSERT(pTask->info.fillHistory == 1); - // 1. get the related stream task - pStreamTask = streamMetaAcquireTask(pMeta, pTask->streamTaskId.streamId, pTask->streamTaskId.taskId); - if (pStreamTask == NULL) { - tqError("failed to find s-task:0x%" PRIx64 ", it may have been destroyed, drop related fill-history task:%s", - pTask->streamTaskId.taskId, pTask->id.idStr); + // 1. get the related stream task + SStreamTask* pStreamTask = streamMetaAcquireTask(pMeta, pTask->streamTaskId.streamId, pTask->streamTaskId.taskId); + if (pStreamTask == NULL) { + tqError("failed to find s-task:0x%" PRIx64 ", it may have been destroyed, drop related fill-history task:%s", + pTask->streamTaskId.taskId, pTask->id.idStr); - tqDebug("s-task:%s fill-history task set status to be dropping", id); - streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &pTask->id); + tqDebug("s-task:%s fill-history task set status to be dropping", id); + streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &pTask->id); - atomic_store_32(&pTask->status.inScanHistorySentinel, 0); - streamMetaReleaseTask(pMeta, pTask); - return -1; - } - - ASSERT(pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE); - - code = streamTaskHandleEvent(pStreamTask->status.pSM, TASK_EVENT_HALT); - if (code == TSDB_CODE_SUCCESS) { - doStartFillhistoryStep2(pTask, pStreamTask, pTq); - } else { - tqError("s-task:%s failed to halt s-task:%s, not launch step2", id, pStreamTask->id.idStr); - } - - streamMetaReleaseTask(pMeta, pStreamTask); - } else { - ASSERT(0); + atomic_store_32(&pTask->status.inScanHistorySentinel, 0); + streamMetaReleaseTask(pMeta, pTask); + return -1; } + ASSERT(pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE); + + // code = streamTaskHandleEvent(pStreamTask->status.pSM, TASK_EVENT_HALT); + code = streamTaskHandleEventAsync(pStreamTask->status.pSM, TASK_EVENT_HALT, handleStep2Async, pTq); +// if (code == TSDB_CODE_SUCCESS) { +// doStartFillhistoryStep2(pTask, pStreamTask, pTq); +// } else { +// tqError("s-task:%s failed to halt s-task:%s, not launch step2", id, pStreamTask->id.idStr); +// } + + streamMetaReleaseTask(pMeta, pStreamTask); + atomic_store_32(&pTask->status.inScanHistorySentinel, 0); streamMetaReleaseTask(pMeta, pTask); return code; diff --git a/source/libs/stream/inc/streamsm.h b/source/libs/stream/inc/streamsm.h index 22e1c4497b..47e0ce1b55 100644 --- a/source/libs/stream/inc/streamsm.h +++ b/source/libs/stream/inc/streamsm.h @@ -26,21 +26,21 @@ extern "C" { typedef int32_t (*__state_trans_fn)(SStreamTask*); typedef int32_t (*__state_trans_succ_fn)(SStreamTask*); -typedef struct SAttachedEventInfo { +typedef struct SFutureHandleEventInfo { ETaskStatus status; // required status that this event can be handled EStreamTaskEvent event; // the delayed handled event void* pParam; - void* pFn; -} SAttachedEventInfo; + __state_trans_user_fn callBackFn; +} SFutureHandleEventInfo; typedef struct STaskStateTrans { - bool autoInvokeEndFn; - SStreamTaskState state; - EStreamTaskEvent event; - SStreamTaskState next; - __state_trans_fn pAction; - __state_trans_succ_fn pSuccAction; - SAttachedEventInfo attachEvent; + bool autoInvokeEndFn; + SStreamTaskState state; + EStreamTaskEvent event; + SStreamTaskState next; + __state_trans_fn pAction; + __state_trans_succ_fn pSuccAction; + SFutureHandleEventInfo attachEvent; } STaskStateTrans; struct SStreamTaskSM { diff --git a/source/libs/stream/src/streamStart.c b/source/libs/stream/src/streamStart.c index ee98bc801b..dd99f59f91 100644 --- a/source/libs/stream/src/streamStart.c +++ b/source/libs/stream/src/streamStart.c @@ -385,7 +385,7 @@ int32_t streamTaskOnScanhistoryTaskReady(SStreamTask* pTask) { void doProcessDownstreamReadyRsp(SStreamTask* pTask) { EStreamTaskEvent event = (pTask->info.fillHistory == 0) ? TASK_EVENT_INIT : TASK_EVENT_INIT_SCANHIST; - streamTaskOnHandleEventSuccess(pTask->status.pSM, event); + streamTaskOnHandleEventSuccess(pTask->status.pSM, event, NULL, NULL); int64_t initTs = pTask->execInfo.init; int64_t startTs = pTask->execInfo.start; diff --git a/source/libs/stream/src/streamTaskSm.c b/source/libs/stream/src/streamTaskSm.c index 83e71c42bc..ecd3fba725 100644 --- a/source/libs/stream/src/streamTaskSm.c +++ b/source/libs/stream/src/streamTaskSm.c @@ -63,16 +63,20 @@ static int32_t streamTaskSendTransSuccessMsg(SStreamTask* pTask); static STaskStateTrans createStateTransform(ETaskStatus current, ETaskStatus next, EStreamTaskEvent event, __state_trans_fn fn, __state_trans_succ_fn succFn, - SAttachedEventInfo* pEventInfo, bool autoInvoke); + SFutureHandleEventInfo* pEventInfo, bool autoInvoke); static int32_t dummyFn(SStreamTask* UNUSED_PARAM(p)) { return TSDB_CODE_SUCCESS; } -static int32_t attachEvent(SStreamTask* pTask, SAttachedEventInfo* pEvtInfo) { +static int32_t attachNextHandledEvent(SStreamTask* pTask, SFutureHandleEventInfo* pEvtInfo) { char* p = streamTaskGetStatus(pTask)->name; stDebug("s-task:%s status:%s attach event:%s required status:%s, since not allowed to handle it", pTask->id.idStr, p, GET_EVT_NAME(pEvtInfo->event), StreamTaskStatusList[pEvtInfo->status].name); - taosArrayPush(pTask->status.pSM->pWaitingEventList, pEvtInfo); + + SArray* pList = pTask->status.pSM->pWaitingEventList; + taosArrayPush(pList, pEvtInfo); + + stDebug("s-task:%s add into waiting list, total waiting events:%d", pTask->id.idStr, (int32_t)taosArrayGetSize(pList)); return 0; } @@ -170,9 +174,11 @@ static int32_t doHandleWaitingEvent(SStreamTaskSM* pSM, const char* pEventName, stDebug("s-task:%s handle event:%s completed, elapsed time:%" PRId64 "ms state:%s -> %s", pTask->id.idStr, pEventName, el, pSM->prev.state.name, pSM->current.name); - SAttachedEventInfo* pEvtInfo = taosArrayGet(pSM->pWaitingEventList, 0); + ASSERT(taosArrayGetSize(pSM->pWaitingEventList) == 1); - // OK, let's handle the attached event, since the task has reached the required status now + SFutureHandleEventInfo* pEvtInfo = taosArrayGet(pSM->pWaitingEventList, 0); + + // OK, let's handle the waiting event, since the task has reached the required status now if (pSM->current.state == pEvtInfo->status) { stDebug("s-task:%s handle the event:%s in waiting list, state:%s", pTask->id.idStr, GET_EVT_NAME(pEvtInfo->event), pSM->current.name); @@ -189,7 +195,7 @@ static int32_t doHandleWaitingEvent(SStreamTaskSM* pSM, const char* pEventName, code = pNextTrans->pAction(pSM->pTask); if (pNextTrans->autoInvokeEndFn) { - return streamTaskOnHandleEventSuccess(pSM, pNextTrans->event); + return streamTaskOnHandleEventSuccess(pSM, pNextTrans->event, pEvtInfo->callBackFn, pEvtInfo->pParam); } else { return code; } @@ -242,7 +248,7 @@ SStreamTaskSM* streamCreateStateMachine(SStreamTask* pTask) { } pSM->pTask = pTask; - pSM->pWaitingEventList = taosArrayInit(4, sizeof(SAttachedEventInfo)); + pSM->pWaitingEventList = taosArrayInit(4, sizeof(SFutureHandleEventInfo)); if (pSM->pWaitingEventList == NULL) { taosMemoryFree(pSM); @@ -273,7 +279,7 @@ static int32_t doHandleEvent(SStreamTaskSM* pSM, EStreamTaskEvent event, STaskSt const char* id = pTask->id.idStr; if (pTrans->attachEvent.event != 0) { - attachEvent(pTask, &pTrans->attachEvent); + attachNextHandledEvent(pTask, &pTrans->attachEvent); taosThreadMutexUnlock(&pTask->lock); while (1) { @@ -303,7 +309,53 @@ static int32_t doHandleEvent(SStreamTaskSM* pSM, EStreamTaskEvent event, STaskSt // todo handle error code; if (pTrans->autoInvokeEndFn) { - streamTaskOnHandleEventSuccess(pSM, event); + streamTaskOnHandleEventSuccess(pSM, event, NULL, NULL); + } + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t doHandleEventAsync(SStreamTaskSM* pSM, EStreamTaskEvent event, STaskStateTrans* pTrans, __state_trans_user_fn callbackFn, void* param) { + SStreamTask* pTask = pSM->pTask; + const char* id = pTask->id.idStr; + + if (pTrans->attachEvent.event != 0) { + SFutureHandleEventInfo info = pTrans->attachEvent; + info.pParam = param; + info.callBackFn = callbackFn; + + attachNextHandledEvent(pTask, &info); + taosThreadMutexUnlock(&pTask->lock); + +// while (1) { +// // wait for the task to be here +// taosThreadMutexLock(&pTask->lock); +// ETaskStatus s = streamTaskGetStatus(pTask)->state; +// taosThreadMutexUnlock(&pTask->lock); +// +// if ((s == pTrans->next.state) && (pSM->prev.evt == pTrans->event)) {// this event has been handled already +// stDebug("s-task:%s attached event:%s handled", id, GET_EVT_NAME(pTrans->event)); +// return TSDB_CODE_SUCCESS; +// } else if (s != TASK_STATUS__DROPPING && s != TASK_STATUS__STOP && s != TASK_STATUS__UNINIT) { +// stDebug("s-task:%s not handle event:%s yet, wait for 100ms and recheck", id, GET_EVT_NAME(event)); +// taosMsleep(100); +// } else { +// stDebug("s-task:%s is dropped or stopped already, not wait.", id); +// return TSDB_CODE_STREAM_INVALID_STATETRANS; +// } +// } + + } else { // override current active trans + pSM->pActiveTrans = pTrans; + pSM->startTs = taosGetTimestampMs(); + taosThreadMutexUnlock(&pTask->lock); + + int32_t code = pTrans->pAction(pTask); + // todo handle error code; + + if (pTrans->autoInvokeEndFn) { + streamTaskOnHandleEventSuccess(pSM, event, NULL, NULL); } } @@ -349,6 +401,46 @@ int32_t streamTaskHandleEvent(SStreamTaskSM* pSM, EStreamTaskEvent event) { return code; } +int32_t streamTaskHandleEventAsync(SStreamTaskSM* pSM, EStreamTaskEvent event, __state_trans_user_fn callbackFn, void* param) { + int32_t code = TSDB_CODE_SUCCESS; + SStreamTask* pTask = pSM->pTask; + STaskStateTrans* pTrans = NULL; + + while (1) { + taosThreadMutexLock(&pTask->lock); + + if (pSM->pActiveTrans != NULL && pSM->pActiveTrans->autoInvokeEndFn) { + EStreamTaskEvent evt = pSM->pActiveTrans->event; + taosThreadMutexUnlock(&pTask->lock); + + stDebug("s-task:%s status:%s handling event:%s by some other thread, wait for 100ms and check if completed", + pTask->id.idStr, pSM->current.name, GET_EVT_NAME(evt)); + ASSERT(0); + taosMsleep(100); + } else { + // no active event trans exists, handle this event directly + pTrans = streamTaskFindTransform(pSM->current.state, event); + if (pTrans == NULL) { + stDebug("s-task:%s failed to handle event:%s", pTask->id.idStr, GET_EVT_NAME(event)); + taosThreadMutexUnlock(&pTask->lock); + return TSDB_CODE_STREAM_INVALID_STATETRANS; + } + + if (pSM->pActiveTrans != NULL) { + // currently in some state transfer procedure, not auto invoke transfer, quit from this prcedure + stDebug("s-task:%s event:%s handle procedure quit, status %s -> %s failed, handle event %s now", + pTask->id.idStr, GET_EVT_NAME(pSM->pActiveTrans->event), pSM->current.name, + pSM->pActiveTrans->next.name, GET_EVT_NAME(event)); + } + + code = doHandleEventAsync(pSM, event, pTrans, callbackFn, param); + break; + } + } + + return code; +} + static void keepPrevInfo(SStreamTaskSM* pSM) { STaskStateTrans* pTrans = pSM->pActiveTrans; @@ -356,8 +448,9 @@ static void keepPrevInfo(SStreamTaskSM* pSM) { pSM->prev.evt = pTrans->event; } -int32_t streamTaskOnHandleEventSuccess(SStreamTaskSM* pSM, EStreamTaskEvent event) { +int32_t streamTaskOnHandleEventSuccess(SStreamTaskSM* pSM, EStreamTaskEvent event, __state_trans_user_fn callbackFn, void* param) { SStreamTask* pTask = pSM->pTask; + const char* id = pTask->id.idStr; // do update the task status taosThreadMutexLock(&pTask->lock); @@ -369,16 +462,16 @@ int32_t streamTaskOnHandleEventSuccess(SStreamTaskSM* pSM, EStreamTaskEvent even s == TASK_STATUS__UNINIT || s == TASK_STATUS__READY); // the pSM->prev.evt may be 0, so print string is not appropriate. - stDebug("s-task:%s event:%s handled failed, current status:%s, trigger event:%s", pTask->id.idStr, - GET_EVT_NAME(event), pSM->current.name, GET_EVT_NAME(pSM->prev.evt)); + stDebug("s-task:%s event:%s handled failed, current status:%s, trigger event:%s", id, GET_EVT_NAME(event), + pSM->current.name, GET_EVT_NAME(pSM->prev.evt)); taosThreadMutexUnlock(&pTask->lock); return TSDB_CODE_STREAM_INVALID_STATETRANS; } if (pTrans->event != event) { - stWarn("s-task:%s handle event:%s failed, current status:%s, active trans evt:%s", pTask->id.idStr, - GET_EVT_NAME(event), pSM->current.name, GET_EVT_NAME(pTrans->event)); + stWarn("s-task:%s handle event:%s failed, current status:%s, active trans evt:%s", id, GET_EVT_NAME(event), + pSM->current.name, GET_EVT_NAME(pTrans->event)); taosThreadMutexUnlock(&pTask->lock); return TSDB_CODE_STREAM_INVALID_STATETRANS; } @@ -388,16 +481,26 @@ int32_t streamTaskOnHandleEventSuccess(SStreamTaskSM* pSM, EStreamTaskEvent even pSM->current = pTrans->next; pSM->pActiveTrans = NULL; + // todo remove it // on success callback, add into lock if necessary, or maybe we should add an option for this? pTrans->pSuccAction(pTask); + // after handling the callback function assigned by invoker, go on handling the waiting tasks + if (callbackFn != NULL) { + stDebug("s-task:%s start to handle user-specified callback fn for event:%s", id, GET_EVT_NAME(pTrans->event)); + callbackFn(pSM->pTask, param); + + stDebug("s-task:%s handle user-specified callback fn for event:%s completed", id, GET_EVT_NAME(pTrans->event)); + } + + // tasks in waiting list if (taosArrayGetSize(pSM->pWaitingEventList) > 0) { doHandleWaitingEvent(pSM, GET_EVT_NAME(pTrans->event), pTask); } else { taosThreadMutexUnlock(&pTask->lock); int64_t el = (taosGetTimestampMs() - pSM->startTs); - stDebug("s-task:%s handle event:%s completed, elapsed time:%" PRId64 "ms state:%s -> %s", pTask->id.idStr, + stDebug("s-task:%s handle event:%s completed, elapsed time:%" PRId64 "ms state:%s -> %s", id, GET_EVT_NAME(pTrans->event), el, pSM->prev.state.name, pSM->current.name); } @@ -453,7 +556,7 @@ void streamTaskSetStatusReady(SStreamTask* pTask) { } STaskStateTrans createStateTransform(ETaskStatus current, ETaskStatus next, EStreamTaskEvent event, __state_trans_fn fn, - __state_trans_succ_fn succFn, SAttachedEventInfo* pEventInfo, bool autoInvoke) { + __state_trans_succ_fn succFn, SFutureHandleEventInfo* pEventInfo, bool autoInvoke) { STaskStateTrans trans = {0}; trans.state = StreamTaskStatusList[current]; trans.next = StreamTaskStatusList[next]; @@ -497,7 +600,7 @@ void doInitStateTransferTable(void) { trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); - SAttachedEventInfo info = {.status = TASK_STATUS__READY, .event = TASK_EVENT_HALT}; + SFutureHandleEventInfo info = {.status = TASK_STATUS__READY, .event = TASK_EVENT_HALT}; trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, &info, true); taosArrayPush(streamTaskSMTrans, &trans); @@ -518,7 +621,7 @@ void doInitStateTransferTable(void) { trans = createStateTransform(TASK_STATUS__SCAN_HISTORY, TASK_STATUS__PAUSE, TASK_EVENT_PAUSE, NULL, NULL, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); - info = (SAttachedEventInfo){.status = TASK_STATUS__READY, .event = TASK_EVENT_PAUSE}; + info = (SFutureHandleEventInfo){.status = TASK_STATUS__READY, .event = TASK_EVENT_PAUSE}; trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__PAUSE, TASK_EVENT_PAUSE, NULL, NULL, &info, true); taosArrayPush(streamTaskSMTrans, &trans); trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__PAUSE, TASK_EVENT_PAUSE, NULL, NULL, &info, true); From dbea34ce0ea9621fc01d08b35dbc266de708a55f Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sun, 18 Feb 2024 14:34:42 +0800 Subject: [PATCH 02/18] fix(stream): set the callback function for handle event. --- source/libs/stream/src/streamTaskSm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/stream/src/streamTaskSm.c b/source/libs/stream/src/streamTaskSm.c index ecd3fba725..4ca0040941 100644 --- a/source/libs/stream/src/streamTaskSm.c +++ b/source/libs/stream/src/streamTaskSm.c @@ -355,7 +355,7 @@ static int32_t doHandleEventAsync(SStreamTaskSM* pSM, EStreamTaskEvent event, ST // todo handle error code; if (pTrans->autoInvokeEndFn) { - streamTaskOnHandleEventSuccess(pSM, event, NULL, NULL); + streamTaskOnHandleEventSuccess(pSM, event, callbackFn, param); } } From c4f9bee62922e7b738f872cf66e2d9c30d29f939 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sun, 18 Feb 2024 15:46:37 +0800 Subject: [PATCH 03/18] fix(stream): add into buffer pool before start trans, to avoid false alarm on orphan task. --- source/dnode/mnode/impl/src/mndStream.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 173d28c705..cc6b2eadcb 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -720,6 +720,13 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { goto _OVER; } + // add into buffer firstly + // to make sure when the hb from vnode arrived, the newly created tasks have been in the task map already. + taosThreadMutexLock(&execInfo.lock); + mDebug("stream tasks register into node list"); + saveStreamTasksInfo(&streamObj, &execInfo); + taosThreadMutexUnlock(&execInfo.lock); + // execute creation if (mndTransPrepare(pMnode, pTrans) != 0) { mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); @@ -729,12 +736,6 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { mndTransDrop(pTrans); - taosThreadMutexLock(&execInfo.lock); - - mDebug("stream tasks register into node list"); - saveStreamTasksInfo(&streamObj, &execInfo); - taosThreadMutexUnlock(&execInfo.lock); - SName dbname = {0}; tNameFromString(&dbname, createReq.sourceDB, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); From 245f0ef806493242470db3cbc8346cdc782af3f9 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sun, 18 Feb 2024 17:05:00 +0800 Subject: [PATCH 04/18] fix(stream): fix deadlock. --- source/dnode/vnode/src/tq/tqStreamTask.c | 2 +- source/libs/stream/src/streamTaskSm.c | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index 280c110711..73508202d9 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -28,8 +28,8 @@ static int32_t tqScanWalInFuture(STQ* pTq, int32_t numOfTasks, int32_t idleDurat // extract data blocks(submit/delete) from WAL, and add them into the input queue for all the sources tasks. int32_t tqScanWal(STQ* pTq) { - int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; + int32_t vgId = pMeta->vgId; int64_t st = taosGetTimestampMs(); tqDebug("vgId:%d continue to check if data in wal are available, scanCounter:%d", vgId, pMeta->scanInfo.scanCounter); diff --git a/source/libs/stream/src/streamTaskSm.c b/source/libs/stream/src/streamTaskSm.c index 4ca0040941..a44e107851 100644 --- a/source/libs/stream/src/streamTaskSm.c +++ b/source/libs/stream/src/streamTaskSm.c @@ -415,7 +415,6 @@ int32_t streamTaskHandleEventAsync(SStreamTaskSM* pSM, EStreamTaskEvent event, _ stDebug("s-task:%s status:%s handling event:%s by some other thread, wait for 100ms and check if completed", pTask->id.idStr, pSM->current.name, GET_EVT_NAME(evt)); - ASSERT(0); taosMsleep(100); } else { // no active event trans exists, handle this event directly @@ -485,6 +484,9 @@ int32_t streamTaskOnHandleEventSuccess(SStreamTaskSM* pSM, EStreamTaskEvent even // on success callback, add into lock if necessary, or maybe we should add an option for this? pTrans->pSuccAction(pTask); + taosThreadMutexUnlock(&pTask->lock); + + // todo: add parameter to control lock // after handling the callback function assigned by invoker, go on handling the waiting tasks if (callbackFn != NULL) { stDebug("s-task:%s start to handle user-specified callback fn for event:%s", id, GET_EVT_NAME(pTrans->event)); @@ -493,6 +495,8 @@ int32_t streamTaskOnHandleEventSuccess(SStreamTaskSM* pSM, EStreamTaskEvent even stDebug("s-task:%s handle user-specified callback fn for event:%s completed", id, GET_EVT_NAME(pTrans->event)); } + taosThreadMutexLock(&pTask->lock); + // tasks in waiting list if (taosArrayGetSize(pSM->pWaitingEventList) > 0) { doHandleWaitingEvent(pSM, GET_EVT_NAME(pTrans->event), pTask); From d5b316839dae1135d4cc2ff9f0a5015e0d4d1a4e Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 19 Feb 2024 09:14:21 +0800 Subject: [PATCH 05/18] refactor: do some internal refactor. --- source/libs/stream/src/streamTaskSm.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/source/libs/stream/src/streamTaskSm.c b/source/libs/stream/src/streamTaskSm.c index a44e107851..593c2c5754 100644 --- a/source/libs/stream/src/streamTaskSm.c +++ b/source/libs/stream/src/streamTaskSm.c @@ -89,11 +89,6 @@ int32_t streamTaskInitStatus(SStreamTask* pTask) { return 0; } -static int32_t streamTaskDoCheckpoint(SStreamTask* pTask) { - stDebug("s-task:%s start to do checkpoint", pTask->id.idStr); - return 0; -} - int32_t streamTaskSendTransSuccessMsg(SStreamTask* pTask) { if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { streamTaskSendCheckpointSourceRsp(pTask); @@ -612,9 +607,9 @@ void doInitStateTransferTable(void) { taosArrayPush(streamTaskSMTrans, &trans); // checkpoint related event - trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__CK, TASK_EVENT_GEN_CHECKPOINT, NULL, streamTaskDoCheckpoint, NULL, true); + trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__CK, TASK_EVENT_GEN_CHECKPOINT, NULL, NULL, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__CK, TASK_EVENT_GEN_CHECKPOINT, NULL, streamTaskDoCheckpoint, NULL, true); + trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__CK, TASK_EVENT_GEN_CHECKPOINT, NULL, NULL, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__READY, TASK_EVENT_CHECKPOINT_DONE, NULL, NULL, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); From 7e866c55278f86b892f50a906f86734ad992495d Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 19 Feb 2024 11:32:04 +0800 Subject: [PATCH 06/18] refactor: do some internal refactor. --- source/dnode/vnode/src/tq/tq.c | 7 -- source/libs/stream/src/streamMeta.c | 9 +- source/libs/stream/src/streamTaskSm.c | 113 ++++++++++---------------- 3 files changed, 50 insertions(+), 79 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 940a8e0c49..5ac44cbbae 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1039,14 +1039,7 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { } ASSERT(pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE); - - // code = streamTaskHandleEvent(pStreamTask->status.pSM, TASK_EVENT_HALT); code = streamTaskHandleEventAsync(pStreamTask->status.pSM, TASK_EVENT_HALT, handleStep2Async, pTq); -// if (code == TSDB_CODE_SUCCESS) { -// doStartFillhistoryStep2(pTask, pStreamTask, pTq); -// } else { -// tqError("s-task:%s failed to halt s-task:%s, not launch step2", id, pStreamTask->id.idStr); -// } streamMetaReleaseTask(pMeta, pStreamTask); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index db74ce9897..774a32b265 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -669,6 +669,13 @@ static void doRemoveIdFromList(SStreamMeta* pMeta, int32_t num, SStreamTaskId* i } } +static int32_t streamTaskSendTransSuccessMsg(SStreamTask* pTask, void* param) { + if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + streamTaskSendCheckpointSourceRsp(pTask); + } + return 0; +} + int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t taskId) { SStreamTask* pTask = NULL; @@ -687,7 +694,7 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t } // handle the dropping event - streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_DROPPING); + streamTaskHandleEventAsync(pTask->status.pSM, TASK_EVENT_DROPPING, streamTaskSendTransSuccessMsg, NULL); } else { stDebug("vgId:%d failed to find the task:0x%x, it may be dropped already", pMeta->vgId, taskId); streamMetaWUnLock(pMeta); diff --git a/source/libs/stream/src/streamTaskSm.c b/source/libs/stream/src/streamTaskSm.c index 593c2c5754..9ca5248157 100644 --- a/source/libs/stream/src/streamTaskSm.c +++ b/source/libs/stream/src/streamTaskSm.c @@ -59,15 +59,14 @@ static int32_t streamTaskInitStatus(SStreamTask* pTask); static int32_t streamTaskKeepCurrentVerInWal(SStreamTask* pTask); static int32_t initStateTransferTable(); static void doInitStateTransferTable(void); -static int32_t streamTaskSendTransSuccessMsg(SStreamTask* pTask); static STaskStateTrans createStateTransform(ETaskStatus current, ETaskStatus next, EStreamTaskEvent event, __state_trans_fn fn, __state_trans_succ_fn succFn, - SFutureHandleEventInfo* pEventInfo, bool autoInvoke); + SFutureHandleEventInfo* pEventInfo); static int32_t dummyFn(SStreamTask* UNUSED_PARAM(p)) { return TSDB_CODE_SUCCESS; } -static int32_t attachNextHandledEvent(SStreamTask* pTask, SFutureHandleEventInfo* pEvtInfo) { +static int32_t attachWaitedEvent(SStreamTask* pTask, SFutureHandleEventInfo* pEvtInfo) { char* p = streamTaskGetStatus(pTask)->name; stDebug("s-task:%s status:%s attach event:%s required status:%s, since not allowed to handle it", pTask->id.idStr, p, @@ -89,13 +88,6 @@ int32_t streamTaskInitStatus(SStreamTask* pTask) { return 0; } -int32_t streamTaskSendTransSuccessMsg(SStreamTask* pTask) { - if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { - streamTaskSendCheckpointSourceRsp(pTask); - } - return 0; -} - int32_t streamTaskKeepCurrentVerInWal(SStreamTask* pTask) { if (!HAS_RELATED_FILLHISTORY_TASK(pTask)) { stError("s-task:%s no related fill-history task, since it may have been dropped already", pTask->id.idStr); @@ -274,7 +266,7 @@ static int32_t doHandleEvent(SStreamTaskSM* pSM, EStreamTaskEvent event, STaskSt const char* id = pTask->id.idStr; if (pTrans->attachEvent.event != 0) { - attachNextHandledEvent(pTask, &pTrans->attachEvent); + attachWaitedEvent(pTask, &pTrans->attachEvent); taosThreadMutexUnlock(&pTask->lock); while (1) { @@ -313,34 +305,13 @@ static int32_t doHandleEvent(SStreamTaskSM* pSM, EStreamTaskEvent event, STaskSt static int32_t doHandleEventAsync(SStreamTaskSM* pSM, EStreamTaskEvent event, STaskStateTrans* pTrans, __state_trans_user_fn callbackFn, void* param) { SStreamTask* pTask = pSM->pTask; - const char* id = pTask->id.idStr; - if (pTrans->attachEvent.event != 0) { SFutureHandleEventInfo info = pTrans->attachEvent; info.pParam = param; info.callBackFn = callbackFn; - attachNextHandledEvent(pTask, &info); + attachWaitedEvent(pTask, &info); taosThreadMutexUnlock(&pTask->lock); - -// while (1) { -// // wait for the task to be here -// taosThreadMutexLock(&pTask->lock); -// ETaskStatus s = streamTaskGetStatus(pTask)->state; -// taosThreadMutexUnlock(&pTask->lock); -// -// if ((s == pTrans->next.state) && (pSM->prev.evt == pTrans->event)) {// this event has been handled already -// stDebug("s-task:%s attached event:%s handled", id, GET_EVT_NAME(pTrans->event)); -// return TSDB_CODE_SUCCESS; -// } else if (s != TASK_STATUS__DROPPING && s != TASK_STATUS__STOP && s != TASK_STATUS__UNINIT) { -// stDebug("s-task:%s not handle event:%s yet, wait for 100ms and recheck", id, GET_EVT_NAME(event)); -// taosMsleep(100); -// } else { -// stDebug("s-task:%s is dropped or stopped already, not wait.", id); -// return TSDB_CODE_STREAM_INVALID_STATETRANS; -// } -// } - } else { // override current active trans pSM->pActiveTrans = pTrans; pSM->startTs = taosGetTimestampMs(); @@ -415,13 +386,13 @@ int32_t streamTaskHandleEventAsync(SStreamTaskSM* pSM, EStreamTaskEvent event, _ // no active event trans exists, handle this event directly pTrans = streamTaskFindTransform(pSM->current.state, event); if (pTrans == NULL) { - stDebug("s-task:%s failed to handle event:%s", pTask->id.idStr, GET_EVT_NAME(event)); + stDebug("s-task:%s failed to handle event:%s, status:%s", pTask->id.idStr, GET_EVT_NAME(event), pSM->current.name); taosThreadMutexUnlock(&pTask->lock); return TSDB_CODE_STREAM_INVALID_STATETRANS; } if (pSM->pActiveTrans != NULL) { - // currently in some state transfer procedure, not auto invoke transfer, quit from this prcedure + // currently in some state transfer procedure, not auto invoke transfer, quit from this procedure stDebug("s-task:%s event:%s handle procedure quit, status %s -> %s failed, handle event %s now", pTask->id.idStr, GET_EVT_NAME(pSM->pActiveTrans->event), pSM->current.name, pSM->pActiveTrans->next.name, GET_EVT_NAME(event)); @@ -555,7 +526,7 @@ void streamTaskSetStatusReady(SStreamTask* pTask) { } STaskStateTrans createStateTransform(ETaskStatus current, ETaskStatus next, EStreamTaskEvent event, __state_trans_fn fn, - __state_trans_succ_fn succFn, SFutureHandleEventInfo* pEventInfo, bool autoInvoke) { + __state_trans_succ_fn succFn, SFutureHandleEventInfo* pEventInfo) { STaskStateTrans trans = {0}; trans.state = StreamTaskStatusList[current]; trans.next = StreamTaskStatusList[next]; @@ -570,7 +541,7 @@ STaskStateTrans createStateTransform(ETaskStatus current, ETaskStatus next, EStr trans.pAction = (fn != NULL) ? fn : dummyFn; trans.pSuccAction = (succFn != NULL) ? succFn : dummyFn; - trans.autoInvokeEndFn = autoInvoke; + trans.autoInvokeEndFn = (fn == NULL); return trans; } @@ -584,93 +555,93 @@ void doInitStateTransferTable(void) { streamTaskSMTrans = taosArrayInit(8, sizeof(STaskStateTrans)); // initialization event handle - STaskStateTrans trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__READY, TASK_EVENT_INIT, streamTaskInitStatus, streamTaskOnNormalTaskReady, false, false); + STaskStateTrans trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__READY, TASK_EVENT_INIT, streamTaskInitStatus, streamTaskOnNormalTaskReady, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__SCAN_HISTORY, TASK_EVENT_INIT_SCANHIST, streamTaskInitStatus, streamTaskOnScanhistoryTaskReady, false, false); + trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__SCAN_HISTORY, TASK_EVENT_INIT_SCANHIST, streamTaskInitStatus, streamTaskOnScanhistoryTaskReady, NULL); taosArrayPush(streamTaskSMTrans, &trans); // scan-history related event - trans = createStateTransform(TASK_STATUS__SCAN_HISTORY, TASK_STATUS__READY, TASK_EVENT_SCANHIST_DONE, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__SCAN_HISTORY, TASK_STATUS__READY, TASK_EVENT_SCANHIST_DONE, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); // halt stream task, from other task status - trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, NULL, true); + trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, NULL, true); + trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, NULL); taosArrayPush(streamTaskSMTrans, &trans); SFutureHandleEventInfo info = {.status = TASK_STATUS__READY, .event = TASK_EVENT_HALT}; - trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, &info, true); + trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, &info); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__PAUSE, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, NULL, true); + trans = createStateTransform(TASK_STATUS__PAUSE, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, NULL); taosArrayPush(streamTaskSMTrans, &trans); // checkpoint related event - trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__CK, TASK_EVENT_GEN_CHECKPOINT, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__CK, TASK_EVENT_GEN_CHECKPOINT, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__CK, TASK_EVENT_GEN_CHECKPOINT, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__CK, TASK_EVENT_GEN_CHECKPOINT, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__READY, TASK_EVENT_CHECKPOINT_DONE, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__READY, TASK_EVENT_CHECKPOINT_DONE, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); // pause & resume related event handle - trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__PAUSE, TASK_EVENT_PAUSE, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__PAUSE, TASK_EVENT_PAUSE, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__SCAN_HISTORY, TASK_STATUS__PAUSE, TASK_EVENT_PAUSE, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__SCAN_HISTORY, TASK_STATUS__PAUSE, TASK_EVENT_PAUSE, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); info = (SFutureHandleEventInfo){.status = TASK_STATUS__READY, .event = TASK_EVENT_PAUSE}; - trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__PAUSE, TASK_EVENT_PAUSE, NULL, NULL, &info, true); + trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__PAUSE, TASK_EVENT_PAUSE, NULL, NULL, &info); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__PAUSE, TASK_EVENT_PAUSE, NULL, NULL, &info, true); + trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__PAUSE, TASK_EVENT_PAUSE, NULL, NULL, &info); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__PAUSE, TASK_EVENT_PAUSE, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__PAUSE, TASK_EVENT_PAUSE, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__PAUSE, TASK_STATUS__PAUSE, TASK_EVENT_PAUSE, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__PAUSE, TASK_STATUS__PAUSE, TASK_EVENT_PAUSE, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__STOP, TASK_STATUS__STOP, TASK_EVENT_PAUSE, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__STOP, TASK_STATUS__STOP, TASK_EVENT_PAUSE, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__DROPPING, TASK_STATUS__DROPPING, TASK_EVENT_PAUSE, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__DROPPING, TASK_STATUS__DROPPING, TASK_EVENT_PAUSE, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); // resume is completed by restore status of state-machine // stop related event - trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__DROPPING, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__DROPPING, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__STOP, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__STOP, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__SCAN_HISTORY, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__SCAN_HISTORY, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__PAUSE, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__PAUSE, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__STOP, TASK_EVENT_STOP, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); // dropping related event - trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__DROPPING, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__DROPPING, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__STOP, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__STOP, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__SCAN_HISTORY, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__SCAN_HISTORY, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__PAUSE, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__PAUSE, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, streamTaskSendTransSuccessMsg, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL); taosArrayPush(streamTaskSMTrans, &trans); } //clang-format on \ No newline at end of file From d3e8adf2ebc3b1e941f29085f7694c1640a94bb7 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 19 Feb 2024 13:36:57 +0800 Subject: [PATCH 07/18] enh(stream): async handle pause event --- source/dnode/vnode/src/tqCommon/tqCommon.c | 2 +- source/libs/stream/src/streamExec.c | 13 ++++++++----- source/libs/stream/src/streamTask.c | 8 ++++++-- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index c4973b7c1e..3cdb3d8ecf 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -837,7 +837,7 @@ int32_t tqStreamTaskProcessTaskPauseReq(SStreamMeta* pMeta, char* pMsg){ pHistoryTask = streamMetaAcquireTask(pMeta, pTask->hTaskInfo.id.streamId, pTask->hTaskInfo.id.taskId); if (pHistoryTask == NULL) { tqError("vgId:%d process pause req, failed to acquire fill-history task:0x%" PRIx64 - ", it may have been dropped already", + ", it may have been dropped already", pMeta->vgId, pTask->hTaskInfo.id.taskId); streamMetaReleaseTask(pMeta, pTask); diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 7fb8095acd..a2cf8bdab5 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -410,6 +410,11 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { return TSDB_CODE_SUCCESS; } +static int32_t haltCallback(SStreamTask* pTask, void* param) { + streamTaskOpenAllUpstreamInput(pTask); + streamTaskSendCheckpointReq(pTask); +} + int32_t streamTransferStateToStreamTask(SStreamTask* pTask) { int32_t code = TSDB_CODE_SUCCESS; SStreamMeta* pMeta = pTask->pMeta; @@ -419,11 +424,12 @@ int32_t streamTransferStateToStreamTask(SStreamTask* pTask) { int32_t level = pTask->info.taskLevel; if (level == TASK_LEVEL__AGG || level == TASK_LEVEL__SOURCE) { // do transfer task operator states. code = streamDoTransferStateToStreamTask(pTask); - } else { // no state transfer for sink tasks, and drop fill-history task, followed by opening inputQ of sink task. + } else { + // no state transfer for sink tasks, and drop fill-history task, followed by opening inputQ of sink task. SStreamTask* pStreamTask = streamMetaAcquireTask(pMeta, pTask->streamTaskId.streamId, pTask->streamTaskId.taskId); if (pStreamTask != NULL) { // halt the related stream sink task - code = streamTaskHandleEvent(pStreamTask->status.pSM, TASK_EVENT_HALT); + code = streamTaskHandleEventAsync(pStreamTask->status.pSM, TASK_EVENT_HALT, haltCallback, NULL); if (code != TSDB_CODE_SUCCESS) { stError("s-task:%s halt stream task:%s failed, code:%s not transfer state to stream task", pTask->id.idStr, pStreamTask->id.idStr, tstrerror(code)); @@ -432,9 +438,6 @@ int32_t streamTransferStateToStreamTask(SStreamTask* pTask) { } else { stDebug("s-task:%s halt by related fill-history task:%s", pStreamTask->id.idStr, pTask->id.idStr); } - - streamTaskOpenAllUpstreamInput(pStreamTask); - streamTaskSendCheckpointReq(pStreamTask); streamMetaReleaseTask(pMeta, pStreamTask); } } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index fef733c9f3..bba49e1226 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -847,8 +847,8 @@ void streamTaskStatusCopy(STaskStatusEntry* pDst, const STaskStatusEntry* pSrc) pDst->chkpointTransId = pSrc->chkpointTransId; } -void streamTaskPause(SStreamMeta* pMeta, SStreamTask* pTask) { - streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_PAUSE); +static int32_t taskPauseCallback(SStreamTask* pTask, void* param) { + SStreamMeta* pMeta = pTask->pMeta; int32_t num = atomic_add_fetch_32(&pMeta->numOfPausedTasks, 1); stInfo("vgId:%d s-task:%s pause stream task. pause task num:%d", pMeta->vgId, pTask->id.idStr, num); @@ -862,6 +862,10 @@ void streamTaskPause(SStreamMeta* pMeta, SStreamTask* pTask) { stDebug("vgId:%d s-task:%s set pause flag and pause task", pMeta->vgId, pTask->id.idStr); } +void streamTaskPause(SStreamMeta* pMeta, SStreamTask* pTask) { + streamTaskHandleEventAsync(pTask->status.pSM, TASK_EVENT_PAUSE, taskPauseCallback, NULL); +} + void streamTaskResume(SStreamTask* pTask) { SStreamTaskState prevState = *streamTaskGetStatus(pTask); SStreamMeta* pMeta = pTask->pMeta; From 730edf24f905d77556c12eb9340894078f3601d2 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 19 Feb 2024 13:38:14 +0800 Subject: [PATCH 08/18] fix(stream): fix syntax error. --- source/libs/stream/src/streamTask.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index bba49e1226..5c03aced32 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -860,6 +860,7 @@ static int32_t taskPauseCallback(SStreamTask* pTask, void* param) { } stDebug("vgId:%d s-task:%s set pause flag and pause task", pMeta->vgId, pTask->id.idStr); + return TSDB_CODE_SUCCESS; } void streamTaskPause(SStreamMeta* pMeta, SStreamTask* pTask) { From c5b26b406f6e0eb493951734fee87805c2ef829f Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 19 Feb 2024 13:38:56 +0800 Subject: [PATCH 09/18] fix(stream): fix syntax error. --- source/libs/stream/src/streamExec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index a2cf8bdab5..8d33eaae62 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -413,6 +413,7 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { static int32_t haltCallback(SStreamTask* pTask, void* param) { streamTaskOpenAllUpstreamInput(pTask); streamTaskSendCheckpointReq(pTask); + return TSDB_CODE_SUCCESS; } int32_t streamTransferStateToStreamTask(SStreamTask* pTask) { From bf242a4f64d34987f1e1bf942dd65a5babf8e366 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 19 Feb 2024 15:57:21 +0800 Subject: [PATCH 10/18] fix(stream): handle pause event in waiting list. --- include/libs/stream/tstream.h | 2 +- source/dnode/snode/src/snode.c | 1 + source/dnode/vnode/src/tq/tq.c | 1 + source/libs/stream/src/streamTask.c | 21 +++------ source/libs/stream/src/streamTaskSm.c | 63 ++++++++++++++++++++------- 5 files changed, 57 insertions(+), 31 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 587e762448..17b87f2355 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -803,7 +803,7 @@ int32_t streamTaskHandleEvent(SStreamTaskSM* pSM, EStreamTaskEvent event); typedef int32_t (*__state_trans_user_fn)(SStreamTask*, void* param); int32_t streamTaskHandleEventAsync(SStreamTaskSM* pSM, EStreamTaskEvent event, __state_trans_user_fn callbackFn, void* param); int32_t streamTaskOnHandleEventSuccess(SStreamTaskSM* pSM, EStreamTaskEvent event, __state_trans_user_fn callbackFn, void* param); -void streamTaskRestoreStatus(SStreamTask* pTask); +int32_t streamTaskRestoreStatus(SStreamTask* pTask); int32_t streamTaskStop(SStreamTask* pTask); int32_t streamSendCheckRsp(const SStreamMeta* pMeta, const SStreamTaskCheckReq* pReq, SStreamTaskCheckRsp* pRsp, diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index f173c327c7..3a72146a41 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -87,6 +87,7 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer // checkpoint ver is the kept version, handled data should be the next version. if (pTask->chkInfo.checkpointId != 0) { pTask->chkInfo.nextProcessVer = pTask->chkInfo.checkpointVer + 1; + pChkInfo->processedVer = pChkInfo->checkpointVer; sndInfo("s-task:%s restore from the checkpointId:%" PRId64 " ver:%" PRId64 " nextProcessVer:%" PRId64, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer); } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 5ac44cbbae..81d9a9f13f 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -835,6 +835,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t nextProcessVer) { // checkpoint ver is the kept version, handled data should be the next version. if (pChkInfo->checkpointId != 0) { pChkInfo->nextProcessVer = pChkInfo->checkpointVer + 1; + pChkInfo->processedVer = pChkInfo->checkpointVer; tqInfo("s-task:%s restore from the checkpointId:%" PRId64 " ver:%" PRId64 " currentVer:%" PRId64, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer); } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 5c03aced32..aab2e6ab95 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -636,8 +636,6 @@ void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SE stDebug("s-task:0x%x update the dispatch info, task:0x%x(nodeId:%d) newEpSet:%s", pTask->id.taskId, pDispatcher->taskId, nodeId, buf); } - } else { - // do nothing } } @@ -869,20 +867,15 @@ void streamTaskPause(SStreamMeta* pMeta, SStreamTask* pTask) { void streamTaskResume(SStreamTask* pTask) { SStreamTaskState prevState = *streamTaskGetStatus(pTask); + SStreamMeta* pMeta = pTask->pMeta; - - if (prevState.state == TASK_STATUS__PAUSE || prevState.state == TASK_STATUS__HALT) { - streamTaskRestoreStatus(pTask); - - char* pNew = streamTaskGetStatus(pTask)->name; - if (prevState.state == TASK_STATUS__PAUSE) { - int32_t num = atomic_sub_fetch_32(&pMeta->numOfPausedTasks, 1); - stInfo("s-task:%s status:%s resume from %s, paused task(s):%d", pTask->id.idStr, pNew, prevState.name, num); - } else { - stInfo("s-task:%s status:%s resume from %s", pTask->id.idStr, pNew, prevState.name); - } + int32_t code = streamTaskRestoreStatus(pTask); + if (code == TSDB_CODE_SUCCESS) { + char* pNew = streamTaskGetStatus(pTask)->name; + int32_t num = atomic_sub_fetch_32(&pMeta->numOfPausedTasks, 1); + stInfo("s-task:%s status:%s resume from %s, paused task(s):%d", pTask->id.idStr, pNew, prevState.name, num); } else { - stDebug("s-task:%s status:%s not in pause/halt status, no need to resume", pTask->id.idStr, prevState.name); + stInfo("s-task:%s status:%s no need to resume, paused task(s):%d", pTask->id.idStr, prevState.name, pMeta->numOfPausedTasks); } } diff --git a/source/libs/stream/src/streamTaskSm.c b/source/libs/stream/src/streamTaskSm.c index 9ca5248157..6aa215586a 100644 --- a/source/libs/stream/src/streamTaskSm.c +++ b/source/libs/stream/src/streamTaskSm.c @@ -196,30 +196,61 @@ static int32_t doHandleWaitingEvent(SStreamTaskSM* pSM, const char* pEventName, return code; } -void streamTaskRestoreStatus(SStreamTask* pTask) { +static int32_t removeEventInWaitingList(SStreamTask* pTask, EStreamTaskEvent event) { SStreamTaskSM* pSM = pTask->status.pSM; + bool removed = false; taosThreadMutexLock(&pTask->lock); - ASSERT(pSM->pActiveTrans == NULL); - ASSERT(pSM->current.state == TASK_STATUS__PAUSE || pSM->current.state == TASK_STATUS__HALT); + int32_t num = taosArrayGetSize(pSM->pWaitingEventList); + for (int32_t i = 0; i < num; ++i) { + SFutureHandleEventInfo* pInfo = taosArrayGet(pSM->pWaitingEventList, i); + if (pInfo->event == event) { + taosArrayRemove(pSM->pWaitingEventList, i); + stDebug("s-task:%s pause event in waiting list not be handled yet, remove it from waiting list, remaining:%d", + pTask->id.idStr, pInfo->event); + removed = true; + break; + } + } - SStreamTaskState state = pSM->current; - pSM->current = pSM->prev.state; - - pSM->prev.state = state; - pSM->prev.evt = 0; - - pSM->startTs = taosGetTimestampMs(); - - if (taosArrayGetSize(pSM->pWaitingEventList) > 0) { - stDebug("s-task:%s restore status, %s -> %s, and then handle waiting event", pTask->id.idStr, pSM->prev.state.name, pSM->current.name); - doHandleWaitingEvent(pSM, "restore-pause/halt", pTask); - } else { - stDebug("s-task:%s restore status, %s -> %s", pTask->id.idStr, pSM->prev.state.name, pSM->current.name); + if (!removed) { + stDebug("s-task:%s failed to remove event:%s in waiting list", pTask->id.idStr, StreamTaskEventList[event].name); } taosThreadMutexUnlock(&pTask->lock); + return TSDB_CODE_SUCCESS; +} + +int32_t streamTaskRestoreStatus(SStreamTask* pTask) { + SStreamTaskSM* pSM = pTask->status.pSM; + int32_t code = 0; + + taosThreadMutexLock(&pTask->lock); + + if (pSM->current.state == TASK_STATUS__PAUSE && pSM->pActiveTrans == NULL) { + SStreamTaskState state = pSM->current; + pSM->current = pSM->prev.state; + + pSM->prev.state = state; + pSM->prev.evt = 0; + + pSM->startTs = taosGetTimestampMs(); + + if (taosArrayGetSize(pSM->pWaitingEventList) > 0) { + stDebug("s-task:%s restore status, %s -> %s, and then handle waiting event", pTask->id.idStr, + pSM->prev.state.name, pSM->current.name); + doHandleWaitingEvent(pSM, "restore-pause/halt", pTask); + } else { + stDebug("s-task:%s restore status, %s -> %s", pTask->id.idStr, pSM->prev.state.name, pSM->current.name); + } + } else { + removeEventInWaitingList(pTask, TASK_EVENT_PAUSE); + code = -1; // failed to restore the status + } + + taosThreadMutexUnlock(&pTask->lock); + return code; } SStreamTaskSM* streamCreateStateMachine(SStreamTask* pTask) { From eec6b668772e32983d4b54e32c3c51400a2514cb Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 28 Feb 2024 17:05:11 +0800 Subject: [PATCH 11/18] fix:[TD-28869]get error in askEp because consumer is dropped when unsubscribe topic --- source/client/src/clientTmq.c | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 9b74456da2..3ce4e97e22 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -1009,19 +1009,8 @@ int32_t tmq_unsubscribe(tmq_t* tmq) { } taosSsleep(2); // sleep 2s for hb to send offset and rows to server - int32_t rsp; - int32_t retryCnt = 0; tmq_list_t* lst = tmq_list_new(); - while (1) { - rsp = tmq_subscribe(tmq, lst); - if (rsp != TSDB_CODE_MND_CONSUMER_NOT_READY || retryCnt > 5) { - break; - } else { - retryCnt++; - taosMsleep(500); - } - } - + int32_t rsp = tmq_subscribe(tmq, lst); tmq_list_destroy(lst); return rsp; } @@ -1271,10 +1260,9 @@ int32_t tmq_subscribe(tmq_t* tmq, const tmq_list_t* topic_list) { } int32_t retryCnt = 0; - while (syncAskEp(tmq) != 0) { - if (retryCnt++ > MAX_RETRY_COUNT) { + while ((code = syncAskEp(tmq)) != 0) { + if (retryCnt++ > MAX_RETRY_COUNT || code == TSDB_CODE_MND_CONSUMER_NOT_EXIST) { tscError("consumer:0x%" PRIx64 ", mnd not ready for subscribe, retry more than 2 minutes", tmq->consumerId); - code = TSDB_CODE_MND_CONSUMER_NOT_READY; goto FAIL; } @@ -2154,18 +2142,8 @@ int32_t tmq_consumer_close(tmq_t* tmq) { } taosSsleep(2); // sleep 2s for hb to send offset and rows to server - int32_t retryCnt = 0; tmq_list_t* lst = tmq_list_new(); - while (1) { - int32_t rsp = tmq_subscribe(tmq, lst); - if (rsp != TSDB_CODE_MND_CONSUMER_NOT_READY || retryCnt > 5) { - break; - } else { - retryCnt++; - taosMsleep(500); - } - } - + tmq_subscribe(tmq, lst); tmq_list_destroy(lst); } else { tscInfo("consumer:0x%" PRIx64 " not in ready state, close it directly", tmq->consumerId); From e17832e27eb5cdb0860504111fba46d4ba28e6d7 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 29 Feb 2024 14:38:09 +0800 Subject: [PATCH 12/18] fix:[TD-28869]get error in askEp because consumer is dropped when unsubscribe --- source/client/src/clientTmq.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 3ce4e97e22..11e88dc306 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -2135,16 +2135,19 @@ int32_t tmq_consumer_close(tmq_t* tmq) { if (tmq->status == TMQ_CONSUMER_STATUS__READY) { // if auto commit is set, commit before close consumer. Otherwise, do nothing. if (tmq->autoCommit) { - int32_t rsp = tmq_commit_sync(tmq, NULL); - if (rsp != 0) { - return rsp; + int32_t code = tmq_commit_sync(tmq, NULL); + if (code != 0) { + return code; } } taosSsleep(2); // sleep 2s for hb to send offset and rows to server tmq_list_t* lst = tmq_list_new(); - tmq_subscribe(tmq, lst); + int32_t code = tmq_subscribe(tmq, lst); tmq_list_destroy(lst); + if (code != 0) { + return code; + } } else { tscInfo("consumer:0x%" PRIx64 " not in ready state, close it directly", tmq->consumerId); } From 3febcb96a0dab5ed7199d72d308b78f1b6699733 Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao> Date: Fri, 1 Mar 2024 10:57:57 +0800 Subject: [PATCH 13/18] reset flush state --- source/libs/stream/src/streamSessionState.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/source/libs/stream/src/streamSessionState.c b/source/libs/stream/src/streamSessionState.c index 3d0241df75..723f04c499 100644 --- a/source/libs/stream/src/streamSessionState.c +++ b/source/libs/stream/src/streamSessionState.c @@ -156,6 +156,7 @@ int32_t getSessionWinResultBuff(SStreamFileState* pFileState, SSessionKey* pKey, (*pVal) = pPos; SSessionKey* pDestWinKey = (SSessionKey*)pPos->pKey; pPos->beUsed = true; + pPos->beFlushed = false; *pKey = *pDestWinKey; goto _end; } @@ -167,6 +168,7 @@ int32_t getSessionWinResultBuff(SStreamFileState* pFileState, SSessionKey* pKey, (*pVal) = pPos; SSessionKey* pDestWinKey = (SSessionKey*)pPos->pKey; pPos->beUsed = true; + pPos->beFlushed = false; *pKey = *pDestWinKey; goto _end; } @@ -380,6 +382,14 @@ static SStreamStateCur* seekKeyCurrentPrev_buff(SStreamFileState* pFileState, co (*pWins) = pWinStates; } + if (size > 0 && index == -1) { + SRowBuffPos* pPos = taosArrayGetP(pWinStates, 0); + SSessionKey* pWin = (SSessionKey*)pPos->pKey; + if (pWinKey->win.skey == pWin->win.skey) { + index = 0; + } + } + if (index >= 0) { pCur = createSessionStateCursor(pFileState); pCur->buffIndex = index; @@ -387,6 +397,7 @@ static SStreamStateCur* seekKeyCurrentPrev_buff(SStreamFileState* pFileState, co *pIndex = index; } } + return pCur; } @@ -666,6 +677,7 @@ int32_t getStateWinResultBuff(SStreamFileState* pFileState, SSessionKey* key, ch (*pVal) = pPos; SSessionKey* pDestWinKey = (SSessionKey*)pPos->pKey; pPos->beUsed = true; + pPos->beFlushed = false; *key = *pDestWinKey; goto _end; } @@ -679,6 +691,7 @@ int32_t getStateWinResultBuff(SStreamFileState* pFileState, SSessionKey* key, ch (*pVal) = pPos; SSessionKey* pDestWinKey = (SSessionKey*)pPos->pKey; pPos->beUsed = true; + pPos->beFlushed = false; *key = *pDestWinKey; goto _end; } @@ -771,6 +784,7 @@ int32_t getCountWinResultBuff(SStreamFileState* pFileState, SSessionKey* pKey, C (*pVal) = pPos; SSessionKey* pDestWinKey = (SSessionKey*)pPos->pKey; pPos->beUsed = true; + pPos->beFlushed = false; *pWinKey = *pDestWinKey; goto _end; } @@ -799,6 +813,7 @@ int32_t getCountWinResultBuff(SStreamFileState* pFileState, SSessionKey* pKey, C (*pVal) = pPos; SSessionKey* pDestWinKey = (SSessionKey*)pPos->pKey; pPos->beUsed = true; + pPos->beFlushed = false; *pWinKey = *pDestWinKey; goto _end; } From 9a5219d5943bd315d3f2e27ecb1d5f0d6b2a972f Mon Sep 17 00:00:00 2001 From: wangjiaming0909 <604227650@qq.com> Date: Thu, 29 Feb 2024 13:19:26 +0800 Subject: [PATCH 14/18] fix: memory free sequence of sub request caused memory use after free --- source/client/inc/clientInt.h | 1 + source/client/src/clientEnv.c | 32 +++++++++++++++++-- source/client/src/clientMain.c | 56 +++++++++++----------------------- 3 files changed, 49 insertions(+), 40 deletions(-) diff --git a/source/client/inc/clientInt.h b/source/client/inc/clientInt.h index 989c6614a6..59eee6fd9d 100644 --- a/source/client/inc/clientInt.h +++ b/source/client/inc/clientInt.h @@ -354,6 +354,7 @@ SRequestObj* acquireRequest(int64_t rid); int32_t releaseRequest(int64_t rid); int32_t removeRequest(int64_t rid); void doDestroyRequest(void* p); +int64_t removeFromMostPrevReq(SRequestObj* pRequest); char* getDbOfConnection(STscObj* pObj); void setConnectionDB(STscObj* pTscObj, const char* db); diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 1df50a51da..6c20813118 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -385,6 +385,33 @@ int32_t releaseRequest(int64_t rid) { return taosReleaseRef(clientReqRefPool, ri int32_t removeRequest(int64_t rid) { return taosRemoveRef(clientReqRefPool, rid); } +/// return the most previous req ref id +int64_t removeFromMostPrevReq(SRequestObj* pRequest) { + int64_t mostPrevReqRefId = pRequest->self; + SRequestObj* pTmp = pRequest; + while (pTmp->relation.prevRefId) { + pTmp = acquireRequest(pTmp->relation.prevRefId); + if (pTmp) { + mostPrevReqRefId = pTmp->self; + releaseRequest(mostPrevReqRefId); + } else { + break; + } + } + removeRequest(mostPrevReqRefId); + return mostPrevReqRefId; +} + +void destroyNextReq(int64_t nextRefId) { + if (nextRefId) { + SRequestObj* pObj = acquireRequest(nextRefId); + if (pObj) { + releaseRequest(nextRefId); + releaseRequest(nextRefId); + } + } +} + void destroySubRequests(SRequestObj *pRequest) { int32_t reqIdx = -1; SRequestObj *pReqList[16] = {NULL}; @@ -435,7 +462,7 @@ void doDestroyRequest(void *p) { uint64_t reqId = pRequest->requestId; tscTrace("begin to destroy request %" PRIx64 " p:%p", reqId, pRequest); - destroySubRequests(pRequest); + int64_t nextReqRefId = pRequest->relation.nextRefId; taosHashRemove(pRequest->pTscObj->pRequests, &pRequest->self, sizeof(pRequest->self)); @@ -471,6 +498,7 @@ void doDestroyRequest(void *p) { taosMemoryFreeClear(pRequest->sqlstr); taosMemoryFree(pRequest); tscTrace("end to destroy request %" PRIx64 " p:%p", reqId, pRequest); + destroyNextReq(nextReqRefId); } void destroyRequest(SRequestObj *pRequest) { @@ -479,7 +507,7 @@ void destroyRequest(SRequestObj *pRequest) { } taos_stop_query(pRequest); - removeRequest(pRequest->self); + removeFromMostPrevReq(pRequest); } void taosStopQueryImpl(SRequestObj *pRequest) { diff --git a/source/client/src/clientMain.c b/source/client/src/clientMain.c index 275ca0d2aa..e9379946b1 100644 --- a/source/client/src/clientMain.c +++ b/source/client/src/clientMain.c @@ -1254,54 +1254,34 @@ void doAsyncQuery(SRequestObj *pRequest, bool updateMetaForce) { } void restartAsyncQuery(SRequestObj *pRequest, int32_t code) { - int32_t reqIdx = 0; - SRequestObj *pReqList[16] = {NULL}; - SRequestObj *pUserReq = NULL; - pReqList[0] = pRequest; - uint64_t tmpRefId = 0; - SRequestObj *pTmp = pRequest; - while (pTmp->relation.prevRefId) { - tmpRefId = pTmp->relation.prevRefId; - pTmp = acquireRequest(tmpRefId); - if (pTmp) { - pReqList[++reqIdx] = pTmp; - releaseRequest(tmpRefId); - } else { - tscError("prev req ref 0x%" PRIx64 " is not there", tmpRefId); + tscInfo("restart request: %s p: %p", pRequest->sqlstr, pRequest); + SRequestObj* pUserReq = pRequest; + acquireRequest(pRequest->self); + while (pUserReq) { + if (pUserReq->self == pUserReq->relation.userRefId || pUserReq->relation.userRefId == 0) { break; - } - } - - tmpRefId = pRequest->relation.nextRefId; - while (tmpRefId) { - pTmp = acquireRequest(tmpRefId); - if (pTmp) { - tmpRefId = pTmp->relation.nextRefId; - removeRequest(pTmp->self); - releaseRequest(pTmp->self); } else { - tscError("next req ref 0x%" PRIx64 " is not there", tmpRefId); - break; + int64_t nextRefId = pUserReq->relation.nextRefId; + releaseRequest(pUserReq->self); + if (nextRefId) { + pUserReq = acquireRequest(nextRefId); + } } } - - for (int32_t i = reqIdx; i >= 0; i--) { - destroyCtxInRequest(pReqList[i]); - if (pReqList[i]->relation.userRefId == pReqList[i]->self || 0 == pReqList[i]->relation.userRefId) { - pUserReq = pReqList[i]; - } else { - removeRequest(pReqList[i]->self); - } - } - + bool hasSubRequest = pUserReq != pRequest || pRequest->relation.prevRefId != 0; if (pUserReq) { + destroyCtxInRequest(pUserReq); pUserReq->prevCode = code; memset(&pUserReq->relation, 0, sizeof(pUserReq->relation)); } else { - tscError("user req is missing"); + tscError("User req is missing"); + removeFromMostPrevReq(pRequest); return; } - + if (hasSubRequest) + removeFromMostPrevReq(pRequest); + else + releaseRequest(pUserReq->self); doAsyncQuery(pUserReq, true); } From 8ff0fd346f913d9937569de5f4a86b6d5379f758 Mon Sep 17 00:00:00 2001 From: chenhaoran Date: Mon, 4 Mar 2024 02:42:03 +0800 Subject: [PATCH 15/18] test: add subscribe and commits in compatibility test --- .../system-test/0-others/com_alltypedata.json | 2 +- tests/system-test/0-others/compatibility.py | 83 +++++++++++++++++-- 2 files changed, 76 insertions(+), 9 deletions(-) diff --git a/tests/system-test/0-others/com_alltypedata.json b/tests/system-test/0-others/com_alltypedata.json index 0e6d8e3a07..1499ca7670 100644 --- a/tests/system-test/0-others/com_alltypedata.json +++ b/tests/system-test/0-others/com_alltypedata.json @@ -22,7 +22,7 @@ "vgroups": 2, "replica": 1, "precision": "ms", - "stt_trigger": 8, + "stt_trigger": 1, "minRows": 100, "maxRows": 4096 }, diff --git a/tests/system-test/0-others/compatibility.py b/tests/system-test/0-others/compatibility.py index c936cf1ae4..8163177a3b 100644 --- a/tests/system-test/0-others/compatibility.py +++ b/tests/system-test/0-others/compatibility.py @@ -1,11 +1,13 @@ from urllib.parse import uses_relative import taos +import taosws import sys import os import time import platform import inspect from taos.tmq import Consumer +from taos.tmq import * from pathlib import Path from util.log import * @@ -17,7 +19,7 @@ from util.dnodes import TDDnode from util.cluster import * import subprocess -BASEVERSION = "3.0.2.3" +BASEVERSION = "3.2.0.0" class TDTestCase: def caseDescription(self): f''' @@ -30,7 +32,7 @@ class TDTestCase: self.replicaVar = int(replicaVar) tdLog.debug(f"start to excute {__file__}") tdSql.init(conn.cursor()) - self.deletedDataSql= '''drop database if exists deldata;create database deldata duration 300 stt_trigger 4; ;use deldata; + self.deletedDataSql= '''drop database if exists deldata;create database deldata duration 300 stt_trigger 1; ;use deldata; create table deldata.stb1 (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) tags (t1 int); create table deldata.ct1 using deldata.stb1 tags ( 1 ); insert into deldata.ct1 values ( now()-0s, 0, 0, 0, 0, 0.0, 0.0, 0, 'binary0', 'nchar0', now()+0a ) ( now()-10s, 1, 11111, 111, 11, 1.11, 11.11, 1, 'binary1', 'nchar1', now()+1a ) ( now()-20s, 2, 22222, 222, 22, 2.22, 22.22, 0, 'binary2', 'nchar2', now()+2a ) ( now()-30s, 3, 33333, 333, 33, 3.33, 33.33, 1, 'binary3', 'nchar3', now()+3a ); @@ -104,8 +106,19 @@ class TDTestCase: print(f"{packageName} has been exists") os.system(f" cd {packagePath} && tar xvf {packageName} && cd {packageTPath} && ./install.sh -e no " ) tdDnodes.stop(1) - print(f"start taosd: rm -rf {dataPath}/* && nohup taosd -c {cPath} & ") - os.system(f"rm -rf {dataPath}/* && nohup taosd -c {cPath} & " ) + print(f"start taosd: rm -rf {dataPath}/* && nohup /usr/bin/taosd -c {cPath} & ") + os.system(f"rm -rf {dataPath}/* && nohup /usr/bin/taosd -c {cPath} & " ) + os.system(f"killall taosadapter" ) + os.system(f"cp /etc/taos/taosadapter.toml {cPath}/taosadapter.toml " ) + taosadapter_cfg = cPath + "/taosadapter.toml" + taosadapter_log_path = cPath + "/../log/" + print(f"taosadapter_cfg:{taosadapter_cfg},taosadapter_log_path:{taosadapter_log_path} ") + self.alter_string_in_file(taosadapter_cfg,"#path = \"/var/log/taos\"",f"path = \"{taosadapter_log_path}\"") + self.alter_string_in_file(taosadapter_cfg,"taosConfigDir = \"\"",f"taosConfigDir = \"{cPath}\"") + print("/usr/bin/taosadapter --version") + os.system(f" /usr/bin/taosadapter --version" ) + print(f" LD_LIBRARY_PATH=/usr/lib -c {taosadapter_cfg} 2>&1 & ") + os.system(f" LD_LIBRARY_PATH=/usr/lib /usr/bin/taosadapter -c {taosadapter_cfg} 2>&1 & " ) sleep(5) @@ -116,7 +129,24 @@ class TDTestCase: def is_list_same_as_ordered_list(self,unordered_list, ordered_list): sorted_list = sorted(unordered_list) return sorted_list == ordered_list - + + def alter_string_in_file(self,file,old_str,new_str): + """ + replace str in file + :param file + :param old_str + :param new_str + :return: + """ + file_data = "" + with open(file, "r", encoding="utf-8") as f: + for line in f: + if old_str in line: + line = line.replace(old_str,new_str) + file_data += line + with open(file,"w",encoding="utf-8") as f: + f.write(file_data) + def run(self): scriptsPath = os.path.dirname(os.path.realpath(__file__)) distro_id = distro.id() @@ -131,7 +161,7 @@ class TDTestCase: dbname = "test" stb = f"{dbname}.meters" self.installTaosd(bPath,cPath) - os.system("echo 'debugFlag 143' > /etc/taos/taos.cfg ") + # os.system(f"echo 'debugFlag 143' >> {cPath}/taos.cfg ") tableNumbers=100 recordNumbers1=100 recordNumbers2=1000 @@ -163,11 +193,46 @@ class TDTestCase: # os.system(f"LD_LIBRARY_PATH=/usr/lib taos -s 'use test;create stream current_stream into current_stream_output_stb as select _wstart as `start`, _wend as wend, max(current) as max_current from meters where voltage <= 220 interval (5s);' ") # os.system('LD_LIBRARY_PATH=/usr/lib taos -s "use test;create stream power_stream into power_stream_output_stb as select ts, concat_ws(\\".\\", location, tbname) as meter_location, current*voltage*cos(phase) as active_power, current*voltage*sin(phase) as reactive_power from meters partition by tbname;" ') # os.system('LD_LIBRARY_PATH=/usr/lib taos -s "use test;show streams;" ') - os.system(f"sed -i 's/\/etc\/taos/{cPath}/' 0-others/tmqBasic.json ") + self.alter_string_in_file("0-others/tmqBasic.json", "/etc/taos/", cPath) # os.system("LD_LIBRARY_PATH=/usr/lib taosBenchmark -f 0-others/tmqBasic.json -y ") os.system('LD_LIBRARY_PATH=/usr/lib taos -s "create topic if not exists tmq_test_topic as select current,voltage,phase from test.meters where voltage <= 106 and current <= 5;" ') os.system('LD_LIBRARY_PATH=/usr/lib taos -s "use test;show topics;" ') + os.system(f" /usr/bin/taosadapter --version " ) + consumer_dict = { + "group.id": "g1", + "td.connect.user": "root", + "td.connect.pass": "taosdata", + "auto.offset.reset": "earliest", + } + consumer = taosws.Consumer(conf={"group.id": "local", "td.connect.websocket.scheme": "ws"}) + try: + consumer.subscribe(["tmq_test_topic"]) + except TmqError: + tdLog.exit(f"subscribe error") + + while True: + message = consumer.poll(timeout=1.0) + if message: + print("message") + id = message.vgroup() + topic = message.topic() + database = message.database() + + for block in message: + nrows = block.nrows() + ncols = block.ncols() + for row in block: + print(row) + values = block.fetchall() + print(nrows, ncols) + + consumer.commit(message) + else: + print("break") + break + + consumer.close() tdLog.info(" LD_LIBRARY_PATH=/usr/lib taosBenchmark -f 0-others/compa4096.json -y ") os.system("LD_LIBRARY_PATH=/usr/lib taosBenchmark -f 0-others/compa4096.json -y") os.system("LD_LIBRARY_PATH=/usr/lib taos -s 'flush database db4096 '") @@ -184,7 +249,8 @@ class TDTestCase: os.system("pkill taosd") # make sure all the data are saved in disk. self.checkProcessPid("taosd") - + os.system("pkill taosadapter") # make sure all the data are saved in disk. + self.checkProcessPid("taosadapter") tdLog.printNoPrefix("==========step2:update new version ") self.buildTaosd(bPath) @@ -193,6 +259,7 @@ class TDTestCase: tdsql=tdCom.newTdSql() print(tdsql) cmd = f" LD_LIBRARY_PATH=/usr/lib taos -h localhost ;" + print(os.system(cmd)) if os.system(cmd) == 0: raise Exception("failed to execute system command. cmd: %s" % cmd) From 376872acc6bdd2e18c58711fe3921f7a4765d989 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Mon, 4 Mar 2024 09:53:49 +0800 Subject: [PATCH 16/18] fix(test/last_both): remove last_row query --- tests/script/tsim/parser/last_both.sim | 6 +- tests/script/tsim/parser/last_both_query.sim | 496 ++++++++++++++++++ tests/script/tsim/parser/last_cache_query.sim | 106 ---- 3 files changed, 499 insertions(+), 109 deletions(-) create mode 100644 tests/script/tsim/parser/last_both_query.sim diff --git a/tests/script/tsim/parser/last_both.sim b/tests/script/tsim/parser/last_both.sim index d7daf4d333..7b6c40c127 100644 --- a/tests/script/tsim/parser/last_both.sim +++ b/tests/script/tsim/parser/last_both.sim @@ -69,13 +69,13 @@ sql drop table tbf; sql alter table st2 add column c1 int; sql alter table st2 drop column c1; -run tsim/parser/last_cache_query.sim +run tsim/parser/last_both_query.sim sql flush database $db system sh/exec.sh -n dnode1 -s stop -x SIGINT system sh/exec.sh -n dnode1 -s start -run tsim/parser/last_cache_query.sim +run tsim/parser/last_both_query.sim system sh/exec.sh -n dnode1 -s stop -x SIGINT system sh/exec.sh -n dnode1 -s start @@ -145,6 +145,6 @@ sql alter database $db cachemodel 'both' sql alter database $db cachesize 2 sleep 11000 -run tsim/parser/last_cache_query.sim +run tsim/parser/last_both_query.sim system sh/exec.sh -n dnode1 -s stop -x SIGINT diff --git a/tests/script/tsim/parser/last_both_query.sim b/tests/script/tsim/parser/last_both_query.sim new file mode 100644 index 0000000000..775fd482ea --- /dev/null +++ b/tests/script/tsim/parser/last_both_query.sim @@ -0,0 +1,496 @@ + +sql connect + +$db = testdb +sql use $db +print "test tb1" + +sql select last(ts) from tb1 +if $rows != 1 then + return -1 +endi +if $data00 != @21-05-12 10:10:12.000@ then + print $data00 + return -1 +endi + +sql select last(f1) from tb1 +if $rows != 1 then + return -1 +endi +if $data00 != 6 then + print $data00 + return -1 +endi + +sql select last(*) from tb1 +if $rows != 1 then + return -1 +endi +if $data00 != @21-05-12 10:10:12.000@ then + print $data00 + return -1 +endi +if $data01 != 6 then + return -1 +endi +if $data02 != 5.000000000 then + print $data02 + return -1 +endi +if $data03 != 3 then + print expect 3, actual: $data03 + return -1 +endi +if $data04 != @70-01-01 07:59:57.000@ then + return -1 +endi + +sql select last(tb1.*,ts,f4) from tb1 +if $rows != 1 then + return -1 +endi +if $data00 != @21-05-12 10:10:12.000@ then + print $data00 + return -1 +endi +if $data01 != 6 then + return -1 +endi +if $data02 != 5.000000000 then + print $data02 + return -1 +endi +if $data03 != 3 then + return -1 +endi +if $data04 != @70-01-01 07:59:57.000@ then + return -1 +endi +if $data05 != @21-05-12 10:10:12.000@ then + print $data00 + return -1 +endi +if $data06 != @70-01-01 07:59:57.000@ then + return -1 +endi + +print "test tb2" +sql select last(ts) from tb2 +if $rows != 1 then + return -1 +endi +if $data00 != @21-05-11 10:11:15.000@ then + print $data00 + return -1 +endi + +sql select last(f1) from tb2 +if $rows != 1 then + return -1 +endi +if $data00 != -6 then + print $data00 + return -1 +endi + +sql select last(*) from tb2 +if $rows != 1 then + return -1 +endi +if $data00 != @21-05-11 10:11:15.000@ then + print $data00 + return -1 +endi +if $data01 != -6 then + return -1 +endi +if $data02 != -7.000000000 then + print $data02 + return -1 +endi +if $data03 != -8 then + return -1 +endi +if $data04 != @70-01-01 07:59:56.999@ then + if $data04 != @70-01-01 07:59:57.-01@ then + return -1 + endi +endi + +sql select last(tb2.*,ts,f4) from tb2 +if $rows != 1 then + return -1 +endi +if $data00 != @21-05-11 10:11:15.000@ then + print $data00 + return -1 +endi +if $data01 != -6 then + return -1 +endi +if $data02 != -7.000000000 then + print $data02 + return -1 +endi +if $data03 != -8 then + return -1 +endi +if $data04 != @70-01-01 07:59:56.999@ then + if $data04 != @70-01-01 07:59:57.-01@ then + return -1 + endi +endi +if $data05 != @21-05-11 10:11:15.000@ then + print $data00 + return -1 +endi +if $data06 != @70-01-01 07:59:56.999@ then + if $data04 != @70-01-01 07:59:57.-01@ then + return -1 + endi +endi + +print "test tbd" +sql select last(*) from tbd +if $rows != 1 then + return -1 +endi +if $data00 != @21-05-11 10:12:29.000@ then + print $data00 + return -1 +endi +if $data01 != NULL then + return -1 +endi +if $data02 != NULL then + print $data02 + return -1 +endi +if $data03 != NULL then + return -1 +endi +if $data04 != NULL then + return -1 +endi + +print "test tbe" +sql select last(*) from tbe +if $rows != 0 then + return -1 +endi + +print "test stable" +sql select last(ts) from st2 +if $rows != 1 then + return -1 +endi +if $data00 != @21-05-12 10:10:12.000@ then + print $data00 + return -1 +endi + +sql select last(f1) from st2 +if $rows != 1 then + return -1 +endi +if $data00 != 6 then + print $data00 + return -1 +endi + +sql select last(*) from st2 +if $rows != 1 then + return -1 +endi +if $data00 != @21-05-12 10:10:12.000@ then + print $data00 + return -1 +endi +if $data01 != 6 then + return -1 +endi +if $data02 != 37.000000000 then + print expect 37.000000000 actual: $data02 + return -1 +endi +if $data03 != 27 then + return -1 +endi +if $data04 != @70-01-01 07:59:57.000@ then + return -1 +endi + + +sql select last(st2.*,ts,f4) from st2 +if $rows != 1 then + return -1 +endi +if $data00 != @21-05-12 10:10:12.000@ then + print $data00 + return -1 +endi +if $data01 != 6 then + return -1 +endi +if $data02 != 37.000000000 then + print expect 37.000000000, acutal: $data02 + return -1 +endi +if $data03 != 27 then + return -1 +endi +if $data04 != @70-01-01 07:59:57.000@ then + return -1 +endi +if $data05 != @21-05-12 10:10:12.000@ then + print $data00 + return -1 +endi +if $data06 != @70-01-01 07:59:57.000@ then + return -1 +endi + +sql select last(*), id from st2 group by id order by id +print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 +print ===> $data10 $data11 $data12 $data13 $data14 $data15 $data16 $data17 $data18 $data19 +print ===> $data20 $data21 $data22 $data23 $data24 $data25 $data26 $data27 $data28 $data29 +print ===> $data30 $data31 $data32 $data33 $data34 $data35 $data36 $data37 $data38 $data39 +print ===> $data40 $data41 $data42 $data43 $data44 $data45 $data46 $data47 $data48 $data49 + +if $rows != 5 then + return -1 +endi +if $data00 != @21-05-12 10:10:12.000@ then + return -1 +endi +if $data01 != 6 then + return -1 +endi +if $data02 != 5.000000000 then + print $data02 + return -1 +endi +if $data03 != 21 then + return -1 +endi +if $data04 != @70-01-01 07:59:57.000@ then + return -1 +endi +if $data05 != 1 then + return -1 +endi +if $data10 != @21-05-11 10:12:23.000@ then + return -1 +endi +if $data11 != 22 then + return -1 +endi +if $data12 != 23.000000000 then + print $data02 + return -1 +endi +if $data13 != -8 then + return -1 +endi +if $data14 != @70-01-01 07:59:58.-04@ then + return -1 +endi +if $data15 != 2 then + return -1 +endi +if $data20 != @21-05-10 10:12:24.000@ then + return -1 +endi +if $data21 != 24 then + return -1 +endi +if $data22 != 11.000000000 then + print expect 11.000000000 actual: $data22 + return -1 +endi +if $data23 != 25 then + return -1 +endi +if $data24 != @70-01-01 07:59:57.-04@ then = + return -1 +endi +if $data25 != 3 then + return -1 +endi +if $data30 != @21-05-11 10:12:25.000@ then + return -1 +endi +if $data31 != 26 then + return -1 +endi +if $data32 != 17.000000000 then + print $data02 + return -1 +endi +if $data33 != 27 then + return -1 +endi +if $data34 != @70-01-01 07:59:56.-04@ then + return -1 +endi +if $data35 != 4 then + return -1 +endi +if $data40 != @21-05-11 10:12:29.000@ then + return -1 +endi +if $data41 != 36 then + return -1 +endi +if $data42 != 37.000000000 then + print $data02 + return -1 +endi +if $data43 != 35 then + return -1 +endi +if $data44 != @70-01-01 07:59:56.-05@ then + return -1 +endi +if $data45 != 5 then + return -1 +endi + +#sql select last_row(*), id from st2 group by id order by id +#print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 +#print ===> $data10 $data11 $data12 $data13 $data14 $data15 $data16 $data17 $data18 $data19 +#print ===> $data20 $data21 $data22 $data23 $data24 $data25 $data26 $data27 $data28 $data29 +#print ===> $data30 $data31 $data32 $data33 $data34 $data35 $data36 $data37 $data38 $data39 +#print ===> $data40 $data41 $data42 $data43 $data44 $data45 $data46 $data47 $data48 $data49 +# +#if $rows != 5 then +# return -1 +#endi +#if $data00 != @21-05-12 10:10:12.000@ then +# return -1 +#endi +#if $data01 != 6 then +# return -1 +#endi +#if $data02 != NULL then +# print $data02 +# return -1 +#endi +#if $data03 != NULL then +# return -1 +#endi +#if $data04 != @70-01-01 07:59:57.000@ then +# return -1 +#endi +#if $data05 != 1 then +# return -1 +#endi +#if $data10 != @21-05-11 10:12:23.000@ then +# return -1 +#endi +#if $data11 != 22 then +# return -1 +#endi +#if $data12 != 23.000000000 then +# print $data02 +# return -1 +#endi +#if $data13 != NULL then +# return -1 +#endi +#if $data14 != @70-01-01 07:59:58.-04@ then +# return -1 +#endi +#if $data15 != 2 then +# return -1 +#endi +#if $data20 != @21-05-10 10:12:24.000@ then +# return -1 +#endi +#if $data21 != 24 then +# return -1 +#endi +#if $data22 != NULL then +# print expect NULL actual: $data22 +# return -1 +#endi +#if $data23 != 25 then +# return -1 +#endi +#if $data24 != @70-01-01 07:59:57.-04@ then = +# return -1 +#endi +#if $data25 != 3 then +# return -1 +#endi +#if $data30 != @21-05-11 10:12:25.000@ then +# return -1 +#endi +#if $data31 != 26 then +# return -1 +#endi +#if $data32 != NULL then +# print $data02 +# return -1 +#endi +#if $data33 != 27 then +# return -1 +#endi +#if $data34 != @70-01-01 07:59:56.-04@ then +# return -1 +#endi +#if $data35 != 4 then +# return -1 +#endi +#if $data40 != @21-05-11 10:12:29.000@ then +# return -1 +#endi +#if $data41 != NULL then +# return -1 +#endi +#if $data42 != NULL then +# print $data02 +# return -1 +#endi +#if $data43 != NULL then +# return -1 +#endi +#if $data44 != NULL then +# return -1 +#endi +#if $data45 != 5 then +# return -1 +#endi + +print "test tbn" +sql create table if not exists tbn (ts timestamp, f1 int, f2 double, f3 binary(10), f4 timestamp) +sql insert into tbn values ("2021-05-09 10:10:10", 1, 2.0, '3', -1000) +sql insert into tbn values ("2021-05-10 10:10:11", 4, 5.0, NULL, -2000) +sql insert into tbn values ("2021-05-12 10:10:12", 6,NULL, NULL, -3000) +sql insert into tbn values ("2021-05-13 10:10:12", NULL,NULL, NULL,NULL) + +sql select last(*) from tbn; +if $rows != 1 then + return -1 +endi +if $data00 != @21-05-13 10:10:12.000@ then + print $data00 + return -1 +endi +if $data01 != 6 then + return -1 +endi +if $data02 != 5.000000000 then + print $data02 + return -1 +endi +if $data03 != 3 then + return -1 +endi +if $data04 != @70-01-01 07:59:57.000@ then + return -1 +endi + +sql alter table tbn add column c1 int; +sql alter table tbn drop column c1; diff --git a/tests/script/tsim/parser/last_cache_query.sim b/tests/script/tsim/parser/last_cache_query.sim index c5961f2183..30196e0b62 100644 --- a/tests/script/tsim/parser/last_cache_query.sim +++ b/tests/script/tsim/parser/last_cache_query.sim @@ -357,112 +357,6 @@ if $data45 != 5 then return -1 endi -sql select last_row(*), id from st2 group by id order by id -print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 -print ===> $data10 $data11 $data12 $data13 $data14 $data15 $data16 $data17 $data18 $data19 -print ===> $data20 $data21 $data22 $data23 $data24 $data25 $data26 $data27 $data28 $data29 -print ===> $data30 $data31 $data32 $data33 $data34 $data35 $data36 $data37 $data38 $data39 -print ===> $data40 $data41 $data42 $data43 $data44 $data45 $data46 $data47 $data48 $data49 - -if $rows != 5 then - return -1 -endi -if $data00 != @21-05-12 10:10:12.000@ then - return -1 -endi -if $data01 != 6 then - return -1 -endi -if $data02 != NULL then - print $data02 - return -1 -endi -if $data03 != NULL then - return -1 -endi -if $data04 != @70-01-01 07:59:57.000@ then - return -1 -endi -if $data05 != 1 then - return -1 -endi -if $data10 != @21-05-11 10:12:23.000@ then - return -1 -endi -if $data11 != 22 then - return -1 -endi -if $data12 != 23.000000000 then - print $data02 - return -1 -endi -if $data13 != NULL then - return -1 -endi -if $data14 != @70-01-01 07:59:58.-04@ then - return -1 -endi -if $data15 != 2 then - return -1 -endi -if $data20 != @21-05-10 10:12:24.000@ then - return -1 -endi -if $data21 != 24 then - return -1 -endi -if $data22 != NULL then - print expect NULL actual: $data22 - return -1 -endi -if $data23 != 25 then - return -1 -endi -if $data24 != @70-01-01 07:59:57.-04@ then = - return -1 -endi -if $data25 != 3 then - return -1 -endi -if $data30 != @21-05-11 10:12:25.000@ then - return -1 -endi -if $data31 != 26 then - return -1 -endi -if $data32 != NULL then - print $data02 - return -1 -endi -if $data33 != 27 then - return -1 -endi -if $data34 != @70-01-01 07:59:56.-04@ then - return -1 -endi -if $data35 != 4 then - return -1 -endi -if $data40 != @21-05-11 10:12:29.000@ then - return -1 -endi -if $data41 != 36 then - return -1 -endi -if $data42 != 37.000000000 then - print $data02 - return -1 -endi -if $data43 != NULL then - return -1 -endi -if $data44 != @70-01-01 07:59:56.-05@ then - return -1 -endi -if $data45 != 5 then - return -1 -endi - print "test tbn" sql create table if not exists tbn (ts timestamp, f1 int, f2 double, f3 binary(10), f4 timestamp) sql insert into tbn values ("2021-05-09 10:10:10", 1, 2.0, '3', -1000) From eba7b2fa82db2b1a93971990ff846597953a80e2 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Mon, 4 Mar 2024 15:58:25 +0800 Subject: [PATCH 17/18] fix(test/last_row): remove random columns' checking --- tests/script/tsim/parser/last_both.sim | 2 +- tests/script/tsim/parser/last_both_query.sim | 190 +++++++++---------- 2 files changed, 96 insertions(+), 96 deletions(-) diff --git a/tests/script/tsim/parser/last_both.sim b/tests/script/tsim/parser/last_both.sim index 7b6c40c127..e01a966744 100644 --- a/tests/script/tsim/parser/last_both.sim +++ b/tests/script/tsim/parser/last_both.sim @@ -6,7 +6,7 @@ sql connect print ======================== dnode1 start $db = testdb sql drop database if exists $db -sql create database $db cachemodel 'both' minrows 10 stt_trigger 1 +sql create database $db cachemodel 'none' minrows 10 stt_trigger 1 sql use $db sql create stable st2 (ts timestamp, f1 int, f2 double, f3 binary(10), f4 timestamp) tags (id int) diff --git a/tests/script/tsim/parser/last_both_query.sim b/tests/script/tsim/parser/last_both_query.sim index 775fd482ea..5f86412199 100644 --- a/tests/script/tsim/parser/last_both_query.sim +++ b/tests/script/tsim/parser/last_both_query.sim @@ -357,95 +357,95 @@ if $data45 != 5 then return -1 endi -#sql select last_row(*), id from st2 group by id order by id -#print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 -#print ===> $data10 $data11 $data12 $data13 $data14 $data15 $data16 $data17 $data18 $data19 -#print ===> $data20 $data21 $data22 $data23 $data24 $data25 $data26 $data27 $data28 $data29 -#print ===> $data30 $data31 $data32 $data33 $data34 $data35 $data36 $data37 $data38 $data39 -#print ===> $data40 $data41 $data42 $data43 $data44 $data45 $data46 $data47 $data48 $data49 -# -#if $rows != 5 then -# return -1 -#endi -#if $data00 != @21-05-12 10:10:12.000@ then -# return -1 -#endi -#if $data01 != 6 then -# return -1 -#endi -#if $data02 != NULL then -# print $data02 -# return -1 -#endi -#if $data03 != NULL then -# return -1 -#endi -#if $data04 != @70-01-01 07:59:57.000@ then -# return -1 -#endi -#if $data05 != 1 then -# return -1 -#endi -#if $data10 != @21-05-11 10:12:23.000@ then -# return -1 -#endi -#if $data11 != 22 then -# return -1 -#endi -#if $data12 != 23.000000000 then -# print $data02 -# return -1 -#endi -#if $data13 != NULL then -# return -1 -#endi -#if $data14 != @70-01-01 07:59:58.-04@ then -# return -1 -#endi -#if $data15 != 2 then -# return -1 -#endi -#if $data20 != @21-05-10 10:12:24.000@ then -# return -1 -#endi -#if $data21 != 24 then -# return -1 -#endi -#if $data22 != NULL then -# print expect NULL actual: $data22 -# return -1 -#endi -#if $data23 != 25 then -# return -1 -#endi -#if $data24 != @70-01-01 07:59:57.-04@ then = -# return -1 -#endi -#if $data25 != 3 then -# return -1 -#endi -#if $data30 != @21-05-11 10:12:25.000@ then -# return -1 -#endi -#if $data31 != 26 then -# return -1 -#endi -#if $data32 != NULL then -# print $data02 -# return -1 -#endi -#if $data33 != 27 then -# return -1 -#endi -#if $data34 != @70-01-01 07:59:56.-04@ then -# return -1 -#endi -#if $data35 != 4 then -# return -1 -#endi -#if $data40 != @21-05-11 10:12:29.000@ then -# return -1 -#endi +sql select last_row(*), id from st2 group by id order by id +print ===> $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 $data08 $data09 +print ===> $data10 $data11 $data12 $data13 $data14 $data15 $data16 $data17 $data18 $data19 +print ===> $data20 $data21 $data22 $data23 $data24 $data25 $data26 $data27 $data28 $data29 +print ===> $data30 $data31 $data32 $data33 $data34 $data35 $data36 $data37 $data38 $data39 +print ===> $data40 $data41 $data42 $data43 $data44 $data45 $data46 $data47 $data48 $data49 + +if $rows != 5 then + return -1 +endi +if $data00 != @21-05-12 10:10:12.000@ then + return -1 +endi +if $data01 != 6 then + return -1 +endi +if $data02 != NULL then + print $data02 + return -1 +endi +if $data03 != NULL then + return -1 +endi +if $data04 != @70-01-01 07:59:57.000@ then + return -1 +endi +if $data05 != 1 then + return -1 +endi +if $data10 != @21-05-11 10:12:23.000@ then + return -1 +endi +if $data11 != 22 then + return -1 +endi +if $data12 != 23.000000000 then + print $data02 + return -1 +endi +if $data13 != NULL then + return -1 +endi +if $data14 != @70-01-01 07:59:58.-04@ then + return -1 +endi +if $data15 != 2 then + return -1 +endi +if $data20 != @21-05-10 10:12:24.000@ then + return -1 +endi +if $data21 != 24 then + return -1 +endi +if $data22 != NULL then + print expect NULL actual: $data22 + return -1 +endi +if $data23 != 25 then + return -1 +endi +if $data24 != @70-01-01 07:59:57.-04@ then = + return -1 +endi +if $data25 != 3 then + return -1 +endi +if $data30 != @21-05-11 10:12:25.000@ then + return -1 +endi +if $data31 != 26 then + return -1 +endi +if $data32 != NULL then + print $data02 + return -1 +endi +if $data33 != 27 then + return -1 +endi +if $data34 != @70-01-01 07:59:56.-04@ then + return -1 +endi +if $data35 != 4 then + return -1 +endi +if $data40 != @21-05-11 10:12:29.000@ then + return -1 +endi #if $data41 != NULL then # return -1 #endi @@ -453,15 +453,15 @@ endi # print $data02 # return -1 #endi -#if $data43 != NULL then -# return -1 -#endi +if $data43 != NULL then + return -1 +endi #if $data44 != NULL then # return -1 #endi -#if $data45 != 5 then -# return -1 -#endi +if $data45 != 5 then + return -1 +endi print "test tbn" sql create table if not exists tbn (ts timestamp, f1 int, f2 double, f3 binary(10), f4 timestamp) From aaa1c1d4a8dc2097058724f5a2a488d3b1392136 Mon Sep 17 00:00:00 2001 From: facetosea <25808407@qq.com> Date: Mon, 4 Mar 2024 09:35:08 +0800 Subject: [PATCH 18/18] fix: memleak --- source/libs/executor/src/tsort.c | 38 ++++++++++++++------------------ 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 9ff903cdb9..10220426a3 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -1199,6 +1199,18 @@ static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { return code; } +static void freeSSortSource(SSortSource* source) { + if (NULL == source) return; + if (source->param && !source->onlyRef) { + taosMemoryFree(source->param); + } + if (!source->onlyRef && source->src.pBlock) { + blockDataDestroy(source->src.pBlock); + source->src.pBlock = NULL; + } + taosMemoryFree(source); +} + static int32_t createBlocksQuickSortInitialSources(SSortHandle* pHandle) { int32_t code = 0; size_t sortBufSize = pHandle->numOfPages * pHandle->pageSize; @@ -1231,14 +1243,7 @@ static int32_t createBlocksQuickSortInitialSources(SSortHandle* pHandle) { code = blockDataMerge(pHandle->pDataBlock, pBlock); if (code != TSDB_CODE_SUCCESS) { - if (source->param && !source->onlyRef) { - taosMemoryFree(source->param); - } - if (!source->onlyRef && source->src.pBlock) { - blockDataDestroy(source->src.pBlock); - source->src.pBlock = NULL; - } - taosMemoryFree(source); + freeSSortSource(source); return code; } @@ -1248,15 +1253,7 @@ static int32_t createBlocksQuickSortInitialSources(SSortHandle* pHandle) { int64_t p = taosGetTimestampUs(); code = blockDataSort(pHandle->pDataBlock, pHandle->pSortInfo); if (code != 0) { - if (source->param && !source->onlyRef) { - taosMemoryFree(source->param); - } - if (!source->onlyRef && source->src.pBlock) { - blockDataDestroy(source->src.pBlock); - source->src.pBlock = NULL; - } - - taosMemoryFree(source); + freeSSortSource(source); return code; } @@ -1265,16 +1262,13 @@ static int32_t createBlocksQuickSortInitialSources(SSortHandle* pHandle) { if (pHandle->pqMaxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->pqMaxRows); code = doAddToBuf(pHandle->pDataBlock, pHandle); if (code != TSDB_CODE_SUCCESS) { + freeSSortSource(source); return code; } } } - if (source->param && !source->onlyRef) { - taosMemoryFree(source->param); - } - - taosMemoryFree(source); + freeSSortSource(source); if (pHandle->pDataBlock != NULL && pHandle->pDataBlock->info.rows > 0) { size_t size = blockDataGetSize(pHandle->pDataBlock);