From 9cc2aec9d66d27aa5a9d1873291f9be797d064b1 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 15 Nov 2024 17:12:01 +0800 Subject: [PATCH 1/5] fix(stream): set correct value and open inputQ for stream if reset checkpoint. --- source/dnode/mnode/impl/src/mndStream.c | 7 ++++++- source/libs/stream/src/streamCheckpoint.c | 15 +++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 81db427afd..6336cd6e49 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -2434,7 +2434,12 @@ static void doAddReportStreamTask(SArray *pList, int64_t reportChkptId, const SC mDebug("s-task:0x%x expired checkpoint-report msg in checkpoint-report list update from %" PRId64 "->%" PRId64, pReport->taskId, p->checkpointId, pReport->checkpointId); - memcpy(p, pReport, sizeof(STaskChkptInfo)); + // update the checkpoint report info + p->checkpointId = pReport->checkpointId; + p->ts = pReport->checkpointTs; + p->version = pReport->checkpointVer; + p->transId = pReport->transId; + p->dropHTask = pReport->dropHTask; } else { mWarn("taskId:0x%x already in checkpoint-report list", pReport->taskId); } diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 7724d1c5ff..df13b9a585 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -255,8 +255,7 @@ static int32_t doCheckBeforeHandleChkptTrigger(SStreamTask* pTask, int64_t check "the interrupted checkpoint", id, vgId, pBlock->srcTaskId); - streamTaskOpenUpstreamInput(pTask, pBlock->srcTaskId); - return code; + return TSDB_CODE_STREAM_INVLD_CHKPT; } if (streamTaskGetStatus(pTask).state == TASK_STATUS__CK) { @@ -264,14 +263,14 @@ static int32_t doCheckBeforeHandleChkptTrigger(SStreamTask* pTask, int64_t check stError("s-task:%s vgId:%d active checkpointId:%" PRId64 ", recv invalid checkpoint-trigger checkpointId:%" PRId64 " discard", id, vgId, pActiveInfo->activeId, checkpointId); - return code; + return TSDB_CODE_STREAM_INVLD_CHKPT; } else { // checkpointId == pActiveInfo->activeId if (pActiveInfo->allUpstreamTriggerRecv == 1) { stDebug( "s-task:%s vgId:%d all upstream checkpoint-trigger recv, discard this checkpoint-trigger, " "checkpointId:%" PRId64 " transId:%d", id, vgId, checkpointId, transId); - return code; + return TSDB_CODE_STREAM_INVLD_CHKPT; } if (taskLevel == TASK_LEVEL__SINK || taskLevel == TASK_LEVEL__AGG) { @@ -286,14 +285,14 @@ static int32_t doCheckBeforeHandleChkptTrigger(SStreamTask* pTask, int64_t check stWarn("s-task:%s repeatly recv checkpoint-source msg from task:0x%x vgId:%d, checkpointId:%" PRId64 ", prev recvTs:%" PRId64 " discard", pTask->id.idStr, p->upstreamTaskId, p->upstreamNodeId, p->checkpointId, p->recvTs); - return code; + return TSDB_CODE_STREAM_INVLD_CHKPT; } } } } } - return 0; + return TSDB_CODE_SUCCESS; } int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock* pBlock) { @@ -317,6 +316,10 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock code = doCheckBeforeHandleChkptTrigger(pTask, checkpointId, pBlock, transId); streamMutexUnlock(&pTask->lock); if (code) { + if (taskLevel != TASK_LEVEL__SOURCE) { // the checkpoint-trigger is discard, open the inputQ for upstream tasks + streamTaskOpenUpstreamInput(pTask, pBlock->srcTaskId); + } + streamFreeQitem((SStreamQueueItem*)pBlock); return code; } From 628808c9a4bbfe83d8efff189b284442de1ed066 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 15 Nov 2024 17:16:47 +0800 Subject: [PATCH 2/5] fix(stream): fix dead lock. --- source/libs/stream/src/streamDispatch.c | 2 ++ source/libs/stream/src/streamStartTask.c | 1 + 2 files changed, 3 insertions(+) diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index e0fa199199..5807240f5e 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -1170,6 +1170,7 @@ int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask) { if (taosArrayGetSize(pTask->upstreamInfo.pList) != num) { stError("s-task:%s invalid number of sent readyMsg:%d to upstream:%d", id, num, (int32_t)taosArrayGetSize(pTask->upstreamInfo.pList)); + streamMutexUnlock(&pActiveInfo->lock); return TSDB_CODE_STREAM_INTERNAL_ERROR; } @@ -1412,6 +1413,7 @@ int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHa if (size > 0) { STaskCheckpointReadyInfo* pReady = taosArrayGet(pActiveInfo->pReadyMsgList, 0); if (pReady == NULL) { + streamMutexUnlock(&pActiveInfo->lock); return terrno; } diff --git a/source/libs/stream/src/streamStartTask.c b/source/libs/stream/src/streamStartTask.c index ed12687e41..9c16ff036e 100644 --- a/source/libs/stream/src/streamStartTask.c +++ b/source/libs/stream/src/streamStartTask.c @@ -433,6 +433,7 @@ int32_t streamMetaStopAllTasks(SStreamMeta* pMeta) { // send hb msg to mnode before closing all tasks. int32_t code = streamMetaSendMsgBeforeCloseTasks(pMeta, &pTaskList); if (code != TSDB_CODE_SUCCESS) { + streamMetaRUnLock(pMeta); return code; } From f304a92229ab4e4cc072165cdb8a5632a87682f6 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 15 Nov 2024 17:21:20 +0800 Subject: [PATCH 3/5] refactor: add new error code. --- include/util/taoserror.h | 1 + source/util/src/terror.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 9a8b39b84c..b563c186c3 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -1010,6 +1010,7 @@ int32_t taosGetErrSize(); #define TSDB_CODE_STREAM_CONFLICT_EVENT TAOS_DEF_ERROR_CODE(0, 0x4106) #define TSDB_CODE_STREAM_INTERNAL_ERROR TAOS_DEF_ERROR_CODE(0, 0x4107) #define TSDB_CODE_STREAM_INPUTQ_FULL TAOS_DEF_ERROR_CODE(0, 0x4108) +#define TSDB_CODE_STREAM_INVLD_CHKPT TAOS_DEF_ERROR_CODE(0, 0x4109) // TDLite #define TSDB_CODE_TDLITE_IVLD_OPEN_FLAGS TAOS_DEF_ERROR_CODE(0, 0x5100) diff --git a/source/util/src/terror.c b/source/util/src/terror.c index ce209cc718..ce754e8795 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -852,7 +852,8 @@ TAOS_DEFINE_ERROR(TSDB_CODE_STREAM_TASK_IVLD_STATUS, "Invalid task status TAOS_DEFINE_ERROR(TSDB_CODE_STREAM_CONFLICT_EVENT, "Stream conflict event") TAOS_DEFINE_ERROR(TSDB_CODE_STREAM_INTERNAL_ERROR, "Stream internal error") TAOS_DEFINE_ERROR(TSDB_CODE_STREAM_NOT_LEADER, "Stream task not on leader vnode") -TAOS_DEFINE_ERROR(TSDB_CODE_STREAM_INPUTQ_FULL, "Task input queue is full") +TAOS_DEFINE_ERROR(TSDB_CODE_STREAM_INPUTQ_FULL, "Task input queue is full") +TAOS_DEFINE_ERROR(TSDB_CODE_STREAM_INVLD_CHKPT, "Invalid checkpoint trigger msg") // TDLite TAOS_DEFINE_ERROR(TSDB_CODE_TDLITE_IVLD_OPEN_FLAGS, "Invalid TDLite open flags") From b1981d309fefe53bd85a6bf1e5b034127fb2e372 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 15 Nov 2024 17:41:14 +0800 Subject: [PATCH 4/5] fix(stream): return error code. --- source/libs/stream/src/streamCheckpoint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index df13b9a585..6c4c285ded 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -222,14 +222,14 @@ static int32_t doCheckBeforeHandleChkptTrigger(SStreamTask* pTask, int64_t check stError("s-task:%s vgId:%d current checkpointId:%" PRId64 " recv expired checkpoint-trigger block, checkpointId:%" PRId64 " transId:%d, discard", id, vgId, pTask->chkInfo.checkpointId, checkpointId, transId); - return code; + return TSDB_CODE_STREAM_TASK_NOT_EXIST; } if (pActiveInfo->failedId >= checkpointId) { stError("s-task:%s vgId:%d checkpointId:%" PRId64 " transId:%d, has been marked failed, failedId:%" PRId64 " discard the checkpoint-trigger block", id, vgId, checkpointId, transId, pActiveInfo->failedId); - return code; + return TSDB_CODE_STREAM_TASK_NOT_EXIST; } if (pTask->chkInfo.checkpointId == checkpointId) { From 55239964a979cd44f5ca965b4aa92a9e72b81a0d Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 16 Nov 2024 01:56:33 +0800 Subject: [PATCH 5/5] refactor: injection error. --- source/libs/stream/src/streamCheckpoint.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 6c4c285ded..2280b7f06f 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -362,6 +362,10 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock } } +#if 0 + taosMsleep(20*1000); +#endif + if (taskLevel == TASK_LEVEL__SOURCE) { int8_t type = pTask->outputInfo.type; pActiveInfo->allUpstreamTriggerRecv = 1;