diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 2ee48a18e0..34272abd43 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -3331,7 +3331,7 @@ typedef struct { SMsgHead head; int64_t streamId; int32_t taskId; -} SVPauseStreamTaskReq, SVResetStreamTaskReq, SVDropHTaskReq; +} SVPauseStreamTaskReq, SVResetStreamTaskReq; typedef struct { int8_t reserved; diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index 92035101f6..d884227249 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -69,12 +69,6 @@ typedef struct SNodeEntry { int64_t hbTimestamp; // second } SNodeEntry; -typedef struct SFailedCheckpointInfo { - int64_t streamUid; - int64_t checkpointId; - int32_t transId; -} SFailedCheckpointInfo; - #define MND_STREAM_CREATE_NAME "stream-create" #define MND_STREAM_CHECKPOINT_NAME "stream-checkpoint" #define MND_STREAM_PAUSE_NAME "stream-pause" @@ -97,9 +91,14 @@ int32_t mndAddtoCheckpointWaitingList(SStreamObj *pStream, int64_t checkpointId) bool mndStreamTransConflictCheck(SMnode *pMnode, int64_t streamUid, const char *pTransName, bool lock); int32_t mndStreamGetRelTrans(SMnode *pMnode, int64_t streamUid); +typedef struct SOrphanTask { + int64_t streamId; + int32_t taskId; + int32_t nodeId; +} SOrphanTask; + // for sma // TODO refactor -int32_t mndDropStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); int32_t mndGetNumOfStreams(SMnode *pMnode, char *dbName, int32_t *pNumOfStreams); int32_t mndGetNumOfStreamTasks(const SStreamObj *pStream); SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady); @@ -119,7 +118,8 @@ void saveStreamTasksInfo(SStreamObj *pStream, SStreamExecInfo *pExecNode) int32_t initStreamNodeList(SMnode *pMnode); int32_t mndStreamSetResumeAction(STrans *pTrans, SMnode *pMnode, SStreamObj* pStream, int8_t igUntreated); int32_t mndStreamSetPauseAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); - +int32_t mndStreamSetDropAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); +int32_t mndStreamSetDropActionFromList(SMnode *pMnode, STrans *pTrans, SArray *pList); #ifdef __cplusplus } diff --git a/source/dnode/mnode/impl/src/mndSma.c b/source/dnode/mnode/impl/src/mndSma.c index a89136e7d3..e6027a0332 100644 --- a/source/dnode/mnode/impl/src/mndSma.c +++ b/source/dnode/mnode/impl/src/mndSma.c @@ -865,7 +865,7 @@ static int32_t mndDropSma(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SSmaObj *p sdbRelease(pMnode->pSdb, pStream); goto _OVER; } else { - if (mndDropStreamTasks(pMnode, pTrans, pStream) < 0) { + if (mndStreamSetDropAction(pMnode, pTrans, pStream) < 0) { mError("stream:%s, failed to drop task since %s", pStream->name, terrstr()); sdbRelease(pMnode->pSdb, pStream); goto _OVER; @@ -917,7 +917,7 @@ int32_t mndDropSmasByStb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SStbObj *p SStreamObj *pStream = mndAcquireStream(pMnode, streamName); if (pStream != NULL && pStream->smaId == pSma->uid) { - if (mndDropStreamTasks(pMnode, pTrans, pStream) < 0) { + if (mndStreamSetDropAction(pMnode, pTrans, pStream) < 0) { mError("stream:%s, failed to drop task since %s", pStream->name, terrstr()); mndReleaseStream(pMnode, pStream); goto _OVER; diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 7b348172f2..46c7a06079 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -608,50 +608,6 @@ _OVER: return -1; } -static int32_t mndPersistTaskDropReq(SMnode *pMnode, STrans *pTrans, SStreamTask *pTask) { - SVDropStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVDropStreamTaskReq)); - if (pReq == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - pReq->head.vgId = htonl(pTask->info.nodeId); - pReq->taskId = pTask->id.taskId; - pReq->streamId = pTask->id.streamId; - - SEpSet epset = {0}; - bool hasEpset = false; - int32_t code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->id.taskId, pTask->info.nodeId); - if (code != TSDB_CODE_SUCCESS || !hasEpset) { // no valid epset, return directly without redoAction - terrno = code; - return -1; - } - - // The epset of nodeId of this task may have been expired now, let's use the newest epset from mnode. - code = setTransAction(pTrans, pReq, sizeof(SVDropStreamTaskReq), TDMT_STREAM_TASK_DROP, &epset, 0); - if (code != 0) { - taosMemoryFree(pReq); - return -1; - } - - return 0; -} - -int32_t mndDropStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) { - int32_t lv = taosArrayGetSize(pStream->tasks); - for (int32_t i = 0; i < lv; i++) { - SArray *pTasks = taosArrayGetP(pStream->tasks, i); - int32_t sz = taosArrayGetSize(pTasks); - for (int32_t j = 0; j < sz; j++) { - SStreamTask *pTask = taosArrayGetP(pTasks, j); - if (mndPersistTaskDropReq(pMnode, pTrans, pTask) < 0) { - return -1; - } - } - } - return 0; -} - static int32_t checkForNumOfStreams(SMnode *pMnode, SStreamObj *pStreamObj) { // check for number of existed tasks int32_t numOfStream = 0; SStreamObj *pStream = NULL; @@ -1200,7 +1156,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { int32_t code = mndStreamRegisterTrans(pTrans, MND_STREAM_DROP_NAME, pStream->uid); // drop all tasks - if (mndDropStreamTasks(pMnode, pTrans, pStream) < 0) { + if (mndStreamSetDropAction(pMnode, pTrans, pStream) < 0) { mError("stream:%s, failed to drop task since %s", dropReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); @@ -1264,7 +1220,7 @@ int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb) { return -1; } else { #if 0 - if (mndDropStreamTasks(pMnode, pTrans, pStream) < 0) { + if (mndStreamSetDropAction(pMnode, pTrans, pStream) < 0) { mError("stream:%s, failed to drop task since %s", pStream->name, terrstr()); sdbRelease(pMnode->pSdb, pStream); sdbCancelFetch(pSdb, pIter); diff --git a/source/dnode/mnode/impl/src/mndStreamHb.c b/source/dnode/mnode/impl/src/mndStreamHb.c index e4599edbd4..5a6faadebb 100644 --- a/source/dnode/mnode/impl/src/mndStreamHb.c +++ b/source/dnode/mnode/impl/src/mndStreamHb.c @@ -16,6 +16,12 @@ #include "mndStream.h" #include "mndTrans.h" +typedef struct SFailedCheckpointInfo { + int64_t streamUid; + int64_t checkpointId; + int32_t transId; +} SFailedCheckpointInfo; + static void doExtractTasksFromStream(SMnode *pMnode) { SSdb *pSdb = pMnode->pSdb; SStreamObj *pStream = NULL; @@ -177,10 +183,51 @@ static int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) { return TSDB_CODE_SUCCESS; } +static int32_t mndDropOrphanTasks(SMnode* pMnode, SArray* pList) { + SOrphanTask* pTask = taosArrayGet(pList, 0); + + // check if it is conflict with other trans in both sourceDb and targetDb. + bool conflict = mndStreamTransConflictCheck(pMnode, pTask->streamId, MND_STREAM_DROP_NAME, false); + if (conflict) { + return -1; + } + + SStreamObj dummyObj = {.uid = pTask->streamId, .sourceDb = "", .targetSTbName = ""}; + STrans* pTrans = doCreateTrans(pMnode, &dummyObj, NULL, MND_STREAM_DROP_NAME, "drop stream"); + if (pTrans == NULL) { + mError("failed to create trans to drop orphan tasks since %s", terrstr()); + return -1; + } + + int32_t code = mndStreamRegisterTrans(pTrans, MND_STREAM_DROP_NAME, pTask->streamId); + + // drop all tasks + if (mndStreamSetDropActionFromList(pMnode, pTrans, pList) < 0) { + mError("failed to create trans to drop orphan tasks since %s", terrstr()); + mndTransDrop(pTrans); + return -1; + } + + // drop stream + if (mndPersistTransLog(&dummyObj, pTrans, SDB_STATUS_DROPPED) < 0) { + mndTransDrop(pTrans); + return -1; + } + + if (mndTransPrepare(pMnode, pTrans) != 0) { + mError("trans:%d, failed to prepare drop stream trans since %s", pTrans->id, terrstr()); + mndTransDrop(pTrans); + return -1; + } + + return 0; +} + int32_t mndProcessStreamHb(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; SStreamHbMsg req = {0}; - SArray *pList = taosArrayInit(4, sizeof(SFailedCheckpointInfo)); + SArray *pFailedTasks = taosArrayInit(4, sizeof(SFailedCheckpointInfo)); + SArray *pOrphanTasks = taosArrayInit(3, sizeof(SOrphanTask)); SDecoder decoder = {0}; tDecoderInit(&decoder, pReq->pCont, pReq->contLen); @@ -198,8 +245,7 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { taosThreadMutexLock(&execInfo.lock); // extract stream task list - int32_t numOfExisted = taosHashGetSize(execInfo.pTaskMap); - if (numOfExisted == 0) { + if (taosHashGetSize(execInfo.pTaskMap) == 0) { doExtractTasksFromStream(pMnode); } @@ -218,6 +264,9 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { STaskStatusEntry *pTaskEntry = taosHashGet(execInfo.pTaskMap, &p->id, sizeof(p->id)); if (pTaskEntry == NULL) { mError("s-task:0x%" PRIx64 " not found in mnode task list", p->id.taskId); + + SOrphanTask oTask = {.streamId = p->id.streamId, .taskId = p->id.taskId, .nodeId = p->nodeId}; + taosArrayPush(pOrphanTasks, &oTask); continue; } @@ -240,15 +289,13 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { } streamTaskStatusCopy(pTaskEntry, p); - if (p->checkpointId != 0) { - if (p->checkpointFailed) { - mError("stream task:0x%" PRIx64 " checkpointId:%" PRIx64 " transId:%d failed, kill it", p->id.taskId, - p->checkpointId, p->chkpointTransId); + if ((p->checkpointId != 0) && p->checkpointFailed) { + mError("stream task:0x%" PRIx64 " checkpointId:%" PRIx64 " transId:%d failed, kill it", p->id.taskId, + p->checkpointId, p->chkpointTransId); - SFailedCheckpointInfo info = { - .transId = p->chkpointTransId, .checkpointId = p->checkpointId, .streamUid = p->id.streamId}; - addIntoCheckpointList(pList, &info); - } + SFailedCheckpointInfo info = { + .transId = p->chkpointTransId, .checkpointId = p->checkpointId, .streamUid = p->id.streamId}; + addIntoCheckpointList(pFailedTasks, &info); } } @@ -266,15 +313,15 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { // current checkpoint is failed, rollback from the checkpoint trans // kill the checkpoint trans and then set all tasks status to be normal - if (taosArrayGetSize(pList) > 0) { + if (taosArrayGetSize(pFailedTasks) > 0) { bool allReady = true; SArray *p = mndTakeVgroupSnapshot(pMnode, &allReady); taosArrayDestroy(p); if (allReady || snodeChanged) { // if the execInfo.activeCheckpoint == 0, the checkpoint is restoring from wal - for(int32_t i = 0; i < taosArrayGetSize(pList); ++i) { - SFailedCheckpointInfo *pInfo = taosArrayGet(pList, i); + for(int32_t i = 0; i < taosArrayGetSize(pFailedTasks); ++i) { + SFailedCheckpointInfo *pInfo = taosArrayGet(pFailedTasks, i); mInfo("checkpointId:%" PRId64 " transId:%d failed, issue task-reset trans to reset all tasks status", pInfo->checkpointId, pInfo->transId); @@ -285,9 +332,16 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { } } + // handle the orphan tasks that are invalid but not removed in some vnodes or snode due to some unknown errors. + if (taosArrayGetSize(pOrphanTasks) > 0) { + mndDropOrphanTasks(pMnode, pOrphanTasks); + } + taosThreadMutexUnlock(&execInfo.lock); streamMetaClearHbMsg(&req); - taosArrayDestroy(pList); + taosArrayDestroy(pFailedTasks); + taosArrayDestroy(pOrphanTasks); + return TSDB_CODE_SUCCESS; } diff --git a/source/dnode/mnode/impl/src/mndStreamUtil.c b/source/dnode/mnode/impl/src/mndStreamUtil.c index 2ee73528e0..5d6e34856b 100644 --- a/source/dnode/mnode/impl/src/mndStreamUtil.c +++ b/source/dnode/mnode/impl/src/mndStreamUtil.c @@ -18,6 +18,66 @@ #include "tmisce.h" #include "mndVgroup.h" +typedef struct SStreamTaskIter { + SStreamObj *pStream; + int32_t level; + int32_t ordinalIndex; + int32_t totalLevel; + SStreamTask *pTask; +} SStreamTaskIter; + +SStreamTaskIter* createTaskIter(SStreamObj* pStream) { + SStreamTaskIter* pIter = taosMemoryCalloc(1, sizeof(SStreamTaskIter)); + if (pIter == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + pIter->level = -1; + pIter->ordinalIndex = 0; + pIter->pStream = pStream; + pIter->totalLevel = taosArrayGetSize(pStream->tasks); + pIter->pTask = NULL; + + return pIter; +} + +bool taskIterNextTask(SStreamTaskIter* pIter) { + if (pIter->level >= pIter->totalLevel) { + pIter->pTask = NULL; + return false; + } + + if (pIter->level == -1) { + pIter->level += 1; + } + + while(pIter->level < pIter->totalLevel) { + SArray *pList = taosArrayGetP(pIter->pStream->tasks, pIter->level); + if (pIter->ordinalIndex >= taosArrayGetSize(pList)) { + pIter->level += 1; + pIter->ordinalIndex = 0; + pIter->pTask = NULL; + continue; + } + + pIter->pTask = taosArrayGetP(pList, pIter->ordinalIndex); + pIter->ordinalIndex += 1; + return true; + } + + pIter->pTask = NULL; + return false; +} + +SStreamTask* taskIterGetCurrent(SStreamTaskIter* pIter) { + return pIter->pTask; +} + +void destroyTaskIter(SStreamTaskIter* pIter) { + taosMemoryFree(pIter); +} + SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) { SSdb *pSdb = pMnode->pSdb; void *pIter = NULL; @@ -251,24 +311,103 @@ static int32_t doSetPauseAction(SMnode *pMnode, STrans *pTrans, SStreamTask *pTa } int32_t mndStreamSetPauseAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) { - SArray *tasks = pStream->tasks; + SStreamTaskIter *pIter = createTaskIter(pStream); - int32_t size = taosArrayGetSize(tasks); - for (int32_t i = 0; i < size; i++) { - SArray *pTasks = taosArrayGetP(tasks, i); - int32_t sz = taosArrayGetSize(pTasks); - for (int32_t j = 0; j < sz; j++) { - SStreamTask *pTask = taosArrayGetP(pTasks, j); - - if (doSetPauseAction(pMnode, pTrans, pTask) < 0) { - return -1; - } - - if (atomic_load_8(&pTask->status.taskStatus) != TASK_STATUS__PAUSE) { - atomic_store_8(&pTask->status.statusBackup, pTask->status.taskStatus); - atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE); - } + while (taskIterNextTask(pIter)) { + SStreamTask *pTask = taskIterGetCurrent(pIter); + if (doSetPauseAction(pMnode, pTrans, pTask) < 0) { + destroyTaskIter(pIter); + return -1; } + + if (atomic_load_8(&pTask->status.taskStatus) != TASK_STATUS__PAUSE) { + atomic_store_8(&pTask->status.statusBackup, pTask->status.taskStatus); + atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE); + } + } + + destroyTaskIter(pIter); + return 0; +} + +static int32_t doSetDropAction(SMnode *pMnode, STrans *pTrans, SStreamTask *pTask) { + SVDropStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVDropStreamTaskReq)); + if (pReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + pReq->head.vgId = htonl(pTask->info.nodeId); + pReq->taskId = pTask->id.taskId; + pReq->streamId = pTask->id.streamId; + + SEpSet epset = {0}; + bool hasEpset = false; + int32_t code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->id.taskId, pTask->info.nodeId); + if (code != TSDB_CODE_SUCCESS || !hasEpset) { // no valid epset, return directly without redoAction + terrno = code; + return -1; + } + + // The epset of nodeId of this task may have been expired now, let's use the newest epset from mnode. + code = setTransAction(pTrans, pReq, sizeof(SVDropStreamTaskReq), TDMT_STREAM_TASK_DROP, &epset, 0); + if (code != 0) { + taosMemoryFree(pReq); + return -1; + } + + return 0; +} + +int32_t mndStreamSetDropAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) { + SStreamTaskIter *pIter = createTaskIter(pStream); + + while(taskIterNextTask(pIter)) { + SStreamTask *pTask = taskIterGetCurrent(pIter); + if (doSetDropAction(pMnode, pTrans, pTask) < 0) { + destroyTaskIter(pIter); + return -1; + } + } + destroyTaskIter(pIter); + return 0; +} + +static int32_t doSetDropActionFromId(SMnode *pMnode, STrans *pTrans, SOrphanTask* pTask) { + SVDropStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVDropStreamTaskReq)); + if (pReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + pReq->head.vgId = htonl(pTask->nodeId); + pReq->taskId = pTask->taskId; + pReq->streamId = pTask->streamId; + + SEpSet epset = {0}; + bool hasEpset = false; + int32_t code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->taskId, pTask->nodeId); + if (code != TSDB_CODE_SUCCESS || (!hasEpset)) { // no valid epset, return directly without redoAction + terrno = code; + taosMemoryFree(pReq); + return -1; + } + + // The epset of nodeId of this task may have been expired now, let's use the newest epset from mnode. + code = setTransAction(pTrans, pReq, sizeof(SVDropStreamTaskReq), TDMT_STREAM_TASK_DROP, &epset, 0); + if (code != 0) { + taosMemoryFree(pReq); + return -1; + } + + return 0; +} + +int32_t mndStreamSetDropActionFromList(SMnode *pMnode, STrans *pTrans, SArray* pList) { + for(int32_t i = 0; i < taosArrayGetSize(pList); ++i) { + SOrphanTask* pTask = taosArrayGet(pList, i); + mDebug("add drop task:0x%x action to drop orphan task", pTask->taskId); + doSetDropActionFromId(pMnode, pTrans, pTask); } return 0; } \ No newline at end of file