diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index dff212b15c..5f322be99b 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -686,7 +686,7 @@ int32_t streamExecScanHistoryInFuture(SStreamTask* pTask, int32_t idleDuration); bool streamHistoryTaskSetVerRangeStep2(SStreamTask* pTask, int64_t latestVer); // checkpoint related -int32_t streamTaskGetActiveCheckpointInfo(const SStreamTask* pTask, int32_t* pTransId, int64_t* pCheckpointId); +void streamTaskGetActiveCheckpointInfo(const SStreamTask* pTask, int32_t* pTransId, int64_t* pCheckpointId); int32_t streamTaskSetActiveCheckpointInfo(SStreamTask* pTask, int64_t activeCheckpointId); int32_t streamTaskSetFailedChkptInfo(SStreamTask* pTask, int32_t transId, int64_t checkpointId); bool streamTaskAlreadySendTrigger(SStreamTask* pTask, int32_t downstreamNodeId); @@ -770,9 +770,9 @@ bool streamMetaAllTasksReady(const SStreamMeta* pMeta); int32_t streamTaskSendRestoreChkptMsg(SStreamTask* pTask); // timer -tmr_h streamTimerGetInstance(); -void streamTmrReset(TAOS_TMR_CALLBACK fp, int32_t mseconds, void* param, void* handle, tmr_h* pTmrId, int32_t vgId, - const char* pMsg); +int32_t streamTimerGetInstance(tmr_h* pTmr); +void streamTmrReset(TAOS_TMR_CALLBACK fp, int32_t mseconds, void* param, void* handle, tmr_h* pTmrId, int32_t vgId, + const char* pMsg); // checkpoint int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); @@ -809,6 +809,9 @@ void streamTaskSendRetrieveRsp(SStreamRetrieveReq* pReq, SRpcMsg* pRsp); int32_t streamProcessHeartbeatRsp(SStreamMeta* pMeta, SMStreamHbRspMsg* pRsp); int32_t streamTaskSendCheckpointsourceRsp(SStreamTask* pTask); +void streamMutexLock(TdThreadMutex *pMutex); +void streamMutexUnlock(TdThreadMutex *pMutex); +void streamMutexDestroy(TdThreadMutex *pMutex); #ifdef __cplusplus } diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index d12db23a43..6746d0343b 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -2491,7 +2491,7 @@ int32_t buildCtbNameByGroupIdImpl(const char* stbFullName, uint64_t groupId, cha .tags = tags, .stbFullName = stbFullName, .stbFullNameLen = strlen(stbFullName), .ctbShortName = cname}; int32_t code = buildChildTableName(&rname); - if(code != TSDB_CODE_SUCCESS){ + if (code != TSDB_CODE_SUCCESS) { return code; } taosArrayDestroy(tags); diff --git a/source/common/src/tmisce.c b/source/common/src/tmisce.c index 7b349e91b0..154fcc3f6b 100644 --- a/source/common/src/tmisce.c +++ b/source/common/src/tmisce.c @@ -150,7 +150,6 @@ int32_t epsetToStr(const SEpSet* pEpSet, char* pBuf, int32_t cap) { cap -= nwrite; for (int _i = 0; (_i < pEpSet->numOfEps) && (cap > 0); _i++) { - int32_t ret = 0; if (_i == pEpSet->numOfEps - 1) { ret = snprintf(pBuf + nwrite, cap, "%d. %s:%d", _i, pEpSet->eps[_i].fqdn, pEpSet->eps[_i].port); } else { diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index 0b6b6a9ef2..bd0d97e34d 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -96,57 +96,58 @@ typedef struct STaskChkptInfo { int8_t dropHTask; }STaskChkptInfo; -int32_t mndInitStream(SMnode *pMnode); -void mndCleanupStream(SMnode *pMnode); -SStreamObj *mndAcquireStream(SMnode *pMnode, char *streamName); -void mndReleaseStream(SMnode *pMnode, SStreamObj *pStream); -int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); -int32_t mndPersistStream(STrans *pTrans, SStreamObj *pStream); -int32_t mndStreamRegisterTrans(STrans *pTrans, const char *pTransName, int64_t streamId); -int32_t mndStreamClearFinishedTrans(SMnode *pMnode, int32_t *pNumOfActiveChkpt); -bool mndStreamTransConflictCheck(SMnode *pMnode, int64_t streamId, const char *pTransName, bool lock); -int32_t mndStreamGetRelTrans(SMnode *pMnode, int64_t streamId); +int32_t mndInitStream(SMnode *pMnode); +void mndCleanupStream(SMnode *pMnode); +int32_t mndAcquireStream(SMnode *pMnode, char *streamName, SStreamObj **pStream); +void mndReleaseStream(SMnode *pMnode, SStreamObj *pStream); +int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); +int32_t mndPersistStream(STrans *pTrans, SStreamObj *pStream); +int32_t mndStreamRegisterTrans(STrans *pTrans, const char *pTransName, int64_t streamId); +int32_t mndStreamClearFinishedTrans(SMnode *pMnode, int32_t *pNumOfActiveChkpt); +bool mndStreamTransConflictCheck(SMnode *pMnode, int64_t streamId, const char *pTransName, bool lock); +int32_t mndStreamGetRelTrans(SMnode *pMnode, int64_t streamId); int32_t mndGetNumOfStreams(SMnode *pMnode, char *dbName, int32_t *pNumOfStreams); int32_t mndGetNumOfStreamTasks(const SStreamObj *pStream); -SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady); +int32_t mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady, SArray** pList); void mndKillTransImpl(SMnode *pMnode, int32_t transId, const char *pDbName); int32_t setTransAction(STrans *pTrans, void *pCont, int32_t contLen, int32_t msgType, const SEpSet *pEpset, int32_t retryCode, int32_t acceptCode); -STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, ETrnConflct conflict, const char *name, const char *pMsg); +int32_t doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, ETrnConflct conflict, const char *name, + const char *pMsg, STrans **pTrans1); int32_t mndPersistTransLog(SStreamObj *pStream, STrans *pTrans, int32_t status); SSdbRaw *mndStreamActionEncode(SStreamObj *pStream); void killAllCheckpointTrans(SMnode *pMnode, SVgroupChangeInfo *pChangeInfo); int32_t mndStreamSetUpdateEpsetAction(SMnode *pMnode, SStreamObj *pStream, SVgroupChangeInfo *pInfo, STrans *pTrans); -SStreamObj *mndGetStreamObj(SMnode *pMnode, int64_t streamId); -int32_t extractNodeEpset(SMnode *pMnode, SEpSet *pEpSet, bool *hasEpset, int32_t taskId, int32_t nodeId); -int32_t mndProcessStreamHb(SRpcMsg *pReq); -void saveTaskAndNodeInfoIntoBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode); -int32_t extractStreamNodeList(SMnode *pMnode); -int32_t mndStreamSetResumeAction(STrans *pTrans, SMnode *pMnode, SStreamObj *pStream, int8_t igUntreated); -int32_t mndStreamSetPauseAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); -int32_t mndStreamSetDropAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); -int32_t mndStreamSetDropActionFromList(SMnode *pMnode, STrans *pTrans, SArray *pList); -int32_t mndStreamSetResetTaskAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); -int32_t mndCreateStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream); -int32_t mndStreamSetUpdateChkptAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); -int32_t mndCreateStreamChkptInfoUpdateTrans(SMnode *pMnode, SStreamObj *pStream, SArray *pChkptInfoList); -int32_t mndScanCheckpointReportInfo(SRpcMsg *pReq); -int32_t mndCreateSetConsensusChkptIdTrans(SMnode *pMnode, SStreamObj *pStream, int32_t taskId, int64_t checkpointId, - int64_t ts); -void removeTasksInBuf(SArray *pTaskIds, SStreamExecInfo *pExecInfo); +int32_t mndGetStreamObj(SMnode *pMnode, int64_t streamId, SStreamObj** pStream); +int32_t extractNodeEpset(SMnode *pMnode, SEpSet *pEpSet, bool *hasEpset, int32_t taskId, int32_t nodeId); +int32_t mndProcessStreamHb(SRpcMsg *pReq); +void saveTaskAndNodeInfoIntoBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode); +int32_t extractStreamNodeList(SMnode *pMnode); +int32_t mndStreamSetResumeAction(STrans *pTrans, SMnode *pMnode, SStreamObj *pStream, int8_t igUntreated); +int32_t mndStreamSetPauseAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); +int32_t mndStreamSetDropAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); +int32_t mndStreamSetDropActionFromList(SMnode *pMnode, STrans *pTrans, SArray *pList); +int32_t mndStreamSetResetTaskAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); +int32_t mndCreateStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream); +int32_t mndStreamSetUpdateChkptAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); +int32_t mndCreateStreamChkptInfoUpdateTrans(SMnode *pMnode, SStreamObj *pStream, SArray *pChkptInfoList); +int32_t mndScanCheckpointReportInfo(SRpcMsg *pReq); +int32_t mndCreateSetConsensusChkptIdTrans(SMnode *pMnode, SStreamObj *pStream, int32_t taskId, int64_t checkpointId, + int64_t ts); +void removeTasksInBuf(SArray *pTaskIds, SStreamExecInfo *pExecInfo); -SStreamTaskIter *createStreamTaskIter(SStreamObj *pStream); -void destroyStreamTaskIter(SStreamTaskIter *pIter); -bool streamTaskIterNextTask(SStreamTaskIter *pIter); -SStreamTask *streamTaskIterGetCurrent(SStreamTaskIter *pIter); -void mndInitExecInfo(); -void mndInitStreamExecInfo(SMnode *pMnode, SStreamExecInfo *pExecInfo); -int32_t removeExpiredNodeEntryAndTaskInBuf(SArray *pNodeSnapshot); -void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode); +int32_t createStreamTaskIter(SStreamObj *pStream, SStreamTaskIter **pIter); +void destroyStreamTaskIter(SStreamTaskIter *pIter); +bool streamTaskIterNextTask(SStreamTaskIter *pIter); +int32_t streamTaskIterGetCurrent(SStreamTaskIter *pIter, SStreamTask **pTask); +int32_t mndInitExecInfo(); +void mndInitStreamExecInfo(SMnode *pMnode, SStreamExecInfo *pExecInfo); +int32_t removeExpiredNodeEntryAndTaskInBuf(SArray *pNodeSnapshot); +void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode); -SCheckpointConsensusInfo *mndGetConsensusInfo(SHashObj *pHash, int64_t streamId, int32_t numOfTasks); +int32_t mndGetConsensusInfo(SHashObj *pHash, int64_t streamId, int32_t numOfTasks, SCheckpointConsensusInfo **pInfo); void mndAddConsensusTasks(SCheckpointConsensusInfo *pInfo, const SRestoreCheckpointInfo *pRestoreInfo); void mndClearConsensusRspEntry(SCheckpointConsensusInfo *pInfo); int64_t mndClearConsensusCheckpointId(SHashObj* pHash, int64_t streamId); diff --git a/source/dnode/mnode/impl/src/mndSma.c b/source/dnode/mnode/impl/src/mndSma.c index 108bafeb09..704b4a5ea9 100644 --- a/source/dnode/mnode/impl/src/mndSma.c +++ b/source/dnode/mnode/impl/src/mndSma.c @@ -815,8 +815,8 @@ static int32_t mndProcessCreateSmaReq(SRpcMsg *pReq) { char streamName[TSDB_TABLE_FNAME_LEN] = {0}; mndGetStreamNameFromSmaName(streamName, createReq.name); - pStream = mndAcquireStream(pMnode, streamName); - if (pStream != NULL) { + code = mndAcquireStream(pMnode, streamName, &pStream); + if (pStream != NULL || code == 0) { mError("sma:%s, failed to create since stream:%s already exist", createReq.name, streamName); code = TSDB_CODE_MND_STREAM_ALREADY_EXIST; goto _OVER; @@ -991,8 +991,10 @@ static int32_t mndDropSma(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb, SSmaObj *p char streamName[TSDB_TABLE_FNAME_LEN] = {0}; mndGetStreamNameFromSmaName(streamName, pSma->name); - SStreamObj *pStream = mndAcquireStream(pMnode, streamName); - if (pStream == NULL || pStream->smaId != pSma->uid) { + SStreamObj *pStream = NULL; + + code = mndAcquireStream(pMnode, streamName, &pStream); + if (pStream == NULL || pStream->smaId != pSma->uid || code != 0) { sdbRelease(pMnode->pSdb, pStream); goto _OVER; } else { @@ -1050,10 +1052,11 @@ int32_t mndDropSmasByStb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SStbObj *p char streamName[TSDB_TABLE_FNAME_LEN] = {0}; mndGetStreamNameFromSmaName(streamName, pSma->name); - SStreamObj *pStream = mndAcquireStream(pMnode, streamName); - if (pStream != NULL && pStream->smaId == pSma->uid) { + SStreamObj *pStream = NULL; + code = mndAcquireStream(pMnode, streamName, &pStream); + if ((pStream != NULL && pStream->smaId == pSma->uid) || code != 0) { if ((code = mndStreamSetDropAction(pMnode, pTrans, pStream)) < 0) { - mError("stream:%s, failed to drop task since %s", pStream->name, tstrerror(code)); + mError("stream:%s, failed to drop task since %s", pStream->name, terrstr()); mndReleaseStream(pMnode, pStream); goto _OVER; } @@ -1800,6 +1803,7 @@ static int32_t mndProcessCreateTSMAReq(SRpcMsg* pReq) { code = 0; goto _OVER; } + if (pSma) { code = TSDB_CODE_MND_SMA_ALREADY_EXIST; goto _OVER; @@ -1813,8 +1817,8 @@ static int32_t mndProcessCreateTSMAReq(SRpcMsg* pReq) { goto _OVER; } - pStream = mndAcquireStream(pMnode, streamName); - if (pStream != NULL) { + code = mndAcquireStream(pMnode, streamName, &pStream); + if (pStream != NULL || code != TSDB_CODE_MND_STREAM_NOT_EXIST) { mError("tsma:%s, failed to create since stream:%s already exist", createReq.name, streamName); code = TSDB_CODE_MND_SMA_ALREADY_EXIST; goto _OVER; @@ -2292,7 +2296,7 @@ static int32_t mndGetSomeTsmas(SMnode* pMnode, STableTSMAInfoRsp* pRsp, tsmaFilt SSmaObj * pBaseTsma = NULL; SSdb * pSdb = pMnode->pSdb; void * pIter = NULL; - SStreamObj * pStreamObj = NULL; + SStreamObj * pStream = NULL; SStbObj * pStb = NULL; while (1) { @@ -2314,14 +2318,16 @@ static int32_t mndGetSomeTsmas(SMnode* pMnode, STableTSMAInfoRsp* pRsp, tsmaFilt char streamName[TSDB_TABLE_FNAME_LEN] = {0}; tNameFromString(&smaName, pSma->name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); sprintf(streamName, "%d.%s", smaName.acctId, smaName.tname); - pStreamObj = mndAcquireStream(pMnode, streamName); - if (!pStreamObj) { + pStream = NULL; + + code = mndAcquireStream(pMnode, streamName, &pStream); + if (!pStream || (code != 0)) { sdbRelease(pSdb, pSma); continue; } - int64_t streamId = pStreamObj->uid; - mndReleaseStream(pMnode, pStreamObj); + int64_t streamId = pStream->uid; + mndReleaseStream(pMnode, pStream); STableTSMAInfo *pTsma = taosMemoryCalloc(1, sizeof(STableTSMAInfo)); if (!pTsma) { diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index d57dc6e52e..df8800aee4 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -134,17 +134,18 @@ int32_t mndInitStream(SMnode *pMnode) { mndAddShowRetrieveHandle(pMnode, TSDB_MGMT_TABLE_STREAM_TASKS, mndRetrieveStreamTask); mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_STREAM_TASKS, mndCancelGetNextStreamTask); - mndInitExecInfo(); - - if (sdbSetTable(pMnode->pSdb, table) != 0) { - return -1; + int32_t code = mndInitExecInfo(); + if (code) { + return code; } - if (sdbSetTable(pMnode->pSdb, tableSeq) != 0) { - return -1; + code = sdbSetTable(pMnode->pSdb, table); + if (code) { + return terrno; } - return 0; + code = sdbSetTable(pMnode->pSdb, tableSeq); + return code; } void mndCleanupStream(SMnode *pMnode) { @@ -251,13 +252,15 @@ static int32_t mndStreamActionUpdate(SSdb *pSdb, SStreamObj *pOldStream, SStream return 0; } -SStreamObj *mndAcquireStream(SMnode *pMnode, char *streamName) { - SSdb *pSdb = pMnode->pSdb; - SStreamObj *pStream = sdbAcquire(pSdb, SDB_STREAM, streamName); - if (pStream == NULL && terrno == TSDB_CODE_SDB_OBJ_NOT_THERE) { +int32_t mndAcquireStream(SMnode *pMnode, char *streamName, SStreamObj **pStream) { + terrno = 0; + + SSdb *pSdb = pMnode->pSdb; + (*pStream) = sdbAcquire(pSdb, SDB_STREAM, streamName); + if ((*pStream) == NULL && terrno == TSDB_CODE_SDB_OBJ_NOT_THERE) { terrno = TSDB_CODE_MND_STREAM_NOT_EXIST; } - return pStream; + return terrno; } void mndReleaseStream(SMnode *pMnode, SStreamObj *pStream) { @@ -530,9 +533,21 @@ int32_t mndPersistTaskDeployReq(STrans *pTrans, SStreamTask *pTask) { } int32_t mndPersistStreamTasks(STrans *pTrans, SStreamObj *pStream) { - SStreamTaskIter *pIter = createStreamTaskIter(pStream); + SStreamTaskIter *pIter = NULL; + int32_t code = createStreamTaskIter(pStream, &pIter); + if (code) { + mError("failed to create task iter for stream:%s", pStream->name); + return code; + } + while (streamTaskIterNextTask(pIter)) { - SStreamTask *pTask = streamTaskIterGetCurrent(pIter); + SStreamTask *pTask = NULL; + code = streamTaskIterGetCurrent(pIter, &pTask); + if (code) { + destroyStreamTaskIter(pIter); + return code; + } + if (mndPersistTaskDeployReq(pTrans, pTask) < 0) { destroyStreamTaskIter(pIter); return -1; @@ -706,7 +721,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { char *sql = NULL; int32_t sqlLen = 0; const char *pMsg = "create stream tasks on dnodes"; - + int32_t code = 0; terrno = TSDB_CODE_SUCCESS; SCMCreateStreamReq createReq = {0}; @@ -726,8 +741,8 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { goto _OVER; } - pStream = mndAcquireStream(pMnode, createReq.name); - if (pStream != NULL) { + code = mndAcquireStream(pMnode, createReq.name, &pStream); + if (pStream != NULL || code == 0) { if (createReq.igExists) { mInfo("stream:%s, already exist, ignore exist is set", createReq.name); goto _OVER; @@ -760,8 +775,9 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { goto _OVER; } - STrans *pTrans = doCreateTrans(pMnode, &streamObj, pReq, TRN_CONFLICT_DB, MND_STREAM_CREATE_NAME, pMsg); - if (pTrans == NULL) { + STrans *pTrans = NULL; + code = doCreateTrans(pMnode, &streamObj, pReq, TRN_CONFLICT_DB, MND_STREAM_CREATE_NAME, pMsg, &pTrans); + if (pTrans == NULL || code) { goto _OVER; } @@ -802,11 +818,10 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { // add into buffer firstly // to make sure when the hb from vnode arrived, the newly created tasks have been in the task map already. - taosThreadMutexLock(&execInfo.lock); + streamMutexLock(&execInfo.lock); mDebug("stream stream:%s start to register tasks into task nodeList and set initial checkpointId", createReq.name); saveTaskAndNodeInfoIntoBuf(&streamObj, &execInfo); -// mndRegisterConsensusChkptId(execInfo.pStreamConsensus, streamObj.uid); - taosThreadMutexUnlock(&execInfo.lock); + streamMutexUnlock(&execInfo.lock); // execute creation if (mndTransPrepare(pMnode, pTrans) != 0) { @@ -867,7 +882,7 @@ int64_t mndStreamGenChkptId(SMnode *pMnode, bool lock) { { // check the max checkpoint id from all vnodes. int64_t maxCheckpointId = -1; if (lock) { - taosThreadMutexLock(&execInfo.lock); + streamMutexLock(&execInfo.lock); } for (int32_t i = 0; i < taosArrayGetSize(execInfo.pTaskList); ++i) { @@ -888,7 +903,7 @@ int64_t mndStreamGenChkptId(SMnode *pMnode, bool lock) { } if (lock) { - taosThreadMutexUnlock(&execInfo.lock); + streamMutexUnlock(&execInfo.lock); } if (maxCheckpointId > maxChkptId) { @@ -989,11 +1004,13 @@ static int32_t mndProcessStreamCheckpointTrans(SMnode *pMnode, SStreamObj *pStre return -1; } - STrans *pTrans = doCreateTrans(pMnode, pStream, NULL, TRN_CONFLICT_NOTHING, MND_STREAM_CHECKPOINT_NAME, - "gen checkpoint for stream"); - if (pTrans == NULL) { + STrans *pTrans = NULL; + code = doCreateTrans(pMnode, pStream, NULL, TRN_CONFLICT_NOTHING, MND_STREAM_CHECKPOINT_NAME, + "gen checkpoint for stream", &pTrans); + if (pTrans == NULL || code) { + code = TSDB_CODE_MND_TRANS_CONFLICT; mError("failed to checkpoint of stream name%s, checkpointId: %" PRId64 ", reason:%s", pStream->name, checkpointId, - tstrerror(TSDB_CODE_MND_TRANS_CONFLICT)); + tstrerror(code)); goto _ERR; } @@ -1033,7 +1050,7 @@ static int32_t mndProcessStreamCheckpointTrans(SMnode *pMnode, SStreamObj *pStre taosWUnLockLatch(&pStream->lock); if ((code = mndPersistTransLog(pStream, pTrans, SDB_STATUS_READY)) != TSDB_CODE_SUCCESS) { - return code; + goto _ERR; } if ((code = mndTransPrepare(pMnode, pTrans)) != TSDB_CODE_SUCCESS) { @@ -1057,13 +1074,13 @@ int32_t extractStreamNodeList(SMnode *pMnode) { static bool taskNodeIsUpdated(SMnode *pMnode) { // check if the node update happens or not - taosThreadMutexLock(&execInfo.lock); + streamMutexLock(&execInfo.lock); int32_t numOfNodes = extractStreamNodeList(pMnode); if (numOfNodes == 0) { mDebug("stream task node change checking done, no vgroups exist, do nothing"); execInfo.ts = taosGetTimestampSec(); - taosThreadMutexUnlock(&execInfo.lock); + streamMutexUnlock(&execInfo.lock); return false; } @@ -1071,17 +1088,22 @@ static bool taskNodeIsUpdated(SMnode *pMnode) { SNodeEntry *pNodeEntry = taosArrayGet(execInfo.pNodeList, i); if (pNodeEntry->stageUpdated) { mDebug("stream task not ready due to node update detected, checkpoint not issued"); - taosThreadMutexUnlock(&execInfo.lock); + streamMutexUnlock(&execInfo.lock); return true; } } bool allReady = true; - SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allReady); + SArray *pNodeSnapshot = NULL; + + int32_t code = mndTakeVgroupSnapshot(pMnode, &allReady, &pNodeSnapshot); + if (code) { + mError("failed to get the vgroup snapshot, ignore it and continue"); + } if (!allReady) { mWarn("not all vnodes ready, quit from vnodes status check"); taosArrayDestroy(pNodeSnapshot); - taosThreadMutexUnlock(&execInfo.lock); + streamMutexUnlock(&execInfo.lock); return true; } @@ -1097,7 +1119,7 @@ static bool taskNodeIsUpdated(SMnode *pMnode) { mDebug("stream tasks not ready due to node update"); } - taosThreadMutexUnlock(&execInfo.lock); + streamMutexUnlock(&execInfo.lock); return nodeUpdated; } @@ -1107,7 +1129,7 @@ static int32_t mndCheckTaskAndNodeStatus(SMnode *pMnode) { return -1; } - taosThreadMutexLock(&execInfo.lock); + streamMutexLock(&execInfo.lock); if (taosArrayGetSize(execInfo.pNodeList) == 0) { mDebug("stream task node change checking done, no vgroups exist, do nothing"); ASSERT(taosArrayGetSize(execInfo.pTaskList) == 0); @@ -1152,7 +1174,7 @@ static int32_t mndCheckTaskAndNodeStatus(SMnode *pMnode) { removeTasksInBuf(pInvalidList, &execInfo); taosArrayDestroy(pInvalidList); - taosThreadMutexUnlock(&execInfo.lock); + streamMutexUnlock(&execInfo.lock); return ready ? 0 : -1; } @@ -1215,14 +1237,14 @@ static int32_t mndProcessStreamCheckpoint(SRpcMsg *pReq) { continue; } - taosThreadMutexLock(&execInfo.lock); + streamMutexLock(&execInfo.lock); int64_t startTs = getStreamTaskLastReadyState(execInfo.pTaskList, pStream->uid); if (startTs != -1 && (now - startTs) < tsStreamCheckpointInterval * 1000) { - taosThreadMutexUnlock(&execInfo.lock); + streamMutexUnlock(&execInfo.lock); sdbRelease(pSdb, pStream); continue; } - taosThreadMutexUnlock(&execInfo.lock); + streamMutexUnlock(&execInfo.lock); SCheckpointInterval in = {.streamId = pStream->uid, .duration = duration}; taosArrayPush(pList, &in); @@ -1265,8 +1287,9 @@ static int32_t mndProcessStreamCheckpoint(SRpcMsg *pReq) { for (int32_t i = 0; i < numOfQual; ++i) { SCheckpointInterval *pCheckpointInfo = taosArrayGet(pList, i); - SStreamObj *p = mndGetStreamObj(pMnode, pCheckpointInfo->streamId); - if (p != NULL) { + SStreamObj *p = NULL; + code = mndGetStreamObj(pMnode, pCheckpointInfo->streamId, &p); + if (p != NULL && code == 0) { code = mndProcessStreamCheckpointTrans(pMnode, p, checkpointId, 1, true); sdbRelease(pSdb, p); @@ -1289,6 +1312,7 @@ static int32_t mndProcessStreamCheckpoint(SRpcMsg *pReq) { static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; SStreamObj *pStream = NULL; + int32_t code = 0; SMDropStreamReq dropReq = {0}; if (tDeserializeSMDropStreamReq(pReq->pCont, pReq->contLen, &dropReq) < 0) { @@ -1299,8 +1323,8 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { mDebug("recv drop stream:%s msg", dropReq.name); - pStream = mndAcquireStream(pMnode, dropReq.name); - if (pStream == NULL) { + code = mndAcquireStream(pMnode, dropReq.name, &pStream); + if (pStream == NULL || code != 0) { if (dropReq.igNotExists) { mInfo("stream:%s not exist, ignore not exist is set, drop stream exec done with success", dropReq.name); sdbRelease(pMnode->pSdb, pStream); @@ -1356,15 +1380,16 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { return -1; } - STrans *pTrans = doCreateTrans(pMnode, pStream, pReq, TRN_CONFLICT_NOTHING, MND_STREAM_DROP_NAME, "drop stream"); - if (pTrans == NULL) { + STrans *pTrans = NULL; + code = doCreateTrans(pMnode, pStream, pReq, TRN_CONFLICT_NOTHING, MND_STREAM_DROP_NAME, "drop stream", &pTrans); + if (pTrans == NULL || code) { mError("stream:%s uid:0x%" PRIx64 " failed to drop since %s", dropReq.name, pStream->uid, terrstr()); sdbRelease(pMnode->pSdb, pStream); tFreeMDropStreamReq(&dropReq); return -1; } - int32_t code = mndStreamRegisterTrans(pTrans, MND_STREAM_DROP_NAME, pStream->uid); + code = mndStreamRegisterTrans(pTrans, MND_STREAM_DROP_NAME, pStream->uid); // drop all tasks if (mndStreamSetDropAction(pMnode, pTrans, pStream) < 0) { @@ -1857,9 +1882,9 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock int32_t numOfRows = 0; SStreamObj *pStream = NULL; - taosThreadMutexLock(&execInfo.lock); + streamMutexLock(&execInfo.lock); mndInitStreamExecInfo(pMnode, &execInfo); - taosThreadMutexUnlock(&execInfo.lock); + streamMutexUnlock(&execInfo.lock); while (numOfRows < rowsCapacity) { pShow->pIter = sdbFetch(pSdb, SDB_STREAM, pShow->pIter, (void **)&pStream); @@ -1876,11 +1901,24 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock } // add row for each task - SStreamTaskIter *pIter = createStreamTaskIter(pStream); - while (streamTaskIterNextTask(pIter)) { - SStreamTask *pTask = streamTaskIterGetCurrent(pIter); + SStreamTaskIter *pIter = NULL; + int32_t code = createStreamTaskIter(pStream, &pIter); + if (code) { + taosRUnLockLatch(&pStream->lock); + sdbRelease(pSdb, pStream); + mError("failed to create task iter for stream:%s", pStream->name); + continue; + } - int32_t code = setTaskAttrInResBlock(pStream, pTask, pBlock, numOfRows); + while (streamTaskIterNextTask(pIter)) { + SStreamTask *pTask = NULL; + code = streamTaskIterGetCurrent(pIter, &pTask); + if (code) { + destroyStreamTaskIter(pIter); + break; + } + + code = setTaskAttrInResBlock(pStream, pTask, pBlock, numOfRows); if (code == TSDB_CODE_SUCCESS) { numOfRows++; } @@ -1906,6 +1944,7 @@ static void mndCancelGetNextStreamTask(SMnode *pMnode, void *pIter) { static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; SStreamObj *pStream = NULL; + int32_t code = 0; SMPauseStreamReq pauseReq = {0}; if (tDeserializeSMPauseStreamReq(pReq->pCont, pReq->contLen, &pauseReq) < 0) { @@ -1913,9 +1952,8 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { return -1; } - pStream = mndAcquireStream(pMnode, pauseReq.name); - - if (pStream == NULL) { + code = mndAcquireStream(pMnode, pauseReq.name, &pStream); + if (pStream == NULL || code != 0) { if (pauseReq.igNotExists) { mInfo("stream:%s, not exist, not pause stream", pauseReq.name); return 0; @@ -1955,7 +1993,7 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { { // check for tasks, if tasks are not ready, not allowed to pause bool found = false; bool readyToPause = true; - taosThreadMutexLock(&execInfo.lock); + streamMutexLock(&execInfo.lock); for (int32_t i = 0; i < taosArrayGetSize(execInfo.pTaskList); ++i) { STaskId *p = taosArrayGet(execInfo.pTaskList, i); @@ -1978,7 +2016,7 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { found = true; } - taosThreadMutexUnlock(&execInfo.lock); + streamMutexUnlock(&execInfo.lock); if (!found) { mError("stream:%s task not report status yet, not ready for pause", pauseReq.name); sdbRelease(pMnode->pSdb, pStream); @@ -1992,42 +2030,49 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { } } - STrans *pTrans = - doCreateTrans(pMnode, pStream, pReq, TRN_CONFLICT_NOTHING, MND_STREAM_PAUSE_NAME, "pause the stream"); - if (pTrans == NULL) { + STrans *pTrans = NULL; + code = doCreateTrans(pMnode, pStream, pReq, TRN_CONFLICT_NOTHING, MND_STREAM_PAUSE_NAME, "pause the stream", &pTrans); + if (pTrans == NULL || code) { mError("stream:%s failed to pause stream since %s", pauseReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); - return -1; + return code; } - int32_t code = mndStreamRegisterTrans(pTrans, MND_STREAM_PAUSE_NAME, pStream->uid); + code = mndStreamRegisterTrans(pTrans, MND_STREAM_PAUSE_NAME, pStream->uid); + if (code) { + sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); + return code; + } // if nodeUpdate happened, not send pause trans - if (mndStreamSetPauseAction(pMnode, pTrans, pStream) < 0) { + code = mndStreamSetPauseAction(pMnode, pTrans, pStream); + if (code) { mError("stream:%s, failed to pause task since %s", pauseReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - return -1; + return code; } // pause stream taosWLockLatch(&pStream->lock); pStream->status = STREAM_STATUS__PAUSE; - if (mndPersistTransLog(pStream, pTrans, SDB_STATUS_READY) < 0) { + code = mndPersistTransLog(pStream, pTrans, SDB_STATUS_READY); + if (code) { taosWUnLockLatch(&pStream->lock); - sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - return -1; + return code; } taosWUnLockLatch(&pStream->lock); - if (mndTransPrepare(pMnode, pTrans) != 0) { + code = mndTransPrepare(pMnode, pTrans); + if (code) { mError("trans:%d, failed to prepare pause stream trans since %s", pTrans->id, terrstr()); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - return -1; + return code; } sdbRelease(pMnode->pSdb, pStream); @@ -2039,6 +2084,7 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; SStreamObj *pStream = NULL; + int32_t code = 0; if ((terrno = grantCheckExpire(TSDB_GRANT_STREAMS)) < 0) { return -1; @@ -2050,9 +2096,8 @@ static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) { return -1; } - pStream = mndAcquireStream(pMnode, resumeReq.name); - - if (pStream == NULL) { + code = mndAcquireStream(pMnode, resumeReq.name, &pStream); + if (pStream == NULL || code != 0) { if (resumeReq.igNotExists) { mInfo("stream:%s not exist, not resume stream", resumeReq.name); sdbRelease(pMnode->pSdb, pStream); @@ -2081,22 +2126,28 @@ static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) { return -1; } - STrans *pTrans = - doCreateTrans(pMnode, pStream, pReq, TRN_CONFLICT_NOTHING, MND_STREAM_RESUME_NAME, "resume the stream"); - if (pTrans == NULL) { + STrans *pTrans = NULL; + code = + doCreateTrans(pMnode, pStream, pReq, TRN_CONFLICT_NOTHING, MND_STREAM_RESUME_NAME, "resume the stream", &pTrans); + if (pTrans == NULL || code) { mError("stream:%s, failed to resume stream since %s", resumeReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); - return -1; + return code; } - int32_t code = mndStreamRegisterTrans(pTrans, MND_STREAM_RESUME_NAME, pStream->uid); + code = mndStreamRegisterTrans(pTrans, MND_STREAM_RESUME_NAME, pStream->uid); + if (code) { + sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); + return code; + } // set the resume action if (mndStreamSetResumeAction(pTrans, pMnode, pStream, resumeReq.igUntreated) < 0) { mError("stream:%s, failed to drop task since %s", resumeReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - return -1; + return code; } // resume stream @@ -2107,7 +2158,7 @@ static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) { sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - return -1; + return code; } taosWUnLockLatch(&pStream->lock); @@ -2115,7 +2166,7 @@ static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) { mError("trans:%d, failed to prepare pause stream trans since %s", pTrans->id, terrstr()); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - return -1; + return code; } sdbRelease(pMnode->pSdb, pStream); @@ -2189,6 +2240,7 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange SStreamObj *pStream = NULL; void *pIter = NULL; STrans *pTrans = NULL; + int32_t code = 0; // conflict check for nodeUpdate trans, here we randomly chose one stream to add into the trans pool while (1) { @@ -2215,12 +2267,11 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange // here create only one trans if (pTrans == NULL) { - pTrans = - doCreateTrans(pMnode, pStream, NULL, TRN_CONFLICT_NOTHING, MND_STREAM_TASK_UPDATE_NAME, "update task epsets"); - if (pTrans == NULL) { + code = doCreateTrans(pMnode, pStream, NULL, TRN_CONFLICT_NOTHING, MND_STREAM_TASK_UPDATE_NAME, "update task epsets", &pTrans); + if (pTrans == NULL || code) { sdbRelease(pSdb, pStream); sdbCancelFetch(pSdb, pIter); - return terrno; + return terrno = code; } mndStreamRegisterTrans(pTrans, MND_STREAM_TASK_UPDATE_NAME, pStream->uid); @@ -2237,7 +2288,7 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange mDebug("stream:0x%" PRIx64 " %s involved node changed, create update trans, transId:%d", pStream->uid, pStream->name, pTrans->id); - int32_t code = mndStreamSetUpdateEpsetAction(pMnode, pStream, pChangeInfo, pTrans); + code = mndStreamSetUpdateEpsetAction(pMnode, pStream, pChangeInfo, pTrans); // todo: not continue, drop all and retry again if (code != TSDB_CODE_SUCCESS) { @@ -2252,7 +2303,7 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange if (code != TSDB_CODE_SUCCESS) { sdbCancelFetch(pSdb, pIter); - return -1; + return code; } } @@ -2261,16 +2312,17 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange return 0; } - if (mndTransPrepare(pMnode, pTrans) != 0) { + code = mndTransPrepare(pMnode, pTrans); + if (code) { mError("trans:%d, failed to prepare update stream trans since %s", pTrans->id, terrstr()); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - return -1; + return code; } sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - return 0; + return code; } static int32_t extractNodeListFromStream(SMnode *pMnode, SArray *pNodeList) { @@ -2287,9 +2339,21 @@ static int32_t extractNodeListFromStream(SMnode *pMnode, SArray *pNodeList) { taosWLockLatch(&pStream->lock); - SStreamTaskIter *pTaskIter = createStreamTaskIter(pStream); + SStreamTaskIter *pTaskIter = NULL; + int32_t code = createStreamTaskIter(pStream, &pTaskIter); + if (code) { + taosWUnLockLatch(&pStream->lock); + sdbRelease(pSdb, pStream); + mError("failed to create task iter for stream:%s", pStream->name); + continue; + } + while (streamTaskIterNextTask(pTaskIter)) { - SStreamTask *pTask = streamTaskIterGetCurrent(pTaskIter); + SStreamTask *pTask = NULL; + code = streamTaskIterGetCurrent(pTaskIter, &pTask); + if (code) { + break; + } SNodeEntry entry = {.hbTimestamp = -1, .nodeId = pTask->info.nodeId}; epsetAssign(&entry.epset, &pTask->info.epSet); @@ -2336,9 +2400,9 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { SMnode *pMnode = pMsg->info.node; - taosThreadMutexLock(&execInfo.lock); + streamMutexLock(&execInfo.lock); int32_t numOfNodes = extractStreamNodeList(pMnode); - taosThreadMutexUnlock(&execInfo.lock); + streamMutexUnlock(&execInfo.lock); if (numOfNodes == 0) { mDebug("end to do stream task(s) node change checking, no stream tasks exist, do nothing"); @@ -2348,7 +2412,13 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { } bool allReady = true; - SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allReady); + SArray *pNodeSnapshot = NULL; + + code = mndTakeVgroupSnapshot(pMnode, &allReady, &pNodeSnapshot); + if (code) { + mError("failed to take the vgroup snapshot, ignore it and continue"); + } + if (!allReady) { taosArrayDestroy(pNodeSnapshot); atomic_store_32(&mndNodeCheckSentinel, 0); @@ -2356,7 +2426,7 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { return 0; } - taosThreadMutexLock(&execInfo.lock); + streamMutexLock(&execInfo.lock); removeExpiredNodeEntryAndTaskInBuf(pNodeSnapshot); @@ -2380,7 +2450,7 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { } taosArrayDestroy(pNodeSnapshot); - taosThreadMutexUnlock(&execInfo.lock); + streamMutexUnlock(&execInfo.lock); taosArrayDestroy(changeInfo.pUpdateNodeList); taosHashCleanup(changeInfo.pDBMap); @@ -2406,9 +2476,19 @@ static int32_t mndProcessNodeCheck(SRpcMsg *pReq) { } void saveTaskAndNodeInfoIntoBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { - SStreamTaskIter *pIter = createStreamTaskIter(pStream); + SStreamTaskIter *pIter = NULL; + int32_t code = createStreamTaskIter(pStream, &pIter); + if (code) { + mError("failed to create task iter for stream:%s", pStream->name); + return; + } + while (streamTaskIterNextTask(pIter)) { - SStreamTask *pTask = streamTaskIterGetCurrent(pIter); + SStreamTask *pTask = NULL; + code = streamTaskIterGetCurrent(pIter, &pTask); + if (code) { + break; + } STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; void *p = taosHashGet(pExecNode->pTaskMap, &id, sizeof(id)); @@ -2478,10 +2558,11 @@ int32_t mndProcessStreamReqCheckpoint(SRpcMsg *pReq) { mDebug("receive stream task checkpoint req msg, vgId:%d, s-task:0x%x", req.nodeId, req.taskId); // register to the stream task done map, if all tasks has sent this kinds of message, start the checkpoint trans. - taosThreadMutexLock(&execInfo.lock); + streamMutexLock(&execInfo.lock); - SStreamObj *pStream = mndGetStreamObj(pMnode, req.streamId); - if (pStream == NULL) { + SStreamObj *pStream = NULL; + int32_t code = mndGetStreamObj(pMnode, req.streamId, &pStream); + if (pStream == NULL || code != 0) { mWarn("failed to find the stream:0x%" PRIx64 ", not handle the checkpoint req, try to acquire in buf", req.streamId); @@ -2492,7 +2573,7 @@ int32_t mndProcessStreamReqCheckpoint(SRpcMsg *pReq) { if (p == NULL) { mError("failed to find the stream:0x%" PRIx64 " in buf, not handle the checkpoint req", req.streamId); terrno = TSDB_CODE_MND_STREAM_NOT_EXIST; - taosThreadMutexUnlock(&execInfo.lock); + streamMutexUnlock(&execInfo.lock); return -1; } else { mDebug("s-task:0x%" PRIx64 "-0x%x in buf not in mnode/meta, create stream trans may not complete yet", @@ -2537,7 +2618,7 @@ int32_t mndProcessStreamReqCheckpoint(SRpcMsg *pReq) { mndReleaseStream(pMnode, pStream); } - taosThreadMutexUnlock(&execInfo.lock); + streamMutexUnlock(&execInfo.lock); { SRpcMsg rsp = {.code = 0, .info = pReq->info, .contLen = sizeof(SMStreamReqCheckpointRsp)}; @@ -2597,10 +2678,11 @@ int32_t mndProcessCheckpointReport(SRpcMsg *pReq) { req.nodeId, req.taskId, req.checkpointId, req.checkpointVer, req.transId); // register to the stream task done map, if all tasks has sent this kinds of message, start the checkpoint trans. - taosThreadMutexLock(&execInfo.lock); + streamMutexLock(&execInfo.lock); - SStreamObj *pStream = mndGetStreamObj(pMnode, req.streamId); - if (pStream == NULL) { + SStreamObj *pStream = NULL; + int32_t code = mndGetStreamObj(pMnode, req.streamId, &pStream); + if (pStream == NULL || code != 0) { mWarn("failed to find the stream:0x%" PRIx64 ", not handle checkpoint-report, try to acquire in buf", req.streamId); // not in meta-store yet, try to acquire the task in exec buffer @@ -2610,7 +2692,7 @@ int32_t mndProcessCheckpointReport(SRpcMsg *pReq) { if (p == NULL) { mError("failed to find the stream:0x%" PRIx64 " in buf, not handle the checkpoint-report", req.streamId); terrno = TSDB_CODE_MND_STREAM_NOT_EXIST; - taosThreadMutexUnlock(&execInfo.lock); + streamMutexUnlock(&execInfo.lock); return -1; } else { mDebug("s-task:0x%" PRIx64 "-0x%x in buf not in mnode/meta, create stream trans may not complete yet", @@ -2642,7 +2724,7 @@ int32_t mndProcessCheckpointReport(SRpcMsg *pReq) { mndReleaseStream(pMnode, pStream); } - taosThreadMutexUnlock(&execInfo.lock); + streamMutexUnlock(&execInfo.lock); doSendQuickRsp(&pReq->info, sizeof(SMStreamUpdateChkptRsp), req.nodeId, TSDB_CODE_SUCCESS); return 0; @@ -2707,7 +2789,7 @@ static void doSendQuickRsp(SRpcHandleInfo *pInfo, int32_t msgSize, int32_t vgId, // req.nodeId, req.streamId, req.taskId, req.checkpointId); // // // register to the stream task done map, if all tasks has sent this kinds of message, start the checkpoint trans. -// taosThreadMutexLock(&execInfo.lock); +// streamMutexLock(&execInfo.lock); // // // mnode handle the create stream transaction too slow may cause this problem // SStreamObj *pStream = mndGetStreamObj(pMnode, req.streamId); @@ -2721,7 +2803,7 @@ static void doSendQuickRsp(SRpcHandleInfo *pInfo, int32_t msgSize, int32_t vgId, // if (p == NULL) { // mError("failed to find the stream:0x%" PRIx64 " in buf, not handle consensus-checkpointId", req.streamId); // terrno = TSDB_CODE_MND_STREAM_NOT_EXIST; -// taosThreadMutexUnlock(&execInfo.lock); +// streamMutexUnlock(&execInfo.lock); // // doSendQuickRsp(&pMsg->info, sizeof(SMStreamReqConsensChkptRsp), req.nodeId, terrno); // return -1; @@ -2737,7 +2819,7 @@ static void doSendQuickRsp(SRpcHandleInfo *pInfo, int32_t msgSize, int32_t vgId, // // int32_t numOfTasks = (pStream == NULL) ? 0 : mndGetNumOfStreamTasks(pStream); // if ((pStream != NULL) && (pStream->checkpointId == 0)) { // not generated checkpoint yet, return 0 directly -// taosThreadMutexUnlock(&execInfo.lock); +// streamMutexUnlock(&execInfo.lock); // mndCreateSetConsensusChkptIdTrans(pMnode, pStream, req.taskId, 0, req.startTs); // // doSendQuickRsp(&pMsg->info, sizeof(SMStreamReqConsensChkptRsp), req.nodeId, terrno); @@ -2754,7 +2836,7 @@ static void doSendQuickRsp(SRpcHandleInfo *pInfo, int32_t msgSize, int32_t vgId, // SCheckpointConsensusInfo *pInfo = mndGetConsensusInfo(execInfo.pStreamConsensus, req.streamId, numOfTasks); // mndAddConsensusTasks(pInfo, &req); // -// taosThreadMutexUnlock(&execInfo.lock); +// streamMutexUnlock(&execInfo.lock); // doSendQuickRsp(&pMsg->info, sizeof(SMStreamReqConsensChkptRsp), req.nodeId, terrno); // return 0; // } @@ -2764,7 +2846,7 @@ static void doSendQuickRsp(SRpcHandleInfo *pInfo, int32_t msgSize, int32_t vgId, // req.nodeId, req.streamId, pStream->name, chkId, pStream->checkpointId); // mndCreateSetConsensusChkptIdTrans(pMnode, pStream, req.taskId, chkId, req.startTs); // -// taosThreadMutexUnlock(&execInfo.lock); +// streamMutexUnlock(&execInfo.lock); // doSendQuickRsp(&pMsg->info, sizeof(SMStreamReqConsensChkptRsp), req.nodeId, terrno); // return 0; // } @@ -2777,7 +2859,7 @@ static void doSendQuickRsp(SRpcHandleInfo *pInfo, int32_t msgSize, int32_t vgId, // mndReleaseStream(pMnode, pStream); // } // -// taosThreadMutexUnlock(&execInfo.lock); +// streamMutexUnlock(&execInfo.lock); // doSendQuickRsp(&pMsg->info, sizeof(SMStreamReqConsensChkptRsp), req.nodeId, terrno); // return 0; //} @@ -2790,15 +2872,21 @@ int32_t mndProcessConsensusInTmr(SRpcMsg *pMsg) { mDebug("start to process consensus-checkpointId in tmr"); bool allReady = true; - SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allReady); + SArray *pNodeSnapshot = NULL; + + int32_t code = mndTakeVgroupSnapshot(pMnode, &allReady, &pNodeSnapshot); taosArrayDestroy(pNodeSnapshot); + if (code) { + mError("failed to get the vgroup snapshot, ignore it and continue"); + } + if (!allReady) { mWarn("not all vnodes are ready, end to process the consensus-checkpointId in tmr process"); taosArrayDestroy(pStreamList); return 0; } - taosThreadMutexLock(&execInfo.lock); + streamMutexLock(&execInfo.lock); void *pIter = NULL; while ((pIter = taosHashIterate(execInfo.pStreamConsensus, pIter)) != NULL) { @@ -2808,8 +2896,9 @@ int32_t mndProcessConsensusInTmr(SRpcMsg *pMsg) { int32_t num = taosArrayGetSize(pInfo->pTaskList); SArray *pList = taosArrayInit(4, sizeof(int32_t)); - SStreamObj *pStream = mndGetStreamObj(pMnode, pInfo->streamId); - if (pStream == NULL) { // stream has been dropped already + SStreamObj *pStream = NULL; + code = mndGetStreamObj(pMnode, pInfo->streamId, &pStream); + if (pStream == NULL || code != 0) { // stream has been dropped already mDebug("stream:0x%" PRIx64 " dropped already, continue", pInfo->streamId); taosArrayDestroy(pList); continue; @@ -2868,14 +2957,14 @@ int32_t mndProcessConsensusInTmr(SRpcMsg *pMsg) { for (int32_t i = 0; i < taosArrayGetSize(pStreamList); ++i) { int64_t *pStreamId = (int64_t *)taosArrayGet(pStreamList, i); - mndClearConsensusCheckpointId(execInfo.pStreamConsensus, *pStreamId); + code = mndClearConsensusCheckpointId(execInfo.pStreamConsensus, *pStreamId); } - taosThreadMutexUnlock(&execInfo.lock); + streamMutexUnlock(&execInfo.lock); taosArrayDestroy(pStreamList); mDebug("end to process consensus-checkpointId in tmr"); - return TSDB_CODE_SUCCESS; + return code; } static int32_t mndProcessCreateStreamReqFromMNode(SRpcMsg *pReq) { @@ -2926,32 +3015,41 @@ void addAllStreamTasksIntoBuf(SMnode *pMnode, SStreamExecInfo *pExecInfo) { } int32_t mndCreateStreamChkptInfoUpdateTrans(SMnode *pMnode, SStreamObj *pStream, SArray *pChkptInfoList) { - STrans *pTrans = doCreateTrans(pMnode, pStream, NULL, TRN_CONFLICT_NOTHING, MND_STREAM_CHKPT_UPDATE_NAME, - "update checkpoint-info"); - if (pTrans == NULL) { - return terrno; + STrans *pTrans = NULL; + int32_t code = doCreateTrans(pMnode, pStream, NULL, TRN_CONFLICT_NOTHING, MND_STREAM_CHKPT_UPDATE_NAME, + "update checkpoint-info", &pTrans); + if (pTrans == NULL || code) { + sdbRelease(pMnode->pSdb, pStream); + return code; } - /*int32_t code = */ mndStreamRegisterTrans(pTrans, MND_STREAM_CHKPT_UPDATE_NAME, pStream->uid); - int32_t code = mndStreamSetUpdateChkptAction(pMnode, pTrans, pStream); - if (code != 0) { + code = mndStreamRegisterTrans(pTrans, MND_STREAM_CHKPT_UPDATE_NAME, pStream->uid); + if (code){ + sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); + return code; + } + + code = mndStreamSetUpdateChkptAction(pMnode, pTrans, pStream); + if (code) { sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); return code; } code = mndPersistTransLog(pStream, pTrans, SDB_STATUS_READY); - if (code != TSDB_CODE_SUCCESS) { + if (code) { sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - return -1; + return code; } - if (mndTransPrepare(pMnode, pTrans) != 0) { + code = mndTransPrepare(pMnode, pTrans); + if (code) { mError("trans:%d, failed to prepare update checkpoint-info meta trans since %s", pTrans->id, terrstr()); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - return -1; + return code; } sdbRelease(pMnode->pSdb, pStream); diff --git a/source/dnode/mnode/impl/src/mndStreamHb.c b/source/dnode/mnode/impl/src/mndStreamHb.c index bc10ec211d..c5297b5ba8 100644 --- a/source/dnode/mnode/impl/src/mndStreamHb.c +++ b/source/dnode/mnode/impl/src/mndStreamHb.c @@ -61,15 +61,23 @@ void addIntoCheckpointList(SArray *pList, const SFailedCheckpointInfo *pInfo) { } int32_t mndCreateStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) { - STrans *pTrans = doCreateTrans(pMnode, pStream, NULL, TRN_CONFLICT_NOTHING, MND_STREAM_TASK_RESET_NAME, - " reset from failed checkpoint"); - if (pTrans == NULL) { + STrans *pTrans = NULL; + int32_t code = doCreateTrans(pMnode, pStream, NULL, TRN_CONFLICT_NOTHING, MND_STREAM_TASK_RESET_NAME, + " reset from failed checkpoint", &pTrans); + if (pTrans == NULL || code) { + sdbRelease(pMnode->pSdb, pStream); return terrno; } - /*int32_t code = */ mndStreamRegisterTrans(pTrans, MND_STREAM_TASK_RESET_NAME, pStream->uid); - int32_t code = mndStreamSetResetTaskAction(pMnode, pTrans, pStream); - if (code != 0) { + code = mndStreamRegisterTrans(pTrans, MND_STREAM_TASK_RESET_NAME, pStream->uid); + if (code) { + sdbRelease(pMnode->pSdb, pStream); + mndTransDrop(pTrans); + return code; + } + + code = mndStreamSetResetTaskAction(pMnode, pTrans, pStream); + if (code) { sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); return code; @@ -79,14 +87,15 @@ int32_t mndCreateStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) { if (code != TSDB_CODE_SUCCESS) { sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - return -1; + return code; } - if (mndTransPrepare(pMnode, pTrans) != 0) { + code = mndTransPrepare(pMnode, pTrans); + if (code != 0) { mError("trans:%d, failed to prepare update stream trans since %s", pTrans->id, terrstr()); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - return -1; + return code; } sdbRelease(pMnode->pSdb, pStream); @@ -99,8 +108,9 @@ int32_t mndResetStatusFromCheckpoint(SMnode *pMnode, int64_t streamId, int32_t t int32_t code = TSDB_CODE_SUCCESS; mndKillTransImpl(pMnode, transId, ""); - SStreamObj *pStream = mndGetStreamObj(pMnode, streamId); - if (pStream == NULL) { + SStreamObj *pStream = NULL; + code = mndGetStreamObj(pMnode, streamId, &pStream); + if (pStream == NULL || code != 0) { code = TSDB_CODE_STREAM_TASK_NOT_EXIST; mError("failed to acquire the streamObj:0x%" PRIx64 " to reset checkpoint, may have been dropped", pStream->uid); } else { @@ -159,34 +169,39 @@ int32_t mndDropOrphanTasks(SMnode *pMnode, SArray *pList) { } SStreamObj dummyObj = {.uid = pTask->streamId, .sourceDb = "", .targetSTbName = ""}; - STrans *pTrans = doCreateTrans(pMnode, &dummyObj, NULL, TRN_CONFLICT_NOTHING, MND_STREAM_DROP_NAME, "drop stream"); - if (pTrans == NULL) { + STrans *pTrans = NULL; + int32_t code = + doCreateTrans(pMnode, &dummyObj, NULL, TRN_CONFLICT_NOTHING, MND_STREAM_DROP_NAME, "drop stream", &pTrans); + if (pTrans == NULL || code != 0) { mError("failed to create trans to drop orphan tasks since %s", terrstr()); - return -1; + return code; } - int32_t code = mndStreamRegisterTrans(pTrans, MND_STREAM_DROP_NAME, pTask->streamId); - + code = mndStreamRegisterTrans(pTrans, MND_STREAM_DROP_NAME, pTask->streamId); + if (code) { + return code; + } // drop all tasks - if (mndStreamSetDropActionFromList(pMnode, pTrans, pList) < 0) { + if ((code = mndStreamSetDropActionFromList(pMnode, pTrans, pList)) < 0) { mError("failed to create trans to drop orphan tasks since %s", terrstr()); mndTransDrop(pTrans); - return -1; + return code; } // drop stream - if (mndPersistTransLog(&dummyObj, pTrans, SDB_STATUS_DROPPED) < 0) { + if ((code = mndPersistTransLog(&dummyObj, pTrans, SDB_STATUS_DROPPED)) < 0) { mndTransDrop(pTrans); - return -1; + return code; } - if (mndTransPrepare(pMnode, pTrans) != 0) { + if ((code = mndTransPrepare(pMnode, pTrans)) != 0) { mError("trans:%d, failed to prepare drop stream trans since %s", pTrans->id, terrstr()); mndTransDrop(pTrans); - return -1; + return code; } + mndTransDrop(pTrans); - return 0; + return code; } int32_t suspendAllStreams(SMnode *pMnode, SRpcHandleInfo *info) { @@ -228,10 +243,11 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { SStreamHbMsg req = {0}; SArray *pFailedChkpt = NULL; SArray *pOrphanTasks = NULL; + int32_t code = 0; - if ((terrno = grantCheckExpire(TSDB_GRANT_STREAMS)) < 0) { + if ((code = grantCheckExpire(TSDB_GRANT_STREAMS)) < 0) { if (suspendAllStreams(pMnode, &pReq->info) < 0) { - return -1; + return code; } } @@ -241,8 +257,8 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { if (tDecodeStreamHbMsg(&decoder, &req) < 0) { tCleanupStreamHbMsg(&req); tDecoderClear(&decoder); - terrno = TSDB_CODE_INVALID_MSG; - return -1; + code = terrno = TSDB_CODE_INVALID_MSG; + return code; } tDecoderClear(&decoder); @@ -257,12 +273,12 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { if (!validateHbMsg(execInfo.pNodeList, req.vgId)) { mError("vgId:%d not exists in nodeList buf, discarded", req.vgId); - terrno = TSDB_CODE_INVALID_MSG; + code = terrno = TSDB_CODE_INVALID_MSG; doSendHbMsgRsp(terrno, &pReq->info, req.vgId, req.msgId); taosThreadMutexUnlock(&execInfo.lock); cleanupAfterProcessHbMsg(&req, pFailedChkpt, pOrphanTasks); - return -1; + return code; } int32_t numOfUpdated = taosArrayGetSize(req.pUpdateNodes); @@ -293,11 +309,23 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { .startTs = pChkInfo->consensusTs, }; - SStreamObj *pStream = mndGetStreamObj(pMnode, p->id.streamId); - int32_t numOfTasks = mndGetNumOfStreamTasks(pStream); + SStreamObj *pStream = NULL; + code = mndGetStreamObj(pMnode, p->id.streamId, &pStream); + if (code) { + code = TSDB_CODE_STREAM_TASK_NOT_EXIST; + continue; + } + + int32_t numOfTasks = mndGetNumOfStreamTasks(pStream); + SCheckpointConsensusInfo *pInfo = NULL; + + code = mndGetConsensusInfo(execInfo.pStreamConsensus, p->id.streamId, numOfTasks, &pInfo); + if (code == 0) { + mndAddConsensusTasks(pInfo, &cp); + } else { + mError("failed to get consensus checkpoint-info"); + } - SCheckpointConsensusInfo *pInfo = mndGetConsensusInfo(execInfo.pStreamConsensus, p->id.streamId, numOfTasks); - mndAddConsensusTasks(pInfo, &cp); mndReleaseStream(pMnode, pStream); } @@ -338,9 +366,15 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { // kill the checkpoint trans and then set all tasks status to be normal if (taosArrayGetSize(pFailedChkpt) > 0) { bool allReady = true; + if (pMnode != NULL) { - SArray *p = mndTakeVgroupSnapshot(pMnode, &allReady); + SArray *p = NULL; + + code = mndTakeVgroupSnapshot(pMnode, &allReady, &p); taosArrayDestroy(p); + if (code) { + mError("failed to get the vgroup snapshot, ignore it and continue"); + } } else { allReady = false; } @@ -374,7 +408,7 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { doSendHbMsgRsp(terrno, &pReq->info, req.vgId, req.msgId); cleanupAfterProcessHbMsg(&req, pFailedChkpt, pOrphanTasks); - return TSDB_CODE_SUCCESS; + return terrno; } void mndStreamStartUpdateCheckpointInfo(SMnode *pMnode) { // here reuse the doCheckpointmsg diff --git a/source/dnode/mnode/impl/src/mndStreamTrans.c b/source/dnode/mnode/impl/src/mndStreamTrans.c index f252791618..c0a869fb77 100644 --- a/source/dnode/mnode/impl/src/mndStreamTrans.c +++ b/source/dnode/mnode/impl/src/mndStreamTrans.c @@ -153,27 +153,30 @@ int32_t mndStreamGetRelTrans(SMnode *pMnode, int64_t streamId) { return 0; } -STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, ETrnConflct conflict, const char *name, - const char *pMsg) { - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, conflict, pReq, name); - if (pTrans == NULL) { +int32_t doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, ETrnConflct conflict, const char *name, + const char *pMsg, STrans ** pTrans1) { + *pTrans1 = NULL; + terrno = 0; + + STrans *p = mndTransCreate(pMnode, TRN_POLICY_RETRY, conflict, pReq, name); + if (p == NULL) { mError("failed to build trans:%s, reason: %s", name, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; + return terrno; } - mInfo("stream:0x%" PRIx64 " start to build trans %s, transId:%d", pStream->uid, pMsg, pTrans->id); + mInfo("stream:0x%" PRIx64 " start to build trans %s, transId:%d", pStream->uid, pMsg, p->id); - mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetSTbName); - if (mndTransCheckConflict(pMnode, pTrans) != 0) { + mndTransSetDbName(p, pStream->sourceDb, pStream->targetSTbName); + if (mndTransCheckConflict(pMnode, p) != 0) { terrno = TSDB_CODE_MND_TRANS_CONFLICT; mError("failed to build trans:%s for stream:0x%" PRIx64 " code:%s", name, pStream->uid, tstrerror(terrno)); - mndTransDrop(pTrans); - return NULL; + mndTransDrop(p); + return terrno; } - terrno = 0; - return pTrans; + *pTrans1 = p; + return 0; } SSdbRaw *mndStreamActionEncode(SStreamObj *pStream) { @@ -272,8 +275,9 @@ int32_t doKillCheckpointTrans(SMnode *pMnode, const char *pDBName, size_t len) { continue; } - SStreamObj *pStream = mndGetStreamObj(pMnode, pTransInfo->streamId); - if (pStream != NULL) { + SStreamObj *pStream = NULL; + int32_t code = mndGetStreamObj(pMnode, pTransInfo->streamId, &pStream); + if (pStream != NULL && code == 0) { if (identicalName(pStream->sourceDb, pDBName, len)) { mndKillTransImpl(pMnode, pTransInfo->transId, pStream->sourceDb); } else if (identicalName(pStream->targetDb, pDBName, len)) { diff --git a/source/dnode/mnode/impl/src/mndStreamUtil.c b/source/dnode/mnode/impl/src/mndStreamUtil.c index 843c024286..0b96626536 100644 --- a/source/dnode/mnode/impl/src/mndStreamUtil.c +++ b/source/dnode/mnode/impl/src/mndStreamUtil.c @@ -28,20 +28,20 @@ struct SStreamTaskIter { int32_t doRemoveTasks(SStreamExecInfo *pExecNode, STaskId *pRemovedId); -SStreamTaskIter* createStreamTaskIter(SStreamObj* pStream) { - SStreamTaskIter* pIter = taosMemoryCalloc(1, sizeof(SStreamTaskIter)); - if (pIter == NULL) { +int32_t createStreamTaskIter(SStreamObj* pStream, SStreamTaskIter** pIter) { + *pIter = taosMemoryCalloc(1, sizeof(SStreamTaskIter)); + if (*pIter == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; + return terrno; } - pIter->level = -1; - pIter->ordinalIndex = 0; - pIter->pStream = pStream; - pIter->totalLevel = taosArrayGetSize(pStream->tasks); - pIter->pTask = NULL; + (*pIter)->level = -1; + (*pIter)->ordinalIndex = 0; + (*pIter)->pStream = pStream; + (*pIter)->totalLevel = taosArrayGetSize(pStream->tasks); + (*pIter)->pTask = NULL; - return pIter; + return 0; } bool streamTaskIterNextTask(SStreamTaskIter* pIter) { @@ -72,19 +72,27 @@ bool streamTaskIterNextTask(SStreamTaskIter* pIter) { return false; } -SStreamTask* streamTaskIterGetCurrent(SStreamTaskIter* pIter) { - return pIter->pTask; +int32_t streamTaskIterGetCurrent(SStreamTaskIter* pIter, SStreamTask** pTask) { + if (pTask) { + *pTask = pIter->pTask; + if (*pTask != NULL) { + return TSDB_CODE_SUCCESS; + } + } + + return TSDB_CODE_INVALID_PARA; } void destroyStreamTaskIter(SStreamTaskIter* pIter) { taosMemoryFree(pIter); } -SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) { +int32_t mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady, SArray** pList) { SSdb *pSdb = pMnode->pSdb; void *pIter = NULL; SVgObj *pVgroup = NULL; int32_t replica = -1; // do the replica check + int32_t code = 0; *allReady = true; SArray *pVgroupList = taosArrayInit(4, sizeof(SNodeEntry)); @@ -131,10 +139,15 @@ SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) { } char buf[256] = {0}; - epsetToStr(&entry.epset, buf, tListLen(buf)); + (void) epsetToStr(&entry.epset, buf, tListLen(buf)); + + void* p = taosArrayPush(pVgroupList, &entry); + if (p == NULL) { + mError("failed to put entry in vgroup list, nodeId:%d code:out of memory", entry.nodeId); + } else { + mDebug("take node snapshot, nodeId:%d %s", entry.nodeId, buf); + } - mDebug("take node snapshot, nodeId:%d %s", entry.nodeId, buf); - taosArrayPush(pVgroupList, &entry); sdbRelease(pSdb, pVgroup); } @@ -145,43 +158,57 @@ SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) { break; } - SNodeEntry entry = {0}; - addEpIntoEpSet(&entry.epset, pObj->pDnode->fqdn, pObj->pDnode->port); - entry.nodeId = SNODE_HANDLE; + SNodeEntry entry = {.nodeId = SNODE_HANDLE}; + code = addEpIntoEpSet(&entry.epset, pObj->pDnode->fqdn, pObj->pDnode->port); + if (code) { + sdbRelease(pSdb, pObj); + continue; + } char buf[256] = {0}; - epsetToStr(&entry.epset, buf, tListLen(buf)); - mDebug("take snode snapshot, nodeId:%d %s", entry.nodeId, buf); + (void) epsetToStr(&entry.epset, buf, tListLen(buf)); + + void* p = taosArrayPush(pVgroupList, &entry); + if (p == NULL) { + mError("failed to put entry in vgroup list, nodeId:%d code:out of memory", entry.nodeId); + } else { + mDebug("take snode snapshot, nodeId:%d %s", entry.nodeId, buf); + } - taosArrayPush(pVgroupList, &entry); sdbRelease(pSdb, pObj); } - return pVgroupList; + *pList = pVgroupList; + return code; } -SStreamObj *mndGetStreamObj(SMnode *pMnode, int64_t streamId) { - void *pIter = NULL; - SSdb *pSdb = pMnode->pSdb; - SStreamObj *pStream = NULL; +int32_t mndGetStreamObj(SMnode *pMnode, int64_t streamId, SStreamObj **pStream) { + void *pIter = NULL; + SSdb *pSdb = pMnode->pSdb; + *pStream = NULL; - while ((pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream)) != NULL) { - if (pStream->uid == streamId) { + SStreamObj *p = NULL; + while ((pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&p)) != NULL) { + if (p->uid == streamId) { sdbCancelFetch(pSdb, pIter); - return pStream; + *pStream = p; + return TSDB_CODE_SUCCESS; } - sdbRelease(pSdb, pStream); + sdbRelease(pSdb, p); } - return NULL; + return TSDB_CODE_STREAM_TASK_NOT_EXIST; } void mndKillTransImpl(SMnode *pMnode, int32_t transId, const char *pDbName) { STrans *pTrans = mndAcquireTrans(pMnode, transId); if (pTrans != NULL) { mInfo("kill active transId:%d in Db:%s", transId, pDbName); - mndKillTrans(pMnode, pTrans); + int32_t code = mndKillTrans(pMnode, pTrans); mndReleaseTrans(pMnode, pTrans); + if (code) { + mError("failed to kill trans:%d", pTrans->id); + } } else { mError("failed to acquire trans in Db:%s, transId:%d", pDbName, transId); } @@ -197,11 +224,16 @@ int32_t extractNodeEpset(SMnode *pMnode, SEpSet *pEpSet, bool *hasEpset, int32_t pIter = sdbFetch(pMnode->pSdb, SDB_SNODE, pIter, (void **)&pObj); if (pIter != NULL) { - addEpIntoEpSet(pEpSet, pObj->pDnode->fqdn, pObj->pDnode->port); + int32_t code = addEpIntoEpSet(pEpSet, pObj->pDnode->fqdn, pObj->pDnode->port); sdbRelease(pMnode->pSdb, pObj); sdbCancelFetch(pMnode->pSdb, pIter); - *hasEpset = true; - return TSDB_CODE_SUCCESS; + if (code) { + *hasEpset = false; + mError("failed to set epset"); + } else { + *hasEpset = true; + } + return code; } else { mError("failed to acquire snode epset"); return TSDB_CODE_INVALID_PARA; @@ -223,12 +255,14 @@ int32_t extractNodeEpset(SMnode *pMnode, SEpSet *pEpSet, bool *hasEpset, int32_t } static int32_t doSetResumeAction(STrans *pTrans, SMnode *pMnode, SStreamTask *pTask, int8_t igUntreated) { + terrno = 0; + SVResumeStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVResumeStreamTaskReq)); if (pReq == NULL) { mError("failed to malloc in resume stream, size:%" PRIzu ", code:%s", sizeof(SVResumeStreamTaskReq), tstrerror(TSDB_CODE_OUT_OF_MEMORY)); terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return terrno; } pReq->head.vgId = htonl(pTask->info.nodeId); @@ -242,31 +276,45 @@ static int32_t doSetResumeAction(STrans *pTrans, SMnode *pMnode, SStreamTask *pT if (code != TSDB_CODE_SUCCESS || (!hasEpset)) { terrno = code; taosMemoryFree(pReq); - return -1; + return terrno; } code = setTransAction(pTrans, pReq, sizeof(SVResumeStreamTaskReq), TDMT_STREAM_TASK_RESUME, &epset, 0, 0); if (code != 0) { taosMemoryFree(pReq); - return -1; + return terrno; } mDebug("set the resume action for trans:%d", pTrans->id); return 0; } -SStreamTask *mndGetStreamTask(STaskId *pId, SStreamObj *pStream) { - SStreamTaskIter *pIter = createStreamTaskIter(pStream); +int32_t mndGetStreamTask(STaskId *pId, SStreamObj *pStream, SStreamTask **pTask) { + *pTask = NULL; + + SStreamTask *p = NULL; + SStreamTaskIter *pIter = NULL; + int32_t code = createStreamTaskIter(pStream, &pIter); + if (code) { + mError("failed to create stream task iter:%s", pStream->name); + return code; + } + while (streamTaskIterNextTask(pIter)) { - SStreamTask *pTask = streamTaskIterGetCurrent(pIter); - if (pTask->id.taskId == pId->taskId) { + code = streamTaskIterGetCurrent(pIter, &p); + if (code) { + continue; + } + + if (p->id.taskId == pId->taskId) { destroyStreamTaskIter(pIter); - return pTask; + *pTask = p; + return 0; } } destroyStreamTaskIter(pIter); - return NULL; + return TSDB_CODE_FAILED; } int32_t mndGetNumOfStreamTasks(const SStreamObj *pStream) { @@ -280,13 +328,25 @@ int32_t mndGetNumOfStreamTasks(const SStreamObj *pStream) { } int32_t mndStreamSetResumeAction(STrans *pTrans, SMnode *pMnode, SStreamObj *pStream, int8_t igUntreated) { - SStreamTaskIter *pIter = createStreamTaskIter(pStream); + SStreamTaskIter *pIter = NULL; + int32_t code = createStreamTaskIter(pStream, &pIter); + if (code) { + mError("failed to create stream task iter:%s", pStream->name); + return code; + } while (streamTaskIterNextTask(pIter)) { - SStreamTask *pTask = streamTaskIterGetCurrent(pIter); - if (doSetResumeAction(pTrans, pMnode, pTask, igUntreated) < 0) { + SStreamTask *pTask = NULL; + code = streamTaskIterGetCurrent(pIter, &pTask); + if (code || pTask == NULL) { destroyStreamTaskIter(pIter); - return -1; + return code; + } + + code = doSetResumeAction(pTrans, pMnode, pTask, igUntreated); + if (code) { + destroyStreamTaskIter(pIter); + return code; } if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__PAUSE) { @@ -303,7 +363,7 @@ static int32_t doSetPauseAction(SMnode *pMnode, STrans *pTrans, SStreamTask *pTa mError("failed to malloc in pause stream, size:%" PRIzu ", code:%s", sizeof(SVPauseStreamTaskReq), tstrerror(TSDB_CODE_OUT_OF_MEMORY)); terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return terrno; } pReq->head.vgId = htonl(pTask->info.nodeId); @@ -320,25 +380,38 @@ static int32_t doSetPauseAction(SMnode *pMnode, STrans *pTrans, SStreamTask *pTa } char buf[256] = {0}; - epsetToStr(&epset, buf, tListLen(buf)); + (void) epsetToStr(&epset, buf, tListLen(buf)); mDebug("pause stream task in node:%d, epset:%s", pTask->info.nodeId, buf); code = setTransAction(pTrans, pReq, sizeof(SVPauseStreamTaskReq), TDMT_STREAM_TASK_PAUSE, &epset, 0, 0); if (code != 0) { taosMemoryFree(pReq); - return -1; + return code; } return 0; } int32_t mndStreamSetPauseAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) { - SStreamTaskIter *pIter = createStreamTaskIter(pStream); + SStreamTaskIter *pIter = NULL; + + int32_t code = createStreamTaskIter(pStream, &pIter); + if (code) { + mError("failed to create stream task iter:%s", pStream->name); + return code; + } while (streamTaskIterNextTask(pIter)) { - SStreamTask *pTask = streamTaskIterGetCurrent(pIter); - if (doSetPauseAction(pMnode, pTrans, pTask) < 0) { + SStreamTask *pTask = NULL; + code = streamTaskIterGetCurrent(pIter, &pTask); + if (code) { destroyStreamTaskIter(pIter); - return -1; + return code; + } + + code = doSetPauseAction(pMnode, pTrans, pTask); + if (code) { + destroyStreamTaskIter(pIter); + return code; } if (atomic_load_8(&pTask->status.taskStatus) != TASK_STATUS__PAUSE) { @@ -348,14 +421,14 @@ int32_t mndStreamSetPauseAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStr } destroyStreamTaskIter(pIter); - return 0; + return code; } static int32_t doSetDropAction(SMnode *pMnode, STrans *pTrans, SStreamTask *pTask) { SVDropStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVDropStreamTaskReq)); if (pReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return terrno; } pReq->head.vgId = htonl(pTask->info.nodeId); @@ -366,28 +439,40 @@ static int32_t doSetDropAction(SMnode *pMnode, STrans *pTrans, SStreamTask *pTas bool hasEpset = false; int32_t code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->id.taskId, pTask->info.nodeId); if (code != TSDB_CODE_SUCCESS || !hasEpset) { // no valid epset, return directly without redoAction - terrno = code; - return -1; + return code; } // The epset of nodeId of this task may have been expired now, let's use the newest epset from mnode. code = setTransAction(pTrans, pReq, sizeof(SVDropStreamTaskReq), TDMT_STREAM_TASK_DROP, &epset, 0, 0); if (code != 0) { taosMemoryFree(pReq); - return -1; + return code; } return 0; } int32_t mndStreamSetDropAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) { - SStreamTaskIter *pIter = createStreamTaskIter(pStream); + SStreamTaskIter *pIter = NULL; + + int32_t code = createStreamTaskIter(pStream, &pIter); + if (code) { + mError("failed to create stream task iter:%s", pStream->name); + return code; + } while(streamTaskIterNextTask(pIter)) { - SStreamTask *pTask = streamTaskIterGetCurrent(pIter); - if (doSetDropAction(pMnode, pTrans, pTask) < 0) { + SStreamTask *pTask = NULL; + code = streamTaskIterGetCurrent(pIter, &pTask); + if (code) { destroyStreamTaskIter(pIter); - return -1; + return code; + } + + code = doSetDropAction(pMnode, pTrans, pTask); + if (code) { + destroyStreamTaskIter(pIter); + return code; } } destroyStreamTaskIter(pIter); @@ -398,7 +483,7 @@ static int32_t doSetDropActionFromId(SMnode *pMnode, STrans *pTrans, SOrphanTask SVDropStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVDropStreamTaskReq)); if (pReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return terrno; } pReq->head.vgId = htonl(pTask->nodeId); @@ -409,16 +494,15 @@ static int32_t doSetDropActionFromId(SMnode *pMnode, STrans *pTrans, SOrphanTask bool hasEpset = false; int32_t code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->taskId, pTask->nodeId); if (code != TSDB_CODE_SUCCESS || (!hasEpset)) { // no valid epset, return directly without redoAction - terrno = code; taosMemoryFree(pReq); - return -1; + return code; } // The epset of nodeId of this task may have been expired now, let's use the newest epset from mnode. code = setTransAction(pTrans, pReq, sizeof(SVDropStreamTaskReq), TDMT_STREAM_TASK_DROP, &epset, 0, 0); if (code != 0) { taosMemoryFree(pReq); - return -1; + return code; } return 0; @@ -427,19 +511,35 @@ static int32_t doSetDropActionFromId(SMnode *pMnode, STrans *pTrans, SOrphanTask int32_t mndStreamSetDropActionFromList(SMnode *pMnode, STrans *pTrans, SArray* pList) { for(int32_t i = 0; i < taosArrayGetSize(pList); ++i) { SOrphanTask* pTask = taosArrayGet(pList, i); - mDebug("add drop task:0x%x action to drop orphan task", pTask->taskId); - doSetDropActionFromId(pMnode, pTrans, pTask); + int32_t code = doSetDropActionFromId(pMnode, pTrans, pTask); + if (code != 0) { + return code; + } else { + mDebug("add drop task:0x%x action to drop orphan task", pTask->taskId); + } } return 0; } static void initNodeUpdateMsg(SStreamTaskNodeUpdateMsg *pMsg, const SVgroupChangeInfo *pInfo, SStreamTaskId *pId, int32_t transId) { + int32_t code = 0; + pMsg->streamId = pId->streamId; pMsg->taskId = pId->taskId; pMsg->transId = transId; pMsg->pNodeList = taosArrayInit(taosArrayGetSize(pInfo->pUpdateNodeList), sizeof(SNodeUpdateInfo)); - taosArrayAddAll(pMsg->pNodeList, pInfo->pUpdateNodeList); + if (pMsg->pNodeList == NULL) { + mError("failed to prepare node list, code:out of memory"); + code = TSDB_CODE_OUT_OF_MEMORY; + } + + if (code == 0) { + void *p = taosArrayAddAll(pMsg->pNodeList, pInfo->pUpdateNodeList); + if (p == NULL) { + mError("failed to add update node list into nodeList"); + } + } } static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupChangeInfo *pInfo, int32_t nodeId, @@ -454,7 +554,7 @@ static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupCha if (code < 0) { terrno = TSDB_CODE_OUT_OF_MEMORY; taosArrayDestroy(req.pNodeList); - return -1; + return terrno; } int32_t tlen = sizeof(SMsgHead) + blen; @@ -463,13 +563,18 @@ static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupCha if (buf == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; taosArrayDestroy(req.pNodeList); - return -1; + return terrno; } void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); SEncoder encoder; tEncoderInit(&encoder, abuf, tlen); - tEncodeStreamTaskUpdateMsg(&encoder, &req); + code = tEncodeStreamTaskUpdateMsg(&encoder, &req); + if (code == -1) { + tEncoderClear(&encoder); + taosArrayDestroy(req.pNodeList); + return code; + } SMsgHead *pMsgHead = (SMsgHead *)buf; pMsgHead->contLen = htonl(tlen); @@ -487,15 +592,20 @@ static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupCha static int32_t doSetUpdateTaskAction(SMnode *pMnode, STrans *pTrans, SStreamTask *pTask, SVgroupChangeInfo *pInfo) { void *pBuf = NULL; int32_t len = 0; - streamTaskUpdateEpsetInfo(pTask, pInfo->pUpdateNodeList); + int32_t code = streamTaskUpdateEpsetInfo(pTask, pInfo->pUpdateNodeList); + if (code) { + return code; + } - doBuildStreamTaskUpdateMsg(&pBuf, &len, pInfo, pTask->info.nodeId, &pTask->id, pTrans->id); + code = doBuildStreamTaskUpdateMsg(&pBuf, &len, pInfo, pTask->info.nodeId, &pTask->id, pTrans->id); + if (code) { + return code; + } SEpSet epset = {0}; bool hasEpset = false; - int32_t code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->id.taskId, pTask->info.nodeId); + code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->id.taskId, pTask->info.nodeId); if (code != TSDB_CODE_SUCCESS || !hasEpset) { - terrno = code; return code; } @@ -510,16 +620,30 @@ static int32_t doSetUpdateTaskAction(SMnode *pMnode, STrans *pTrans, SStreamTask // build trans to update the epset int32_t mndStreamSetUpdateEpsetAction(SMnode *pMnode, SStreamObj *pStream, SVgroupChangeInfo *pInfo, STrans *pTrans) { mDebug("stream:0x%" PRIx64 " set tasks epset update action", pStream->uid); - taosWLockLatch(&pStream->lock); + SStreamTaskIter *pIter = NULL; + + taosWLockLatch(&pStream->lock); + int32_t code = createStreamTaskIter(pStream, &pIter); + if (code) { + taosWUnLockLatch(&pStream->lock); + mError("failed to create stream task iter:%s", pStream->name); + return code; + } - SStreamTaskIter *pIter = createStreamTaskIter(pStream); while (streamTaskIterNextTask(pIter)) { - SStreamTask *pTask = streamTaskIterGetCurrent(pIter); - int32_t code = doSetUpdateTaskAction(pMnode, pTrans, pTask, pInfo); + SStreamTask *pTask = NULL; + code = streamTaskIterGetCurrent(pIter, &pTask); + if (code) { + destroyStreamTaskIter(pIter); + taosWUnLockLatch(&pStream->lock); + return code; + } + + code = doSetUpdateTaskAction(pMnode, pTrans, pTask, pInfo); if (code != TSDB_CODE_SUCCESS) { destroyStreamTaskIter(pIter); taosWUnLockLatch(&pStream->lock); - return -1; + return code; } } @@ -558,16 +682,30 @@ static int32_t doSetResetAction(SMnode *pMnode, STrans *pTrans, SStreamTask *pTa } int32_t mndStreamSetResetTaskAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) { - taosWLockLatch(&pStream->lock); + SStreamTaskIter *pIter = NULL; + + taosWLockLatch(&pStream->lock); + int32_t code = createStreamTaskIter(pStream, &pIter); + if (code) { + taosWUnLockLatch(&pStream->lock); + mError("failed to create stream task iter:%s", pStream->name); + return code; + } - SStreamTaskIter *pIter = createStreamTaskIter(pStream); while (streamTaskIterNextTask(pIter)) { - SStreamTask *pTask = streamTaskIterGetCurrent(pIter); - int32_t code = doSetResetAction(pMnode, pTrans, pTask); + SStreamTask *pTask = NULL; + code = streamTaskIterGetCurrent(pIter, &pTask); + if (code) { + destroyStreamTaskIter(pIter); + taosWUnLockLatch(&pStream->lock); + return code; + } + + code = doSetResetAction(pMnode, pTrans, pTask); if (code != TSDB_CODE_SUCCESS) { destroyStreamTaskIter(pIter); taosWUnLockLatch(&pStream->lock); - return -1; + return code; } } @@ -581,8 +719,12 @@ static void freeTaskList(void* param) { taosArrayDestroy(*pList); } -void mndInitExecInfo() { - taosThreadMutexInit(&execInfo.lock, NULL); +int32_t mndInitExecInfo() { + int32_t code = taosThreadMutexInit(&execInfo.lock, NULL); + if (code) { + return code; + } + _hash_fn_t fn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR); execInfo.pTaskList = taosArrayInit(4, sizeof(STaskId)); @@ -596,6 +738,7 @@ void mndInitExecInfo() { taosHashSetFreeFp(execInfo.pTransferStateStreams, freeTaskList); taosHashSetFreeFp(execInfo.pChkptStreams, freeTaskList); taosHashSetFreeFp(execInfo.pStreamConsensus, freeTaskList); + return 0; } void removeExpiredNodeInfo(const SArray *pNodeSnapshot) { @@ -608,7 +751,10 @@ void removeExpiredNodeInfo(const SArray *pNodeSnapshot) { for (int32_t j = 0; j < size; ++j) { SNodeEntry *pEntry = taosArrayGet(pNodeSnapshot, j); if (pEntry->nodeId == p->nodeId) { - taosArrayPush(pValidList, p); + void* px = taosArrayPush(pValidList, p); + if (px == NULL) { + mError("failed to put node into list, nodeId:%d", p->nodeId); + } break; } } @@ -626,7 +772,10 @@ int32_t doRemoveTasks(SStreamExecInfo *pExecNode, STaskId *pRemovedId) { return TSDB_CODE_SUCCESS; } - taosHashRemove(pExecNode->pTaskMap, pRemovedId, sizeof(*pRemovedId)); + int32_t code = taosHashRemove(pExecNode->pTaskMap, pRemovedId, sizeof(*pRemovedId)); + if (code) { + return code; + } for (int32_t k = 0; k < taosArrayGetSize(pExecNode->pTaskList); ++k) { STaskId *pId = taosArrayGet(pExecNode->pTaskList, k); @@ -645,28 +794,45 @@ int32_t doRemoveTasks(SStreamExecInfo *pExecNode, STaskId *pRemovedId) { void removeTasksInBuf(SArray *pTaskIds, SStreamExecInfo* pExecInfo) { for (int32_t i = 0; i < taosArrayGetSize(pTaskIds); ++i) { STaskId *pId = taosArrayGet(pTaskIds, i); - doRemoveTasks(pExecInfo, pId); + int32_t code = doRemoveTasks(pExecInfo, pId); + if (code) { + mError("failed to remove task in buffer list, 0x%"PRIx64, pId->taskId); + } } } void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { - taosThreadMutexLock(&pExecNode->lock); + SStreamTaskIter *pIter = NULL; + streamMutexLock(&pExecNode->lock); // 1. remove task entries - SStreamTaskIter *pIter = createStreamTaskIter(pStream); + int32_t code = createStreamTaskIter(pStream, &pIter); + if (code) { + streamMutexUnlock(&pExecNode->lock); + mError("failed to create stream task iter:%s", pStream->name); + return; + } + while (streamTaskIterNextTask(pIter)) { - SStreamTask *pTask = streamTaskIterGetCurrent(pIter); + SStreamTask *pTask = NULL; + code = streamTaskIterGetCurrent(pIter, &pTask); + if (code) { + continue; + } STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; - doRemoveTasks(pExecNode, &id); + code = doRemoveTasks(pExecNode, &id); + if (code) { + mError("failed to remove task in buffer list, 0x%"PRIx64, id.taskId); + } } ASSERT(taosHashGetSize(pExecNode->pTaskMap) == taosArrayGetSize(pExecNode->pTaskList)); // 2. remove stream entry in consensus hash table - mndClearConsensusCheckpointId(execInfo.pStreamConsensus, pStream->uid); + (void) mndClearConsensusCheckpointId(execInfo.pStreamConsensus, pStream->uid); - taosThreadMutexUnlock(&pExecNode->lock); + streamMutexUnlock(&pExecNode->lock); destroyStreamTaskIter(pIter); } @@ -697,7 +863,10 @@ int32_t removeExpiredNodeEntryAndTaskInBuf(SArray *pNodeSnapshot) { bool existed = taskNodeExists(pNodeSnapshot, pEntry->nodeId); if (!existed) { - taosArrayPush(pRemovedTasks, pId); + void* p = taosArrayPush(pRemovedTasks, pId); + if (p == NULL) { + mError("failed to put task entry into remove list, taskId:0x%" PRIx64, pId->taskId); + } } } @@ -759,45 +928,64 @@ static int32_t doSetUpdateChkptAction(SMnode *pMnode, STrans *pTrans, SStreamTas } int32_t mndStreamSetUpdateChkptAction(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) { + SStreamTaskIter *pIter = NULL; + taosWLockLatch(&pStream->lock); + int32_t code = createStreamTaskIter(pStream, &pIter); + if (code) { + taosWUnLockLatch(&pStream->lock); + mError("failed to create stream task iter:%s", pStream->name); + return code; + } - SStreamTaskIter *pIter = createStreamTaskIter(pStream); while (streamTaskIterNextTask(pIter)) { - SStreamTask *pTask = streamTaskIterGetCurrent(pIter); + SStreamTask *pTask = NULL; + code = streamTaskIterGetCurrent(pIter, &pTask); + if (code) { + destroyStreamTaskIter(pIter); + taosWUnLockLatch(&pStream->lock); + return code; + } - int32_t code = doSetUpdateChkptAction(pMnode, pTrans, pTask); + code = doSetUpdateChkptAction(pMnode, pTrans, pTask); if (code != TSDB_CODE_SUCCESS) { destroyStreamTaskIter(pIter); taosWUnLockLatch(&pStream->lock); - return -1; + return code; } } destroyStreamTaskIter(pIter); taosWUnLockLatch(&pStream->lock); - return 0; + return code; } int32_t mndScanCheckpointReportInfo(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; void *pIter = NULL; SArray *pDropped = taosArrayInit(4, sizeof(int64_t)); + int32_t code = 0; mDebug("start to scan checkpoint report info"); while ((pIter = taosHashIterate(execInfo.pChkptStreams, pIter)) != NULL) { SArray *pList = *(SArray **)pIter; - STaskChkptInfo* pInfo = taosArrayGet(pList, 0); - SStreamObj* pStream = mndGetStreamObj(pMnode, pInfo->streamId); - if (pStream == NULL) { + STaskChkptInfo *pInfo = taosArrayGet(pList, 0); + SStreamObj *pStream = NULL; + code = mndGetStreamObj(pMnode, pInfo->streamId, &pStream); + if (pStream == NULL || code != 0) { mDebug("failed to acquire stream:0x%" PRIx64 " remove it from checkpoint-report list", pInfo->streamId); - taosArrayPush(pDropped, &pInfo->streamId); + void* p = taosArrayPush(pDropped, &pInfo->streamId); + if (p == NULL) { + mError("failed to put stream into drop list:0x%" PRIx64, pInfo->streamId); + } + continue; } int32_t total = mndGetNumOfStreamTasks(pStream); - int32_t existed = (int32_t) taosArrayGetSize(pList); + int32_t existed = (int32_t)taosArrayGetSize(pList); if (total == existed) { mDebug("stream:0x%" PRIx64 " %s all %d tasks send checkpoint-report, start to update checkpoint-info", @@ -805,17 +993,21 @@ int32_t mndScanCheckpointReportInfo(SRpcMsg *pReq) { bool conflict = mndStreamTransConflictCheck(pMnode, pStream->uid, MND_STREAM_CHKPT_UPDATE_NAME, false); if (!conflict) { - int32_t code = mndCreateStreamChkptInfoUpdateTrans(pMnode, pStream, pList); - if (code == TSDB_CODE_SUCCESS || code == TSDB_CODE_ACTION_IN_PROGRESS) { // remove this entry - taosArrayPush(pDropped, &pInfo->streamId); - mDebug("stream:0x%" PRIx64 " removed", pInfo->streamId); + code = mndCreateStreamChkptInfoUpdateTrans(pMnode, pStream, pList); + if (code == TSDB_CODE_SUCCESS || code == TSDB_CODE_ACTION_IN_PROGRESS) { // remove this entry + void* p = taosArrayPush(pDropped, &pInfo->streamId); + if (p == NULL) { + mError("failed to remove stream:0x%" PRIx64, pInfo->streamId); + } else { + mDebug("stream:0x%" PRIx64 " removed", pInfo->streamId); + } } else { mDebug("stream:0x%" PRIx64 " not launch chkpt-meta update trans, due to checkpoint not finished yet", pInfo->streamId); } break; } else { - mDebug("stream:0x%"PRIx64" active checkpoint trans not finished yet, wait", pInfo->streamId); + mDebug("stream:0x%" PRIx64 " active checkpoint trans not finished yet, wait", pInfo->streamId); } } else { mDebug("stream:0x%" PRIx64 " %s %d/%d tasks send checkpoint-report, %d not send", pInfo->streamId, pStream->name, @@ -829,7 +1021,10 @@ int32_t mndScanCheckpointReportInfo(SRpcMsg *pReq) { if (size > 0) { for (int32_t i = 0; i < size; ++i) { int64_t streamId = *(int64_t *)taosArrayGet(pDropped, i); - taosHashRemove(execInfo.pChkptStreams, &streamId, sizeof(streamId)); + code = taosHashRemove(execInfo.pChkptStreams, &streamId, sizeof(streamId)); + if (code) { + mError("failed to remove stream in buf:0x%"PRIx64, streamId); + } } int32_t numOfStreams = taosHashGetSize(execInfo.pChkptStreams); @@ -854,29 +1049,30 @@ static int32_t mndStreamSetChkptIdAction(SMnode *pMnode, STrans *pTrans, SStream int32_t blen; tEncodeSize(tEncodeRestoreCheckpointInfo, &req, blen, code); if (code < 0) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return terrno = TSDB_CODE_OUT_OF_MEMORY; } int32_t tlen = sizeof(SMsgHead) + blen; void *pBuf = taosMemoryMalloc(tlen); if (pBuf == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + return terrno = TSDB_CODE_OUT_OF_MEMORY; } void *abuf = POINTER_SHIFT(pBuf, sizeof(SMsgHead)); SEncoder encoder; tEncoderInit(&encoder, abuf, tlen); - tEncodeRestoreCheckpointInfo(&encoder, &req); + code = tEncodeRestoreCheckpointInfo(&encoder, &req); + tEncoderClear(&encoder); + if (code == -1) { + taosMemoryFree(pBuf); + return code; + } SMsgHead *pMsgHead = (SMsgHead *)pBuf; pMsgHead->contLen = htonl(tlen); pMsgHead->vgId = htonl(pTask->info.nodeId); - tEncoderClear(&encoder); - SEpSet epset = {0}; bool hasEpset = false; code = extractNodeEpset(pMnode, &epset, &hasEpset, pTask->id.taskId, pTask->info.nodeId); @@ -898,17 +1094,28 @@ int32_t mndCreateSetConsensusChkptIdTrans(SMnode *pMnode, SStreamObj *pStream, i char msg[128] = {0}; snprintf(msg, tListLen(msg), "set consen-chkpt-id for task:0x%x", taskId); - STrans *pTrans = doCreateTrans(pMnode, pStream, NULL, TRN_CONFLICT_NOTHING, MND_STREAM_CHKPT_CONSEN_NAME, msg); - if (pTrans == NULL) { + STrans *pTrans = NULL; + int32_t code = doCreateTrans(pMnode, pStream, NULL, TRN_CONFLICT_NOTHING, MND_STREAM_CHKPT_CONSEN_NAME, msg, &pTrans); + if (pTrans == NULL || code != 0) { return terrno; } STaskId id = {.streamId = pStream->uid, .taskId = taskId}; - SStreamTask *pTask = mndGetStreamTask(&id, pStream); - ASSERT(pTask); + SStreamTask *pTask = NULL; + code = mndGetStreamTask(&id, pStream, &pTask); + if (code) { + mError("failed to get task:0x%x in stream:%s, failed to create consensus-checkpointId", taskId, pStream->name); + sdbRelease(pMnode->pSdb, pStream); + return code; + } - /*int32_t code = */ mndStreamRegisterTrans(pTrans, MND_STREAM_CHKPT_CONSEN_NAME, pStream->uid); - int32_t code = mndStreamSetChkptIdAction(pMnode, pTrans, pTask, checkpointId, ts); + code = mndStreamRegisterTrans(pTrans, MND_STREAM_CHKPT_CONSEN_NAME, pStream->uid); + if (code) { + sdbRelease(pMnode->pSdb, pStream); + return code; + } + + code = mndStreamSetChkptIdAction(pMnode, pTrans, pTask, checkpointId, ts); if (code != 0) { sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); @@ -916,17 +1123,18 @@ int32_t mndCreateSetConsensusChkptIdTrans(SMnode *pMnode, SStreamObj *pStream, i } code = mndPersistTransLog(pStream, pTrans, SDB_STATUS_READY); - if (code != TSDB_CODE_SUCCESS) { + if (code) { sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - return -1; + return code; } - if (mndTransPrepare(pMnode, pTrans) != 0) { + code = mndTransPrepare(pMnode, pTrans); + if (code) { mError("trans:%d, failed to prepare set consensus-chkptId trans since %s", pTrans->id, terrstr()); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - return -1; + return code; } sdbRelease(pMnode->pSdb, pStream); @@ -935,10 +1143,13 @@ int32_t mndCreateSetConsensusChkptIdTrans(SMnode *pMnode, SStreamObj *pStream, i return TSDB_CODE_ACTION_IN_PROGRESS; } -SCheckpointConsensusInfo* mndGetConsensusInfo(SHashObj* pHash, int64_t streamId, int32_t numOfTasks) { - void* pInfo = taosHashGet(pHash, &streamId, sizeof(streamId)); - if (pInfo != NULL) { - return (SCheckpointConsensusInfo*)pInfo; +int32_t mndGetConsensusInfo(SHashObj* pHash, int64_t streamId, int32_t numOfTasks, SCheckpointConsensusInfo **pInfo) { + *pInfo = NULL; + + void* px = taosHashGet(pHash, &streamId, sizeof(streamId)); + if (px != NULL) { + *pInfo = px; + return 0; } SCheckpointConsensusInfo p = { @@ -947,10 +1158,14 @@ SCheckpointConsensusInfo* mndGetConsensusInfo(SHashObj* pHash, int64_t streamId, .streamId = streamId, }; - taosHashPut(pHash, &streamId, sizeof(streamId), &p, sizeof(p)); - - void* pChkptInfo = (SCheckpointConsensusInfo*)taosHashGet(pHash, &streamId, sizeof(streamId)); - return pChkptInfo; + int32_t code = taosHashPut(pHash, &streamId, sizeof(streamId), &p, sizeof(p)); + if (code == 0) { + void *pChkptInfo = (SCheckpointConsensusInfo *)taosHashGet(pHash, &streamId, sizeof(streamId)); + *pInfo = pChkptInfo; + } else { + *pInfo = NULL; + } + return code; } // no matter existed or not, add the request into info list anyway, since we need to send rsp mannually @@ -971,11 +1186,15 @@ void mndAddConsensusTasks(SCheckpointConsensusInfo *pInfo, const SRestoreCheckpo } } - taosArrayPush(pInfo->pTaskList, &info); - int32_t num = taosArrayGetSize(pInfo->pTaskList); - mDebug("s-task:0x%x checkpointId:%" PRId64 " added into consensus-checkpointId list, stream:0x%" PRIx64 - " waiting tasks:%d", - pRestoreInfo->taskId, pRestoreInfo->checkpointId, pRestoreInfo->streamId, num); + void *p = taosArrayPush(pInfo->pTaskList, &info); + if (p == NULL) { + mError("s-task:0x%x failed to put task into consensus-checkpointId list, code: out of memory", info.req.taskId); + } else { + int32_t num = taosArrayGetSize(pInfo->pTaskList); + mDebug("s-task:0x%x checkpointId:%" PRId64 " added into consensus-checkpointId list, stream:0x%" PRIx64 + " waiting tasks:%d", + pRestoreInfo->taskId, pRestoreInfo->checkpointId, pRestoreInfo->streamId, num); + } } void mndClearConsensusRspEntry(SCheckpointConsensusInfo* pInfo) { @@ -984,22 +1203,14 @@ void mndClearConsensusRspEntry(SCheckpointConsensusInfo* pInfo) { } int64_t mndClearConsensusCheckpointId(SHashObj* pHash, int64_t streamId) { - taosHashRemove(pHash, &streamId, sizeof(streamId)); - int32_t numOfStreams = taosHashGetSize(pHash); - mDebug("drop stream:0x%" PRIx64 " in consensus-checkpointId list after new checkpoint generated, remain:%d", streamId, - numOfStreams); - return TSDB_CODE_SUCCESS; -} + int32_t code = taosHashRemove(pHash, &streamId, sizeof(streamId)); + if (code == 0) { + int32_t numOfStreams = taosHashGetSize(pHash); + mDebug("drop stream:0x%" PRIx64 " in consensus-checkpointId list after new checkpoint generated, remain:%d", + streamId, numOfStreams); + } else { + mError("failed to remove stream:0x%"PRIx64" in consensus-checkpointId list", streamId); + } -//int32_t mndRegisterConsensusChkptId(SHashObj* pHash, int64_t streamId) { -// void* pInfo = taosHashGet(pHash, &streamId, sizeof(streamId)); -// ASSERT(pInfo == NULL); -// -// SCheckpointConsensusInfo p = {.genTs = taosGetTimestampMs(), .checkpointId = 0, .pTaskList = NULL}; -// taosHashPut(pHash, &streamId, sizeof(streamId), &p, sizeof(p)); -// -// SCheckpointConsensusInfo* pChkptInfo = (SCheckpointConsensusInfo*)taosHashGet(pHash, &streamId, sizeof(streamId)); -// ASSERT(pChkptInfo->genTs > 0 && pChkptInfo->checkpointId == 0); -// mDebug("s-task:0x%" PRIx64 " set the initial consensus-checkpointId:0", streamId); -// return TSDB_CODE_SUCCESS; -//} \ No newline at end of file + return code; +} \ No newline at end of file diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index e2bedc258a..c61949b316 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -44,11 +44,16 @@ static void mndCancelGetNextSubscribe(SMnode *pMnode, void *pIter); static void mndCheckConsumer(SRpcMsg *pMsg, SHashObj *hash); static int32_t mndSetSubCommitLogs(STrans *pTrans, SMqSubscribeObj *pSub) { + int32_t code = 0; SSdbRaw *pCommitRaw = mndSubActionEncode(pSub); - if (pCommitRaw == NULL) return -1; - if (mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) return -1; - if (sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY) != 0) return -1; - return 0; + if (pCommitRaw == NULL) { + code = TSDB_CODE_MND_RETURN_VALUE_NULL; + if (terrno != 0) code = terrno; + TAOS_RETURN(code); + } + TAOS_CHECK_RETURN(mndTransAppendCommitlog(pTrans, pCommitRaw)); + TAOS_CHECK_RETURN(sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY)); + TAOS_RETURN(code); } int32_t mndInitSubscribe(SMnode *pMnode) { @@ -75,6 +80,7 @@ int32_t mndInitSubscribe(SMnode *pMnode) { } static SMqSubscribeObj *mndCreateSubscription(SMnode *pMnode, const SMqTopicObj *pTopic, const char *subKey) { + int32_t code = 0; SMqSubscribeObj *pSub = tNewSubscribeObj(subKey); if (pSub == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -86,7 +92,7 @@ static SMqSubscribeObj *mndCreateSubscription(SMnode *pMnode, const SMqTopicObj pSub->subType = pTopic->subType; pSub->withMeta = pTopic->withMeta; - if (mndSchedInitSubEp(pMnode, pTopic, pSub) < 0) { + if ((terrno = mndSchedInitSubEp(pMnode, pTopic, pSub)) < 0) { tDeleteSubscribeObj(pSub); taosMemoryFree(pSub); return NULL; @@ -97,6 +103,7 @@ static SMqSubscribeObj *mndCreateSubscription(SMnode *pMnode, const SMqTopicObj static int32_t mndBuildSubChangeReq(void **pBuf, int32_t *pLen, SMqSubscribeObj *pSub, const SMqRebOutputVg *pRebVg, SSubplan *pPlan) { + int32_t code = 0; SMqRebVgReq req = {0}; req.oldConsumerId = pRebVg->oldConsumerId; req.newConsumerId = pRebVg->newConsumerId; @@ -106,8 +113,8 @@ static int32_t mndBuildSubChangeReq(void **pBuf, int32_t *pLen, SMqSubscribeObj pPlan->execNode.nodeId = pRebVg->pVgEp->vgId; int32_t msgLen; if (qSubPlanToString(pPlan, &req.qmsg, &msgLen) < 0) { - terrno = TSDB_CODE_QRY_INVALID_INPUT; - return -1; + code = TSDB_CODE_QRY_INVALID_INPUT; + TAOS_RETURN(code); } } else { req.qmsg = taosStrdup(""); @@ -122,7 +129,7 @@ static int32_t mndBuildSubChangeReq(void **pBuf, int32_t *pLen, SMqSubscribeObj tEncodeSize(tEncodeSMqRebVgReq, &req, tlen, ret); if (ret < 0) { taosMemoryFree(req.qmsg); - return -1; + TAOS_RETURN(ret); } tlen += sizeof(SMsgHead); @@ -130,7 +137,7 @@ static int32_t mndBuildSubChangeReq(void **pBuf, int32_t *pLen, SMqSubscribeObj if (buf == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; taosMemoryFree(req.qmsg); - return -1; + TAOS_RETURN(ret); } SMsgHead *pMsgHead = (SMsgHead *)buf; @@ -140,40 +147,41 @@ static int32_t mndBuildSubChangeReq(void **pBuf, int32_t *pLen, SMqSubscribeObj SEncoder encoder = {0}; tEncoderInit(&encoder, POINTER_SHIFT(buf, sizeof(SMsgHead)), tlen); - if (tEncodeSMqRebVgReq(&encoder, &req) < 0) { + if ((code = tEncodeSMqRebVgReq(&encoder, &req)) < 0) { taosMemoryFreeClear(buf); tEncoderClear(&encoder); taosMemoryFree(req.qmsg); - return -1; + TAOS_RETURN(code); } tEncoderClear(&encoder); *pBuf = buf; *pLen = tlen; taosMemoryFree(req.qmsg); - return 0; + TAOS_RETURN(code); } static int32_t mndPersistSubChangeVgReq(SMnode *pMnode, STrans *pTrans, SMqSubscribeObj *pSub, const SMqRebOutputVg *pRebVg, SSubplan *pPlan) { + int32_t code = 0; if (pRebVg->oldConsumerId == pRebVg->newConsumerId) { if (pRebVg->oldConsumerId == -1) return 0; // drop stream, no consumer, while split vnode,all consumerId is -1 - terrno = TSDB_CODE_MND_INVALID_SUB_OPTION; - return -1; + code = TSDB_CODE_MND_INVALID_SUB_OPTION; + TAOS_RETURN(code); } void *buf; int32_t tlen; - if (mndBuildSubChangeReq(&buf, &tlen, pSub, pRebVg, pPlan) < 0) { - return -1; + if ((code = mndBuildSubChangeReq(&buf, &tlen, pSub, pRebVg, pPlan)) < 0) { + TAOS_RETURN(code); } int32_t vgId = pRebVg->pVgEp->vgId; SVgObj *pVgObj = mndAcquireVgroup(pMnode, vgId); if (pVgObj == NULL) { taosMemoryFree(buf); - terrno = TSDB_CODE_MND_VGROUP_NOT_EXIST; - return -1; + code = TSDB_CODE_MND_VGROUP_NOT_EXIST; + TAOS_RETURN(code); } STransAction action = {0}; @@ -183,11 +191,11 @@ static int32_t mndPersistSubChangeVgReq(SMnode *pMnode, STrans *pTrans, SMqSubsc action.msgType = TDMT_VND_TMQ_SUBSCRIBE; mndReleaseVgroup(pMnode, pVgObj); - if (mndTransAppendRedoAction(pTrans, &action) != 0) { + if ((code = mndTransAppendRedoAction(pTrans, &action)) != 0) { taosMemoryFree(buf); - return -1; + TAOS_RETURN(code); } - return 0; + TAOS_RETURN(code); } static int32_t mndSplitSubscribeKey(const char *key, char *topic, char *cgroup, bool fullName) { @@ -209,6 +217,7 @@ static int32_t mndSplitSubscribeKey(const char *key, char *topic, char *cgroup, } static SMqRebInfo *mndGetOrCreateRebSub(SHashObj *pHash, const char *key) { + terrno = 0; SMqRebInfo *pRebInfo = taosHashGet(pHash, key, strlen(key) + 1); if (pRebInfo == NULL) { pRebInfo = tNewSMqRebSubscribe(key); @@ -612,7 +621,6 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu if (strcmp(pOutput->pSub->qmsg, "") != 0) { code = qStringToSubplan(pOutput->pSub->qmsg, &pPlan); if (code != 0) { - terrno = code; goto END; } } @@ -623,7 +631,8 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pMsg, "tmq-reb"); if (pTrans == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_MND_RETURN_VALUE_NULL; + if (terrno != 0) code = terrno; goto END; } @@ -665,7 +674,7 @@ static int32_t mndPersistRebResult(SMnode *pMnode, SRpcMsg *pMsg, const SMqRebOu END: nodesDestroyNode((SNode *)pPlan); mndTransDrop(pTrans); - return code; + TAOS_RETURN(code); } static void freeRebalanceItem(void *param) { @@ -827,6 +836,7 @@ static void checkConsumer(SMnode *pMnode, SMqSubscribeObj *pSub) { } static int32_t buildRebOutput(SMnode *pMnode, SMqRebInputObj *rebInput, SMqRebOutputObj *rebOutput) { + int32_t code = 0; const char *key = rebInput->pRebInfo->key; SMqSubscribeObj *pSub = mndAcquireSubscribeByKey(pMnode, key); @@ -838,8 +848,10 @@ static int32_t buildRebOutput(SMnode *pMnode, SMqRebInputObj *rebInput, SMqRebOu SMqTopicObj *pTopic = mndAcquireTopic(pMnode, topic); if (pTopic == NULL) { + code = TSDB_CODE_MND_RETURN_VALUE_NULL; + if (terrno != 0) code = terrno; mError("[rebalance] mq rebalance %s ignored since topic %s doesn't exist", key, topic); - return -1; + TAOS_RETURN(code); } taosRLockLatch(&pTopic->lock); @@ -848,10 +860,12 @@ static int32_t buildRebOutput(SMnode *pMnode, SMqRebInputObj *rebInput, SMqRebOu rebOutput->pSub = mndCreateSubscription(pMnode, pTopic, key); if (rebOutput->pSub == NULL) { - mError("[rebalance] mq rebalance %s failed create sub since %s, ignore", key, terrstr()); + code = TSDB_CODE_MND_RETURN_VALUE_NULL; + if (terrno != 0) code = terrno; + mError("[rebalance] mq rebalance %s failed create sub since %s, ignore", key, tstrerror(code)); taosRUnLockLatch(&pTopic->lock); mndReleaseTopic(pMnode, pTopic); - return -1; + TAOS_RETURN(code); } memcpy(rebOutput->pSub->dbName, pTopic->db, TSDB_DB_FNAME_LEN); @@ -869,7 +883,7 @@ static int32_t buildRebOutput(SMnode *pMnode, SMqRebInputObj *rebInput, SMqRebOu mInfo("[rebalance] sub topic:%s has %d consumers sub till now", key, rebInput->oldConsumerNum); mndReleaseSubscribe(pMnode, pSub); } - return 0; + TAOS_RETURN(code); } static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) { @@ -885,6 +899,7 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) { SHashObj *rebSubHash = taosHashInit(64, MurmurHash3_32, true, HASH_NO_LOCK); if (rebSubHash == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; + if (terrno != 0) code = terrno; goto END; } taosHashSetFreeFp(rebSubHash, freeRebalanceItem); @@ -915,8 +930,8 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) { mndDoRebalance(pMnode, &rebInput, &rebOutput); - if (mndPersistRebResult(pMnode, pMsg, &rebOutput) != 0) { - mError("mq rebalance persist output error, possibly vnode splitted or dropped,msg:%s", terrstr()) + if ((code = mndPersistRebResult(pMnode, pMsg, &rebOutput)) != 0) { + mError("mq rebalance persist output error, possibly vnode splitted or dropped,msg:%s", tstrerror(code)) } clearRebOutput(&rebOutput); @@ -931,7 +946,7 @@ END: taosHashCleanup(rebSubHash); mndRebCntDec(); - return code; + TAOS_RETURN(code); } static int32_t sendDeleteSubToVnode(SMnode *pMnode, SMqSubscribeObj *pSub, STrans *pTrans) { @@ -950,8 +965,7 @@ static int32_t sendDeleteSubToVnode(SMnode *pMnode, SMqSubscribeObj *pSub, STran } SMqVDeleteReq *pReq = taosMemoryCalloc(1, sizeof(SMqVDeleteReq)); if (pReq == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - ret = -1; + ret = TSDB_CODE_OUT_OF_MEMORY; goto END; } pReq->head.vgId = htonl(pVgObj->vgId); @@ -968,15 +982,14 @@ static int32_t sendDeleteSubToVnode(SMnode *pMnode, SMqSubscribeObj *pSub, STran action.acceptableCode = TSDB_CODE_MND_VGROUP_NOT_EXIST; sdbRelease(pMnode->pSdb, pVgObj); - if (mndTransAppendRedoAction(pTrans, &action) != 0) { - ret = -1; + if ((ret = mndTransAppendRedoAction(pTrans, &action)) != 0) { goto END; } } END: sdbRelease(pMnode->pSdb, pVgObj); sdbCancelFetch(pMnode->pSdb, pIter); - return ret; + TAOS_RETURN(ret); } static int32_t mndDropConsumerByGroup(SMnode *pMnode, STrans *pTrans, char *cgroup, char *topic) { @@ -1012,7 +1025,7 @@ static int32_t mndDropConsumerByGroup(SMnode *pMnode, STrans *pTrans, char *cgro END: sdbRelease(pMnode->pSdb, pConsumer); sdbCancelFetch(pMnode->pSdb, pIter); - return ret; + TAOS_RETURN(ret); } static int32_t mndProcessDropCgroupReq(SRpcMsg *pMsg) { @@ -1022,8 +1035,8 @@ static int32_t mndProcessDropCgroupReq(SRpcMsg *pMsg) { int32_t code = TSDB_CODE_ACTION_IN_PROGRESS; if (tDeserializeSMDropCgroupReq(pMsg->pCont, pMsg->contLen, &dropReq) != 0) { - terrno = TSDB_CODE_INVALID_MSG; - return -1; + code = TSDB_CODE_INVALID_MSG; + TAOS_RETURN(code); } SMqSubscribeObj *pSub = mndAcquireSubscribe(pMnode, dropReq.cgroup, dropReq.topic); @@ -1032,24 +1045,24 @@ static int32_t mndProcessDropCgroupReq(SRpcMsg *pMsg) { mInfo("cgroup:%s on topic:%s, not exist, ignore not exist is set", dropReq.cgroup, dropReq.topic); return 0; } else { - terrno = TSDB_CODE_MND_SUBSCRIBE_NOT_EXIST; - mError("topic:%s, cgroup:%s, failed to drop since %s", dropReq.topic, dropReq.cgroup, terrstr()); - return -1; + code = TSDB_CODE_MND_SUBSCRIBE_NOT_EXIST; + mError("topic:%s, cgroup:%s, failed to drop since %s", dropReq.topic, dropReq.cgroup, tstrerror(code)); + TAOS_RETURN(code); } } taosWLockLatch(&pSub->lock); if (taosHashGetSize(pSub->consumerHash) != 0) { - terrno = TSDB_CODE_MND_CGROUP_USED; - mError("cgroup:%s on topic:%s, failed to drop since %s", dropReq.cgroup, dropReq.topic, terrstr()); - code = -1; + code = TSDB_CODE_MND_CGROUP_USED; + mError("cgroup:%s on topic:%s, failed to drop since %s", dropReq.cgroup, dropReq.topic, tstrerror(code)); goto end; } pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pMsg, "drop-cgroup"); if (pTrans == NULL) { - mError("cgroup: %s on topic:%s, failed to drop since %s", dropReq.cgroup, dropReq.topic, terrstr()); - code = -1; + code = TSDB_CODE_MND_RETURN_VALUE_NULL; + if (terrno != 0) code = terrno; + mError("cgroup: %s on topic:%s, failed to drop since %s", dropReq.cgroup, dropReq.topic, tstrerror(code)); goto end; } @@ -1088,9 +1101,9 @@ end: if (code != 0) { mError("cgroup %s on topic:%s, failed to drop", dropReq.cgroup, dropReq.topic); - return code; + TAOS_RETURN(code); } - return TSDB_CODE_ACTION_IN_PROGRESS; + TAOS_RETURN(TSDB_CODE_ACTION_IN_PROGRESS); } void mndCleanupSubscribe(SMnode *pMnode) {} @@ -1340,7 +1353,7 @@ END: sdbRelease(pSdb, pSub); sdbCancelFetch(pSdb, pIter); - return code; + TAOS_RETURN(code); } static int32_t buildResult(SSDataBlock *pBlock, int32_t *numOfRows, int64_t consumerId, const char *topic, diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index 573b75ff5a..70d0b858f6 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -123,15 +123,16 @@ _OUT: } static int32_t mndTransValidateImp(SMnode *pMnode, STrans *pTrans) { + int32_t code = 0; if (pTrans->stage == TRN_STAGE_PREPARE) { - if (mndTransCheckConflict(pMnode, pTrans) < 0) { + if ((code = mndTransCheckConflict(pMnode, pTrans)) < 0) { mError("trans:%d, failed to validate trans conflicts.", pTrans->id); - return -1; + TAOS_RETURN(code); } return mndTransValidatePrepareStage(pMnode, pTrans); } - return 0; + TAOS_RETURN(code); } static int32_t mndTransValidate(SMnode *pMnode, SSdbRaw *pRaw) { @@ -139,10 +140,18 @@ static int32_t mndTransValidate(SMnode *pMnode, SSdbRaw *pRaw) { int32_t code = -1; SSdbRow *pRow = mndTransDecode(pRaw); - if (pRow == NULL) goto _OUT; + if (pRow == NULL) { + code = TSDB_CODE_MND_RETURN_VALUE_NULL; + if (terrno != 0) code = terrno; + goto _OUT; + } pTrans = sdbGetRowObj(pRow); - if (pTrans == NULL) goto _OUT; + if (pTrans == NULL) { + code = TSDB_CODE_MND_RETURN_VALUE_NULL; + if (terrno != 0) code = terrno; + goto _OUT; + } code = mndTransValidateImp(pMnode, pTrans); @@ -150,11 +159,10 @@ _OUT: if (pTrans) mndTransDropData(pTrans); if (pRow) taosMemoryFreeClear(pRow); if (code) terrno = (terrno ? terrno : TSDB_CODE_MND_TRANS_CONFLICT); - return code; + TAOS_RETURN(code); } int32_t mndProcessWriteMsg(SMnode *pMnode, SRpcMsg *pMsg, SFsmCbMeta *pMeta) { - terrno = TSDB_CODE_SUCCESS; SSyncMgmt *pMgmt = &pMnode->syncMgmt; SSdbRaw *pRaw = pMsg->pCont; STrans *pTrans = NULL; @@ -163,7 +171,7 @@ int32_t mndProcessWriteMsg(SMnode *pMnode, SRpcMsg *pMsg, SFsmCbMeta *pMeta) { if (transId <= 0) { mError("trans:%d, invalid commit msg, cache transId:%d seq:%" PRId64, transId, pMgmt->transId, pMgmt->transSeq); - terrno = TSDB_CODE_INVALID_MSG; + code = TSDB_CODE_INVALID_MSG; goto _OUT; } @@ -176,7 +184,7 @@ int32_t mndProcessWriteMsg(SMnode *pMnode, SRpcMsg *pMsg, SFsmCbMeta *pMeta) { if (code != 0) { mError("trans:%d, failed to validate requested trans since %s", transId, terrstr()); code = 0; - pMeta->code = terrno; + pMeta->code = code; goto _OUT; } @@ -184,13 +192,15 @@ int32_t mndProcessWriteMsg(SMnode *pMnode, SRpcMsg *pMsg, SFsmCbMeta *pMeta) { if (code != 0) { mError("trans:%d, failed to write to sdb since %s", transId, terrstr()); code = 0; - pMeta->code = terrno; + pMeta->code = code; goto _OUT; } pTrans = mndAcquireTrans(pMnode, transId); if (pTrans == NULL) { - mError("trans:%d, not found while execute in mnode since %s", transId, terrstr()); + code = TSDB_CODE_MND_RETURN_VALUE_NULL; + if (terrno != 0) code = terrno; + mError("trans:%d, not found while execute in mnode since %s", transId, tstrerror(code)); goto _OUT; } @@ -207,7 +217,7 @@ int32_t mndProcessWriteMsg(SMnode *pMnode, SRpcMsg *pMsg, SFsmCbMeta *pMeta) { _OUT: if (pTrans) mndReleaseTrans(pMnode, pTrans); - return code; + TAOS_RETURN(code); } static int32_t mndPostMgmtCode(SMnode *pMnode, int32_t code) { @@ -258,7 +268,7 @@ _OUT: mndPostMgmtCode(pMnode, code ? code : pMeta->code); rpcFreeCont(pMsg->pCont); pMsg->pCont = NULL; - return code; + TAOS_RETURN(code); } SyncIndex mndSyncAppliedIndex(const SSyncFSM *pFSM) { @@ -466,16 +476,18 @@ int32_t mndInitSync(SMnode *pMnode) { pNode->clusterId); } + int32_t code = 0; tsem_init(&pMgmt->syncSem, 0, 0); pMgmt->sync = syncOpen(&syncInfo, true); if (pMgmt->sync <= 0) { - mError("failed to open sync since %s", terrstr()); - return -1; + if (terrno != 0) code = terrno; + mError("failed to open sync since %s", tstrerror(code)); + TAOS_RETURN(code); } pMnode->pSdb->sync = pMgmt->sync; mInfo("mnode-sync is opened, id:%" PRId64, pMgmt->sync); - return 0; + TAOS_RETURN(code); } void mndCleanupSync(SMnode *pMnode) { @@ -518,10 +530,10 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw, int32_t transId) { SSyncMgmt *pMgmt = &pMnode->syncMgmt; SRpcMsg req = {.msgType = TDMT_MND_APPLY_MSG, .contLen = sdbGetRawTotalSize(pRaw)}; - if (req.contLen <= 0) return -1; + if (req.contLen <= 0) return TSDB_CODE_OUT_OF_MEMORY; req.pCont = rpcMallocCont(req.contLen); - if (req.pCont == NULL) return -1; + if (req.pCont == NULL) return TSDB_CODE_OUT_OF_MEMORY; memcpy(req.pCont, pRaw, req.contLen); taosThreadMutexLock(&pMgmt->lock); @@ -531,8 +543,7 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw, int32_t transId) { mError("trans:%d, can't be proposed since trans:%d already waiting for confirm", transId, pMgmt->transId); taosThreadMutexUnlock(&pMgmt->lock); rpcFreeCont(req.pCont); - terrno = TSDB_CODE_MND_LAST_TRANS_NOT_FINISHED; - return terrno; + TAOS_RETURN(TSDB_CODE_MND_LAST_TRANS_NOT_FINISHED); } mInfo("trans:%d, will be proposed", transId); diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index d3582ab8f3..56a3c00fee 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -155,8 +155,8 @@ int32_t tqSetDstTableDataPayload(uint64_t suid, const STSchema* pTSchema, int32_ SSubmitTbData* pTableData, int64_t earlyTs, const char* id); int32_t doMergeExistedRows(SSubmitTbData* pExisted, const SSubmitTbData* pNew, const char* id); -SVCreateTbReq* buildAutoCreateTableReq(const char* stbFullName, int64_t suid, int32_t numOfCols, - SSDataBlock* pDataBlock, SArray* pTagArray, bool newSubTableRule); +int32_t buildAutoCreateTableReq(const char* stbFullName, int64_t suid, int32_t numOfCols, SSDataBlock* pDataBlock, + SArray* pTagArray, bool newSubTableRule, SVCreateTbReq** pReq); #define TQ_ERR_GO_TO_END(c) \ do { \ diff --git a/source/dnode/vnode/src/sma/smaTimeRange.c b/source/dnode/vnode/src/sma/smaTimeRange.c index fe3117de49..201e496140 100644 --- a/source/dnode/vnode/src/sma/smaTimeRange.c +++ b/source/dnode/vnode/src/sma/smaTimeRange.c @@ -201,7 +201,12 @@ int32_t smaBlockToSubmit(SVnode *pVnode, const SArray *pBlocks, const STSchema * SSubmitTbData tbData = {.suid = suid, .uid = 0, .sver = pTSchema->version, .flags = SUBMIT_REQ_AUTO_CREATE_TABLE}; int32_t cid = taosArrayGetSize(pDataBlock->pDataBlock) + 1; - tbData.pCreateTbReq = buildAutoCreateTableReq(stbFullName, suid, cid, pDataBlock, tagArray, true); + + code = buildAutoCreateTableReq(stbFullName, suid, cid, pDataBlock, tagArray, true, &tbData.pCreateTbReq); + if (code) { + smaError("failed to build create-table req, code:%d", code); + continue; + } { uint64_t groupId = pDataBlock->info.id.groupId; diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 3efc653f64..45212df1dd 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -18,8 +18,6 @@ #include "tmsg.h" #include "tq.h" -#define MAX_CACHE_TABLE_INFO_NUM 10240 - typedef struct STableSinkInfo { uint64_t uid; tstr name; @@ -43,7 +41,7 @@ static int32_t doRemoveFromCache(SSHashObj* pSinkTableMap, uint64_t groupId, con static bool isValidDstChildTable(SMetaReader* pReader, int32_t vgId, const char* ctbName, int64_t suid); static int32_t initCreateTableMsg(SVCreateTbReq* pCreateTableReq, uint64_t suid, const char* stbFullName, int32_t numOfTags); -static SArray* createDefaultTagColName(); +static int32_t createDefaultTagColName(SArray** pList); static void setCreateTableMsgTableName(SVCreateTbReq* pCreateTableReq, SSDataBlock* pDataBlock, const char* stbFullName, int64_t gid, bool newSubTableRule); static int32_t doCreateSinkInfo(const char* pDstTableName, STableSinkInfo** pInfo); @@ -95,7 +93,10 @@ int32_t tqBuildDeleteReq(STQ* pTq, const char* stbFullName, const SSDataBlock* p SSingleDeleteReq req = {.startTs = skey, .endTs = ekey}; strncpy(req.tbname, name, TSDB_TABLE_NAME_LEN - 1); - taosArrayPush(deleteReq->deleteReqs, &req); + void* p = taosArrayPush(deleteReq->deleteReqs, &req); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } } if (originName) name = originName; taosMemoryFreeClear(name); @@ -149,14 +150,20 @@ static bool tqGetTableInfo(SSHashObj* pTableInfoMap, uint64_t groupId, STableSin static int32_t tqPutReqToQueue(SVnode* pVnode, SVCreateTbBatchReq* pReqs) { void* buf = NULL; int32_t tlen = 0; - encodeCreateChildTableForRPC(pReqs, TD_VID(pVnode), &buf, &tlen); + + int32_t code = encodeCreateChildTableForRPC(pReqs, TD_VID(pVnode), &buf, &tlen); + if (code) { + tqError("vgId:%d failed to encode create table msg, create table failed, code:%s", TD_VID(pVnode), tstrerror(code)); + return code; + } SRpcMsg msg = {.msgType = TDMT_VND_CREATE_TABLE, .pCont = buf, .contLen = tlen}; - if (tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg) != 0) { + code = tmsgPutToQueue(&pVnode->msgCb, WRITE_QUEUE, &msg); + if (code) { tqError("failed to put into write-queue since %s", terrstr()); } - return TSDB_CODE_SUCCESS; + return code; } int32_t initCreateTableMsg(SVCreateTbReq* pCreateTableReq, uint64_t suid, const char* stbFullName, int32_t numOfTags) { @@ -166,18 +173,36 @@ int32_t initCreateTableMsg(SVCreateTbReq* pCreateTableReq, uint64_t suid, const // set super table name SName name = {0}; - tNameFromString(&name, stbFullName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); - pCreateTableReq->ctb.stbName = taosStrdup((char*)tNameGetTableName(&name)); + + int32_t code = tNameFromString(&name, stbFullName, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); + if (code == 0) { + pCreateTableReq->ctb.stbName = taosStrdup((char*)tNameGetTableName(&name)); + if (pCreateTableReq->ctb.stbName == NULL) { // ignore this error code + tqError("failed to duplicate the stb name:%s, failed to init create-table msg and create req table", stbFullName); + } + } pCreateTableReq->ctb.tagNum = numOfTags; - return TSDB_CODE_SUCCESS; + return code; } -SArray* createDefaultTagColName() { +int32_t createDefaultTagColName(SArray** pColNameList) { + *pColNameList = NULL; + SArray* pTagColNameList = taosArrayInit(1, TSDB_COL_NAME_LEN); - char tagNameStr[TSDB_COL_NAME_LEN] = "group_id"; - taosArrayPush(pTagColNameList, tagNameStr); - return pTagColNameList; + if (pTagColNameList == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + char tagNameStr[TSDB_COL_NAME_LEN] = "group_id"; + void* p = taosArrayPush(pTagColNameList, tagNameStr); + if (p == NULL) { + taosArrayDestroy(pTagColNameList); + return TSDB_CODE_OUT_OF_MEMORY; + } + + *pColNameList = pTagColNameList; + return TSDB_CODE_SUCCESS; } void setCreateTableMsgTableName(SVCreateTbReq* pCreateTableReq, SSDataBlock* pDataBlock, const char* stbFullName, @@ -201,18 +226,20 @@ void setCreateTableMsgTableName(SVCreateTbReq* pCreateTableReq, SSDataBlock* pDa static int32_t doBuildAndSendCreateTableMsg(SVnode* pVnode, char* stbFullName, SSDataBlock* pDataBlock, SStreamTask* pTask, int64_t suid) { - STSchema* pTSchema = pTask->outputInfo.tbSink.pTSchema; - int32_t rows = pDataBlock->info.rows; - SArray* tagArray = taosArrayInit(4, sizeof(STagVal)); + STSchema* pTSchema = pTask->outputInfo.tbSink.pTSchema; + int32_t rows = pDataBlock->info.rows; + SArray* tagArray = taosArrayInit(4, sizeof(STagVal)); + const char* id = pTask->id.idStr; + int32_t vgId = pTask->pMeta->vgId; - tqDebug("s-task:%s build create %d table(s) msg", pTask->id.idStr, rows); + tqDebug("s-task:%s build create %d table(s) msg", id, rows); int32_t code = 0; SVCreateTbBatchReq reqs = {0}; SArray* crTblArray = reqs.pArray = taosArrayInit(1, sizeof(SVCreateTbReq)); if (NULL == reqs.pArray) { - tqError("s-task:%s failed to init create table msg, code:%s", pTask->id.idStr, tstrerror(terrno)); + tqError("s-task:%s failed to init create table msg, code:%s", id, tstrerror(terrno)); goto _end; } @@ -222,15 +249,26 @@ static int32_t doBuildAndSendCreateTableMsg(SVnode* pVnode, char* stbFullName, S int32_t size = taosArrayGetSize(pDataBlock->pDataBlock); int32_t numOfTags = TMAX(size - UD_TAG_COLUMN_INDEX, 1); - initCreateTableMsg(pCreateTbReq, suid, stbFullName, numOfTags); + code = initCreateTableMsg(pCreateTbReq, suid, stbFullName, numOfTags); + if (code) { + tqError("s-task:%s vgId:%d failed to init create table msg", id, vgId); + continue; + } taosArrayClear(tagArray); if (size == 2) { STagVal tagVal = { .cid = pTSchema->numOfCols + 1, .type = TSDB_DATA_TYPE_UBIGINT, .i64 = pDataBlock->info.id.groupId}; - taosArrayPush(tagArray, &tagVal); - pCreateTbReq->ctb.tagName = createDefaultTagColName(); + void* p = taosArrayPush(tagArray, &tagVal); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + code = createDefaultTagColName(&pCreateTbReq->ctb.tagName); + if (code) { + return code; + } } else { for (int32_t tagId = UD_TAG_COLUMN_INDEX, step = 1; tagId < size; tagId++, step++) { SColumnInfoData* pTagData = taosArrayGet(pDataBlock->pDataBlock, tagId); @@ -245,14 +283,19 @@ static int32_t doBuildAndSendCreateTableMsg(SVnode* pVnode, char* stbFullName, S } else { memcpy(&tagVal.i64, pData, pTagData->info.bytes); } - taosArrayPush(tagArray, &tagVal); + void* p = taosArrayPush(tagArray, &tagVal); + if (p == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _end; + } } } - tTagNew(tagArray, 1, false, (STag**)&pCreateTbReq->ctb.pTag); + code = tTagNew(tagArray, 1, false, (STag**)&pCreateTbReq->ctb.pTag); taosArrayDestroy(tagArray); tagArray = NULL; - if (pCreateTbReq->ctb.pTag == NULL) { + + if (pCreateTbReq->ctb.pTag == NULL || (code != 0)) { tdDestroySVCreateTbReq(pCreateTbReq); code = TSDB_CODE_OUT_OF_MEMORY; goto _end; @@ -270,22 +313,34 @@ static int32_t doBuildAndSendCreateTableMsg(SVnode* pVnode, char* stbFullName, S setCreateTableMsgTableName(pCreateTbReq, pDataBlock, stbFullName, gid, pTask->ver >= SSTREAM_TASK_SUBTABLE_CHANGED_VER && pTask->subtableWithoutMd5 != 1); - taosArrayPush(reqs.pArray, pCreateTbReq); + void* p = taosArrayPush(reqs.pArray, pCreateTbReq); + if (p == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _end; + } STableSinkInfo* pInfo = NULL; bool alreadyCached = tqGetTableInfo(pTask->outputInfo.tbSink.pTblInfo, gid, &pInfo); if (!alreadyCached) { code = doCreateSinkInfo(pCreateTbReq->name, &pInfo); - doPutIntoCache(pTask->outputInfo.tbSink.pTblInfo, pInfo, gid, pTask->id.idStr); + if (code) { + tqError("vgId:%d failed to create sink tableInfo for table:%s, s-task:%s", vgId, pCreateTbReq->name, id); + continue; + } + + code = doPutIntoCache(pTask->outputInfo.tbSink.pTblInfo, pInfo, gid, id); + if (code) { + tqError("vgId:%d failed to put sink tableInfo:%s into cache, s-task:%s", vgId, pCreateTbReq->name, id); + } } - tqDebug("s-task:%s build create table:%s msg complete", pTask->id.idStr, pCreateTbReq->name); + tqDebug("s-task:%s build create table:%s msg complete", id, pCreateTbReq->name); } reqs.nReqs = taosArrayGetSize(reqs.pArray); code = tqPutReqToQueue(pVnode, &reqs); if (code != TSDB_CODE_SUCCESS) { - tqError("s-task:%s failed to send create table msg", pTask->id.idStr); + tqError("s-task:%s failed to send create table msg", id); } _end: @@ -348,15 +403,26 @@ int32_t doMergeExistedRows(SSubmitTbData* pExisted, const SSubmitTbData* pNew, c SRow* pOldRow = *(SRow**)TARRAY_GET_ELEM(pExisted->aRowP, k); if (pNewRow->ts < pOldRow->ts) { - taosArrayPush(pFinal, &pNewRow); + void* p = taosArrayPush(pFinal, &pNewRow); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } j += 1; } else if (pNewRow->ts > pOldRow->ts) { - taosArrayPush(pFinal, &pOldRow); + void* p = taosArrayPush(pFinal, &pOldRow); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + k += 1; } else { // check for the existance of primary key if (pNewRow->numOfPKs == 0) { - taosArrayPush(pFinal, &pNewRow); + void* p = taosArrayPush(pFinal, &pNewRow); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + k += 1; j += 1; tRowDestroy(pOldRow); @@ -369,7 +435,11 @@ int32_t doMergeExistedRows(SSubmitTbData* pExisted, const SSubmitTbData* pNew, c int32_t ret = tRowKeyCompare(&kNew, &kOld); if (ret <= 0) { - taosArrayPush(pFinal, &pNewRow); + void* p = taosArrayPush(pFinal, &pNewRow); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + j += 1; if (ret == 0) { @@ -377,7 +447,11 @@ int32_t doMergeExistedRows(SSubmitTbData* pExisted, const SSubmitTbData* pNew, c tRowDestroy(pOldRow); } } else { - taosArrayPush(pFinal, &pOldRow); + void* p = taosArrayPush(pFinal, &pOldRow); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + k += 1; } } @@ -386,12 +460,18 @@ int32_t doMergeExistedRows(SSubmitTbData* pExisted, const SSubmitTbData* pNew, c while (j < newLen) { SRow* pRow = *(SRow**)TARRAY_GET_ELEM(pNew->aRowP, j++); - taosArrayPush(pFinal, &pRow); + void* p = taosArrayPush(pFinal, &pRow); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } } while (k < oldLen) { SRow* pRow = *(SRow**)TARRAY_GET_ELEM(pExisted->aRowP, k++); - taosArrayPush(pFinal, &pRow); + void* p = taosArrayPush(pFinal, &pRow); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } } taosArrayDestroy(pNew->aRowP); @@ -425,34 +505,40 @@ bool isValidDstChildTable(SMetaReader* pReader, int32_t vgId, const char* ctbNam return true; } -SVCreateTbReq* buildAutoCreateTableReq(const char* stbFullName, int64_t suid, int32_t numOfCols, - SSDataBlock* pDataBlock, SArray* pTagArray, bool newSubTableRule) { +int32_t buildAutoCreateTableReq(const char* stbFullName, int64_t suid, int32_t numOfCols, SSDataBlock* pDataBlock, + SArray* pTagArray, bool newSubTableRule, SVCreateTbReq** pReq) { + *pReq = NULL; + SVCreateTbReq* pCreateTbReq = taosMemoryCalloc(1, sizeof(SVCreateTbReq)); if (pCreateTbReq == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; + return TSDB_CODE_OUT_OF_MEMORY; } taosArrayClear(pTagArray); - initCreateTableMsg(pCreateTbReq, suid, stbFullName, 1); - - STagVal tagVal = {.cid = numOfCols, .type = TSDB_DATA_TYPE_UBIGINT, .i64 = pDataBlock->info.id.groupId}; - taosArrayPush(pTagArray, &tagVal); - - tTagNew(pTagArray, 1, false, (STag**)&pCreateTbReq->ctb.pTag); - - if (pCreateTbReq->ctb.pTag == NULL) { - tdDestroySVCreateTbReq(pCreateTbReq); - taosMemoryFreeClear(pCreateTbReq); - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; + int32_t code = initCreateTableMsg(pCreateTbReq, suid, stbFullName, 1); + if (code != 0) { + return code; } - pCreateTbReq->ctb.tagName = createDefaultTagColName(); + STagVal tagVal = {.cid = numOfCols, .type = TSDB_DATA_TYPE_UBIGINT, .i64 = pDataBlock->info.id.groupId}; + void* p = taosArrayPush(pTagArray, &tagVal); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + code = tTagNew(pTagArray, 1, false, (STag**)&pCreateTbReq->ctb.pTag); + if (pCreateTbReq->ctb.pTag == NULL || (code != 0)) { + tdDestroySVCreateTbReq(pCreateTbReq); + taosMemoryFreeClear(pCreateTbReq); + return code; + } + + code = createDefaultTagColName(&pCreateTbReq->ctb.tagName); // set table name setCreateTableMsgTableName(pCreateTbReq, pDataBlock, stbFullName, pDataBlock->info.id.groupId, newSubTableRule); - return pCreateTbReq; + *pReq = pCreateTbReq; + return code; } int32_t buildSubmitMsgImpl(SSubmitReq2* pSubmitReq, int32_t vgId, void** pMsg, int32_t* msgLen) { @@ -555,7 +641,10 @@ int32_t doConvertRows(SSubmitTbData* pTableData, const STSchema* pTSchema, SSDat break; } SColVal cv = COL_VAL_NULL(pCol->colId, pCol->type); - taosArrayPush(pVals, &cv); + void* p = taosArrayPush(pVals, &cv); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } } else { SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex); if (colDataIsNull_s(pColData, j)) { @@ -566,7 +655,11 @@ int32_t doConvertRows(SSubmitTbData* pTableData, const STSchema* pTSchema, SSDat } SColVal cv = COL_VAL_NULL(pCol->colId, pCol->type); - taosArrayPush(pVals, &cv); + void* p = taosArrayPush(pVals, &cv); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + dataIndex++; } else { void* colData = colDataGetData(pColData, j); @@ -574,12 +667,18 @@ int32_t doConvertRows(SSubmitTbData* pTableData, const STSchema* pTSchema, SSDat SValue sv = (SValue){.type = pCol->type, .nData = varDataLen(colData), .pData = (uint8_t*)varDataVal(colData)}; SColVal cv = COL_VAL_VALUE(pCol->colId, sv); - taosArrayPush(pVals, &cv); + void* p = taosArrayPush(pVals, &cv); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } } else { SValue sv = {.type = pCol->type}; memcpy(&sv.val, colData, tDataTypes[pCol->type].bytes); SColVal cv = COL_VAL_VALUE(pCol->colId, sv); - taosArrayPush(pVals, &cv); + void* p = taosArrayPush(pVals, &cv); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } } dataIndex++; } @@ -596,7 +695,10 @@ int32_t doConvertRows(SSubmitTbData* pTableData, const STSchema* pTSchema, SSDat } ASSERT(pRow); - taosArrayPush(pTableData->aRowP, &pRow); + void* p = taosArrayPush(pTableData->aRowP, &pRow); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } } taosArrayDestroy(pVals); @@ -665,6 +767,7 @@ int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDat STSchema* pTSchema = pTask->outputInfo.tbSink.pTSchema; int32_t vgId = TD_VID(pVnode); STableSinkInfo* pTableSinkInfo = NULL; + int32_t code = 0; bool alreadyCached = tqGetTableInfo(pTask->outputInfo.tbSink.pTblInfo, groupId, &pTableSinkInfo); @@ -686,7 +789,11 @@ int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDat } else { // this groupId has not been kept in cache yet if (dstTableName[0] == 0) { memset(dstTableName, 0, TSDB_TABLE_NAME_LEN); - buildCtbNameByGroupIdImpl(stbFullName, groupId, dstTableName); + code = buildCtbNameByGroupIdImpl(stbFullName, groupId, dstTableName); + if (code) { + tqDebug("s-task:%s failed to build auto create table-name:%s, groupId:0x%" PRId64, id, dstTableName, groupId); + return code; + } } else { if (pTask->subtableWithoutMd5 != 1 && !isAutoTableName(dstTableName) && !alreadyAddGroupId(dstTableName, groupId) && groupId != 0) { @@ -699,8 +806,13 @@ int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDat } } - int32_t code = doCreateSinkInfo(dstTableName, &pTableSinkInfo); - tqDebug("s-task:%s build new sinkTableInfo to add cache, dstTable:%s", id, dstTableName); + code = doCreateSinkInfo(dstTableName, &pTableSinkInfo); + if (code == 0) { + tqDebug("s-task:%s build new sinkTableInfo to add cache, dstTable:%s", id, dstTableName); + } else { + tqDebug("s-task:%s failed to build new sinkTableInfo, dstTable:%s", id, dstTableName); + return code; + } } if (alreadyCached) { @@ -731,20 +843,20 @@ int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDat SArray* pTagArray = taosArrayInit(pTSchema->numOfCols + 1, sizeof(STagVal)); pTableData->flags = SUBMIT_REQ_AUTO_CREATE_TABLE; - pTableData->pCreateTbReq = + code = buildAutoCreateTableReq(stbFullName, suid, pTSchema->numOfCols + 1, pDataBlock, pTagArray, - pTask->ver >= SSTREAM_TASK_SUBTABLE_CHANGED_VER && pTask->subtableWithoutMd5 != 1); + (pTask->ver >= SSTREAM_TASK_SUBTABLE_CHANGED_VER && pTask->subtableWithoutMd5 != 1), + &pTableData->pCreateTbReq); taosArrayDestroy(pTagArray); - if (pTableData->pCreateTbReq == NULL) { - tqError("s-task:%s failed to build auto create dst-table req:%s, code:%s", id, dstTableName, - tstrerror(terrno)); + if (code) { + tqError("s-task:%s failed to build auto create dst-table req:%s, code:%s", id, dstTableName, tstrerror(code)); taosMemoryFree(pTableSinkInfo); - return terrno; + return code; } pTableSinkInfo->uid = 0; - doPutIntoCache(pTask->outputInfo.tbSink.pTblInfo, pTableSinkInfo, groupId, id); + code = doPutIntoCache(pTask->outputInfo.tbSink.pTblInfo, pTableSinkInfo, groupId, id); } else { metaReaderClear(&mr); @@ -765,12 +877,12 @@ int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDat pTableSinkInfo->uid = mr.me.uid; metaReaderClear(&mr); - doPutIntoCache(pTask->outputInfo.tbSink.pTblInfo, pTableSinkInfo, groupId, id); + code = doPutIntoCache(pTask->outputInfo.tbSink.pTblInfo, pTableSinkInfo, groupId, id); } } } - return TDB_CODE_SUCCESS; + return code; } int32_t tqSetDstTableDataPayload(uint64_t suid, const STSchema *pTSchema, int32_t blockIndex, SSDataBlock* pDataBlock, @@ -864,14 +976,21 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { if (code != TSDB_CODE_SUCCESS || tbData.aRowP == NULL) { if (tbData.pCreateTbReq != NULL) { tdDestroySVCreateTbReq(tbData.pCreateTbReq); - doRemoveFromCache(pTask->outputInfo.tbSink.pTblInfo, pDataBlock->info.id.groupId, id); + (void) doRemoveFromCache(pTask->outputInfo.tbSink.pTblInfo, pDataBlock->info.id.groupId, id); tbData.pCreateTbReq = NULL; } continue; } - taosArrayPush(submitReq.aSubmitTbData, &tbData); + void* p = taosArrayPush(submitReq.aSubmitTbData, &tbData); + if (p == NULL) { + tqDebug("vgId:%d, s-task:%s failed to build submit msg, data lost", vgId, id); + } + code = doBuildAndSendSubmitMsg(pVnode, pTask, &submitReq, 1); + if (code) { // failed and continue + tqDebug("vgId:%d, s-task:%s submit msg failed, data lost", vgId, id); + } } } } else { @@ -918,16 +1037,24 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { if (code != TSDB_CODE_SUCCESS || tbData.aRowP == NULL) { if (tbData.pCreateTbReq != NULL) { tdDestroySVCreateTbReq(tbData.pCreateTbReq); - doRemoveFromCache(pTask->outputInfo.tbSink.pTblInfo, groupId, id); + (void) doRemoveFromCache(pTask->outputInfo.tbSink.pTblInfo, groupId, id); tbData.pCreateTbReq = NULL; } continue; } - taosArrayPush(submitReq.aSubmitTbData, &tbData); + void* p = taosArrayPush(submitReq.aSubmitTbData, &tbData); + if (p == NULL) { + tqError("vgId:%d, s-task:%s failed to build submit msg, data lost", vgId, id); + continue; + } int32_t size = (int32_t)taosArrayGetSize(submitReq.aSubmitTbData) - 1; - taosHashPut(pTableIndexMap, &groupId, sizeof(groupId), &size, sizeof(size)); + code = taosHashPut(pTableIndexMap, &groupId, sizeof(groupId), &size, sizeof(size)); + if (code) { + tqError("vgId:%d, s-task:%s failed to put group into index map, code:%s", vgId, id, tstrerror(code)); + continue; + } } else { code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, earlyTs, id); if (code != TSDB_CODE_SUCCESS || tbData.aRowP == NULL) { @@ -951,7 +1078,10 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { taosHashCleanup(pTableIndexMap); if (hasSubmit) { - doBuildAndSendSubmitMsg(pVnode, pTask, &submitReq, numOfBlocks); + code = doBuildAndSendSubmitMsg(pVnode, pTask, &submitReq, numOfBlocks); + if (code) { // failed and continue + tqError("vgId:%d failed to build and send submit msg", vgId); + } } else { tDestroySubmitReq(&submitReq, TSDB_MSG_FLG_ENCODE); tqDebug("vgId:%d, s-task:%s write results completed", vgId, id); @@ -989,7 +1119,11 @@ int32_t doRemoveFromCache(SSHashObj* pSinkTableMap, uint64_t groupId, const char } int32_t code = tSimpleHashRemove(pSinkTableMap, &groupId, sizeof(groupId)); - tqDebug("s-task:%s remove cached table meta for groupId:%" PRId64, id, groupId); + if (code == 0) { + tqDebug("s-task:%s remove cached table meta for groupId:%" PRId64, id, groupId); + } else { + tqError("s-task:%s failed to remove table meta from hashmap, groupId:%" PRId64, id, groupId); + } return code; } @@ -1019,10 +1153,14 @@ int32_t doBuildAndSendDeleteMsg(SVnode* pVnode, char* stbFullName, SSDataBlock* void* serializedDeleteReq = rpcMallocCont(len + sizeof(SMsgHead)); void* abuf = POINTER_SHIFT(serializedDeleteReq, sizeof(SMsgHead)); tEncoderInit(&encoder, abuf, len); - tEncodeSBatchDeleteReq(&encoder, &deleteReq); + code = tEncodeSBatchDeleteReq(&encoder, &deleteReq); tEncoderClear(&encoder); taosArrayDestroy(deleteReq.deleteReqs); + if (code) { + return code; + } + ((SMsgHead*)serializedDeleteReq)->vgId = TD_VID(pVnode); SRpcMsg msg = {.msgType = TDMT_VND_BATCH_DEL, .pCont = serializedDeleteReq, .contLen = len + sizeof(SMsgHead)}; diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index 0e5b1b6fb7..c84e016459 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -37,7 +37,12 @@ int32_t tqScanWal(STQ* pTq) { // check all tasks int32_t numOfTasks = 0; bool shouldIdle = true; - doScanWalForAllTasks(pMeta, &shouldIdle); + + int32_t code = doScanWalForAllTasks(pMeta, &shouldIdle); + if (code) { + tqError("vgId:%d failed to start all tasks, try next time", vgId); + return code; + } streamMetaWLock(pMeta); int32_t times = (--pMeta->scanInfo.scanCounter); @@ -51,9 +56,13 @@ int32_t tqScanWal(STQ* pTq) { if (times > 0) { tqDebug("vgId:%d scan wal for stream tasks for %d times in %dms", vgId, times, SCAN_WAL_IDLE_DURATION); - tqScanWalInFuture(pTq, numOfTasks, SCAN_WAL_IDLE_DURATION); + code = tqScanWalInFuture(pTq, numOfTasks, SCAN_WAL_IDLE_DURATION); + if (code) { + tqError("vgId:%d sched scan wal in %dms failed, ignore this failure", vgId, SCAN_WAL_IDLE_DURATION); + } } - return 0; + + return code; } typedef struct SBuildScanWalMsgParam { @@ -69,28 +78,44 @@ static void doStartScanWal(void* param, void* tmrId) { tqDebug("vgId:%d create msg to start wal scan, numOfTasks:%d, vnd restored:%d", vgId, pParam->numOfTasks, pTq->pVnode->restored); - /*int32_t code = */ streamTaskSchedTask(&pTq->pVnode->msgCb, vgId, 0, 0, STREAM_EXEC_T_EXTRACT_WAL_DATA); + int32_t code = streamTaskSchedTask(&pTq->pVnode->msgCb, vgId, 0, 0, STREAM_EXEC_T_EXTRACT_WAL_DATA); taosMemoryFree(pParam); + + if (code) { + tqError("vgId:%d failed sched task to scan wal", vgId); + } } int32_t tqScanWalInFuture(STQ* pTq, int32_t numOfTasks, int32_t idleDuration) { SStreamMeta* pMeta = pTq->pStreamMeta; + int32_t code = 0; + int32_t vgId = TD_VID(pTq->pVnode); SBuildScanWalMsgParam* pParam = taosMemoryMalloc(sizeof(SBuildScanWalMsgParam)); + if (pParam == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } pParam->pTq = pTq; pParam->numOfTasks = numOfTasks; - tmr_h pTimer = streamTimerGetInstance(); - ASSERT(pTimer); + tmr_h pTimer = NULL; + code = streamTimerGetInstance(&pTimer); + if (code) { + tqError("vgId:%d failed to get tmr ctrl during sched scan wal", vgId); + return code; + } if (pMeta->scanInfo.scanTimer == NULL) { pMeta->scanInfo.scanTimer = taosTmrStart(doStartScanWal, idleDuration, pParam, pTimer); } else { - taosTmrReset(doStartScanWal, idleDuration, pParam, pTimer, &pMeta->scanInfo.scanTimer); + code = taosTmrReset(doStartScanWal, idleDuration, pParam, pTimer, &pMeta->scanInfo.scanTimer); + if (code) { + tqError("vgId:%d failed to start scan wal in:%dms", vgId, idleDuration); + } } - return TSDB_CODE_SUCCESS; + return code; } int32_t tqScanWalAsync(STQ* pTq, bool ckPause) { @@ -207,7 +232,11 @@ bool handleFillhistoryScanComplete(SStreamTask* pTask, int64_t ver) { double el = (taosGetTimestampMs() - pTask->execInfo.step2Start) / 1000.0; qDebug("s-task:%s scan-history from WAL stage(step 2) ended, range:%" PRId64 "-%" PRId64 ", elapsed time:%.2fs", id, pTask->step2Range.minVer, maxVer, el); - /*int32_t code = */streamTaskPutTranstateIntoInputQ(pTask); + int32_t code = streamTaskPutTranstateIntoInputQ(pTask); + if (code) { + qError("s-task:%s failed to put trans-state into inputQ", id); + } + return true; } else { qWarn("s-task:%s fill-history scan WAL, nextProcessVer:%" PRId64 " out of the ver range:%" PRId64 "-%" PRId64 @@ -290,8 +319,12 @@ bool doPutDataIntoInputQ(SStreamTask* pTask, int64_t maxVer, int32_t* numOfItems break; } } else { - walReaderSeekVer(pTask->exec.pWalReader, pTask->chkInfo.nextProcessVer); tqTrace("s-task:%s append input queue failed, code:too many items, ver:%" PRId64, id, pTask->chkInfo.nextProcessVer); + code = walReaderSeekVer(pTask->exec.pWalReader, pTask->chkInfo.nextProcessVer); + if (code) { + tqError("s-task:%s failed to seek ver to:%"PRId64 " in wal", id, pTask->chkInfo.nextProcessVer); + } + break; } } @@ -347,18 +380,18 @@ int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, bool* pScanIdle) { int32_t numOfItems = streamQueueGetNumOfItems(pTask->inputq.queue); int64_t maxVer = (pTask->info.fillHistory == 1) ? pTask->step2Range.maxVer : INT64_MAX; - taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); SStreamTaskState pState = streamTaskGetStatus(pTask); if (pState.state != TASK_STATUS__READY) { tqDebug("s-task:%s not ready for submit block from wal, status:%s", pTask->id.idStr, pState.name); - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); streamMetaReleaseTask(pStreamMeta, pTask); continue; } bool hasNewData = doPutDataIntoInputQ(pTask, maxVer, &numOfItems); - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); if ((numOfItems > 0) || hasNewData) { noDataInWal = false; @@ -366,7 +399,7 @@ int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, bool* pScanIdle) { if (code != TSDB_CODE_SUCCESS) { streamMetaReleaseTask(pStreamMeta, pTask); taosArrayDestroy(pTaskList); - return -1; + return code; } } @@ -379,5 +412,5 @@ int32_t doScanWalForAllTasks(SStreamMeta* pStreamMeta, bool* pScanIdle) { } taosArrayDestroy(pTaskList); - return 0; + return TSDB_CODE_SUCCESS; } diff --git a/source/dnode/vnode/src/tqCommon/tqCommon.c b/source/dnode/vnode/src/tqCommon/tqCommon.c index 3c6100a8f4..b56c474ed5 100644 --- a/source/dnode/vnode/src/tqCommon/tqCommon.c +++ b/source/dnode/vnode/src/tqCommon/tqCommon.c @@ -160,6 +160,7 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM SRpcMsg rsp = {.info = pMsg->info, .code = TSDB_CODE_SUCCESS}; int64_t st = taosGetTimestampMs(); bool updated = false; + int32_t code = 0; SStreamTaskNodeUpdateMsg req = {0}; @@ -258,26 +259,40 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM } } - // save + // stream do update the nodeEp info, write it into stream meta. if (updated) { tqDebug("s-task:%s vgId:%d save task after update epset, and stop task", idstr, vgId); - streamMetaSaveTask(pMeta, pTask); + code = streamMetaSaveTask(pMeta, pTask); + if (code) { + tqError("s-task:%s vgId:%d failed to save task, code:%s", idstr, vgId, tstrerror(code)); + } + if (ppHTask != NULL) { - streamMetaSaveTask(pMeta, *ppHTask); + code = streamMetaSaveTask(pMeta, *ppHTask); + if (code) { + tqError("s-task:%s vgId:%d failed to save related history task, code:%s", idstr, vgId, tstrerror(code)); + } } } else { tqDebug("s-task:%s vgId:%d not save task since not update epset actually, stop task", idstr, vgId); } - streamTaskStop(pTask); + code = streamTaskStop(pTask); + if (code) { + tqError("s-task:%s vgId:%d failed to stop task, code:%s", idstr, vgId, tstrerror(code)); + } + if (ppHTask != NULL) { - streamTaskStop(*ppHTask); + code = streamTaskStop(*ppHTask); + if (code) { + tqError("s-task:%s vgId:%d failed to stop related history task, code:%s", idstr, vgId, tstrerror(code)); + } } // keep info streamMetaAddIntoUpdateTaskList(pMeta, pTask, (ppHTask != NULL) ? (*ppHTask) : NULL, req.transId, st); - rsp.code = 0; + rsp.code = TSDB_CODE_SUCCESS; // possibly only handle the stream task. int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); @@ -305,13 +320,16 @@ int32_t tqStreamTaskProcessUpdateReq(SStreamMeta* pMeta, SMsgCb* cb, SRpcMsg* pM #if 0 taosMSleep(5000);// for test purpose, to trigger the leader election #endif - tqStreamTaskStartAsync(pMeta, cb, true); + code = tqStreamTaskStartAsync(pMeta, cb, true); + if (code) { + tqError("vgId:%d async start all tasks, failed, code:%s", vgId, tstrerror(code)); + } } } streamMetaWUnLock(pMeta); taosArrayDestroy(req.pNodeList); - return rsp.code; + return rsp.code; // always return true } int32_t tqStreamTaskProcessDispatchReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { @@ -333,7 +351,7 @@ int32_t tqStreamTaskProcessDispatchReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { SStreamTask* pTask = NULL; int32_t code = streamMetaAcquireTask(pMeta, req.streamId, req.taskId, &pTask); - if (pTask) { + if (pTask && (code == 0)) { SRpcMsg rsp = {.info = pMsg->info, .code = 0}; if (streamProcessDispatchMsg(pTask, &req, &rsp) != 0) { return -1; @@ -393,14 +411,14 @@ int32_t tqStreamTaskProcessDispatchRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { SStreamTask* pTask = NULL; int32_t code = streamMetaAcquireTask(pMeta, pRsp->streamId, pRsp->upstreamTaskId, &pTask); - if (pTask) { - streamProcessDispatchRsp(pTask, pRsp, pMsg->code); + if (pTask && (code == 0)) { + code = streamProcessDispatchRsp(pTask, pRsp, pMsg->code); streamMetaReleaseTask(pMeta, pTask); - return TSDB_CODE_SUCCESS; + return code; } else { tqDebug("vgId:%d failed to handle the dispatch rsp, since find task:0x%x failed", vgId, pRsp->upstreamTaskId); terrno = TSDB_CODE_STREAM_TASK_NOT_EXIST; - return terrno; + return TSDB_CODE_STREAM_TASK_NOT_EXIST; } } @@ -408,16 +426,22 @@ int32_t tqStreamTaskProcessRetrieveReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { char* msgStr = pMsg->pCont; char* msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); + int32_t code = 0; SDecoder decoder; SStreamRetrieveReq req; tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen); - tDecodeStreamRetrieveReq(&decoder, &req); + code = tDecodeStreamRetrieveReq(&decoder, &req); tDecoderClear(&decoder); + if (code) { + tqError("vgId:%d failed to decode retrieve msg, quit handling it", pMeta->vgId); + return code; + } + SStreamTask* pTask = NULL; - int32_t code = streamMetaAcquireTask(pMeta, req.streamId, req.dstTaskId, &pTask); - if (pTask == NULL) { + code = streamMetaAcquireTask(pMeta, req.streamId, req.dstTaskId, &pTask); + if (pTask == NULL || code != 0) { tqError("vgId:%d process retrieve req, failed to acquire task:0x%x, it may have been dropped already", pMeta->vgId, req.dstTaskId); tCleanupStreamRetrieveReq(&req); @@ -446,6 +470,7 @@ int32_t tqStreamTaskProcessCheckReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { char* msgStr = pMsg->pCont; char* msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); + int32_t code = 0; SStreamTaskCheckReq req; SStreamTaskCheckRsp rsp = {0}; @@ -453,9 +478,14 @@ int32_t tqStreamTaskProcessCheckReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen); - tDecodeStreamTaskCheckReq(&decoder, &req); + code = tDecodeStreamTaskCheckReq(&decoder, &req); tDecoderClear(&decoder); + if (code) { + tqError("vgId:%d decode check msg failed, not handle this msg", pMeta->vgId); + return code; + } + streamTaskProcessCheckMsg(pMeta, &req, &rsp); return streamTaskSendCheckRsp(pMeta, req.upstreamNodeId, &rsp, &pMsg->info, req.upstreamTaskId); } @@ -490,7 +520,7 @@ int32_t tqStreamTaskProcessCheckRsp(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLe SStreamTask* pTask = NULL; code = streamMetaAcquireTask(pMeta, rsp.streamId, rsp.upstreamTaskId, &pTask); - if (pTask == NULL) { + if ((pTask == NULL) || (code != 0)) { return streamMetaAddFailedTask(pMeta, rsp.streamId, rsp.upstreamTaskId); } @@ -518,19 +548,25 @@ int32_t tqStreamTaskProcessCheckpointReadyMsg(SStreamMeta* pMeta, SRpcMsg* pMsg) SStreamTask* pTask = NULL; code = streamMetaAcquireTask(pMeta, req.streamId, req.upstreamTaskId, &pTask); - if (pTask == NULL) { + if (code != 0) { tqError("vgId:%d failed to find s-task:0x%x, it may have been destroyed already", vgId, req.downstreamTaskId); - return TSDB_CODE_STREAM_TASK_NOT_EXIST; + return code; } tqDebug("vgId:%d s-task:%s received the checkpoint-ready msg from task:0x%x (vgId:%d), handle it", vgId, pTask->id.idStr, req.downstreamTaskId, req.downstreamNodeId); - streamProcessCheckpointReadyMsg(pTask, req.checkpointId, req.downstreamTaskId, req.downstreamNodeId); + code = streamProcessCheckpointReadyMsg(pTask, req.checkpointId, req.downstreamTaskId, req.downstreamNodeId); streamMetaReleaseTask(pMeta, pTask); + if (code) { + return code; + } { // send checkpoint ready rsp SMStreamCheckpointReadyRspMsg* pReadyRsp = rpcMallocCont(sizeof(SMStreamCheckpointReadyRspMsg)); + if (pReadyRsp == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } pReadyRsp->upstreamTaskId = req.upstreamTaskId; pReadyRsp->upstreamNodeId = req.upstreamNodeId; @@ -606,8 +642,8 @@ int32_t tqStreamTaskProcessDeployReq(SStreamMeta* pMeta, SMsgCb* cb, int64_t sve if (restored) { SStreamTask* p = NULL; code = streamMetaAcquireTask(pMeta, streamId, taskId, &p); - if ((p != NULL) && (p->info.fillHistory == 0)) { - tqStreamStartOneTaskAsync(pMeta, cb, streamId, taskId); + if ((p != NULL) && (code == 0) && (p->info.fillHistory == 0)) { + code = tqStreamStartOneTaskAsync(pMeta, cb, streamId, taskId); } if (p != NULL) { @@ -631,6 +667,7 @@ int32_t tqStreamTaskProcessDeployReq(SStreamMeta* pMeta, SMsgCb* cb, int64_t sve int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen) { SVDropStreamTaskReq* pReq = (SVDropStreamTaskReq*)msg; + int32_t code = 0; int32_t vgId = pMeta->vgId; STaskId hTaskId = {0}; tqDebug("vgId:%d receive msg to drop s-task:0x%x", vgId, pReq->taskId); @@ -649,8 +686,12 @@ int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen } streamTaskSetRemoveBackendFiles(pTask); - streamTaskClearHTaskAttr(pTask, pReq->resetRelHalt); + code = streamTaskClearHTaskAttr(pTask, pReq->resetRelHalt); streamMetaReleaseTask(pMeta, pTask); + + if (code) { + tqError("s-task:0x%x failed to clear related fill-history info, still exists", pReq->taskId); + } } streamMetaWUnLock(pMeta); @@ -658,11 +699,17 @@ int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen // drop the related fill-history task firstly if (hTaskId.taskId != 0 && hTaskId.streamId != 0) { tqDebug("s-task:0x%x vgId:%d drop rel fill-history task:0x%x firstly", pReq->taskId, vgId, (int32_t)hTaskId.taskId); - streamMetaUnregisterTask(pMeta, hTaskId.streamId, hTaskId.taskId); + code = streamMetaUnregisterTask(pMeta, hTaskId.streamId, hTaskId.taskId); + if (code) { + tqDebug("s-task:0x%x vgId:%d drop rel fill-history task:0x%x failed", pReq->taskId, vgId, (int32_t)hTaskId.taskId); + } } // drop the stream task now - streamMetaUnregisterTask(pMeta, pReq->streamId, pReq->taskId); + code = streamMetaUnregisterTask(pMeta, pReq->streamId, pReq->taskId); + if (code) { + tqDebug("s-task:0x%x vgId:%d drop task failed", pReq->taskId, vgId); + } // commit the update streamMetaWLock(pMeta); @@ -674,12 +721,13 @@ int32_t tqStreamTaskProcessDropReq(SStreamMeta* pMeta, char* msg, int32_t msgLen } streamMetaWUnLock(pMeta); - return 0; + return 0; // always return success } int32_t tqStreamTaskProcessUpdateCheckpointReq(SStreamMeta* pMeta, bool restored, char* msg) { SVUpdateCheckpointInfoReq* pReq = (SVUpdateCheckpointInfoReq*)msg; + int32_t code = 0; int32_t vgId = pMeta->vgId; tqDebug("vgId:%d receive msg to update-checkpoint-info for s-task:0x%x", vgId, pReq->taskId); @@ -689,7 +737,7 @@ int32_t tqStreamTaskProcessUpdateCheckpointReq(SStreamMeta* pMeta, bool restored SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (ppTask != NULL && (*ppTask) != NULL) { - streamTaskUpdateTaskCheckpointInfo(*ppTask, restored, pReq); + code = streamTaskUpdateTaskCheckpointInfo(*ppTask, restored, pReq); } else { // failed to get the task. int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); tqError( @@ -700,7 +748,7 @@ int32_t tqStreamTaskProcessUpdateCheckpointReq(SStreamMeta* pMeta, bool restored streamMetaWUnLock(pMeta); // always return success when handling the requirement issued by mnode during transaction. - return TSDB_CODE_SUCCESS; + return code; } static int32_t restartStreamTasks(SStreamMeta* pMeta, bool isLeader) { @@ -746,7 +794,7 @@ static int32_t restartStreamTasks(SStreamMeta* pMeta, bool isLeader) { if (isLeader && !tsDisableStream) { streamMetaWUnLock(pMeta); - streamMetaStartAllTasks(pMeta); + code = streamMetaStartAllTasks(pMeta); } else { streamMetaResetStartInfo(&pMeta->startInfo, pMeta->vgId); pMeta->startInfo.restartCount = 0; @@ -765,16 +813,16 @@ int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLead int32_t vgId = pMeta->vgId; if (type == STREAM_EXEC_T_START_ONE_TASK) { - streamMetaStartOneTask(pMeta, pReq->streamId, pReq->taskId); + (void) streamMetaStartOneTask(pMeta, pReq->streamId, pReq->taskId); return 0; } else if (type == STREAM_EXEC_T_START_ALL_TASKS) { - streamMetaStartAllTasks(pMeta); + (void) streamMetaStartAllTasks(pMeta); return 0; } else if (type == STREAM_EXEC_T_RESTART_ALL_TASKS) { - restartStreamTasks(pMeta, isLeader); + (void) restartStreamTasks(pMeta, isLeader); return 0; } else if (type == STREAM_EXEC_T_STOP_ALL_TASKS) { - streamMetaStopAllTasks(pMeta); + (void) streamMetaStopAllTasks(pMeta); return 0; } else if (type == STREAM_EXEC_T_ADD_FAILED_TASK) { int32_t code = streamMetaAddFailedTask(pMeta, pReq->streamId, pReq->taskId); @@ -783,7 +831,7 @@ int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLead SStreamTask* pTask = NULL; int32_t code = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId, &pTask); - if (pTask != NULL) { + if (pTask != NULL && (code == 0)) { char* pStatus = NULL; if (streamTaskReadyToRun(pTask, &pStatus)) { int64_t execTs = pTask->status.lastExecTs; @@ -804,12 +852,12 @@ int32_t tqStreamTaskProcessRunReq(SStreamMeta* pMeta, SRpcMsg* pMsg, bool isLead SStreamTask* pTask = NULL; int32_t code = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId, &pTask); - if (pTask != NULL) { // even in halt status, the data in inputQ must be processed + if ((pTask != NULL) && (code == 0)) { // even in halt status, the data in inputQ must be processed char* p = NULL; if (streamTaskReadyToRun(pTask, &p)) { tqDebug("vgId:%d s-task:%s status:%s start to process block from inputQ, next checked ver:%" PRId64, vgId, pTask->id.idStr, p, pTask->chkInfo.nextProcessVer); - streamExecTask(pTask); + (void) streamExecTask(pTask); } else { int8_t status = streamTaskSetSchedStatusInactive(pTask); tqDebug("vgId:%d s-task:%s ignore run req since not in ready state, status:%s, sched-status:%d", vgId, @@ -829,6 +877,7 @@ int32_t tqStartTaskCompleteCallback(SStreamMeta* pMeta) { STaskStartInfo* pStartInfo = &pMeta->startInfo; int32_t vgId = pMeta->vgId; bool scanWal = false; + int32_t code = 0; streamMetaWLock(pMeta); if (pStartInfo->startAllTasks == 1) { @@ -844,8 +893,7 @@ int32_t tqStartTaskCompleteCallback(SStreamMeta* pMeta) { pStartInfo->restartCount); streamMetaWUnLock(pMeta); - restartStreamTasks(pMeta, (pMeta->role == NODE_ROLE_LEADER)); - return TSDB_CODE_SUCCESS; + return restartStreamTasks(pMeta, (pMeta->role == NODE_ROLE_LEADER)); } else { if (pStartInfo->restartCount == 0) { tqDebug("vgId:%d start all tasks completed in callbackFn, restartCount is 0", pMeta->vgId); @@ -862,10 +910,10 @@ int32_t tqStartTaskCompleteCallback(SStreamMeta* pMeta) { if (scanWal && (vgId != SNODE_HANDLE)) { tqDebug("vgId:%d start scan wal for executing tasks", vgId); - tqScanWalAsync(pMeta->ahandle, true); + code = tqScanWalAsync(pMeta->ahandle, true); } - return TSDB_CODE_SUCCESS; + return code; } int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, char* pMsg) { @@ -873,7 +921,7 @@ int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, char* pMsg) { SStreamTask* pTask = NULL; int32_t code = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId, &pTask); - if (pTask == NULL) { + if (pTask == NULL || (code != 0)) { tqError("vgId:%d process task-reset req, failed to acquire task:0x%x, it may have been dropped already", pMeta->vgId, pReq->taskId); return TSDB_CODE_SUCCESS; @@ -881,7 +929,7 @@ int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, char* pMsg) { tqDebug("s-task:%s receive task-reset msg from mnode, reset status and ready for data processing", pTask->id.idStr); - taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); streamTaskClearCheckInfo(pTask, true); // clear flag set during do checkpoint, and open inputQ for all upstream tasks @@ -904,7 +952,7 @@ int32_t tqStreamTaskProcessTaskResetReq(SStreamMeta* pMeta, char* pMsg) { tqDebug("s-task:%s status:%s do nothing after receiving reset-task from mnode", pTask->id.idStr, pState.name); } - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); streamMetaReleaseTask(pMeta, pTask); return TSDB_CODE_SUCCESS; @@ -915,7 +963,7 @@ int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) SStreamTask* pTask = NULL; int32_t code = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->upstreamTaskId, &pTask); - if (pTask == NULL) { + if (pTask == NULL || (code != 0)) { tqError("vgId:%d process retrieve checkpoint trigger, checkpointId:%" PRId64 " from s-task:0x%x, failed to acquire task:0x%x, it may have been dropped already", pMeta->vgId, pReq->checkpointId, (int32_t)pReq->downstreamTaskId, pReq->upstreamTaskId); @@ -929,11 +977,10 @@ int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) tqError("s-task:%s not ready for checkpoint-trigger retrieve from 0x%x, since downstream not ready", pTask->id.idStr, (int32_t)pReq->downstreamTaskId); - streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, pReq->downstreamNodeId, &pMsg->info, + code = streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, pReq->downstreamNodeId, &pMsg->info, TSDB_CODE_STREAM_TASK_IVLD_STATUS); streamMetaReleaseTask(pMeta, pTask); - - return TSDB_CODE_SUCCESS; + return code; } SStreamTaskState pState = streamTaskGetStatus(pTask); @@ -948,7 +995,7 @@ int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) // re-send the lost checkpoint-trigger msg to downstream task tqDebug("s-task:%s re-send checkpoint-trigger to:0x%x, checkpointId:%" PRId64 ", transId:%d", pTask->id.idStr, (int32_t)pReq->downstreamTaskId, checkpointId, transId); - streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, pReq->downstreamNodeId, &pMsg->info, + code = streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, pReq->downstreamNodeId, &pMsg->info, TSDB_CODE_SUCCESS); } else { // not send checkpoint-trigger yet, wait int32_t recv = 0, total = 0; @@ -962,7 +1009,7 @@ int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) "sending checkpoint-source/trigger", pTask->id.idStr, recv, total); } - streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, pReq->downstreamNodeId, &pMsg->info, + code = streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, pReq->downstreamNodeId, &pMsg->info, TSDB_CODE_ACTION_IN_PROGRESS); } } else { // upstream not recv the checkpoint-source/trigger till now @@ -971,12 +1018,12 @@ int32_t tqStreamTaskProcessRetrieveTriggerReq(SStreamMeta* pMeta, SRpcMsg* pMsg) "s-task:%s not recv checkpoint-source from mnode or checkpoint-trigger from upstream yet, wait for all " "upstream sending checkpoint-source/trigger", pTask->id.idStr); - streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, pReq->downstreamNodeId, &pMsg->info, + code = streamTaskSendCheckpointTriggerMsg(pTask, pReq->downstreamTaskId, pReq->downstreamNodeId, &pMsg->info, TSDB_CODE_ACTION_IN_PROGRESS); } streamMetaReleaseTask(pMeta, pTask); - return TSDB_CODE_SUCCESS; + return code; } int32_t tqStreamTaskProcessRetrieveTriggerRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { @@ -984,19 +1031,19 @@ int32_t tqStreamTaskProcessRetrieveTriggerRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) SStreamTask* pTask = NULL; int32_t code = streamMetaAcquireTask(pMeta, pRsp->streamId, pRsp->taskId, &pTask); - if (pTask == NULL) { + if (pTask == NULL || (code != 0)) { tqError( "vgId:%d process retrieve checkpoint-trigger, failed to acquire task:0x%x, it may have been dropped already", pMeta->vgId, pRsp->taskId); - return TSDB_CODE_STREAM_TASK_NOT_EXIST; + return code; } tqDebug("s-task:%s recv re-send checkpoint-trigger msg from upstream:0x%x, checkpointId:%" PRId64 ", transId:%d", pTask->id.idStr, pRsp->upstreamTaskId, pRsp->checkpointId, pRsp->transId); - streamTaskProcessCheckpointTriggerRsp(pTask, pRsp); + code = streamTaskProcessCheckpointTriggerRsp(pTask, pRsp); streamMetaReleaseTask(pMeta, pTask); - return TSDB_CODE_SUCCESS; + return code; } int32_t tqStreamTaskProcessTaskPauseReq(SStreamMeta* pMeta, char* pMsg) { @@ -1004,7 +1051,7 @@ int32_t tqStreamTaskProcessTaskPauseReq(SStreamMeta* pMeta, char* pMsg) { SStreamTask* pTask = NULL; int32_t code = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId, &pTask); - if (pTask == NULL) { + if (pTask == NULL || (code != 0)) { tqError("vgId:%d process pause req, failed to acquire task:0x%x, it may have been dropped already", pMeta->vgId, pReq->taskId); // since task is in [STOP|DROPPING] state, it is safe to assume the pause is active @@ -1018,7 +1065,7 @@ int32_t tqStreamTaskProcessTaskPauseReq(SStreamMeta* pMeta, char* pMsg) { if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { pHistoryTask = NULL; code = streamMetaAcquireTask(pMeta, pTask->hTaskInfo.id.streamId, pTask->hTaskInfo.id.taskId, &pHistoryTask); - if (pHistoryTask == NULL) { + if (pHistoryTask == NULL || (code != 0)) { tqError("vgId:%d process pause req, failed to acquire fill-history task:0x%" PRIx64 ", it may have been dropped already", pMeta->vgId, pTask->hTaskInfo.id.taskId); @@ -1042,6 +1089,8 @@ static int32_t tqProcessTaskResumeImpl(void* handle, SStreamTask* pTask, int64_t bool fromVnode) { SStreamMeta* pMeta = fromVnode ? ((STQ*)handle)->pStreamMeta : handle; int32_t vgId = pMeta->vgId; + int32_t code = 0; + if (pTask == NULL) { return -1; } @@ -1065,18 +1114,18 @@ static int32_t tqProcessTaskResumeImpl(void* handle, SStreamTask* pTask, int64_t if (level == TASK_LEVEL__SOURCE && pTask->info.fillHistory && status == TASK_STATUS__SCAN_HISTORY) { pTask->hTaskInfo.operatorOpen = false; - streamStartScanHistoryAsync(pTask, igUntreated); + code = streamStartScanHistoryAsync(pTask, igUntreated); } else if (level == TASK_LEVEL__SOURCE && (streamQueueGetNumOfItems(pTask->inputq.queue) == 0)) { - tqScanWalAsync((STQ*)handle, false); + code = tqScanWalAsync((STQ*)handle, false); } else { - streamTrySchedExec(pTask); + code = streamTrySchedExec(pTask); } } /*else { ASSERT(status != TASK_STATUS__UNINIT); }*/ streamMetaReleaseTask(pMeta, pTask); - return 0; + return code; } int32_t tqStreamTaskProcessTaskResumeReq(void* handle, int64_t sversion, char* msg, bool fromVnode) { @@ -1086,15 +1135,15 @@ int32_t tqStreamTaskProcessTaskResumeReq(void* handle, int64_t sversion, char* m SStreamTask* pTask = NULL; int32_t code = streamMetaAcquireTask(pMeta, pReq->streamId, pReq->taskId, &pTask); - if (pTask == NULL) { + if (pTask == NULL || (code != 0)) { tqError("s-task:0x%x failed to acquire task to resume, it may have been dropped or stopped", pReq->taskId); return TSDB_CODE_STREAM_TASK_IVLD_STATUS; } - taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); SStreamTaskState pState = streamTaskGetStatus(pTask); tqDebug("s-task:%s start to resume from paused, current status:%s", pTask->id.idStr, pState.name); - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); code = tqProcessTaskResumeImpl(handle, pTask, sversion, pReq->igUntreated, fromVnode); if (code != 0) { @@ -1104,11 +1153,11 @@ int32_t tqStreamTaskProcessTaskResumeReq(void* handle, int64_t sversion, char* m STaskId* pHTaskId = &pTask->hTaskInfo.id; SStreamTask* pHTask = NULL; code = streamMetaAcquireTask(pMeta, pHTaskId->streamId, pHTaskId->taskId, &pHTask); - if (pHTask) { - taosThreadMutexLock(&pHTask->lock); + if (pHTask && (code == 0)) { + streamMutexLock(&pHTask->lock); SStreamTaskState p = streamTaskGetStatus(pHTask); tqDebug("s-task:%s related history task start to resume from paused, current status:%s", pHTask->id.idStr, p.name); - taosThreadMutexUnlock(&pHTask->lock); + streamMutexUnlock(&pHTask->lock); code = tqProcessTaskResumeImpl(handle, pHTask, sversion, pReq->igUntreated, fromVnode); } @@ -1139,15 +1188,15 @@ int32_t tqStreamProcessCheckpointReadyRsp(SStreamMeta* pMeta, SRpcMsg* pMsg) { SStreamTask* pTask = NULL; int32_t code = streamMetaAcquireTask(pMeta, pRsp->streamId, pRsp->downstreamTaskId, &pTask); - if (pTask == NULL) { + if (pTask == NULL || (code != 0)) { tqError("vgId:%d failed to acquire task:0x%x when handling checkpoint-ready msg, it may have been dropped", pRsp->downstreamNodeId, pRsp->downstreamTaskId); - return TSDB_CODE_STREAM_TASK_NOT_EXIST; + return code; } - streamTaskProcessCheckpointReadyRsp(pTask, pRsp->upstreamTaskId, pRsp->checkpointId); + code = streamTaskProcessCheckpointReadyRsp(pTask, pRsp->upstreamTaskId, pRsp->checkpointId); streamMetaReleaseTask(pMeta, pTask); - return TSDB_CODE_SUCCESS; + return code; } int32_t tqStreamTaskProcessConsenChkptIdReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { @@ -1173,11 +1222,11 @@ int32_t tqStreamTaskProcessConsenChkptIdReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { SStreamTask* pTask = NULL; code = streamMetaAcquireTask(pMeta, req.streamId, req.taskId, &pTask); - if (pTask == NULL) { + if (pTask == NULL || (code != 0)) { tqError("vgId:%d process set consensus checkpointId req, failed to acquire task:0x%x, it may have been dropped already", pMeta->vgId, req.taskId); - streamMetaAddFailedTask(pMeta, req.streamId, req.taskId); - return TSDB_CODE_SUCCESS; + (void)streamMetaAddFailedTask(pMeta, req.streamId, req.taskId); + return code; } // discard the rsp, since it is expired. @@ -1193,13 +1242,13 @@ int32_t tqStreamTaskProcessConsenChkptIdReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { tqDebug("s-task:%s vgId:%d checkpointId:%" PRId64 " restore to consensus-checkpointId:%" PRId64 " from mnode", pTask->id.idStr, vgId, pTask->chkInfo.checkpointId, req.checkpointId); - taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); ASSERT(pTask->chkInfo.checkpointId >= req.checkpointId); if (pTask->chkInfo.consensusTransId >= req.transId) { tqDebug("s-task:%s vgId:%d latest consensus transId:%d, expired consensus trans:%d, discard", pTask->id.idStr, vgId, pTask->chkInfo.consensusTransId, req.transId); - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); streamMetaReleaseTask(pMeta, pTask); return TSDB_CODE_SUCCESS; } @@ -1215,14 +1264,14 @@ int32_t tqStreamTaskProcessConsenChkptIdReq(SStreamMeta* pMeta, SRpcMsg* pMsg) { } pTask->chkInfo.consensusTransId = req.transId; - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); if (pMeta->role == NODE_ROLE_LEADER) { - /*code = */ tqStreamStartOneTaskAsync(pMeta, pTask->pMsgCb, req.streamId, req.taskId); + code = tqStreamStartOneTaskAsync(pMeta, pTask->pMsgCb, req.streamId, req.taskId); } else { tqDebug("vgId:%d follower not start task:%s", vgId, pTask->id.idStr); } streamMetaReleaseTask(pMeta, pTask); - return TSDB_CODE_SUCCESS; + return code; } \ No newline at end of file diff --git a/source/dnode/vnode/src/tsdb/tsdbCacheRead.c b/source/dnode/vnode/src/tsdb/tsdbCacheRead.c index 46c3ba4785..27910d1746 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCacheRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbCacheRead.c @@ -294,6 +294,14 @@ void* tsdbCacherowsReaderClose(void* pReader) { taosMemoryFree(p->pCurrSchema); + if (p->rowKey.numOfPKs > 0) { + for (int32_t i = 0; i < p->rowKey.numOfPKs; i++) { + if (IS_VAR_DATA_TYPE(p->rowKey.pks[i].type)) { + taosMemoryFree(p->rowKey.pks[i].pData); + } + } + } + if (p->pLDataIterArray) { destroySttBlockReader(p->pLDataIterArray, NULL); } @@ -325,13 +333,26 @@ void* tsdbCacherowsReaderClose(void* pReader) { return NULL; } -static void freeItem(void* pItem) { +static void freeItemOfRow(void* pItem) { SLastCol* pCol = (SLastCol*)pItem; if (IS_VAR_DATA_TYPE(pCol->colVal.value.type) && pCol->colVal.value.pData) { taosMemoryFree(pCol->colVal.value.pData); } } +static void freeItemWithPk(void* pItem) { + SLastCol* pCol = (SLastCol*)pItem; + for (int i = 0; i < pCol->rowKey.numOfPKs; i++) { + if (IS_VAR_DATA_TYPE(pCol->rowKey.pks[i].type)) { + taosMemoryFree(pCol->rowKey.pks[i].pData); + } + } + + if (IS_VAR_DATA_TYPE(pCol->colVal.value.type) && pCol->colVal.value.pData) { + taosMemoryFree(pCol->colVal.value.pData); + } +} + static int32_t tsdbCacheQueryReseek(void* pQHandle) { int32_t code = 0; SCacheRowsReader* pReader = pQHandle; @@ -407,6 +428,16 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 struct STColumn* pCol = &pr->pSchema->columns[slotId]; SLastCol p = {.rowKey.ts = INT64_MIN, .colVal.value.type = pCol->type, .colVal.flag = CV_FLAG_NULL}; + if (pr->rowKey.numOfPKs > 0) { + p.rowKey.numOfPKs = pr->rowKey.numOfPKs; + for (int32_t j = 0; j < pr->rowKey.numOfPKs; j++) { + p.rowKey.pks[j].type = pr->pkColumn.type; + if (IS_VAR_DATA_TYPE(pr->pkColumn.type)) { + p.rowKey.pks[j].pData = taosMemoryCalloc(1, pr->pkColumn.bytes); + } + } + } + if (IS_VAR_DATA_TYPE(pCol->type)) { p.colVal.value.pData = taosMemoryCalloc(pCol->bytes, sizeof(char)); } @@ -420,7 +451,7 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 tsdbCacheGetBatch(pr->pTsdb, uid, pRow, pr, ltype); if (TARRAY_SIZE(pRow) <= 0 || COL_VAL_IS_NONE(&((SLastCol*)TARRAY_DATA(pRow))[0].colVal)) { - taosArrayClearEx(pRow, freeItem); + taosArrayClearEx(pRow, freeItemOfRow); continue; } @@ -432,7 +463,7 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 SLastCol* p = taosArrayGet(pLastCols, k); SLastCol* pColVal = (SLastCol*)taosArrayGet(pRow, k); - if (pColVal->rowKey.ts > p->rowKey.ts) { + if (tRowKeyCompare(&pColVal->rowKey, &p->rowKey) > 0) { if (!COL_VAL_IS_VALUE(&pColVal->colVal) && HASTYPE(pr->type, CACHESCAN_RETRIEVE_LAST)) { if (!COL_VAL_IS_VALUE(&p->colVal)) { hasNotNullRow = false; @@ -445,6 +476,15 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 hasRes = true; p->rowKey.ts = pColVal->rowKey.ts; + for (int32_t j = 0; j < p->rowKey.numOfPKs; j++) { + if (IS_VAR_DATA_TYPE(p->rowKey.pks[j].type)) { + memcpy(p->rowKey.pks[j].pData, pColVal->rowKey.pks[j].pData, pColVal->rowKey.pks[j].nData); + p->rowKey.pks[j].nData = pColVal->rowKey.pks[j].nData; + } else { + p->rowKey.pks[j].val = pColVal->rowKey.pks[j].val; + } + } + if (k == 0) { if (TARRAY_SIZE(pTableUidList) == 0) { taosArrayPush(pTableUidList, &uid); @@ -483,26 +523,26 @@ int32_t tsdbRetrieveCacheRows(void* pReader, SSDataBlock* pResBlock, const int32 } } - taosArrayClearEx(pRow, freeItem); + taosArrayClearEx(pRow, freeItemOfRow); } if (hasRes) { saveOneRow(pLastCols, pResBlock, pr, slotIds, dstSlotIds, pRes, pr->idstr); } - taosArrayDestroyEx(pLastCols, freeItem); + taosArrayDestroyEx(pLastCols, freeItemWithPk); } else if (HASTYPE(pr->type, CACHESCAN_RETRIEVE_TYPE_ALL)) { for (int32_t i = pr->tableIndex; i < pr->numOfTables; ++i) { tb_uid_t uid = pTableList[i].uid; tsdbCacheGetBatch(pr->pTsdb, uid, pRow, pr, ltype); if (TARRAY_SIZE(pRow) <= 0 || COL_VAL_IS_NONE(&((SLastCol*)TARRAY_DATA(pRow))[0].colVal)) { - taosArrayClearEx(pRow, freeItem); + taosArrayClearEx(pRow, freeItemOfRow); continue; } saveOneRow(pRow, pResBlock, pr, slotIds, dstSlotIds, pRes, pr->idstr); - taosArrayClearEx(pRow, freeItem); + taosArrayClearEx(pRow, freeItemOfRow); taosArrayPush(pTableUidList, &uid); diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index 2fe86817e3..93d2edd639 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -231,14 +231,8 @@ void initCheckpointReadyInfo(STaskCheckpointReadyInfo* pReadyInfo, int32_t up int32_t initCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamNodeId, int32_t upstreamTaskId, int32_t childId, int64_t checkpointId, SRpcMsg* pMsg); -typedef int32_t (*__stream_async_exec_fn_t)(void* param); - -int32_t streamMetaAsyncExec(SStreamMeta* pMeta, __stream_async_exec_fn_t fn, void* param, int32_t* code); void flushStateDataInExecutor(SStreamTask* pTask, SStreamQueueItem* pCheckpointBlock); -void streamMutexLock(TdThreadMutex *pMutex); -void streamMutexUnlock(TdThreadMutex *pMutex); -void streamMutexDestroy(TdThreadMutex *pMutex); #ifdef __cplusplus } diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 74fff23c6b..869877c9a8 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -1540,7 +1540,7 @@ int32_t chkpLoadExtraInfo(char* pChkpIdDir, int64_t* chkpId, int64_t* processId) // compatible with previous version *processId = -1; code = 0; - stError("failed to open file to load extra info, file:%s, reason:%s", pDst, tstrerror(TAOS_SYSTEM_ERROR(errno))); + stWarn("failed to open file to load extra info, file:%s, reason:%s", pDst, tstrerror(TAOS_SYSTEM_ERROR(errno))); goto _EXIT; } @@ -2308,6 +2308,7 @@ _EXIT: taosMemoryFree(cfHandle); return code; } + void* taskDbAddRef(void* pTaskDb) { STaskDbWrapper* pBackend = pTaskDb; return taosAcquireRef(taskDbWrapperId, pBackend->refId); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 6f3b7d8b32..1283f8e20b 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -45,7 +45,7 @@ typedef struct STaskInitTs { SMetaRefMgt gMetaRefMgt; -void metaRefMgtInit(); +int32_t metaRefMgtInit(); void metaRefMgtCleanup(); int32_t metaRefMgtAdd(int64_t vgId, int64_t* rid); @@ -56,9 +56,14 @@ static void streamMetaEnvInit() { streamMetaId = taosOpenRef(64, streamMetaCloseImpl); - metaRefMgtInit(); - int32_t code = streamTimerInit(); - if (code != 0) { + int32_t code = metaRefMgtInit(); + if (code) { + stError("failed to init stream meta mgmt env, start failed"); + return; + } + + code = streamTimerInit(); + if (code) { stError("failed to init stream meta env, start failed"); } } @@ -66,17 +71,29 @@ static void streamMetaEnvInit() { void streamMetaInit() { (void) taosThreadOnce(&streamMetaModuleInit, streamMetaEnvInit); } void streamMetaCleanup() { - taosCloseRef(streamBackendId); - taosCloseRef(streamBackendCfWrapperId); - taosCloseRef(streamMetaId); + (void) taosCloseRef(streamBackendId); + (void) taosCloseRef(streamBackendCfWrapperId); + (void) taosCloseRef(streamMetaId); metaRefMgtCleanup(); streamTimerCleanUp(); } -void metaRefMgtInit() { - taosThreadMutexInit(&(gMetaRefMgt.mutex), NULL); - gMetaRefMgt.pTable = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_ENTRY_LOCK); +int32_t metaRefMgtInit() { + int32_t code = taosThreadMutexInit(&(gMetaRefMgt.mutex), NULL); + if (code) { + return code; + } + + if (code == 0) { + gMetaRefMgt.pTable = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_ENTRY_LOCK); + } + + if (gMetaRefMgt.pTable == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } else { + return code; + } } void metaRefMgtCleanup() { @@ -96,20 +113,34 @@ void metaRefMgtCleanup() { } int32_t metaRefMgtAdd(int64_t vgId, int64_t* rid) { + int32_t code = 0; + void* p = NULL; + streamMutexLock(&gMetaRefMgt.mutex); - void* p = taosHashGet(gMetaRefMgt.pTable, &vgId, sizeof(vgId)); + p = taosHashGet(gMetaRefMgt.pTable, &vgId, sizeof(vgId)); if (p == NULL) { SArray* list = taosArrayInit(8, sizeof(void*)); - taosArrayPush(list, &rid); - taosHashPut(gMetaRefMgt.pTable, &vgId, sizeof(vgId), &list, sizeof(void*)); + p = taosArrayPush(list, &rid); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + code = taosHashPut(gMetaRefMgt.pTable, &vgId, sizeof(vgId), &list, sizeof(void*)); + if (code) { + stError("vgId:%d failed to put into metaRef table, rid:%" PRId64, (int32_t) vgId, *rid); + return code; + } } else { SArray* list = *(SArray**)p; - taosArrayPush(list, &rid); + void* px = taosArrayPush(list, &rid); + if (px == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + } } streamMutexUnlock(&gMetaRefMgt.mutex); - return 0; + return code; } int32_t streamMetaOpenTdb(SStreamMeta* pMeta) { @@ -141,19 +172,25 @@ enum STREAM_STATE_VER { }; int32_t streamMetaCheckBackendCompatible(SStreamMeta* pMeta) { - int8_t ret = STREAM_STATA_COMPATIBLE; - TBC* pCur = NULL; - - if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { // no task info, no stream - return ret; - } - + int8_t ret = STREAM_STATA_COMPATIBLE; + TBC* pCur = NULL; + int32_t code = 0; void* pKey = NULL; int32_t kLen = 0; void* pVal = NULL; int32_t vLen = 0; - tdbTbcMoveToFirst(pCur); + if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { // no task info, no stream + return ret; + } + + code = tdbTbcMoveToFirst(pCur); + if (code) { + (void) tdbTbcClose(pCur); + stError("vgId:%d failed to open stream meta file cursor, not perform compatible check", pMeta->vgId); + return ret; + } + while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) { if (pVal == NULL || vLen == 0) { break; @@ -178,7 +215,7 @@ int32_t streamMetaCheckBackendCompatible(SStreamMeta* pMeta) { tdbFree(pKey); tdbFree(pVal); - tdbTbcClose(pCur); + (void) tdbTbcClose(pCur); return ret; } @@ -244,7 +281,11 @@ int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key) streamMutexLock(&pMeta->backendMutex); void** ppBackend = taosHashGet(pMeta->pTaskDbUnique, key, strlen(key)); if ((ppBackend != NULL) && (*ppBackend != NULL)) { - taskDbAddRef(*ppBackend); + void* p = taskDbAddRef(*ppBackend); + if (p == NULL) { + stError("s-task:0x%x failed to ref backend", pTask->id.taskId); + return TSDB_CODE_FAILED; + } STaskDbWrapper* pBackend = *ppBackend; pBackend->pMeta = pMeta; @@ -278,7 +319,10 @@ int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key) if (processVer != -1) pTask->chkInfo.processedVer = processVer; - taosHashPut(pMeta->pTaskDbUnique, key, strlen(key), &pBackend, sizeof(void*)); + int32_t code = taosHashPut(pMeta->pTaskDbUnique, key, strlen(key), &pBackend, sizeof(void*)); + if (code) { + stError("s-task:0x%x failed to put taskDb backend, code:out of memory", pTask->id.taskId); + } streamMutexUnlock(&pMeta->backendMutex); stDebug("s-task:0x%x set backend %p", pTask->id.taskId, pBackend); @@ -290,7 +334,10 @@ void streamMetaRemoveDB(void* arg, char* key) { SStreamMeta* pMeta = arg; streamMutexLock(&pMeta->backendMutex); - taosHashRemove(pMeta->pTaskDbUnique, key, strlen(key)); + int32_t code = taosHashRemove(pMeta->pTaskDbUnique, key, strlen(key)); + if (code) { + stError("vgId:%d failed to remove key:%s in taskDbUnique map", pMeta->vgId, key); + } streamMutexUnlock(&pMeta->backendMutex); } @@ -398,12 +445,22 @@ int32_t streamMetaOpen(const char* path, void* ahandle, FTaskBuild buildTaskFn, } #endif - taosThreadRwlockInit(&pMeta->lock, &attr); - taosThreadRwlockAttrDestroy(&attr); + code = taosThreadRwlockInit(&pMeta->lock, &attr); + if (code) { + goto _err; + } + + code = taosThreadRwlockAttrDestroy(&attr); + if (code) { + goto _err; + } int64_t* pRid = taosMemoryMalloc(sizeof(int64_t)); memcpy(pRid, &pMeta->rid, sizeof(pMeta->rid)); - metaRefMgtAdd(pMeta->vgId, pRid); + code = metaRefMgtAdd(pMeta->vgId, pRid); + if (code) { + goto _err; + } code = createMetaHbInfo(pRid, &pMeta->pHbInfo); if (code != TSDB_CODE_SUCCESS) { @@ -416,7 +473,8 @@ int32_t streamMetaOpen(const char* path, void* ahandle, FTaskBuild buildTaskFn, if (pMeta->bkdChkptMgt == NULL) { goto _err; } - taosThreadMutexInit(&pMeta->backendMutex, NULL); + + code = taosThreadMutexInit(&pMeta->backendMutex, NULL); *p = pMeta; return code; @@ -425,9 +483,9 @@ _err: taosMemoryFree(pMeta->path); if (pMeta->pTasksMap) taosHashCleanup(pMeta->pTasksMap); if (pMeta->pTaskList) taosArrayDestroy(pMeta->pTaskList); - if (pMeta->pTaskDb) tdbTbClose(pMeta->pTaskDb); - if (pMeta->pCheckpointDb) tdbTbClose(pMeta->pCheckpointDb); - if (pMeta->db) tdbClose(pMeta->db); + if (pMeta->pTaskDb) (void)tdbTbClose(pMeta->pTaskDb); + if (pMeta->pCheckpointDb) (void)tdbTbClose(pMeta->pCheckpointDb); + if (pMeta->db) (void) tdbClose(pMeta->db); if (pMeta->pHbInfo) taosMemoryFreeClear(pMeta->pHbInfo); if (pMeta->updateInfo.pTasks) taosHashCleanup(pMeta->updateInfo.pTasks); if (pMeta->startInfo.pReadyTaskSet) taosHashCleanup(pMeta->startInfo.pReadyTaskSet); @@ -473,7 +531,7 @@ void streamMetaClear(SStreamMeta* pMeta) { // release the ref by timer if (p->info.delaySchedParam != 0 && p->info.fillHistory == 0) { // one more ref in timer stDebug("s-task:%s stop schedTimer, and (before) desc ref:%d", p->id.idStr, p->refCnt); - taosTmrStop(p->schedInfo.pDelayTimer); + (void) taosTmrStop(p->schedInfo.pDelayTimer); p->info.delaySchedParam = 0; streamMetaReleaseTask(pMeta, p); } @@ -481,7 +539,11 @@ void streamMetaClear(SStreamMeta* pMeta) { streamMetaReleaseTask(pMeta, p); } - taosRemoveRef(streamBackendId, pMeta->streamBackendRid); + int32_t code = taosRemoveRef(streamBackendId, pMeta->streamBackendRid); + if (code) { + stError("vgId:%d remove stream backend Ref failed, rid:%"PRId64, pMeta->vgId, pMeta->streamBackendRid); + } + taosHashClear(pMeta->pTasksMap); taosArrayClear(pMeta->pTaskList); @@ -502,14 +564,7 @@ void streamMetaClose(SStreamMeta* pMeta) { if (pMeta == NULL) { return; } - - // int64_t rid = *(int64_t*)pMeta->pRid; - // if (taosTmrStop(pMeta->hbInfo.hbTmr)) { - // taosMemoryFree(pMeta->pRid); - // } else { - // // do nothing, stop by timer thread - // } - taosRemoveRef(streamMetaId, pMeta->rid); + (void) taosRemoveRef(streamMetaId, pMeta->rid); } void streamMetaCloseImpl(void* arg) { @@ -525,10 +580,11 @@ void streamMetaCloseImpl(void* arg) { streamMetaClear(pMeta); streamMetaWUnLock(pMeta); - tdbAbort(pMeta->db, pMeta->txn); - tdbTbClose(pMeta->pTaskDb); - tdbTbClose(pMeta->pCheckpointDb); - tdbClose(pMeta->db); + // already log the error, ignore here + (void) tdbAbort(pMeta->db, pMeta->txn); + (void) tdbTbClose(pMeta->pTaskDb); + (void) tdbTbClose(pMeta->pCheckpointDb); + (void) tdbClose(pMeta->db); taosArrayDestroy(pMeta->pTaskList); taosArrayDestroy(pMeta->chkpSaved); @@ -552,7 +608,7 @@ void streamMetaCloseImpl(void* arg) { bkdMgtDestroy(pMeta->bkdChkptMgt); pMeta->role = NODE_ROLE_UNINIT; - taosThreadRwlockDestroy(&pMeta->lock); + (void) taosThreadRwlockDestroy(&pMeta->lock); taosMemoryFree(pMeta); stDebug("vgId:%d end to close stream meta", vgId); @@ -568,9 +624,10 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { if (code < 0) { return -1; } + buf = taosMemoryCalloc(1, len); if (buf == NULL) { - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } if (pTask->ver < SSTREAM_TASK_SUBTABLE_CHANGED_VER) { @@ -579,13 +636,19 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, len); - tEncodeStreamTask(&encoder, pTask); + code = tEncodeStreamTask(&encoder, pTask); tEncoderClear(&encoder); + if (code == -1) { + stError("s-task:%s vgId:%d task meta encode failed, code:%s", pTask->id.idStr, vgId, tstrerror(code)); + return TSDB_CODE_INVALID_MSG; + } + int64_t id[2] = {pTask->id.streamId, pTask->id.taskId}; code = tdbTbUpsert(pMeta->pTaskDb, id, STREAM_TASK_KEY_LEN, buf, len, pMeta->txn); if (code != TSDB_CODE_SUCCESS) { + code = terrno; stError("s-task:%s vgId:%d task meta save to disk failed, code:%s", pTask->id.idStr, vgId, tstrerror(terrno)); } else { stDebug("s-task:%s vgId:%d task meta save to disk", pTask->id.idStr, vgId); @@ -612,33 +675,44 @@ int32_t streamMetaRemoveTask(SStreamMeta* pMeta, STaskId* pTaskId) { int32_t streamMetaRegisterTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask, bool* pAdded) { *pAdded = false; + int32_t code = 0; STaskId id = streamTaskGetTaskId(pTask); void* p = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (p != NULL) { - return 0; + stDebug("s-task:%" PRIx64 " already exist in meta, no need to register", id.taskId); + return code; } - if (pMeta->buildTaskFn(pMeta->ahandle, pTask, ver) < 0) { - return -1; + if ((code = pMeta->buildTaskFn(pMeta->ahandle, pTask, ver)) != 0) { + return code; } - taosArrayPush(pMeta->pTaskList, &pTask->id); - taosHashPut(pMeta->pTasksMap, &id, sizeof(id), &pTask, POINTER_BYTES); - - if (streamMetaSaveTask(pMeta, pTask) < 0) { - return -1; + p = taosArrayPush(pMeta->pTaskList, &pTask->id); + if (p == NULL) { + stError("s-task:0x%"PRIx64" failed to register task into meta-list, code: out of memory", id.taskId); + return TSDB_CODE_OUT_OF_MEMORY; } - if (streamMetaCommit(pMeta) < 0) { - return -1; + code = taosHashPut(pMeta->pTasksMap, &id, sizeof(id), &pTask, POINTER_BYTES); + if (code) { + stError("s-task:0x%"PRIx64" failed to register task into meta-list, code: out of memory", id.taskId); + return code; + } + + if ((code = streamMetaSaveTask(pMeta, pTask)) != 0) { + return code; + } + + if ((code = streamMetaCommit(pMeta)) != 0) { + return code; } if (pTask->info.fillHistory == 0) { - atomic_add_fetch_32(&pMeta->numOfStreamTasks, 1); + (void) atomic_add_fetch_32(&pMeta->numOfStreamTasks, 1); } *pAdded = true; - return 0; + return code; } int32_t streamMetaGetNumOfTasks(SStreamMeta* pMeta) { @@ -703,7 +777,7 @@ static void doRemoveIdFromList(SArray* pTaskList, int32_t num, SStreamTaskId* id static int32_t streamTaskSendTransSuccessMsg(SStreamTask* pTask, void* param) { if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { - streamTaskSendCheckpointSourceRsp(pTask); + (void) streamTaskSendCheckpointSourceRsp(pTask); } return 0; } @@ -726,7 +800,7 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t } // handle the dropping event - streamTaskHandleEventAsync(pTask->status.pSM, TASK_EVENT_DROPPING, streamTaskSendTransSuccessMsg, NULL); + (void) streamTaskHandleEventAsync(pTask->status.pSM, TASK_EVENT_DROPPING, streamTaskSendTransSuccessMsg, NULL); } else { stDebug("vgId:%d failed to find the task:0x%x, it may be dropped already", pMeta->vgId, taskId); streamMetaWUnLock(pMeta); @@ -765,12 +839,12 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t pTask = *ppTask; // it is an fill-history task, remove the related stream task's id that points to it if (pTask->info.fillHistory == 0) { - atomic_sub_fetch_32(&pMeta->numOfStreamTasks, 1); + (void) atomic_sub_fetch_32(&pMeta->numOfStreamTasks, 1); } - taosHashRemove(pMeta->pTasksMap, &id, sizeof(id)); + (void) taosHashRemove(pMeta->pTasksMap, &id, sizeof(id)); doRemoveIdFromList(pMeta->pTaskList, (int32_t)taosArrayGetSize(pMeta->pTaskList), &pTask->id); - streamMetaRemoveTask(pMeta, &id); + (void) streamMetaRemoveTask(pMeta, &id); ASSERT(taosHashGetSize(pMeta->pTasksMap) == taosArrayGetSize(pMeta->pTaskList)); streamMetaWUnLock(pMeta); @@ -778,7 +852,7 @@ int32_t streamMetaUnregisterTask(SStreamMeta* pMeta, int64_t streamId, int32_t t ASSERT(pTask->status.timerActive == 0); if (pTask->info.delaySchedParam != 0 && pTask->info.fillHistory == 0) { stDebug("s-task:%s stop schedTimer, and (before) desc ref:%d", pTask->id.idStr, pTask->refCnt); - taosTmrStop(pTask->schedInfo.pDelayTimer); + (void) taosTmrStop(pTask->schedInfo.pDelayTimer); pTask->info.delaySchedParam = 0; streamMetaReleaseTask(pMeta, pTask); } @@ -823,9 +897,11 @@ int32_t streamMetaCommit(SStreamMeta* pMeta) { int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta) { int64_t checkpointId = 0; + int32_t code = 0; TBC* pCur = NULL; if (tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL) < 0) { + stError("failed to open stream meta file, the latest checkpointId is 0, vgId:%d", pMeta->vgId); return checkpointId; } @@ -835,7 +911,13 @@ int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta) { int32_t vLen = 0; SDecoder decoder; - tdbTbcMoveToFirst(pCur); + code = tdbTbcMoveToFirst(pCur); + if (code) { + (void) tdbTbcClose(pCur); + stError("failed to open stream meta file cursor, the latest checkpointId is 0, vgId:%d", pMeta->vgId); + return checkpointId; + } + while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) { if (pVal == NULL || vLen == 0) { break; @@ -854,8 +936,8 @@ int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta) { tdbFree(pKey); tdbFree(pVal); - tdbTbcClose(pCur); + (void)tdbTbcClose(pCur); return checkpointId; } @@ -867,23 +949,34 @@ void streamMetaLoadAllTasks(SStreamMeta* pMeta) { void* pVal = NULL; int32_t vLen = 0; SDecoder decoder; + int32_t vgId = 0; + int32_t code = 0; + SArray* pRecycleList = NULL; if (pMeta == NULL) { return; } - SArray* pRecycleList = taosArrayInit(4, sizeof(STaskId)); - int32_t vgId = pMeta->vgId; + pRecycleList = taosArrayInit(4, sizeof(STaskId)); + + vgId = pMeta->vgId; stInfo("vgId:%d load stream tasks from meta files", vgId); - int32_t code = tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL); + code = tdbTbcOpen(pMeta->pTaskDb, &pCur, NULL); if (code != TSDB_CODE_SUCCESS) { stError("vgId:%d failed to open stream meta, code:%s, not load any stream tasks", vgId, tstrerror(terrno)); taosArrayDestroy(pRecycleList); return; } - tdbTbcMoveToFirst(pCur); + code = tdbTbcMoveToFirst(pCur); + if (code) { + stError("vgId:%d failed to open stream meta cursor, code:%s, not load any stream tasks", vgId, tstrerror(terrno)); + taosArrayDestroy(pRecycleList); + (void) tdbTbcClose(pCur); + return; + } + while (tdbTbcNext(pCur, &pKey, &kLen, &pVal, &vLen) == 0) { if (pVal == NULL || vLen == 0) { break; @@ -913,7 +1006,7 @@ void streamMetaLoadAllTasks(SStreamMeta* pMeta) { tFreeStreamTask(pTask); STaskId id = streamTaskGetTaskId(pTask); - taosArrayPush(pRecycleList, &id); + (void) taosArrayPush(pRecycleList, &id); int32_t total = taosArrayGetSize(pRecycleList); stDebug("s-task:0x%x is already dropped, add into recycle list, total:%d", taskId, total); @@ -934,7 +1027,7 @@ void streamMetaLoadAllTasks(SStreamMeta* pMeta) { continue; } - taosArrayPush(pMeta->pTaskList, &pTask->id); + (void) taosArrayPush(pMeta->pTaskList, &pTask->id); } else { // todo this should replace the existed object put by replay creating stream task msg from mnode stError("s-task:0x%x already added into table meta by replaying WAL, need check", pTask->id.taskId); @@ -944,17 +1037,17 @@ void streamMetaLoadAllTasks(SStreamMeta* pMeta) { if (taosHashPut(pMeta->pTasksMap, &id, sizeof(id), &pTask, POINTER_BYTES) != 0) { stError("s-task:0x%x failed to put into hashTable, code:%s, continue", pTask->id.taskId, tstrerror(terrno)); - taosArrayPop(pMeta->pTaskList); + (void) taosArrayPop(pMeta->pTaskList); tFreeStreamTask(pTask); continue; } if (pTask->info.fillHistory == 0) { - atomic_add_fetch_32(&pMeta->numOfStreamTasks, 1); + (void) atomic_add_fetch_32(&pMeta->numOfStreamTasks, 1); } if (streamTaskShouldPause(pTask)) { - atomic_add_fetch_32(&pMeta->numOfPausedTasks, 1); + (void) atomic_add_fetch_32(&pMeta->numOfPausedTasks, 1); } ASSERT(pTask->status.downstreamReady == 0); @@ -970,7 +1063,7 @@ void streamMetaLoadAllTasks(SStreamMeta* pMeta) { if (taosArrayGetSize(pRecycleList) > 0) { for (int32_t i = 0; i < taosArrayGetSize(pRecycleList); ++i) { STaskId* pId = taosArrayGet(pRecycleList, i); - streamMetaRemoveTask(pMeta, pId); + (void) streamMetaRemoveTask(pMeta, pId); } } @@ -998,7 +1091,7 @@ bool streamMetaTaskInTimer(SStreamMeta* pMeta) { SStreamTask* pTask = *(SStreamTask**)pIter; if (pTask->status.timerActive >= 1) { stDebug("s-task:%s in timer, blocking tasks in vgId:%d restart, set closing again", pTask->id.idStr, pMeta->vgId); - streamTaskStop(pTask); + (void) streamTaskStop(pTask); inTimer = true; } } @@ -1031,7 +1124,7 @@ void streamMetaNotifyClose(SStreamMeta* pMeta) { SStreamTask* pTask = *(SStreamTask**)pIter; stDebug("vgId:%d s-task:%s set task closing flag", vgId, pTask->id.idStr); - streamTaskStop(pTask); + (void) streamTaskStop(pTask); } streamMetaWUnLock(pMeta); @@ -1050,7 +1143,16 @@ void streamMetaNotifyClose(SStreamMeta* pMeta) { void streamMetaStartHb(SStreamMeta* pMeta) { int64_t* pRid = taosMemoryMalloc(sizeof(int64_t)); - metaRefMgtAdd(pMeta->vgId, pRid); + if (pRid == NULL) { + stError("vgId:%d failed to prepare the metaHb to mnode, hbMsg will not started, code: out of memory", pMeta->vgId); + return; + } + + int32_t code = metaRefMgtAdd(pMeta->vgId, pRid); + if (code) { + return; + } + *pRid = pMeta->rid; streamMetaHbToMnode(pRid, NULL); } @@ -1069,7 +1171,7 @@ void streamMetaResetStartInfo(STaskStartInfo* pStartInfo, int32_t vgId) { void streamMetaRLock(SStreamMeta* pMeta) { // stTrace("vgId:%d meta-rlock", pMeta->vgId); - taosThreadRwlockRdlock(&pMeta->lock); + (void) taosThreadRwlockRdlock(&pMeta->lock); } void streamMetaRUnLock(SStreamMeta* pMeta) { @@ -1084,30 +1186,13 @@ void streamMetaRUnLock(SStreamMeta* pMeta) { void streamMetaWLock(SStreamMeta* pMeta) { // stTrace("vgId:%d meta-wlock", pMeta->vgId); - taosThreadRwlockWrlock(&pMeta->lock); + (void) taosThreadRwlockWrlock(&pMeta->lock); // stTrace("vgId:%d meta-wlock completed", pMeta->vgId); } void streamMetaWUnLock(SStreamMeta* pMeta) { // stTrace("vgId:%d meta-wunlock", pMeta->vgId); - taosThreadRwlockUnlock(&pMeta->lock); -} - -static void execHelper(struct SSchedMsg* pSchedMsg) { - __async_exec_fn_t execFn = (__async_exec_fn_t)pSchedMsg->ahandle; - int32_t code = execFn(pSchedMsg->thandle); - if (code != 0 && pSchedMsg->msg != NULL) { - *(int32_t*)pSchedMsg->msg = code; - } -} - -int32_t streamMetaAsyncExec(SStreamMeta* pMeta, __stream_async_exec_fn_t fn, void* param, int32_t* code) { - SSchedMsg schedMsg = {0}; - schedMsg.fp = execHelper; - schedMsg.ahandle = fn; - schedMsg.thandle = param; - schedMsg.msg = code; - return taosScheduleTask(pMeta->qHandle, &schedMsg); + (void) taosThreadRwlockUnlock(&pMeta->lock); } int32_t streamMetaSendMsgBeforeCloseTasks(SStreamMeta* pMeta, SArray** pList) { @@ -1195,10 +1280,10 @@ static int32_t prepareBeforeStartTasks(SStreamMeta* pMeta, SArray** pList, int64 taosHashClear(pMeta->startInfo.pFailedTaskSet); pMeta->startInfo.startTs = now; - streamMetaResetTaskStatus(pMeta); + int32_t code = streamMetaResetTaskStatus(pMeta); streamMetaWUnLock(pMeta); - return TSDB_CODE_SUCCESS; + return code; } // restore the checkpoint id by negotiating the latest consensus checkpoint id @@ -1233,7 +1318,7 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta) { code = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId, &pTask); if (pTask == NULL) { stError("vgId:%d failed to acquire task:0x%x during start tasks", pMeta->vgId, pTaskId->taskId); - streamMetaAddFailedTask(pMeta, pTaskId->streamId, pTaskId->taskId); + (void) streamMetaAddFailedTask(pMeta, pTaskId->streamId, pTaskId->taskId); continue; } @@ -1256,7 +1341,7 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta) { code = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId, &pTask); if (pTask == NULL) { stError("vgId:%d failed to acquire task:0x%x during start tasks", pMeta->vgId, pTaskId->taskId); - streamMetaAddFailedTask(pMeta, pTaskId->streamId, pTaskId->taskId); + (void) streamMetaAddFailedTask(pMeta, pTaskId->streamId, pTaskId->taskId); continue; } @@ -1274,7 +1359,7 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta) { if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { stDebug("s-task:%s downstream ready, no need to check downstream, check only related fill-history task", pTask->id.idStr); - streamLaunchFillHistoryTask(pTask); + (void) streamLaunchFillHistoryTask(pTask); // todo: how about retry launch fill-history task? } (void) streamMetaAddTaskLaunchResult(pMeta, pTaskId->streamId, pTaskId->taskId, pInfo->checkTs, pInfo->readyTs, true); @@ -1340,7 +1425,7 @@ int32_t streamMetaStopAllTasks(SStreamMeta* pMeta) { continue; } - streamTaskStop(pTask); + (void) streamTaskStop(pTask); streamMetaReleaseTask(pMeta, pTask); } @@ -1380,7 +1465,7 @@ int32_t streamMetaStartOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t tas code = streamMetaAcquireTask(pMeta, streamId, taskId, &pTask); if (pTask == NULL) { stError("vgId:%d failed to acquire task:0x%x when starting task", pMeta->vgId, taskId); - streamMetaAddFailedTask(pMeta, streamId, taskId); + (void) streamMetaAddFailedTask(pMeta, streamId, taskId); return TSDB_CODE_STREAM_TASK_IVLD_STATUS; } @@ -1471,7 +1556,10 @@ int32_t streamMetaAddTaskLaunchResult(SStreamMeta* pMeta, int64_t streamId, int3 SHashObj* pDst = ready ? pStartInfo->pReadyTaskSet : pStartInfo->pFailedTaskSet; STaskInitTs initTs = {.start = startTs, .end = endTs, .success = ready}; - taosHashPut(pDst, &id, sizeof(id), &initTs, sizeof(STaskInitTs)); + int32_t code = taosHashPut(pDst, &id, sizeof(id), &initTs, sizeof(STaskInitTs)); + if (code) { + + } int32_t numOfTotal = streamMetaGetNumOfTasks(pMeta); int32_t numOfRecv = taosHashGetSize(pStartInfo->pReadyTaskSet) + taosHashGetSize(pStartInfo->pFailedTaskSet); @@ -1491,14 +1579,14 @@ int32_t streamMetaAddTaskLaunchResult(SStreamMeta* pMeta, int64_t streamId, int3 streamMetaResetStartInfo(pStartInfo, pMeta->vgId); streamMetaWUnLock(pMeta); - pStartInfo->completeFn(pMeta); + code = pStartInfo->completeFn(pMeta); } else { streamMetaWUnLock(pMeta); stDebug("vgId:%d recv check downstream results, s-task:0x%x succ:%d, received:%d, total:%d", pMeta->vgId, taskId, ready, numOfRecv, numOfTotal); } - return TSDB_CODE_SUCCESS; + return code; } int32_t streamMetaResetTaskStatus(SStreamMeta* pMeta) { @@ -1572,19 +1660,26 @@ void streamMetaAddIntoUpdateTaskList(SStreamMeta* pMeta, SStreamTask* pTask, SSt int64_t startTs) { const char* id = pTask->id.idStr; int32_t vgId = pTask->pMeta->vgId; + int32_t code = 0; // keep the already updated info STaskUpdateEntry entry = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId, .transId = transId}; - taosHashPut(pMeta->updateInfo.pTasks, &entry, sizeof(entry), NULL, 0); + code = taosHashPut(pMeta->updateInfo.pTasks, &entry, sizeof(entry), NULL, 0); + if (code != 0) { + stError("s-task:%s failed to put updateTask into update list", id); + } int64_t el = taosGetTimestampMs() - startTs; if (pHTask != NULL) { STaskUpdateEntry hEntry = {.streamId = pHTask->id.streamId, .taskId = pHTask->id.taskId, .transId = transId}; - taosHashPut(pMeta->updateInfo.pTasks, &hEntry, sizeof(hEntry), NULL, 0); - - stDebug("s-task:%s vgId:%d transId:%d task nodeEp update completed, streamTask/hTask closed, elapsed:%" PRId64 - " ms", - id, vgId, transId, el); + code = taosHashPut(pMeta->updateInfo.pTasks, &hEntry, sizeof(hEntry), NULL, 0); + if (code != 0) { + stError("s-task:%s failed to put updateTask into update list", id); + } else { + stDebug("s-task:%s vgId:%d transId:%d task nodeEp update completed, streamTask/hTask closed, elapsed:%" PRId64 + " ms", + id, vgId, transId, el); + } } else { stDebug("s-task:%s vgId:%d transId:%d task nodeEp update completed, streamTask closed, elapsed time:%" PRId64 "ms", id, vgId, transId, el); diff --git a/source/libs/stream/src/streamSnapshot.c b/source/libs/stream/src/streamSnapshot.c index 02e4ed8d8b..3c27210a23 100644 --- a/source/libs/stream/src/streamSnapshot.c +++ b/source/libs/stream/src/streamSnapshot.c @@ -130,7 +130,6 @@ int32_t streamGetFileSize(char* path, char* name, int64_t* sz) { int32_t ret = 0; char* fullname = taosMemoryCalloc(1, strlen(path) + 32); - sprintf(fullname, "%s%s%s", path, TD_DIRSEP, name); ret = taosStatFile(fullname, sz, NULL, NULL); @@ -185,48 +184,89 @@ void snapFileDebugInfo(SBackendSnapFile2* pSnapFile) { } int32_t snapFileGenMeta(SBackendSnapFile2* pSnapFile) { + void* p = NULL; SBackendFileItem item = {0}; item.ref = 1; + // current item.name = pSnapFile->pCurrent; item.type = ROCKSDB_CURRENT_TYPE; - streamGetFileSize(pSnapFile->path, item.name, &item.size); - taosArrayPush(pSnapFile->pFileList, &item); + int32_t code = streamGetFileSize(pSnapFile->path, item.name, &item.size); + if (code) { + stError("failed to get file size"); + return code; + } + + p = taosArrayPush(pSnapFile->pFileList, &item); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } // mainfest item.name = pSnapFile->pMainfest; item.type = ROCKSDB_MAINFEST_TYPE; - streamGetFileSize(pSnapFile->path, item.name, &item.size); - taosArrayPush(pSnapFile->pFileList, &item); + code = streamGetFileSize(pSnapFile->path, item.name, &item.size); + if (code) { + return code; + } + + p = taosArrayPush(pSnapFile->pFileList, &item); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } // options item.name = pSnapFile->pOptions; item.type = ROCKSDB_OPTIONS_TYPE; - streamGetFileSize(pSnapFile->path, item.name, &item.size); - taosArrayPush(pSnapFile->pFileList, &item); + code = streamGetFileSize(pSnapFile->path, item.name, &item.size); + if (code) { + return code; + } + + p = taosArrayPush(pSnapFile->pFileList, &item); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + // sst for (int32_t i = 0; i < taosArrayGetSize(pSnapFile->pSst); i++) { char* sst = taosArrayGetP(pSnapFile->pSst, i); item.name = sst; item.type = ROCKSDB_SST_TYPE; - streamGetFileSize(pSnapFile->path, item.name, &item.size); - taosArrayPush(pSnapFile->pFileList, &item); + code = streamGetFileSize(pSnapFile->path, item.name, &item.size); + if (code) { + return code; + } + + p = taosArrayPush(pSnapFile->pFileList, &item); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } } + // meta item.name = pSnapFile->pCheckpointMeta; item.type = ROCKSDB_CHECKPOINT_META_TYPE; if (streamGetFileSize(pSnapFile->path, item.name, &item.size) == 0) { - taosArrayPush(pSnapFile->pFileList, &item); + p = taosArrayPush(pSnapFile->pFileList, &item); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } } item.name = pSnapFile->pCheckpointSelfcheck; item.type = ROCKSDB_CHECKPOINT_SELFCHECK_TYPE; if (streamGetFileSize(pSnapFile->path, item.name, &item.size) == 0) { - taosArrayPush(pSnapFile->pFileList, &item); + p = taosArrayPush(pSnapFile->pFileList, &item); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } } + return 0; } + int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { int32_t code = 0; TdDirPtr pDir = taosOpenDir(pSnapFile->path); @@ -288,12 +328,18 @@ int32_t snapFileReadMeta(SBackendSnapFile2* pSnapFile) { code = TSDB_CODE_OUT_OF_MEMORY; break; } - taosArrayPush(pSnapFile->pSst, &sst); + + void* p = taosArrayPush(pSnapFile->pSst, &sst); + if (p == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + break; + } } } - taosCloseDir(&pDir); - return code; + + return taosCloseDir(&pDir); } + int32_t streamBackendSnapInitFile(char* metaPath, SStreamTaskSnap* pSnap, SBackendSnapFile2* pSnapFile) { int32_t code = 0; int32_t nBytes = 0; @@ -359,13 +405,16 @@ void snapFileDestroy(SBackendSnapFile2* pSnap) { } taosArrayDestroy(pSnap->pFileList); taosArrayDestroy(pSnap->pSst); - taosCloseFile(&pSnap->fd); - - return; + int32_t code = taosCloseFile(&pSnap->fd); + if (code) { + stError("failed to close snapshot fd"); + } } + int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, void* pMeta) { - // impl later int32_t code = 0; + SArray* pDbSnapSet = NULL; + SArray* pSnapInfoSet = taosArrayInit(4, sizeof(SStreamTaskSnap)); if (pSnapInfoSet == NULL) { return TSDB_CODE_OUT_OF_MEMORY; @@ -374,15 +423,13 @@ int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, void* pMeta code = streamCreateTaskDbSnapInfo(pMeta, path, pSnapInfoSet); if (code != 0) { stError("failed to do task db snap info, reason:%s", tstrerror(code)); - taosArrayDestroy(pSnapInfoSet); - return code; + goto _err; } - SArray* pDbSnapSet = taosArrayInit(8, sizeof(SBackendSnapFile2)); + pDbSnapSet = taosArrayInit(8, sizeof(SBackendSnapFile2)); if (pDbSnapSet == NULL) { - taosArrayDestroy(pSnapInfoSet); code = TSDB_CODE_OUT_OF_MEMORY; - return code; + goto _err; } for (int32_t i = 0; i < taosArrayGetSize(pSnapInfoSet); i++) { @@ -391,16 +438,24 @@ int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, void* pMeta SBackendSnapFile2 snapFile = {0}; code = streamBackendSnapInitFile(path, pSnap, &snapFile); ASSERT(code == 0); - taosArrayPush(pDbSnapSet, &snapFile); + + void* p = taosArrayPush(pDbSnapSet, &snapFile); + if (p == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } } pHandle->pDbSnapSet = pDbSnapSet; pHandle->pSnapInfoSet = pSnapInfoSet; pHandle->currIdx = 0; pHandle->pMeta = pMeta; - return 0; + + return code; _err: + taosArrayDestroy(pSnapInfoSet); + taosArrayDestroy(pDbSnapSet); streamSnapHandleDestroy(pHandle); return code; } @@ -414,7 +469,8 @@ void streamSnapHandleDestroy(SStreamSnapHandle* handle) { } taosArrayDestroy(handle->pDbSnapSet); } - streamDestroyTaskDbSnapInfo(handle->pMeta, handle->pSnapInfoSet); + + (void) streamDestroyTaskDbSnapInfo(handle->pMeta, handle->pSnapInfoSet); if (handle->pSnapInfoSet) { for (int32_t i = 0; i < taosArrayGetSize(handle->pSnapInfoSet); i++) { SStreamTaskSnap* pSnap = taosArrayGet(handle->pSnapInfoSet, i); @@ -422,8 +478,8 @@ void streamSnapHandleDestroy(SStreamSnapHandle* handle) { } taosArrayDestroy(handle->pSnapInfoSet); } + taosMemoryFree(handle->metaPath); - return; } int32_t streamSnapReaderOpen(void* pMeta, int64_t sver, int64_t chkpId, char* path, SStreamSnapReader** ppReader) { @@ -506,14 +562,22 @@ _NEXT: item->name, (int64_t)pSnapFile->offset, item->size, pSnapFile->currFileIdx); pSnapFile->offset += nread; if (pSnapFile->offset >= item->size || nread < kBlockSize) { - taosCloseFile(&pSnapFile->fd); + code = taosCloseFile(&pSnapFile->fd); + if (code) { + stError("failed to close snapshot fd"); + } + pSnapFile->offset = 0; pSnapFile->currFileIdx += 1; } } else { stDebug("%s no data read, close file no.%d, move to next file, open and read", STREAM_STATE_TRANSFER, pSnapFile->currFileIdx); - taosCloseFile(&pSnapFile->fd); + code = taosCloseFile(&pSnapFile->fd); + if (code) { + stError("failed to close snapshot fd"); + } + pSnapFile->offset = 0; pSnapFile->currFileIdx += 1; @@ -577,14 +641,22 @@ int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path pHandle->pDbSnapSet = taosArrayInit(8, sizeof(SBackendSnapFile2)); if (pHandle->pDbSnapSet == NULL) { - streamSnapWriterClose(pWriter, 0); + int32_t c = streamSnapWriterClose(pWriter, 0); // not override the error code, and igore this error code + if (c) { + stError("failed close snaphost writer"); + } + code = TSDB_CODE_OUT_OF_MEMORY; return code; } SBackendSnapFile2 snapFile = {0}; if (taosArrayPush(pHandle->pDbSnapSet, &snapFile) == NULL) { - streamSnapWriterClose(pWriter, 0); + int32_t c = streamSnapWriterClose(pWriter, 0); + if (c) { + stError("failed close snaphost writer"); + } + code = TSDB_CODE_OUT_OF_MEMORY; return code; } @@ -614,46 +686,62 @@ int32_t streamSnapWriteImpl(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t pHdr->name, tstrerror(code)); } } + if (strlen(pHdr->name) == strlen(pItem->name) && strcmp(pHdr->name, pItem->name) == 0) { int64_t bytes = taosPWriteFile(pSnapFile->fd, pHdr->data, pHdr->size, pSnapFile->offset); if (bytes != pHdr->size) { code = TAOS_SYSTEM_ERROR(errno); stError("%s failed to write snap, file name:%s, reason:%s", STREAM_STATE_TRANSFER, pHdr->name, tstrerror(code)); - return code; + goto _err; } else { stInfo("succ to write data %s", pItem->name); } pSnapFile->offset += bytes; } else { - taosCloseFile(&pSnapFile->fd); + code = taosCloseFile(&pSnapFile->fd); + if (code) { + stError("failed to close snapshot fd"); + } + pSnapFile->offset = 0; pSnapFile->currFileIdx += 1; SBackendFileItem item = {0}; item.name = taosStrdup(pHdr->name); item.type = pHdr->type; + if (item.name == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } - taosArrayPush(pSnapFile->pFileList, &item); + void* p = taosArrayPush(pSnapFile->pFileList, &item); + if (p == NULL) { // can NOT goto _err here. + return TSDB_CODE_OUT_OF_MEMORY; + } - SBackendFileItem* pItem = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx); - pSnapFile->fd = streamOpenFile(pSnapFile->path, pItem->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); + SBackendFileItem* pItem2 = taosArrayGet(pSnapFile->pFileList, pSnapFile->currFileIdx); + pSnapFile->fd = streamOpenFile(pSnapFile->path, pItem2->name, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); if (pSnapFile->fd == NULL) { code = TAOS_SYSTEM_ERROR(errno); stError("%s failed to open file name:%s%s%s, reason:%s", STREAM_STATE_TRANSFER, pSnapFile->path, TD_DIRSEP, pHdr->name, tstrerror(code)); + return code; } + // open fd again, let's close fd during handle errors. if (taosPWriteFile(pSnapFile->fd, pHdr->data, pHdr->size, pSnapFile->offset) != pHdr->size) { code = TAOS_SYSTEM_ERROR(errno); stError("%s failed to write snap, file name:%s, reason:%s", STREAM_STATE_TRANSFER, pHdr->name, tstrerror(code)); - return code; + goto _err; } - stInfo("succ to write data %s", pItem->name); + + stInfo("succ to write data %s", pItem2->name); pSnapFile->offset += pHdr->size; } - code = 0; -_EXIT: + return TSDB_CODE_SUCCESS; + +_err: + (void) taosCloseFile(&pSnapFile->fd); return code; } @@ -688,7 +776,10 @@ int32_t streamSnapWrite(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nDa item.name = taosStrdup((char*)ROCKSDB_CURRENT); item.type = ROCKSDB_CURRENT_TYPE; - taosArrayPush(pDbSnapFile->pFileList, &item); + void* p = taosArrayPush(pDbSnapFile->pFileList, &item); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } pDbSnapFile->inited = 1; return streamSnapWriteImpl(pWriter, pData, nData, pDbSnapFile); @@ -697,9 +788,12 @@ int32_t streamSnapWrite(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nDa return streamSnapWriteImpl(pWriter, pData, nData, pDbSnapFile); } else { SBackendSnapFile2 snapFile = {0}; - taosArrayPush(pHandle->pDbSnapSet, &snapFile); - pHandle->currIdx += 1; + void* p = taosArrayPush(pHandle->pDbSnapSet, &snapFile); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + pHandle->currIdx += 1; return streamSnapWrite(pWriter, pData, nData); } } diff --git a/source/libs/stream/src/streamStartHistory.c b/source/libs/stream/src/streamStartHistory.c index 1efb2af381..db0784d572 100644 --- a/source/libs/stream/src/streamStartHistory.c +++ b/source/libs/stream/src/streamStartHistory.c @@ -29,19 +29,19 @@ typedef struct SLaunchHTaskInfo { STaskId hTaskId; } SLaunchHTaskInfo; -static int32_t streamSetParamForScanHistory(SStreamTask* pTask); -static void streamTaskSetRangeStreamCalc(SStreamTask* pTask); -static int32_t initScanHistoryReq(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated); -static SLaunchHTaskInfo* createHTaskLaunchInfo(SStreamMeta* pMeta, STaskId* pTaskId, int64_t hStreamId, - int32_t hTaskId); -static void tryLaunchHistoryTask(void* param, void* tmrId); -static void doExecScanhistoryInFuture(void* param, void* tmrId); -static int32_t doStartScanHistoryTask(SStreamTask* pTask); -static int32_t streamTaskStartScanHistory(SStreamTask* pTask); -static void checkFillhistoryTaskStatus(SStreamTask* pTask, SStreamTask* pHTask); -static int32_t launchNotBuiltFillHistoryTask(SStreamTask* pTask); -static void doRetryLaunchFillHistoryTask(SStreamTask* pTask, SLaunchHTaskInfo* pInfo, int64_t now); -static void notRetryLaunchFillHistoryTask(SStreamTask* pTask, SLaunchHTaskInfo* pInfo, int64_t now); +static int32_t streamSetParamForScanHistory(SStreamTask* pTask); +static int32_t streamTaskSetRangeStreamCalc(SStreamTask* pTask); +static void initScanHistoryReq(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated); +static int32_t createHTaskLaunchInfo(SStreamMeta* pMeta, STaskId* pTaskId, int64_t hStreamId, int32_t hTaskId, + SLaunchHTaskInfo** pInfo); +static void tryLaunchHistoryTask(void* param, void* tmrId); +static void doExecScanhistoryInFuture(void* param, void* tmrId); +static int32_t doStartScanHistoryTask(SStreamTask* pTask); +static int32_t streamTaskStartScanHistory(SStreamTask* pTask); +static void checkFillhistoryTaskStatus(SStreamTask* pTask, SStreamTask* pHTask); +static int32_t launchNotBuiltFillHistoryTask(SStreamTask* pTask); +static void doRetryLaunchFillHistoryTask(SStreamTask* pTask, SLaunchHTaskInfo* pInfo, int64_t now); +static void notRetryLaunchFillHistoryTask(SStreamTask* pTask, SLaunchHTaskInfo* pInfo, int64_t now); static int32_t streamTaskSetReady(SStreamTask* pTask) { int32_t numOfDowns = streamTaskGetNumOfDownstream(pTask); @@ -65,22 +65,19 @@ static int32_t streamTaskSetReady(SStreamTask* pTask) { int32_t streamStartScanHistoryAsync(SStreamTask* pTask, int8_t igUntreated) { SStreamScanHistoryReq req; + int32_t code = 0; initScanHistoryReq(pTask, &req, igUntreated); int32_t len = sizeof(SStreamScanHistoryReq); void* serializedReq = rpcMallocCont(len); if (serializedReq == NULL) { - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } memcpy(serializedReq, &req, len); SRpcMsg rpcMsg = {.contLen = len, .pCont = serializedReq, .msgType = TDMT_VND_STREAM_SCAN_HISTORY}; - if (tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &rpcMsg) < 0) { - /*ASSERT(0);*/ - } - - return 0; + return tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &rpcMsg); } int32_t streamExecScanHistoryInFuture(SStreamTask* pTask, int32_t idleDuration) { @@ -109,8 +106,8 @@ int32_t streamExecScanHistoryInFuture(SStreamTask* pTask, int32_t idleDuration) pTask->schedHistoryInfo.pTimer = taosTmrStart(doExecScanhistoryInFuture, SCANHISTORY_IDLE_TIME_SLICE, pTask, streamTimer); } else { - taosTmrReset(doExecScanhistoryInFuture, SCANHISTORY_IDLE_TIME_SLICE, pTask, streamTimer, - &pTask->schedHistoryInfo.pTimer); + streamTmrReset(doExecScanhistoryInFuture, SCANHISTORY_IDLE_TIME_SLICE, pTask, streamTimer, + &pTask->schedHistoryInfo.pTimer, pTask->pMeta->vgId, " start-history-task-tmr"); } return TSDB_CODE_SUCCESS; @@ -135,9 +132,19 @@ int32_t streamTaskStartScanHistory(SStreamTask* pTask) { int32_t streamTaskOnNormalTaskReady(SStreamTask* pTask) { const char* id = pTask->id.idStr; + int32_t code = 0; - streamTaskSetReady(pTask); - streamTaskSetRangeStreamCalc(pTask); + code = streamTaskSetReady(pTask); + if (code) { + stError("s-task:%s failed to set task status ready", id); + return code; + } + + code = streamTaskSetRangeStreamCalc(pTask); + if (code) { + stError("s-task:%s failed to set the time range for stream task", id); + return code; + } SStreamTaskState p = streamTaskGetStatus(pTask); ASSERT(p.state == TASK_STATUS__READY); @@ -155,19 +162,23 @@ int32_t streamTaskOnNormalTaskReady(SStreamTask* pTask) { stDebug("s-task:%s level:%d status:%s sched-status:%d", id, pTask->info.taskLevel, p.name, schedStatus); } - return TSDB_CODE_SUCCESS; + return code; } int32_t streamTaskOnScanHistoryTaskReady(SStreamTask* pTask) { // set the state to be ready - streamTaskSetReady(pTask); - streamTaskSetRangeStreamCalc(pTask); + int32_t code = streamTaskSetReady(pTask); + if (code == 0) { + code = streamTaskSetRangeStreamCalc(pTask); + } - SStreamTaskState p = streamTaskGetStatus(pTask); - ASSERT((p.state == TASK_STATUS__SCAN_HISTORY) && (pTask->info.fillHistory == 1)); + if (code == 0) { + SStreamTaskState p = streamTaskGetStatus(pTask); + ASSERT((p.state == TASK_STATUS__SCAN_HISTORY) && (pTask->info.fillHistory == 1)); - stDebug("s-task:%s fill-history task enters into scan-history data stage, status:%s", pTask->id.idStr, p.name); - streamTaskStartScanHistory(pTask); + stDebug("s-task:%s fill-history task enters into scan-history data stage, status:%s", pTask->id.idStr, p.name); + code = streamTaskStartScanHistory(pTask); + } // NOTE: there will be an deadlock if launch fill history here. // start the related fill-history task, when current task is ready @@ -175,7 +186,7 @@ int32_t streamTaskOnScanHistoryTaskReady(SStreamTask* pTask) { // streamLaunchFillHistoryTask(pTask); // } - return TSDB_CODE_SUCCESS; + return code; } // common @@ -212,8 +223,7 @@ int32_t streamLaunchFillHistoryTask(SStreamTask* pTask) { stDebug("s-task:%s not launch related fill-history task:0x%" PRIx64 "-0x%x, status:%s", idStr, hStreamId, hTaskId, pStatus.name); - (void) streamMetaAddTaskLaunchResult(pMeta, hStreamId, hTaskId, pExecInfo->checkTs, pExecInfo->readyTs, false); - return -1; // todo set the correct error code + return streamMetaAddTaskLaunchResult(pMeta, hStreamId, hTaskId, pExecInfo->checkTs, pExecInfo->readyTs, false); } stDebug("s-task:%s start to launch related fill-history task:0x%" PRIx64 "-0x%x", idStr, hStreamId, hTaskId); @@ -257,12 +267,11 @@ int32_t streamLaunchFillHistoryTask(SStreamTask* pTask) { } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -int32_t initScanHistoryReq(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated) { +void initScanHistoryReq(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated) { pReq->msgHead.vgId = pTask->info.nodeId; pReq->streamId = pTask->id.streamId; pReq->taskId = pTask->id.taskId; pReq->igUntreated = igUntreated; - return 0; } void checkFillhistoryTaskStatus(SStreamTask* pTask, SStreamTask* pHTask) { @@ -281,7 +290,10 @@ void checkFillhistoryTaskStatus(SStreamTask* pTask, SStreamTask* pHTask) { } // check if downstream tasks have been ready - streamTaskHandleEvent(pHTask->status.pSM, TASK_EVENT_INIT_SCANHIST); + int32_t code = streamTaskHandleEvent(pHTask->status.pSM, TASK_EVENT_INIT_SCANHIST); + if (code) { + stError("s-task:%s handle event init_scanhist failed", pTask->id.idStr); + } } void notRetryLaunchFillHistoryTask(SStreamTask* pTask, SLaunchHTaskInfo* pInfo, int64_t now) { @@ -316,7 +328,8 @@ void doRetryLaunchFillHistoryTask(SStreamTask* pTask, SLaunchHTaskInfo* pInfo, i stDebug("s-task:%s status:%s failed to launch fill-history task:0x%x, retry launch:%dms, retryCount:%d", pTask->id.idStr, p, hTaskId, pHTaskInfo->waitInterval, pHTaskInfo->retryTimes); - taosTmrReset(tryLaunchHistoryTask, LAUNCH_HTASK_INTERVAL, pInfo, streamTimer, &pHTaskInfo->pTimer); + streamTmrReset(tryLaunchHistoryTask, LAUNCH_HTASK_INTERVAL, pInfo, streamTimer, &pHTaskInfo->pTimer, + pTask->pMeta->vgId, " start-history-task-tmr"); } } @@ -367,7 +380,8 @@ void tryLaunchHistoryTask(void* param, void* tmrId) { pHTaskInfo->tickCount -= 1; if (pHTaskInfo->tickCount > 0) { - taosTmrReset(tryLaunchHistoryTask, LAUNCH_HTASK_INTERVAL, pInfo, streamTimer, &pHTaskInfo->pTimer); + streamTmrReset(tryLaunchHistoryTask, LAUNCH_HTASK_INTERVAL, pInfo, streamTimer, &pHTaskInfo->pTimer, + pTask->pMeta->vgId, " start-history-task-tmr"); streamMetaReleaseTask(pMeta, pTask); return; } @@ -417,21 +431,21 @@ void tryLaunchHistoryTask(void* param, void* tmrId) { taosMemoryFree(pInfo); } -SLaunchHTaskInfo* createHTaskLaunchInfo(SStreamMeta* pMeta, STaskId* pTaskId, int64_t hStreamId, int32_t hTaskId) { - SLaunchHTaskInfo* pInfo = taosMemoryCalloc(1, sizeof(SLaunchHTaskInfo)); - if (pInfo == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; +int32_t createHTaskLaunchInfo(SStreamMeta* pMeta, STaskId* pTaskId, int64_t hStreamId, int32_t hTaskId, + SLaunchHTaskInfo** pInfo) { + *pInfo = taosMemoryCalloc(1, sizeof(SLaunchHTaskInfo)); + if ((*pInfo) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; } - pInfo->id.streamId = pTaskId->streamId; - pInfo->id.taskId = pTaskId->taskId; + (*pInfo)->id.streamId = pTaskId->streamId; + (*pInfo)->id.taskId = pTaskId->taskId; - pInfo->hTaskId.streamId = hStreamId; - pInfo->hTaskId.taskId = hTaskId; + (*pInfo)->hTaskId.streamId = hStreamId; + (*pInfo)->hTaskId.taskId = hTaskId; - pInfo->pMeta = pMeta; - return pInfo; + (*pInfo)->pMeta = pMeta; + return TSDB_CODE_SUCCESS; } int32_t launchNotBuiltFillHistoryTask(SStreamTask* pTask) { @@ -440,16 +454,18 @@ int32_t launchNotBuiltFillHistoryTask(SStreamTask* pTask) { const char* idStr = pTask->id.idStr; int64_t hStreamId = pTask->hTaskInfo.id.streamId; int32_t hTaskId = pTask->hTaskInfo.id.taskId; + SLaunchHTaskInfo* pInfo = NULL; + ASSERT(hTaskId != 0); stWarn("s-task:%s vgId:%d failed to launch history task:0x%x, since not built yet", idStr, pMeta->vgId, hTaskId); - STaskId id = streamTaskGetTaskId(pTask); - SLaunchHTaskInfo* pInfo = createHTaskLaunchInfo(pMeta, &id, hStreamId, hTaskId); - if (pInfo == NULL) { + STaskId id = streamTaskGetTaskId(pTask); + int32_t code = createHTaskLaunchInfo(pMeta, &id, hStreamId, hTaskId, &pInfo); + if (code) { stError("s-task:%s failed to launch related fill-history task, since Out Of Memory", idStr); - (void) streamMetaAddTaskLaunchResult(pMeta, hStreamId, hTaskId, pExecInfo->checkTs, pExecInfo->readyTs, false); - return terrno; + (void)streamMetaAddTaskLaunchResult(pMeta, hStreamId, hTaskId, pExecInfo->checkTs, pExecInfo->readyTs, false); + return code; } // set the launch time info @@ -475,7 +491,8 @@ int32_t launchNotBuiltFillHistoryTask(SStreamTask* pTask) { } else { // timer exists ASSERT(pTask->status.timerActive >= 1); stDebug("s-task:%s set timer active flag, task timer not null", idStr); - taosTmrReset(tryLaunchHistoryTask, WAIT_FOR_MINIMAL_INTERVAL, pInfo, streamTimer, &pTask->hTaskInfo.pTimer); + streamTmrReset(tryLaunchHistoryTask, WAIT_FOR_MINIMAL_INTERVAL, pInfo, streamTimer, &pTask->hTaskInfo.pTimer, + pTask->pMeta->vgId, " start-history-task-tmr"); } return TSDB_CODE_SUCCESS; @@ -510,7 +527,7 @@ bool streamHistoryTaskSetVerRangeStep2(SStreamTask* pTask, int64_t nextProcessVe } } -void streamTaskSetRangeStreamCalc(SStreamTask* pTask) { +int32_t streamTaskSetRangeStreamCalc(SStreamTask* pTask) { SDataRange* pRange = &pTask->dataRange; if (!HAS_RELATED_FILLHISTORY_TASK(pTask)) { @@ -523,10 +540,12 @@ void streamTaskSetRangeStreamCalc(SStreamTask* pTask) { "window:%" PRId64 "-%" PRId64 ", verRange:%" PRId64 "-%" PRId64, pTask->id.idStr, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, pRange->range.maxVer); } + + return TSDB_CODE_SUCCESS; } else { ASSERT(pTask->info.fillHistory == 0); if (pTask->info.taskLevel >= TASK_LEVEL__AGG) { - return; + return TSDB_CODE_SUCCESS; } stDebug("s-task:%s level:%d related fill-history task exists, stream task timeWindow:%" PRId64 " - %" PRId64 @@ -536,7 +555,7 @@ void streamTaskSetRangeStreamCalc(SStreamTask* pTask) { SVersionRange verRange = pRange->range; STimeWindow win = pRange->window; - streamSetParamForStreamScannerStep2(pTask, &verRange, &win); + return streamSetParamForStreamScannerStep2(pTask, &verRange, &win); } } @@ -554,7 +573,10 @@ void doExecScanhistoryInFuture(void* param, void* tmrId) { } if (pTask->schedHistoryInfo.numOfTicks <= 0) { - streamStartScanHistoryAsync(pTask, 0); + int32_t code = streamStartScanHistoryAsync(pTask, 0); + if (code) { + stError("s-task:%s async start history task failed", pTask->id.idStr); + } int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); stDebug("s-task:%s fill-history:%d start scan-history data, out of tmr, ref:%d", pTask->id.idStr, @@ -563,18 +585,26 @@ void doExecScanhistoryInFuture(void* param, void* tmrId) { // release the task. streamMetaReleaseTask(pTask->pMeta, pTask); } else { - taosTmrReset(doExecScanhistoryInFuture, SCANHISTORY_IDLE_TIME_SLICE, pTask, streamTimer, - &pTask->schedHistoryInfo.pTimer); + streamTmrReset(doExecScanhistoryInFuture, SCANHISTORY_IDLE_TIME_SLICE, pTask, streamTimer, + &pTask->schedHistoryInfo.pTimer, pTask->pMeta->vgId, " start-history-task-tmr"); } } int32_t doStartScanHistoryTask(SStreamTask* pTask) { + int32_t code = 0; SVersionRange* pRange = &pTask->dataRange.range; + if (pTask->info.fillHistory) { - streamSetParamForScanHistory(pTask); + code = streamSetParamForScanHistory(pTask); + if (code) { + return code; + } } - streamSetParamForStreamScannerStep1(pTask, pRange, &pTask->dataRange.window); - int32_t code = streamStartScanHistoryAsync(pTask, 0); - return code; + code = streamSetParamForStreamScannerStep1(pTask, pRange, &pTask->dataRange.window); + if (code) { + return code; + } + + return streamStartScanHistoryAsync(pTask, 0); } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 7c2d0b3556..0110a9825c 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -29,20 +29,20 @@ static void streamTaskDestroyActiveChkptInfo(SActiveCheckpointInfo* pInfo); static int32_t addToTaskset(SArray* pArray, SStreamTask* pTask) { int32_t childId = taosArrayGetSize(pArray); pTask->info.selfChildId = childId; - taosArrayPush(pArray, &pTask); - return 0; + void* p = taosArrayPush(pArray, &pTask); + return (p == NULL)? TSDB_CODE_OUT_OF_MEMORY:TSDB_CODE_SUCCESS; } static int32_t doUpdateTaskEpset(SStreamTask* pTask, int32_t nodeId, SEpSet* pEpSet, bool* pUpdated) { char buf[512] = {0}; if (pTask->info.nodeId == nodeId) { // execution task should be moved away bool isEqual = isEpsetEqual(&pTask->info.epSet, pEpSet); - epsetToStr(pEpSet, buf, tListLen(buf)); + (void)epsetToStr(pEpSet, buf, tListLen(buf)); if (!isEqual) { (*pUpdated) = true; char tmp[512] = {0}; - epsetToStr(&pTask->info.epSet, tmp, tListLen(tmp)); + (void) epsetToStr(&pTask->info.epSet, tmp, tListLen(tmp)); // only for log file, ignore errors epsetAssign(&pTask->info.epSet, pEpSet); stDebug("s-task:0x%x (vgId:%d) self node epset is updated %s, old:%s", pTask->id.taskId, nodeId, buf, tmp); @@ -127,7 +127,10 @@ int32_t tNewStreamTask(int64_t streamId, int8_t taskLevel, SEpSet* pEpset, bool pTask->outputq.status = TASK_OUTPUT_STATUS__NORMAL; pTask->taskCheckInfo.pList = taosArrayInit(4, sizeof(SDownstreamStatusInfo)); - taosThreadMutexInit(&pTask->taskCheckInfo.checkInfoLock, NULL); + code = taosThreadMutexInit(&pTask->taskCheckInfo.checkInfoLock, NULL); + if (code) { + return code; + } if (fillHistory) { ASSERT(hasFillhistory); @@ -135,7 +138,7 @@ int32_t tNewStreamTask(int64_t streamId, int8_t taskLevel, SEpSet* pEpset, bool epsetAssign(&(pTask->info.mnodeEpset), pEpset); - addToTaskset(pTaskList, pTask); + code = addToTaskset(pTaskList, pTask); *p = pTask; return code; @@ -221,17 +224,17 @@ void tFreeStreamTask(SStreamTask* pTask) { } if (pTask->schedInfo.pDelayTimer != NULL) { - taosTmrStop(pTask->schedInfo.pDelayTimer); + (void) taosTmrStop(pTask->schedInfo.pDelayTimer); pTask->schedInfo.pDelayTimer = NULL; } if (pTask->hTaskInfo.pTimer != NULL) { - /*bool ret = */ taosTmrStop(pTask->hTaskInfo.pTimer); + (void) taosTmrStop(pTask->hTaskInfo.pTimer); pTask->hTaskInfo.pTimer = NULL; } if (pTask->msgInfo.pRetryTmr != NULL) { - /*bool ret = */ taosTmrStop(pTask->msgInfo.pRetryTmr); + (void) taosTmrStop(pTask->msgInfo.pRetryTmr); pTask->msgInfo.pRetryTmr = NULL; } @@ -394,10 +397,12 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i return terrno; } - taosThreadMutexInit(&pTask->msgInfo.lock, NULL); + code = taosThreadMutexInit(&pTask->msgInfo.lock, NULL); + if (code) { + return code; + } TdThreadMutexAttr attr = {0}; - code = taosThreadMutexAttrInit(&attr); if (code != 0) { stError("s-task:%s initElapsed mutex attr failed, code:%s", pTask->id.idStr, tstrerror(code)); @@ -410,8 +415,16 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i return code; } - taosThreadMutexInit(&pTask->lock, &attr); - taosThreadMutexAttrDestroy(&attr); + code = taosThreadMutexInit(&pTask->lock, &attr); + if (code) { + return code; + } + + code = taosThreadMutexAttrDestroy(&attr); + if (code) { + return code; + } + streamTaskOpenAllUpstreamInput(pTask); STaskOutputInfo* pOutputInfo = &pTask->outputInfo; @@ -424,7 +437,11 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i // 2MiB per second for sink task // 50 times sink operator per second - streamTaskInitTokenBucket(pOutputInfo->pTokenBucket, 35, 35, tsSinkDataRate, pTask->id.idStr); + code = streamTaskInitTokenBucket(pOutputInfo->pTokenBucket, 35, 35, tsSinkDataRate, pTask->id.idStr); + if (code) { + return code; + } + pOutputInfo->pNodeEpsetUpdateList = taosArrayInit(4, sizeof(SDownstreamTaskEpset)); if (pOutputInfo->pNodeEpsetUpdateList == NULL) { stError("s-task:%s failed to prepare downstreamUpdateList, code:%s", pTask->id.idStr, @@ -474,13 +491,13 @@ int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstre pTask->upstreamInfo.pList = taosArrayInit(4, POINTER_BYTES); } - taosArrayPush(pTask->upstreamInfo.pList, &pEpInfo); - return TSDB_CODE_SUCCESS; + void* p = taosArrayPush(pTask->upstreamInfo.pList, &pEpInfo); + return (p == NULL)? TSDB_CODE_OUT_OF_MEMORY:TSDB_CODE_SUCCESS; } void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet, bool* pUpdated) { char buf[512] = {0}; - epsetToStr(pEpSet, buf, tListLen(buf)); + (void) epsetToStr(pEpSet, buf, tListLen(buf)); // ignore error since it is only for log file. int32_t numOfUpstream = taosArrayGetSize(pTask->upstreamInfo.pList); for (int32_t i = 0; i < numOfUpstream; ++i) { @@ -491,7 +508,7 @@ void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpS *pUpdated = true; char tmp[512] = {0}; - epsetToStr(&pInfo->epSet, tmp, tListLen(tmp)); + (void) epsetToStr(&pInfo->epSet, tmp, tListLen(tmp)); epsetAssign(&pInfo->epSet, pEpSet); stDebug("s-task:0x%x update the upstreamInfo taskId:0x%x(nodeId:%d) newEpset:%s old:%s", pTask->id.taskId, @@ -526,7 +543,7 @@ void streamTaskSetFixedDownstreamInfo(SStreamTask* pTask, const SStreamTask* pDo void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet, bool* pUpdated) { char buf[512] = {0}; - epsetToStr(pEpSet, buf, tListLen(buf)); + (void) epsetToStr(pEpSet, buf, tListLen(buf)); // ignore the error since only for log files. int32_t id = pTask->id.taskId; int8_t type = pTask->outputInfo.type; @@ -542,7 +559,7 @@ void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SE if (!isEqual) { *pUpdated = true; char tmp[512] = {0}; - epsetToStr(&pVgInfo->epSet, tmp, tListLen(tmp)); + (void) epsetToStr(&pVgInfo->epSet, tmp, tListLen(tmp)); epsetAssign(&pVgInfo->epSet, pEpSet); stDebug("s-task:0x%x update dispatch info, task:0x%x(nodeId:%d) newEpset:%s old:%s", id, pVgInfo->taskId, @@ -562,7 +579,7 @@ void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SE *pUpdated = true; char tmp[512] = {0}; - epsetToStr(&pDispatcher->epSet, tmp, tListLen(tmp)); + (void) epsetToStr(&pDispatcher->epSet, tmp, tListLen(tmp)); epsetAssign(&pDispatcher->epSet, pEpSet); stDebug("s-task:0x%x update dispatch info, task:0x%x(nodeId:%d) newEpset:%s old:%s", id, pDispatcher->taskId, @@ -580,8 +597,16 @@ int32_t streamTaskStop(SStreamTask* pTask) { int64_t st = taosGetTimestampMs(); const char* id = pTask->id.idStr; - streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_STOP); - qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS); + int32_t code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_STOP); + if (code) { + stError("failed to handle STOP event, s-task:%s", id); + } + + code = qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS); + if (code) { + stError("s-task:%s failed to kill task related query handle", id); + } + while (!streamTaskIsIdle(pTask)) { stDebug("s-task:%s level:%d wait for task to be idle and then close, check again in 100ms", id, pTask->info.taskLevel); @@ -590,7 +615,7 @@ int32_t streamTaskStop(SStreamTask* pTask) { int64_t el = taosGetTimestampMs() - st; stDebug("vgId:%d s-task:%s is closed in %" PRId64 " ms", vgId, id, el); - return 0; + return code; } bool streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList) { @@ -607,7 +632,10 @@ bool streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList) { bool updated = false; for (int32_t i = 0; i < taosArrayGetSize(pNodeList); ++i) { SNodeUpdateInfo* pInfo = taosArrayGet(pNodeList, i); - doUpdateTaskEpset(pTask, pInfo->nodeId, &pInfo->newEp, &updated); + int32_t code = doUpdateTaskEpset(pTask, pInfo->nodeId, &pInfo->newEp, &updated); + if (code) { + stError("s-task:0x%x failed to update the task nodeEp epset, code:%s", pTask->id.taskId, tstrerror(code)); + } } return updated; @@ -704,10 +732,11 @@ int8_t streamTaskSetSchedStatusInactive(SStreamTask* pTask) { } int32_t streamTaskClearHTaskAttr(SStreamTask* pTask, int32_t resetRelHalt) { + int32_t code = 0; SStreamMeta* pMeta = pTask->pMeta; STaskId sTaskId = {.streamId = pTask->streamTaskId.streamId, .taskId = pTask->streamTaskId.taskId}; if (pTask->info.fillHistory == 0) { - return TSDB_CODE_SUCCESS; + return code; } SStreamTask** ppStreamTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &sTaskId, sizeof(sTaskId)); @@ -725,11 +754,11 @@ int32_t streamTaskClearHTaskAttr(SStreamTask* pTask, int32_t resetRelHalt) { (*ppStreamTask)->status.taskStatus = TASK_STATUS__READY; } - streamMetaSaveTask(pMeta, *ppStreamTask); + code = streamMetaSaveTask(pMeta, *ppStreamTask); streamMutexUnlock(&(*ppStreamTask)->lock); } - return TSDB_CODE_SUCCESS; + return code; } int32_t streamBuildAndSendDropTaskMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskId* pTaskId, int64_t resetRelHalt) { @@ -797,8 +826,7 @@ int32_t streamSendChkptReportMsg(SStreamTask* pTask, SCheckpointInfo* pCheckpoin initRpcMsg(&msg, TDMT_MND_STREAM_CHKPT_REPORT, buf, tlen); stDebug("s-task:%s vgId:%d build and send task checkpoint-report to mnode", id, vgId); - tmsgSendReq(&pTask->info.mnodeEpset, &msg); - return 0; + return tmsgSendReq(&pTask->info.mnodeEpset, &msg); } STaskId streamTaskGetTaskId(const SStreamTask* pTask) { @@ -880,6 +908,7 @@ STaskStatusEntry streamTaskGetStatusEntry(SStreamTask* pTask) { static int32_t taskPauseCallback(SStreamTask* pTask, void* param) { SStreamMeta* pMeta = pTask->pMeta; + int32_t code = 0; int32_t num = atomic_add_fetch_32(&pMeta->numOfPausedTasks, 1); stInfo("vgId:%d s-task:%s pause stream task. paused task num:%d", pMeta->vgId, pTask->id.idStr, num); @@ -887,15 +916,15 @@ static int32_t taskPauseCallback(SStreamTask* pTask, void* param) { // in case of fill-history task, stop the tsdb file scan operation. if (pTask->info.fillHistory == 1) { void* pExecutor = pTask->exec.pExecutor; - qKillTask(pExecutor, TSDB_CODE_SUCCESS); + code = qKillTask(pExecutor, TSDB_CODE_SUCCESS); } stDebug("vgId:%d s-task:%s set pause flag and pause task", pMeta->vgId, pTask->id.idStr); - return TSDB_CODE_SUCCESS; + return code; } void streamTaskPause(SStreamTask* pTask) { - streamTaskHandleEventAsync(pTask->status.pSM, TASK_EVENT_PAUSE, taskPauseCallback, NULL); + (void) streamTaskHandleEventAsync(pTask->status.pSM, TASK_EVENT_PAUSE, taskPauseCallback, NULL); } void streamTaskResume(SStreamTask* pTask) { @@ -949,8 +978,7 @@ int32_t streamTaskSendCheckpointReq(SStreamTask* pTask) { initRpcMsg(&msg, TDMT_MND_STREAM_REQ_CHKPT, buf, tlen); stDebug("s-task:%s vgId:%d build and send task checkpoint req", id, vgId); - tmsgSendReq(&pTask->info.mnodeEpset, &msg); - return 0; + return tmsgSendReq(&pTask->info.mnodeEpset, &msg); } void streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId, SStreamUpstreamEpInfo** pEpInfo) { @@ -1044,7 +1072,7 @@ int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq) { void streamTaskSetRemoveBackendFiles(SStreamTask* pTask) { pTask->status.removeBackendFiles = true; } -int32_t streamTaskGetActiveCheckpointInfo(const SStreamTask* pTask, int32_t* pTransId, int64_t* pCheckpointId) { +void streamTaskGetActiveCheckpointInfo(const SStreamTask* pTask, int32_t* pTransId, int64_t* pCheckpointId) { if (pTransId != NULL) { *pTransId = pTask->chkInfo.pActiveInfo->transId; } @@ -1052,8 +1080,6 @@ int32_t streamTaskGetActiveCheckpointInfo(const SStreamTask* pTask, int32_t* pTr if (pCheckpointId != NULL) { *pCheckpointId = pTask->chkInfo.pActiveInfo->activeId; } - - return TSDB_CODE_SUCCESS; } int32_t streamTaskSetActiveCheckpointInfo(SStreamTask* pTask, int64_t activeCheckpointId) { @@ -1084,7 +1110,7 @@ int32_t streamTaskCreateActiveChkptInfo(SActiveCheckpointInfo** pRes) { pInfo->pCheckpointReadyRecvList = taosArrayInit(4, sizeof(STaskDownstreamReadyInfo)); *pRes = pInfo; - return TSDB_CODE_SUCCESS; + return code; } void streamTaskDestroyActiveChkptInfo(SActiveCheckpointInfo* pInfo) { @@ -1101,12 +1127,12 @@ void streamTaskDestroyActiveChkptInfo(SActiveCheckpointInfo* pInfo) { pInfo->pCheckpointReadyRecvList = NULL; if (pInfo->pChkptTriggerTmr != NULL) { - taosTmrStop(pInfo->pChkptTriggerTmr); + (void) taosTmrStop(pInfo->pChkptTriggerTmr); pInfo->pChkptTriggerTmr = NULL; } if (pInfo->pSendReadyMsgTmr != NULL) { - taosTmrStop(pInfo->pSendReadyMsgTmr); + (void) taosTmrStop(pInfo->pSendReadyMsgTmr); pInfo->pSendReadyMsgTmr = NULL; } diff --git a/source/libs/stream/src/streamTimer.c b/source/libs/stream/src/streamTimer.c index 931de397cc..fb1740ae0a 100644 --- a/source/libs/stream/src/streamTimer.c +++ b/source/libs/stream/src/streamTimer.c @@ -35,8 +35,9 @@ void streamTimerCleanUp() { streamTimer = NULL; } -tmr_h streamTimerGetInstance() { - return streamTimer; +int32_t streamTimerGetInstance(tmr_h* pTmr) { + *pTmr = streamTimer; + return TSDB_CODE_SUCCESS; } void streamTmrReset(TAOS_TMR_CALLBACK fp, int32_t mseconds, void* param, void* handle, tmr_h* pTmrId, int32_t vgId, diff --git a/source/libs/stream/src/streamUpdate.c b/source/libs/stream/src/streamUpdate.c index 76b0d6a561..6a2c85323a 100644 --- a/source/libs/stream/src/streamUpdate.c +++ b/source/libs/stream/src/streamUpdate.c @@ -36,7 +36,6 @@ static int64_t adjustExpEntries(int64_t entries) { return TMIN(DEFAULT_EXPECTED_ int compareKeyTs(void* pTs1, void* pTs2, void* pPkVal, __compar_fn_t cmpPkFn) { return compareInt64Val(pTs1, pTs2); - ; } int compareKeyTsAndPk(void* pValue1, void* pTs, void* pPkVal, __compar_fn_t cmpPkFn) { diff --git a/source/libs/tdb/CMakeLists.txt b/source/libs/tdb/CMakeLists.txt index cc50919413..bb8d33366c 100644 --- a/source/libs/tdb/CMakeLists.txt +++ b/source/libs/tdb/CMakeLists.txt @@ -10,7 +10,6 @@ target_sources(tdb "src/db/tdbTable.c" "src/db/tdbTxn.c" "src/db/tdbPage.c" - "src/db/tdbOs.c" ) target_include_directories( diff --git a/source/libs/tdb/src/db/tdbBtree.c b/source/libs/tdb/src/db/tdbBtree.c index 4cca63a966..4dd3d3298c 100644 --- a/source/libs/tdb/src/db/tdbBtree.c +++ b/source/libs/tdb/src/db/tdbBtree.c @@ -76,14 +76,14 @@ int tdbBtreeOpen(int keyLen, int valLen, SPager *pPager, char const *tbname, SPg if (keyLen == 0) { tdbError("tdb/btree-open: key len cannot be zero."); - return -1; + return TSDB_CODE_INVALID_PARA; } *ppBt = NULL; pBt = (SBTree *)tdbOsCalloc(1, sizeof(*pBt)); if (pBt == NULL) { - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } // pBt->keyLen diff --git a/source/libs/tdb/src/db/tdbDb.c b/source/libs/tdb/src/db/tdbDb.c index 9a1d89eeac..b9ba9de51f 100644 --- a/source/libs/tdb/src/db/tdbDb.c +++ b/source/libs/tdb/src/db/tdbDb.c @@ -15,8 +15,8 @@ #include "tdbInt.h" -int32_t tdbOpen(const char *dbname, int32_t szPage, int32_t pages, TDB **ppDb, int8_t rollback, int32_t encryptAlgorithm, - char *encryptKey) { +int32_t tdbOpen(const char *dbname, int32_t szPage, int32_t pages, TDB **ppDb, int8_t rollback, + int32_t encryptAlgorithm, char *encryptKey) { TDB *pDb; int dsize; int zsize; @@ -31,7 +31,7 @@ int32_t tdbOpen(const char *dbname, int32_t szPage, int32_t pages, TDB **ppDb, i pPtr = (uint8_t *)tdbOsCalloc(1, zsize); if (pPtr == NULL) { - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } pDb = (TDB *)pPtr; @@ -51,38 +51,38 @@ int32_t tdbOpen(const char *dbname, int32_t szPage, int32_t pages, TDB **ppDb, i pDb->jfd = -1; pDb->encryptAlgorithm = encryptAlgorithm; - if(encryptKey != NULL){ - strncpy(pDb->encryptKey, encryptKey, ENCRYPT_KEY_LEN); + if (encryptKey != NULL) { + strncpy(pDb->encryptKey, encryptKey, ENCRYPT_KEY_LEN); } ret = tdbPCacheOpen(szPage, pages, &(pDb->pCache)); if (ret < 0) { - return -1; + return ret; } pDb->nPgrHash = 8; tsize = sizeof(SPager *) * pDb->nPgrHash; pDb->pgrHash = tdbOsMalloc(tsize); if (pDb->pgrHash == NULL) { - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } memset(pDb->pgrHash, 0, tsize); ret = taosMulModeMkDir(dbname, 0755, false); if (ret < 0) { - return -1; + return TAOS_SYSTEM_ERROR(ret); } #ifdef USE_MAINDB // open main db ret = tdbTbOpen(TDB_MAINDB_NAME, -1, sizeof(SBtInfo), NULL, pDb, &pDb->pMainDb, rollback); if (ret < 0) { - return -1; + return ret; } ret = tdbTbOpen(TDB_FREEDB_NAME, sizeof(SPgno), 0, NULL, pDb, &pDb->pFreeDb, rollback); if (ret < 0) { - return -1; + return ret; } #endif @@ -125,12 +125,13 @@ int32_t tdbBegin(TDB *pDb, TXN **ppTxn, void *(*xMalloc)(void *, size_t), void ( TXN *pTxn = tdbOsCalloc(1, sizeof(*pTxn)); if (!pTxn) { - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } - if (tdbTxnOpen(pTxn, txnId, xMalloc, xFree, xArg, flags) < 0) { + ret = tdbTxnOpen(pTxn, txnId, xMalloc, xFree, xArg, flags); + if (ret < 0) { tdbOsFree(pTxn); - return -1; + return ret; } for (pPager = pDb->pgrList; pPager; pPager = pPager->pNext) { @@ -139,7 +140,7 @@ int32_t tdbBegin(TDB *pDb, TXN **ppTxn, void *(*xMalloc)(void *, size_t), void ( tdbError("failed to begin pager since %s. dbName:%s, txnId:%" PRId64, tstrerror(terrno), pDb->dbName, pTxn->txnId); tdbTxnClose(pTxn); - return -1; + return ret; } } @@ -157,7 +158,7 @@ int32_t tdbCommit(TDB *pDb, TXN *pTxn) { if (ret < 0) { tdbError("failed to commit pager since %s. dbName:%s, txnId:%" PRId64, tstrerror(terrno), pDb->dbName, pTxn->txnId); - return -1; + return ret; } } @@ -173,7 +174,7 @@ int32_t tdbPostCommit(TDB *pDb, TXN *pTxn) { if (ret < 0) { tdbError("failed to commit pager since %s. dbName:%s, txnId:%" PRId64, tstrerror(terrno), pDb->dbName, pTxn->txnId); - return -1; + return ret; } } @@ -191,7 +192,7 @@ int32_t tdbPrepareAsyncCommit(TDB *pDb, TXN *pTxn) { if (ret < 0) { tdbError("failed to commit pager since %s. dbName:%s, txnId:%" PRId64, tstrerror(terrno), pDb->dbName, pTxn->txnId); - return -1; + return ret; } } @@ -207,7 +208,7 @@ int32_t tdbAbort(TDB *pDb, TXN *pTxn) { if (ret < 0) { tdbError("failed to abort pager since %s. dbName:%s, txnId:%" PRId64, tstrerror(terrno), pDb->dbName, pTxn->txnId); - return -1; + return ret; } } diff --git a/source/libs/tdb/src/db/tdbOs.c b/source/libs/tdb/src/db/tdbOs.c deleted file mode 100644 index a2fce8925b..0000000000 --- a/source/libs/tdb/src/db/tdbOs.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "tdbOs.h" - -#ifndef TDB_FOR_TDENGINE - -// tdbOsRead -i64 tdbOsRead(tdb_fd_t fd, void *pData, i64 nBytes) { - i64 nRead = 0; - i64 iRead = 0; - u8 *pBuf = (u8 *)pData; - - while (nBytes > 0) { - iRead = read(fd, pBuf, nBytes); - if (iRead < 0) { - if (errno == EINTR) { - continue; - } else { - return -1; - } - } else if (iRead == 0) { - break; - } - - nRead += iRead; - pBuf += iRead; - nBytes -= iRead; - } - - return nRead; -} - -// tdbOsPRead -i64 tdbOsPRead(tdb_fd_t fd, void *pData, i64 nBytes, i64 offset) { - i64 nRead = 0; - i64 iRead = 0; - i64 iOffset = offset; - u8 *pBuf = (u8 *)pData; - - while (nBytes > 0) { - iRead = pread(fd, pBuf, nBytes, iOffset); - if (iRead < 0) { - if (errno == EINTR) { - continue; - } else { - return -1; - } - } else if (iRead == 0) { - break; - } - - nRead += iRead; - pBuf += iRead; - iOffset += iRead; - nBytes -= iRead; - } - - return nRead; -} - -// tdbOsWrite -i64 tdbOsWrite(tdb_fd_t fd, const void *pData, i64 nBytes) { - i64 nWrite = 0; - i64 iWrite = 0; - u8 *pBuf = (u8 *)pData; - - while (nBytes > 0) { - iWrite = write(fd, pBuf, nBytes); - if (iWrite < 0) { - if (errno == EINTR) { - continue; - } - - return -1; - } - - nWrite += iWrite; - pBuf += iWrite; - nBytes -= iWrite; - } - - return nWrite; -} - -#endif \ No newline at end of file diff --git a/source/libs/tdb/src/db/tdbPCache.c b/source/libs/tdb/src/db/tdbPCache.c index 455128e6ec..6cb5f89758 100644 --- a/source/libs/tdb/src/db/tdbPCache.c +++ b/source/libs/tdb/src/db/tdbPCache.c @@ -50,30 +50,36 @@ static void tdbPCacheLock(SPCache *pCache) { tdbMutexLock(&(pCache->mutex)); } static void tdbPCacheUnlock(SPCache *pCache) { tdbMutexUnlock(&(pCache->mutex)); } int tdbPCacheOpen(int pageSize, int cacheSize, SPCache **ppCache) { + int32_t code = 0; + int32_t lino; SPCache *pCache; void *pPtr; SPage *pPgHdr; pCache = (SPCache *)tdbOsCalloc(1, sizeof(*pCache) + sizeof(SPage *) * cacheSize); if (pCache == NULL) { - return -1; + TSDB_CHECK_CODE(code = TSDB_CODE_OUT_OF_MEMORY, lino, _exit); } pCache->szPage = pageSize; pCache->nPages = cacheSize; pCache->aPage = (SPage **)tdbOsCalloc(cacheSize, sizeof(SPage *)); if (pCache->aPage == NULL) { - tdbOsFree(pCache); - return -1; + TSDB_CHECK_CODE(code = TSDB_CODE_OUT_OF_MEMORY, lino, _exit); } - if (tdbPCacheOpenImpl(pCache) < 0) { - tdbOsFree(pCache); - return -1; - } + code = tdbPCacheOpenImpl(pCache); + TSDB_CHECK_CODE(code, lino, _exit); - *ppCache = pCache; - return 0; +_exit: + if (code) { + tdbError("%s failed at %s:%d since %s", __func__, __FILE__, __LINE__, tstrerror(code)); + tdbPCacheClose(pCache); + *ppCache = NULL; + } else { + *ppCache = pCache; + } + return code; } int tdbPCacheClose(SPCache *pCache) { @@ -99,14 +105,14 @@ static int tdbPCacheAlterImpl(SPCache *pCache, int32_t nPage) { } else if (pCache->nPages < nPage) { SPage **aPage = tdbOsCalloc(nPage, sizeof(SPage *)); if (aPage == NULL) { - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } for (int32_t iPage = pCache->nPages; iPage < nPage; iPage++) { - if (tdbPageCreate(pCache->szPage, &aPage[iPage], tdbDefaultMalloc, NULL) < 0) { - // TODO: handle error + int32_t code = tdbPageCreate(pCache->szPage, &aPage[iPage], tdbDefaultMalloc, NULL); + if (code) { tdbOsFree(aPage); - return -1; + return code; } // pPage->pgid = 0; @@ -156,15 +162,11 @@ static int tdbPCacheAlterImpl(SPCache *pCache, int32_t nPage) { } int tdbPCacheAlter(SPCache *pCache, int32_t nPage) { - int ret = 0; - + int code; tdbPCacheLock(pCache); - - ret = tdbPCacheAlterImpl(pCache, nPage); - + code = tdbPCacheAlterImpl(pCache, nPage); tdbPCacheUnlock(pCache); - - return ret; + return code; } SPage *tdbPCacheFetch(SPCache *pCache, const SPgid *pPgid, TXN *pTxn) { @@ -180,9 +182,6 @@ SPage *tdbPCacheFetch(SPCache *pCache, const SPgid *pPgid, TXN *pTxn) { tdbPCacheUnlock(pCache); - // printf("thread %" PRId64 " fetch page %d pgno %d pPage %p nRef %d\n", taosGetSelfPthreadId(), pPage->id, - // TDB_PAGE_PGNO(pPage), pPage, nRef); - if (pPage) { tdbTrace("pcache/fetch page %p/%d/%d/%d", pPage, TDB_PAGE_PGNO(pPage), pPage->id, nRef); } else { @@ -285,6 +284,7 @@ static SPage *tdbPCacheFetchImpl(SPCache *pCache, const SPgid *pPgid, TXN *pTxn) if (!pTxn) { tdbError("tdb/pcache: null ptr pTxn, fetch impl failed."); + terrno = TSDB_CODE_INVALID_PARA; return NULL; } @@ -327,7 +327,7 @@ static SPage *tdbPCacheFetchImpl(SPCache *pCache, const SPgid *pPgid, TXN *pTxn) ret = tdbPageCreate(pCache->szPage, &pPage, pTxn->xMalloc, pTxn->xArg); if (ret < 0 || pPage == NULL) { tdbError("tdb/pcache: ret: %" PRId32 " pPage: %p, page create failed.", ret, pPage); - // TODO: recycle other backup pages + terrno = ret; return NULL; } @@ -475,10 +475,8 @@ static int tdbPCacheOpenImpl(SPCache *pCache) { pCache->nFree = 0; pCache->pFree = NULL; for (int i = 0; i < pCache->nPages; i++) { - if (tdbPageCreate(pCache->szPage, &pPage, tdbDefaultMalloc, NULL) < 0) { - // TODO: handle error - return -1; - } + ret = tdbPageCreate(pCache->szPage, &pPage, tdbDefaultMalloc, NULL); + if (ret) return ret; // pPage->pgid = 0; pPage->isAnchor = 0; @@ -504,8 +502,7 @@ static int tdbPCacheOpenImpl(SPCache *pCache) { pCache->nHash = pCache->nPages < 8 ? 8 : pCache->nPages; pCache->pgHash = (SPage **)tdbOsCalloc(pCache->nHash, sizeof(SPage *)); if (pCache->pgHash == NULL) { - // TODO - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } // Open LRU list diff --git a/source/libs/tdb/src/db/tdbPage.c b/source/libs/tdb/src/db/tdbPage.c index c6310f4985..322b735163 100644 --- a/source/libs/tdb/src/db/tdbPage.c +++ b/source/libs/tdb/src/db/tdbPage.c @@ -45,12 +45,12 @@ int tdbPageCreate(int pageSize, SPage **ppPage, void *(*xMalloc)(void *, size_t) if (!xMalloc) { tdbError("tdb/page-create: null xMalloc."); - return -1; + return TSDB_CODE_INVALID_PARA; } if (!TDB_IS_PGSIZE_VLD(pageSize)) { tdbError("tdb/page-create: invalid pageSize: %d.", pageSize); - return -1; + return TSDB_CODE_INVALID_PARA; } *ppPage = NULL; @@ -58,7 +58,7 @@ int tdbPageCreate(int pageSize, SPage **ppPage, void *(*xMalloc)(void *, size_t) ptr = (u8 *)(xMalloc(arg, size)); if (ptr == NULL) { - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } memset(ptr, 0, size); @@ -86,12 +86,12 @@ int tdbPageDestroy(SPage *pPage, void (*xFree)(void *arg, void *ptr), void *arg) if (pPage->isDirty) { tdbError("tdb/page-destroy: dirty page: %" PRIu8 ".", pPage->isDirty); - return -1; + return TSDB_CODE_INVALID_PARA; } if (!xFree) { tdbError("tdb/page-destroy: null xFree."); - return -1; + return TSDB_CODE_INVALID_PARA; } for (int iOvfl = 0; iOvfl < pPage->nOverflow; iOvfl++) { @@ -129,7 +129,8 @@ void tdbPageInit(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell tdbTrace("page/init: %p %" PRIu8 " %p", pPage, szAmHdr, xCellSize); pPage->pPageHdr = pPage->pData + szAmHdr; if (TDB_PAGE_NCELLS(pPage) == 0) { - return tdbPageZero(pPage, szAmHdr, xCellSize); + tdbPageZero(pPage, szAmHdr, xCellSize); + return; } pPage->pCellIdx = pPage->pPageHdr + TDB_PAGE_HDR_SIZE(pPage); pPage->pFreeStart = pPage->pCellIdx + TDB_PAGE_OFFSET_SIZE(pPage) * TDB_PAGE_NCELLS(pPage); @@ -159,7 +160,7 @@ int tdbPageInsertCell(SPage *pPage, int idx, SCell *pCell, int szCell, u8 asOvfl if (szCell > TDB_PAGE_MAX_FREE_BLOCK(pPage, pPage->pPageHdr - pPage->pData)) { tdbError("tdb/page-insert-cell: invalid page, szCell: %d, max free: %lu", szCell, TDB_PAGE_MAX_FREE_BLOCK(pPage, pPage->pPageHdr - pPage->pData)); - return -1; + return TSDB_CODE_INVALID_DATA_FMT; } nFree = TDB_PAGE_NFREE(pPage); @@ -207,7 +208,7 @@ int tdbPageInsertCell(SPage *pPage, int idx, SCell *pCell, int szCell, u8 asOvfl if (pPage->pFreeStart != pPage->pCellIdx + TDB_PAGE_OFFSET_SIZE(pPage) * (nCells + 1)) { tdbError("tdb/page-insert-cell: invalid page, pFreeStart: %p, pCellIdx: %p, nCells: %d", pPage->pFreeStart, pPage->pCellIdx, nCells); - return -1; + return TSDB_CODE_INVALID_DATA_FMT; } } @@ -234,7 +235,7 @@ int tdbPageDropCell(SPage *pPage, int idx, TXN *pTxn, SBTree *pBt) { if (idx < 0 || idx >= nCells + pPage->nOverflow) { tdbError("tdb/page-drop-cell: idx: %d out of range, nCells: %d, nOvfl: %d.", idx, nCells, pPage->nOverflow); - return -1; + return TSDB_CODE_INVALID_PARA; } iOvfl = 0; @@ -265,14 +266,14 @@ int tdbPageDropCell(SPage *pPage, int idx, TXN *pTxn, SBTree *pBt) { pPage->aiOvfl[iOvfl]--; if (pPage->aiOvfl[iOvfl] <= 0) { tdbError("tdb/page-drop-cell: invalid ai idx: %d", pPage->aiOvfl[iOvfl]); - return -1; + return TSDB_CODE_INVALID_DATA_FMT; } } return 0; } -void tdbPageCopy(SPage *pFromPage, SPage *pToPage, int deepCopyOvfl) { +int32_t tdbPageCopy(SPage *pFromPage, SPage *pToPage, int deepCopyOvfl) { int delta, nFree; pToPage->pFreeStart = pToPage->pPageHdr + (pFromPage->pFreeStart - pFromPage->pPageHdr); @@ -280,7 +281,7 @@ void tdbPageCopy(SPage *pFromPage, SPage *pToPage, int deepCopyOvfl) { if (pToPage->pFreeEnd < pToPage->pFreeStart) { tdbError("tdb/page-copy: invalid to page, pFreeStart: %p, pFreeEnd: %p", pToPage->pFreeStart, pToPage->pFreeEnd); - return; + return TSDB_CODE_INVALID_DATA_FMT; } memcpy(pToPage->pPageHdr, pFromPage->pPageHdr, pFromPage->pFreeStart - pFromPage->pPageHdr); @@ -289,7 +290,7 @@ void tdbPageCopy(SPage *pFromPage, SPage *pToPage, int deepCopyOvfl) { if (TDB_PAGE_CCELLS(pToPage) != pToPage->pFreeEnd - pToPage->pData) { tdbError("tdb/page-copy: invalid to page, cell body: %d, range: %ld", TDB_PAGE_CCELLS(pToPage), pToPage->pFreeEnd - pToPage->pData); - return; + return TSDB_CODE_INVALID_DATA_FMT; } delta = (pToPage->pPageHdr - pToPage->pData) - (pFromPage->pPageHdr - pFromPage->pData); @@ -304,6 +305,10 @@ void tdbPageCopy(SPage *pFromPage, SPage *pToPage, int deepCopyOvfl) { if (deepCopyOvfl) { int szCell = (*pFromPage->xCellSize)(pFromPage, pFromPage->apOvfl[iOvfl], 0, NULL, NULL); pNewCell = (SCell *)tdbOsMalloc(szCell); + if (pNewCell == NULL) { + tdbError("tdb/page-copy: out of memory, size: %d", szCell); + return TSDB_CODE_OUT_OF_MEMORY; + } memcpy(pNewCell, pFromPage->apOvfl[iOvfl], szCell); tdbTrace("tdbPage/copy/new ovfl cell: %p/%p/%p", pNewCell, pToPage, pFromPage); } @@ -312,6 +317,7 @@ void tdbPageCopy(SPage *pFromPage, SPage *pToPage, int deepCopyOvfl) { pToPage->aiOvfl[iOvfl] = pFromPage->aiOvfl[iOvfl]; } pToPage->nOverflow = pFromPage->nOverflow; + return 0; } int tdbPageCapacity(int pageSize, int amHdrSize) { @@ -343,12 +349,12 @@ static int tdbPageAllocate(SPage *pPage, int szCell, SCell **ppCell) { if (nFree < szCell + TDB_PAGE_OFFSET_SIZE(pPage)) { tdbError("tdb/page-allocate: invalid cell size, nFree: %d, szCell: %d, szOffset: %d", nFree, szCell, TDB_PAGE_OFFSET_SIZE(pPage)); - return -1; + return TSDB_CODE_INVALID_PARA; } if (TDB_PAGE_CCELLS(pPage) != pPage->pFreeEnd - pPage->pData) { tdbError("tdb/page-allocate: invalid page, cell body: %d, range: %ld", TDB_PAGE_CCELLS(pPage), pPage->pFreeEnd - pPage->pData); - return -1; + return TSDB_CODE_INVALID_DATA_FMT; } // 1. Try to allocate from the free space block area @@ -363,7 +369,7 @@ static int tdbPageAllocate(SPage *pPage, int szCell, SCell **ppCell) { cellFree = TDB_PAGE_FCELL(pPage); if (cellFree != 0 && cellFree < pPage->pFreeEnd - pPage->pData) { tdbError("tdb/page-allocate: cellFree: %d, pFreeEnd: %p, pData: %p.", cellFree, pPage->pFreeEnd, pPage->pData); - return -1; + return TSDB_CODE_INVALID_DATA_FMT; } if (cellFree && pPage->pFreeEnd - pPage->pFreeStart >= TDB_PAGE_OFFSET_SIZE(pPage)) { SCell *pPrevFreeCell = NULL; @@ -408,19 +414,19 @@ static int tdbPageAllocate(SPage *pPage, int szCell, SCell **ppCell) { } // 3. Try to dfragment and allocate again - tdbPageDefragment(pPage); + TAOS_CHECK_RETURN(tdbPageDefragment(pPage)); if (pPage->pFreeEnd - pPage->pFreeStart != nFree) { tdbError("tdb/page-allocate: nFree: %d, pFreeStart: %p, pFreeEnd: %p.", nFree, pPage->pFreeStart, pPage->pFreeEnd); - return -1; + return TSDB_CODE_INVALID_DATA_FMT; } if (TDB_PAGE_NFREE(pPage) != nFree) { tdbError("tdb/page-allocate: nFree: %d, page free: %d.", nFree, TDB_PAGE_NFREE(pPage)); - return -1; + return TSDB_CODE_INVALID_DATA_FMT; } if (pPage->pFreeEnd - pPage->pData != TDB_PAGE_CCELLS(pPage)) { tdbError("tdb/page-allocate: ccells: %d, pFreeStart: %p, pData: %p.", TDB_PAGE_CCELLS(pPage), pPage->pFreeStart, pPage->pData); - return -1; + return TSDB_CODE_INVALID_DATA_FMT; } pPage->pFreeEnd -= szCell; @@ -430,7 +436,7 @@ static int tdbPageAllocate(SPage *pPage, int szCell, SCell **ppCell) { _alloc_finish: if (NULL == pCell) { tdbError("tdb/page-allocate: null ptr pCell."); - return -1; + return TSDB_CODE_OUT_OF_BUFFER; } pPage->pFreeStart += TDB_PAGE_OFFSET_SIZE(pPage); @@ -447,15 +453,15 @@ static int tdbPageFree(SPage *pPage, int idx, SCell *pCell, int szCell) { if (pCell < pPage->pFreeEnd) { tdbError("tdb/page-free: invalid cell, cell: %p, free end: %p", pCell, pPage->pFreeEnd); - return -1; + return TSDB_CODE_INVALID_PARA; } if (pCell + szCell > (u8 *)(pPage->pPageFtr)) { tdbError("tdb/page-free: cell crosses page footer, cell: %p, size: %d footer: %p", pCell, szCell, pPage->pFreeEnd); - return -1; + return TSDB_CODE_INVALID_PARA; } if (pCell != TDB_PAGE_CELL_AT(pPage, idx)) { tdbError("tdb/page-free: cell pos incorrect, cell: %p, pos: %p", pCell, TDB_PAGE_CELL_AT(pPage, idx)); - return -1; + return TSDB_CODE_INVALID_PARA; } nFree = TDB_PAGE_NFREE(pPage); @@ -470,7 +476,7 @@ static int tdbPageFree(SPage *pPage, int idx, SCell *pCell, int szCell) { TDB_PAGE_FCELL_SET(pPage, pCell - pPage->pData); } else { tdbError("tdb/page-free: invalid cell size: %d", szCell); - return -1; + return TSDB_CODE_INVALID_PARA; } } @@ -502,7 +508,9 @@ static int tdbPageDefragment(SPage *pPage) { int32_t nCell = TDB_PAGE_NCELLS(pPage); SCellIdx *aCellIdx = (SCellIdx *)tdbOsMalloc(sizeof(SCellIdx) * nCell); - if (aCellIdx == NULL) return -1; + if (aCellIdx == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } for (int32_t iCell = 0; iCell < nCell; iCell++) { aCellIdx[iCell].iCell = iCell; aCellIdx[iCell].offset = TDB_PAGE_CELL_OFFSET_AT(pPage, iCell); diff --git a/source/libs/tdb/src/db/tdbPager.c b/source/libs/tdb/src/db/tdbPager.c index 9dbac11b02..14165045ca 100644 --- a/source/libs/tdb/src/db/tdbPager.c +++ b/source/libs/tdb/src/db/tdbPager.c @@ -13,8 +13,8 @@ * along with this program. If not, see . */ -#include "tdbInt.h" #include "crypt.h" +#include "tdbInt.h" #include "tglobal.h" /* #pragma pack(push, 1) @@ -41,9 +41,10 @@ struct hashset_st { static const unsigned int prime = 39; static const unsigned int prime2 = 5009; -hashset_t hashset_create(void) { +static hashset_t hashset_create(void) { hashset_t set = tdbOsCalloc(1, sizeof(struct hashset_st)); if (!set) { + terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } @@ -52,6 +53,7 @@ hashset_t hashset_create(void) { set->items = tdbOsCalloc(set->capacity, sizeof(size_t)); if (!set->items) { tdbOsFree(set); + terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } set->mask = set->capacity - 1; @@ -69,7 +71,7 @@ void hashset_destroy(hashset_t set) { } } -int hashset_add_member(hashset_t set, void *item) { +static int hashset_add_member(hashset_t set, void *item) { size_t value = (size_t)item; size_t h; @@ -88,7 +90,7 @@ int hashset_add_member(hashset_t set, void *item) { return 1; } -int hashset_add(hashset_t set, void *item) { +static int hashset_add(hashset_t set, void *item) { int ret = hashset_add_member(set, item); size_t old_capacity = set->capacity; @@ -113,7 +115,7 @@ int hashset_add(hashset_t set, void *item) { return ret; } -int hashset_remove(hashset_t set, void *item) { +static int hashset_remove(hashset_t set, void *item) { size_t value = (size_t)item; for (size_t h = set->mask & (prime * value); set->items[h] != 0; h = set->mask & (h + prime2)) { @@ -127,7 +129,7 @@ int hashset_remove(hashset_t set, void *item) { return 0; } -int hashset_contains(hashset_t set, void *item) { +static int hashset_contains(hashset_t set, void *item) { size_t value = (size_t)item; for (size_t h = set->mask & (prime * value); set->items[h] != 0; h = set->mask & (h + prime2)) { @@ -177,7 +179,7 @@ int tdbPagerOpen(SPCache *pCache, const char *fileName, SPager **ppPager) { + fsize + 8 + 1; /* jFileName */ pPtr = (uint8_t *)tdbOsCalloc(1, zsize); if (pPtr == NULL) { - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } pPager = (SPager *)pPtr; @@ -198,12 +200,12 @@ int tdbPagerOpen(SPCache *pCache, const char *fileName, SPager **ppPager) { pPager->fd = tdbOsOpen(pPager->dbFileName, TDB_O_CREAT | TDB_O_RDWR, 0755); if (TDB_FD_INVALID(pPager->fd)) { // if (pPager->fd < 0) { - return -1; + return TAOS_SYSTEM_ERROR(errno); } ret = tdbGnrtFileID(pPager->fd, pPager->fid, false); if (ret < 0) { - return -1; + return TAOS_SYSTEM_ERROR(errno); } // pPager->jfd = -1; @@ -221,11 +223,6 @@ int tdbPagerOpen(SPCache *pCache, const char *fileName, SPager **ppPager) { int tdbPagerClose(SPager *pPager) { if (pPager) { - /* - if (pPager->inTran) { - tdbOsClose(pPager->jfd); - } - */ tdbOsClose(pPager->fd); tdbOsFree(pPager); } @@ -254,8 +251,8 @@ int tdbPagerWrite(SPager *pPager, SPage *pPage) { !hashset_contains(pPager->pActiveTxn->jPageSet, (void *)((long)TDB_PAGE_PGNO(pPage))))) { ret = tdbPagerWritePageToJournal(pPager, pPage); if (ret < 0) { - tdbError("failed to write page to journal since %s", tstrerror(terrno)); - return -1; + tdbError("failed to write page to journal since %s", tstrerror(ret)); + return ret; } if (pPager->pActiveTxn->jPageSet) { @@ -278,11 +275,13 @@ int tdbPagerBegin(SPager *pPager, TXN *pTxn) { pTxn->jfd = tdbOsOpen(jTxnFileName, TDB_O_CREAT | TDB_O_RDWR, 0755); if (TDB_FD_INVALID(pTxn->jfd)) { tdbError("failed to open file due to %s. jFileName:%s", strerror(errno), pPager->jFileName); - terrno = TAOS_SYSTEM_ERROR(errno); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } pTxn->jPageSet = hashset_create(); + if (pTxn->jPageSet == NULL) { + return terrno; + } pPager->pActiveTxn = pTxn; @@ -319,8 +318,7 @@ int tdbPagerCommit(SPager *pPager, TXN *pTxn) { ret = tdbOsFSync(pTxn->jfd); if (ret < 0) { tdbError("failed to fsync: %s. jFileName:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId); - terrno = TAOS_SYSTEM_ERROR(errno); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } // loop to write the dirty pages to file @@ -331,13 +329,13 @@ int tdbPagerCommit(SPager *pPager, TXN *pTxn) { if (pPage->nOverflow != 0) { tdbError("tdb/pager-commit: %p, pPage: %p, ovfl: %d, commit page failed.", pPager, pPage, pPage->nOverflow); - return -1; + return TSDB_CODE_INVALID_DATA_FMT; } ret = tdbPagerPWritePageToDB(pPager, pPage); if (ret < 0) { tdbError("failed to write page to db since %s", tstrerror(terrno)); - return -1; + return ret; } } @@ -368,8 +366,7 @@ int tdbPagerCommit(SPager *pPager, TXN *pTxn) { // sync the db file if (tdbOsFSync(pPager->fd) < 0) { tdbError("failed to fsync fd due to %s. file:%s", strerror(errno), pPager->dbFileName); - terrno = TAOS_SYSTEM_ERROR(errno); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } return 0; @@ -382,14 +379,12 @@ int tdbPagerPostCommit(SPager *pPager, TXN *pTxn) { // remove the journal file if (tdbOsClose(pTxn->jfd) < 0) { tdbError("failed to close jfd: %s. file:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId); - terrno = TAOS_SYSTEM_ERROR(errno); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } if (tdbOsRemove(jTxnFileName) < 0 && errno != ENOENT) { tdbError("failed to remove file due to %s. file:%s", strerror(errno), jTxnFileName); - terrno = TAOS_SYSTEM_ERROR(errno); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } // pPager->inTran = 0; @@ -408,8 +403,7 @@ int tdbPagerPrepareAsyncCommit(SPager *pPager, TXN *pTxn) { ret = tdbOsFSync(pTxn->jfd); if (ret < 0) { tdbError("failed to fsync jfd: %s. jfile:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId); - terrno = TAOS_SYSTEM_ERROR(errno); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } // loop to write the dirty pages to file @@ -426,7 +420,7 @@ int tdbPagerPrepareAsyncCommit(SPager *pPager, TXN *pTxn) { ret = tdbPagerPWritePageToDB(pPager, pPage); if (ret < 0) { tdbError("failed to write page to db since %s", tstrerror(terrno)); - return -1; + return ret; } } @@ -445,36 +439,28 @@ int tdbPagerPrepareAsyncCommit(SPager *pPager, TXN *pTxn) { tdbPCacheRelease(pPager->pCache, pPage, pTxn); } - /* - tdbTrace("reset dirty tree: %p", &pPager->rbt); - tRBTreeCreate(&pPager->rbt, pageCmpFn); - - // sync the db file - if (tdbOsFSync(pPager->fd) < 0) { - tdbError("failed to fsync fd due to %s. file:%s", strerror(errno), pPager->dbFileName); - terrno = TAOS_SYSTEM_ERROR(errno); - return -1; - } - */ return 0; } -static char* tdbEncryptPage(SPager *pPager, char* pPageData, int32_t pageSize, const char* function, - int64_t offset){ +static char *tdbEncryptPage(SPager *pPager, char *pPageData, int32_t pageSize, const char *function, int64_t offset) { int32_t encryptAlgorithm = pPager->pEnv->encryptAlgorithm; - char* encryptKey = pPager->pEnv->encryptKey; + char *encryptKey = pPager->pEnv->encryptKey; - char* buf = pPageData; + char *buf = pPageData; - if(encryptAlgorithm == DND_CA_SM4){ - //tdbInfo("CBC_Encrypt key:%d %s %s", encryptAlgorithm, encryptKey, __FUNCTION__); - //ASSERT(strlen(encryptKey) > 0); + if (encryptAlgorithm == DND_CA_SM4) { + // tdbInfo("CBC_Encrypt key:%d %s %s", encryptAlgorithm, encryptKey, __FUNCTION__); + // ASSERT(strlen(encryptKey) > 0); - //tdbInfo("CBC tdb offset:%" PRId64 ", flag:%d before Encrypt", offset, pPage->pData[0]); + // tdbInfo("CBC tdb offset:%" PRId64 ", flag:%d before Encrypt", offset, pPage->pData[0]); buf = taosMemoryMalloc(pageSize); + if (buf == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } - unsigned char packetData[128]; + unsigned char packetData[128]; int32_t count = 0; while (count < pageSize) { @@ -488,19 +474,19 @@ static char* tdbEncryptPage(SPager *pPager, char* pPageData, int32_t pageSize, c int32_t newLen = CBC_Encrypt(&opts); memcpy(buf + count, packetData, newLen); - count += newLen; + count += newLen; } - //tdbInfo("CBC tdb offset:%" PRId64 ", Encrypt count:%d %s", offset, count, function); + // tdbInfo("CBC tdb offset:%" PRId64 ", Encrypt count:%d %s", offset, count, function); - //tdbInfo("CBC tdb offset:%" PRId64 ", flag:%d after Encrypt", offset, (uint8_t)buf[0]); + // tdbInfo("CBC tdb offset:%" PRId64 ", flag:%d after Encrypt", offset, (uint8_t)buf[0]); } return buf; } -void tdbFreeEncryptBuf(SPager *pPager, char* buf){ +void tdbFreeEncryptBuf(SPager *pPager, char *buf) { int32_t encryptAlgorithm = pPager->pEnv->encryptAlgorithm; - if(encryptAlgorithm == DND_CA_SM4) taosMemoryFreeClear(buf); + if (encryptAlgorithm == DND_CA_SM4) taosMemoryFreeClear(buf); } // recovery dirty pages int tdbPagerAbort(SPager *pPager, TXN *pTxn) { @@ -518,26 +504,24 @@ int tdbPagerAbort(SPager *pPager, TXN *pTxn) { ret = tdbOsFSync(pTxn->jfd); if (ret < 0) { tdbError("failed to fsync jfd: %s. jfile:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId); - terrno = TAOS_SYSTEM_ERROR(errno); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } tdb_fd_t jfd = pTxn->jfd; ret = tdbGetFileSize(jfd, pPager->pageSize, &journalSize); if (ret < 0) { - return -1; + return ret; } if (tdbOsLSeek(jfd, 0L, SEEK_SET) < 0) { tdbError("failed to lseek jfd due to %s. file:%s, offset:0", strerror(errno), pPager->dbFileName); - terrno = TAOS_SYSTEM_ERROR(errno); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } u8 *pageBuf = tdbOsCalloc(1, pPager->pageSize); if (pageBuf == NULL) { - return -1; + return terrno = TSDB_CODE_OUT_OF_MEMORY; } tdbDebug("pager/abort: %p, %d/%d, txnId:%" PRId64, pPager, pPager->dbOrigSize, pPager->dbFileSize, pTxn->txnId); @@ -549,7 +533,7 @@ int tdbPagerAbort(SPager *pPager, TXN *pTxn) { int ret = tdbOsRead(jfd, &pgno, sizeof(pgno)); if (ret < 0) { tdbOsFree(pageBuf); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } tdbTrace("pager/abort: restore pgno:%d,", pgno); @@ -559,27 +543,28 @@ int tdbPagerAbort(SPager *pPager, TXN *pTxn) { ret = tdbOsRead(jfd, pageBuf, pPager->pageSize); if (ret < 0) { tdbOsFree(pageBuf); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } i64 offset = pPager->pageSize * (pgno - 1); if (tdbOsLSeek(pPager->fd, offset, SEEK_SET) < 0) { tdbError("failed to lseek fd due to %s. file:%s, offset:%" PRId64, strerror(errno), pPager->dbFileName, offset); - terrno = TAOS_SYSTEM_ERROR(errno); tdbOsFree(pageBuf); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } - char* buf = tdbEncryptPage(pPager, pageBuf, pPager->pageSize, __FUNCTION__, offset); + char *buf = tdbEncryptPage(pPager, pageBuf, pPager->pageSize, __FUNCTION__, offset); + if (buf == NULL) { + return terrno; + } ret = tdbOsWrite(pPager->fd, buf, pPager->pageSize); if (ret < 0) { tdbError("failed to write buf due to %s. file: %s, bufsize:%d", strerror(errno), pPager->dbFileName, pPager->pageSize); tdbFreeEncryptBuf(pPager, buf); - terrno = TAOS_SYSTEM_ERROR(errno); tdbOsFree(pageBuf); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } tdbFreeEncryptBuf(pPager, buf); @@ -587,9 +572,8 @@ int tdbPagerAbort(SPager *pPager, TXN *pTxn) { if (tdbOsFSync(pPager->fd) < 0) { tdbError("failed to fsync fd due to %s. dbfile:%s", strerror(errno), pPager->dbFileName); - terrno = TAOS_SYSTEM_ERROR(errno); tdbOsFree(pageBuf); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } tdbOsFree(pageBuf); @@ -617,8 +601,7 @@ int tdbPagerAbort(SPager *pPager, TXN *pTxn) { // 4, remove the journal file if (tdbOsClose(pTxn->jfd) < 0) { tdbError("failed to close jfd: %s. file:%s, %" PRId64, strerror(errno), pPager->jFileName, pTxn->txnId); - terrno = TAOS_SYSTEM_ERROR(errno); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } char jTxnFileName[TDB_FILENAME_LEN]; @@ -626,8 +609,7 @@ int tdbPagerAbort(SPager *pPager, TXN *pTxn) { if (tdbOsRemove(jTxnFileName) < 0 && errno != ENOENT) { tdbError("failed to remove file due to %s. file:%s", strerror(errno), jTxnFileName); - terrno = TAOS_SYSTEM_ERROR(errno); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } // pPager->inTran = 0; @@ -658,7 +640,7 @@ int tdbPagerFlushPage(SPager *pPager, TXN *pTxn) { ret = tdbPagerPWritePageToDB(pPager, pPage); if (ret < 0) { tdbError("failed to write page to db since %s", tstrerror(terrno)); - return -1; + return ret; } tdbTrace("tdb/flush:%p, pgno:%d, %d/%d/%d", pPager, pgno, pPager->dbOrigSize, pPager->dbFileSize, maxPgno); @@ -717,13 +699,13 @@ int tdbPagerFetchPage(SPager *pPager, SPgno *ppgno, SPage **ppPage, int (*initPa ret = tdbPagerAllocPage(pPager, &pgno, pTxn); if (ret < 0) { tdbError("tdb/pager: %p, ret: %d pgno: %" PRIu32 ", alloc page failed.", pPager, ret, pgno); - return -1; + return ret; } } if (pgno == 0) { tdbError("tdb/pager: %p, ret: %d pgno: %" PRIu32 ", alloc page failed.", pPager, ret, pgno); - return -1; + return TSDB_CODE_INVALID_DATA_FMT; } // fetch a page container @@ -739,7 +721,7 @@ int tdbPagerFetchPage(SPager *pPager, SPgno *ppgno, SPage **ppPage, int (*initPa ret = tdbPagerInitPage(pPager, pPage, initPage, arg, loadPage); if (ret < 0) { tdbError("tdb/pager: %p, pPage: %p, init page failed.", pPager, pPage); - return -1; + return ret; } } @@ -748,11 +730,11 @@ int tdbPagerFetchPage(SPager *pPager, SPgno *ppgno, SPage **ppPage, int (*initPa if (!TDB_PAGE_INITIALIZED(pPage)) { tdbError("tdb/pager: %p, pPage: %p, fetch page uninited.", pPager, pPage); - return -1; + return TSDB_CODE_INVALID_DATA_FMT; } if (pPage->pPager != pPager) { tdbError("tdb/pager: %p/%p, fetch page failed.", pPager, pPage->pPager); - return -1; + return TSDB_CODE_INVALID_DATA_FMT; } *ppgno = pgno; @@ -771,12 +753,17 @@ int tdbPagerInsertFreePage(SPager *pPager, SPage *pPage, TXN *pTxn) { SPgno pgno = TDB_PAGE_PGNO(pPage); if (pPager->frps) { - taosArrayPush(pPager->frps, &pgno); + if (taosArrayPush(pPager->frps, &pgno) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } pPage->pPager = NULL; return code; } pPager->frps = taosArrayInit(8, sizeof(SPgno)); + if (pPager->frps == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } // memset(pPage->pData, 0, pPage->pageSize); tdbTrace("tdb/insert-free-page: tbc recycle page: %d.", pgno); // printf("tdb/insert-free-page: tbc recycle page: %d.\n", pgno); @@ -785,7 +772,7 @@ int tdbPagerInsertFreePage(SPager *pPager, SPage *pPage, TXN *pTxn) { tdbError("tdb/insert-free-page: tb insert failed with ret: %d.", code); taosArrayDestroy(pPager->frps); pPager->frps = NULL; - return -1; + return code; } while (TARRAY_SIZE(pPager->frps) > 0) { @@ -796,7 +783,7 @@ int tdbPagerInsertFreePage(SPager *pPager, SPage *pPage, TXN *pTxn) { tdbError("tdb/insert-free-page: tb insert failed with ret: %d.", code); taosArrayDestroy(pPager->frps); pPager->frps = NULL; - return -1; + return code; } } @@ -822,7 +809,7 @@ static int tdbPagerRemoveFreePage(SPager *pPager, SPgno *pPgno, TXN *pTxn) { code = tdbTbcOpen(pPager->pEnv->pFreeDb, &pCur, pTxn); if (code < 0) { - return 0; + return code; } code = tdbTbcMoveToFirst(pCur); @@ -924,20 +911,19 @@ static int tdbPagerInitPage(SPager *pPager, SPage *pPage, int (*initPage)(SPage } int32_t encryptAlgorithm = pPager->pEnv->encryptAlgorithm; - char* encryptKey = pPager->pEnv->encryptKey; + char *encryptKey = pPager->pEnv->encryptKey; - if(encryptAlgorithm == DND_CA_SM4){ - //tdbInfo("CBC_Decrypt key:%d %s %s", encryptAlgorithm, encryptKey, __FUNCTION__); - //ASSERT(strlen(encryptKey) > 0); + if (encryptAlgorithm == DND_CA_SM4) { + // tdbInfo("CBC_Decrypt key:%d %s %s", encryptAlgorithm, encryptKey, __FUNCTION__); + // ASSERT(strlen(encryptKey) > 0); - //uint8_t flags = pPage->pData[0]; - //tdbInfo("CBC tdb offset:%" PRId64 ", flag:%d before Decrypt", ((i64)pPage->pageSize) * (pgno - 1), flags); + // uint8_t flags = pPage->pData[0]; + // tdbInfo("CBC tdb offset:%" PRId64 ", flag:%d before Decrypt", ((i64)pPage->pageSize) * (pgno - 1), flags); - unsigned char packetData[128]; + unsigned char packetData[128]; int32_t count = 0; - while(count < pPage->pageSize) - { + while (count < pPage->pageSize) { SCryptOpts opts = {0}; opts.len = 128; opts.source = pPage->pData + count; @@ -945,20 +931,23 @@ static int tdbPagerInitPage(SPager *pPager, SPage *pPage, int (*initPage)(SPage opts.unitLen = 128; strncpy(opts.key, encryptKey, ENCRYPT_KEY_LEN); - int newLen = CBC_Decrypt(&opts); + int newLen = CBC_Decrypt(&opts); memcpy(pPage->pData + count, packetData, newLen); count += newLen; } - //tdbInfo("CBC tdb offset:%" PRId64 ", Decrypt count:%d %s", ((i64)pPage->pageSize) * (pgno - 1), count, __FUNCTION__); + // tdbInfo("CBC tdb offset:%" PRId64 ", Decrypt count:%d %s", ((i64)pPage->pageSize) * (pgno - 1), count, + // __FUNCTION__); - //tdbInfo("CBC tdb offset:%" PRId64 ", flag:%d after Decrypt %s", ((i64)pPage->pageSize) * (pgno - 1), pPage->pData[0], __FUNCTION__); + // tdbInfo("CBC tdb offset:%" PRId64 ", flag:%d after Decrypt %s", ((i64)pPage->pageSize) * (pgno - 1), + // pPage->pData[0], __FUNCTION__); } } else { init = 0; } - //tdbInfo("CBC tdb offset:%" PRId64 ", flag:%d initPage %s", ((i64)pPage->pageSize) * (pgno - 1), pPage->pData[0], __FUNCTION__); + // tdbInfo("CBC tdb offset:%" PRId64 ", flag:%d initPage %s", ((i64)pPage->pageSize) * (pgno - 1), pPage->pData[0], + // __FUNCTION__); ret = (*initPage)(pPage, arg, init); if (ret < 0) { @@ -1001,16 +990,14 @@ static int tdbPagerWritePageToJournal(SPager *pPager, SPage *pPage) { if (ret < 0) { tdbError("failed to write pgno due to %s. file:%s, pgno:%u, txnId:%" PRId64, strerror(errno), pPager->jFileName, pgno, pPager->pActiveTxn->txnId); - terrno = TAOS_SYSTEM_ERROR(errno); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } ret = tdbOsWrite(pPager->pActiveTxn->jfd, pPage->pData, pPage->pageSize); if (ret < 0) { tdbError("failed to write page data due to %s. file:%s, pageSize:%d, txnId:%" PRId64, strerror(errno), pPager->jFileName, pPage->pageSize, pPager->pActiveTxn->txnId); - terrno = TAOS_SYSTEM_ERROR(errno); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } return 0; @@ -1044,15 +1031,14 @@ static int tdbPagerPWritePageToDB(SPager *pPager, SPage *pPage) { offset = (i64)pPage->pageSize * (TDB_PAGE_PGNO(pPage) - 1); - char* buf = tdbEncryptPage(pPager, pPage->pData, pPage->pageSize, __FUNCTION__, offset); + char *buf = tdbEncryptPage(pPager, pPage->pData, pPage->pageSize, __FUNCTION__, offset); ret = tdbOsPWrite(pPager->fd, buf, pPage->pageSize, offset); if (ret < 0) { tdbFreeEncryptBuf(pPager, buf); tdbError("failed to pwrite page data due to %s. file:%s, pageSize:%d", strerror(errno), pPager->dbFileName, pPage->pageSize); - terrno = TAOS_SYSTEM_ERROR(errno); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } tdbFreeEncryptBuf(pPager, buf); @@ -1072,18 +1058,17 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { ret = tdbGetFileSize(jfd, pPager->pageSize, &journalSize); if (ret < 0) { - return -1; + return TAOS_SYSTEM_ERROR(errno); } if (tdbOsLSeek(jfd, 0L, SEEK_SET) < 0) { tdbError("failed to lseek jfd due to %s. file:%s, offset:0", strerror(errno), pPager->dbFileName); - terrno = TAOS_SYSTEM_ERROR(errno); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } pageBuf = tdbOsCalloc(1, pPager->pageSize); if (pageBuf == NULL) { - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } tdbDebug("pager/restore: %p, %d/%d, txnId:%s", pPager, pPager->dbOrigSize, pPager->dbFileSize, jFileName); @@ -1095,7 +1080,7 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { int ret = tdbOsRead(jfd, &pgno, sizeof(pgno)); if (ret < 0) { tdbOsFree(pageBuf); - return -1; + return TAOS_SYSTEM_ERROR(errno); } tdbTrace("pager/restore: restore pgno:%d,", pgno); @@ -1103,27 +1088,28 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { ret = tdbOsRead(jfd, pageBuf, pPager->pageSize); if (ret < 0) { tdbOsFree(pageBuf); - return -1; + return TAOS_SYSTEM_ERROR(errno); } i64 offset = pPager->pageSize * (pgno - 1); if (tdbOsLSeek(pPager->fd, offset, SEEK_SET) < 0) { tdbError("failed to lseek fd due to %s. file:%s, offset:%" PRId64, strerror(errno), pPager->dbFileName, offset); - terrno = TAOS_SYSTEM_ERROR(errno); tdbOsFree(pageBuf); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } - char* buf = tdbEncryptPage(pPager, pageBuf, pPager->pageSize, __FUNCTION__, offset); + char *buf = tdbEncryptPage(pPager, pageBuf, pPager->pageSize, __FUNCTION__, offset); + if (buf == NULL) { + return terrno; + } ret = tdbOsWrite(pPager->fd, buf, pPager->pageSize); if (ret < 0) { tdbError("failed to write buf due to %s. file: %s, bufsize:%d", strerror(errno), pPager->dbFileName, pPager->pageSize); tdbFreeEncryptBuf(pPager, buf); - terrno = TAOS_SYSTEM_ERROR(errno); tdbOsFree(pageBuf); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } tdbFreeEncryptBuf(pPager, buf); @@ -1131,9 +1117,8 @@ static int tdbPagerRestore(SPager *pPager, const char *jFileName) { if (tdbOsFSync(pPager->fd) < 0) { tdbError("failed to fsync fd due to %s. dbfile:%s", strerror(errno), pPager->dbFileName); - terrno = TAOS_SYSTEM_ERROR(errno); tdbOsFree(pageBuf); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } tdbOsFree(pageBuf); @@ -1160,21 +1145,27 @@ static int32_t txnIdCompareDesc(const void *pLeft, const void *pRight) { } int tdbPagerRestoreJournals(SPager *pPager) { + int32_t code = 0; tdbDirEntryPtr pDirEntry; tdbDirPtr pDir = taosOpenDir(pPager->pEnv->dbName); if (pDir == NULL) { tdbError("failed to open %s since %s", pPager->pEnv->dbName, strerror(errno)); - return -1; + return TAOS_SYSTEM_ERROR(errno); } SArray *pTxnList = taosArrayInit(16, sizeof(int64_t)); + if (pTxnList == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } while ((pDirEntry = tdbReadDir(pDir)) != NULL) { char *name = tdbDirEntryBaseName(tdbGetDirEntryName(pDirEntry)); if (strncmp(TDB_MAINDB_NAME "-journal", name, 16) == 0) { int64_t txnId = -1; sscanf(name, TDB_MAINDB_NAME "-journal.%" PRId64, &txnId); - taosArrayPush(pTxnList, &txnId); + if (taosArrayPush(pTxnList, &txnId) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } } } taosArraySort(pTxnList, txnIdCompareDesc); @@ -1185,12 +1176,12 @@ int tdbPagerRestoreJournals(SPager *pPager) { memcpy(jname, pPager->pEnv->dbName, dirLen); jname[dirLen] = '/'; sprintf(jname + dirLen + 1, TDB_MAINDB_NAME "-journal.%" PRId64, *pTxnId); - if (tdbPagerRestore(pPager, jname) < 0) { + code = tdbPagerRestore(pPager, jname); + if (code) { taosArrayDestroy(pTxnList); tdbCloseDir(&pDir); - - tdbError("failed to restore file due to %s. jFileName:%s", strerror(errno), jname); - return -1; + tdbError("failed to restore file due to %s. jFileName:%s", strerror(code), jname); + return code; } } @@ -1205,7 +1196,7 @@ int tdbPagerRollback(SPager *pPager) { tdbDirPtr pDir = taosOpenDir(pPager->pEnv->dbName); if (pDir == NULL) { tdbError("failed to open %s since %s", pPager->pEnv->dbName, strerror(errno)); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } while ((pDirEntry = tdbReadDir(pDir)) != NULL) { @@ -1221,8 +1212,7 @@ int tdbPagerRollback(SPager *pPager) { tdbCloseDir(&pDir); tdbError("failed to remove file due to %s. jFileName:%s", strerror(errno), name); - terrno = TAOS_SYSTEM_ERROR(errno); - return -1; + return terrno = TAOS_SYSTEM_ERROR(errno); } } } diff --git a/source/libs/tdb/src/db/tdbTable.c b/source/libs/tdb/src/db/tdbTable.c index 3bc6f75bef..365be222ef 100644 --- a/source/libs/tdb/src/db/tdbTable.c +++ b/source/libs/tdb/src/db/tdbTable.c @@ -41,7 +41,7 @@ int tdbTbOpen(const char *tbname, int keyLen, int valLen, tdb_cmpr_fn_t keyCmprF pTb = (TTB *)tdbOsCalloc(1, sizeof(*pTb)); if (pTb == NULL) { - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } // pTb->pEnv @@ -54,7 +54,7 @@ int tdbTbOpen(const char *tbname, int keyLen, int valLen, tdb_cmpr_fn_t keyCmprF pPager = tdbEnvGetPager(pEnv, fFullName); if (!pPager) { tdbOsFree(pTb); - return -1; + return terrno; } ret = tdbTbGet(pPager->pEnv->pMainDb, tbname, strlen(tbname) + 1, &pData, &nData); @@ -74,7 +74,7 @@ int tdbTbOpen(const char *tbname, int keyLen, int valLen, tdb_cmpr_fn_t keyCmprF ret = tdbPagerOpen(pEnv->pCache, fFullName, &pPager); if (ret < 0) { tdbOsFree(pTb); - return -1; + return ret; } tdbEnvAddPager(pEnv, pPager); @@ -109,7 +109,7 @@ int tdbTbOpen(const char *tbname, int keyLen, int valLen, tdb_cmpr_fn_t keyCmprF ret = tdbPagerRestoreJournals(pPager); if (ret < 0) { tdbOsFree(pTb); - return -1; + return ret; } } else { tdbPagerRollback(pPager); @@ -119,7 +119,7 @@ int tdbTbOpen(const char *tbname, int keyLen, int valLen, tdb_cmpr_fn_t keyCmprF ret = tdbBtreeOpen(keyLen, valLen, pPager, tbname, pgno, keyCmprFn, pEnv, &(pTb->pBt)); if (ret < 0) { tdbOsFree(pTb); - return -1; + return ret; } *ppTb = pTb; diff --git a/source/libs/tdb/src/db/tdbTxn.c b/source/libs/tdb/src/db/tdbTxn.c index 0aeed3c140..24a70f62b2 100644 --- a/source/libs/tdb/src/db/tdbTxn.c +++ b/source/libs/tdb/src/db/tdbTxn.c @@ -20,7 +20,7 @@ int tdbTxnOpen(TXN *pTxn, int64_t txnid, void *(*xMalloc)(void *, size_t), void // not support read-committed version at the moment if (flags != 0 && flags != (TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED)) { tdbError("tdb/txn: invalid txn flags: %" PRId32, flags); - return -1; + return TSDB_CODE_INVALID_PARA; } pTxn->flags = flags; @@ -39,7 +39,7 @@ int tdbTxnCloseImpl(TXN *pTxn) { } if (pTxn->jfd) { - tdbOsClose(pTxn->jfd); + TAOS_UNUSED(tdbOsClose(pTxn->jfd)); ASSERT(pTxn->jfd == NULL); } diff --git a/source/libs/tdb/src/db/tdbUtil.c b/source/libs/tdb/src/db/tdbUtil.c index 9021e08ffe..d6d33bc7d8 100644 --- a/source/libs/tdb/src/db/tdbUtil.c +++ b/source/libs/tdb/src/db/tdbUtil.c @@ -38,9 +38,8 @@ void tdbFree(void *p) { int tdbGnrtFileID(tdb_fd_t fd, uint8_t *fileid, bool unique) { int64_t stDev = 0, stIno = 0; - if (taosDevInoFile(fd, &stDev, &stIno) < 0) { - return -1; - } + int32_t code = taosDevInoFile(fd, &stDev, &stIno); + return code; memset(fileid, 0, TDB_FILE_ID_LEN); @@ -59,7 +58,7 @@ int tdbGetFileSize(tdb_fd_t fd, int szPage, SPgno *size) { ret = tdbOsFileSize(fd, &szBytes); if (ret < 0) { - return -1; + return TAOS_SYSTEM_ERROR(errno); } *size = szBytes / szPage; diff --git a/source/libs/tdb/src/inc/tdbInt.h b/source/libs/tdb/src/inc/tdbInt.h index b359f8a4cc..605fe6a1a4 100644 --- a/source/libs/tdb/src/inc/tdbInt.h +++ b/source/libs/tdb/src/inc/tdbInt.h @@ -17,6 +17,7 @@ #define _TD_TDB_INTERNAL_H_ #include "tdb.h" +#include "tutil.h" #include "tdef.h" #include "tlog.h" @@ -338,15 +339,15 @@ static inline int tdbTryLockPage(tdb_spinlock_t *pLock) { ((*(pPage)->xCellSize)(pPage, pCell, 0, NULL, NULL) + (pPage)->pPageMethods->szOffset) #define TDB_PAGE_OFFSET_SIZE(pPage) ((pPage)->pPageMethods->szOffset) -int tdbPageCreate(int pageSize, SPage **ppPage, void *(*xMalloc)(void *, size_t), void *arg); -int tdbPageDestroy(SPage *pPage, void (*xFree)(void *arg, void *ptr), void *arg); -void tdbPageZero(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell *, int, TXN *, SBTree *pBt)); -void tdbPageInit(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell *, int, TXN *, SBTree *pBt)); -int tdbPageInsertCell(SPage *pPage, int idx, SCell *pCell, int szCell, u8 asOvfl); -int tdbPageDropCell(SPage *pPage, int idx, TXN *pTxn, SBTree *pBt); -int tdbPageUpdateCell(SPage *pPage, int idx, SCell *pCell, int szCell, TXN *pTxn, SBTree *pBt); -void tdbPageCopy(SPage *pFromPage, SPage *pToPage, int copyOvflCells); -int tdbPageCapacity(int pageSize, int amHdrSize); +int tdbPageCreate(int pageSize, SPage **ppPage, void *(*xMalloc)(void *, size_t), void *arg); +int tdbPageDestroy(SPage *pPage, void (*xFree)(void *arg, void *ptr), void *arg); +void tdbPageZero(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell *, int, TXN *, SBTree *pBt)); +void tdbPageInit(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell *, int, TXN *, SBTree *pBt)); +int tdbPageInsertCell(SPage *pPage, int idx, SCell *pCell, int szCell, u8 asOvfl); +int tdbPageDropCell(SPage *pPage, int idx, TXN *pTxn, SBTree *pBt); +int tdbPageUpdateCell(SPage *pPage, int idx, SCell *pCell, int szCell, TXN *pTxn, SBTree *pBt); +int32_t tdbPageCopy(SPage *pFromPage, SPage *pToPage, int deepCopyOvfl); +int tdbPageCapacity(int pageSize, int amHdrSize); static inline SCell *tdbPageGetCell(SPage *pPage, int idx) { SCell *pCell; diff --git a/source/os/src/osFile.c b/source/os/src/osFile.c index f63339bcfc..79acec1211 100644 --- a/source/os/src/osFile.c +++ b/source/os/src/osFile.c @@ -67,7 +67,7 @@ typedef struct TdFile { void taosGetTmpfilePath(const char *inputTmpDir, const char *fileNamePrefix, char *dstPath) { #ifdef WINDOWS - char tmpPath[PATH_MAX]; + char tmpPath[PATH_MAX]; int32_t len = (int32_t)strlen(inputTmpDir); memcpy(tmpPath, inputTmpDir, len); @@ -269,13 +269,13 @@ int32_t taosDevInoFile(TdFilePtr pFile, int64_t *stDev, int64_t *stIno) { #else if (pFile == NULL || pFile->fd < 0) { - return -1; + return TSDB_CODE_INVALID_PARA; } struct stat fileStat; int32_t code = fstat(pFile->fd, &fileStat); if (code < 0) { printf("taosFStatFile run fstat fail."); - return code; + return TAOS_SYSTEM_ERROR(errno); } if (stDev != NULL) { @@ -1239,7 +1239,7 @@ int64_t taosGetLineFile(TdFilePtr pFile, char **__restrict ptrBuf) { #ifdef WINDOWS size_t bufferSize = 512; *ptrBuf = taosMemoryMalloc(bufferSize); - if (*ptrBuf == NULL) goto END; + if (*ptrBuf == NULL) goto END; size_t bytesRead = 0; size_t totalBytesRead = 0; @@ -1274,7 +1274,7 @@ int64_t taosGetLineFile(TdFilePtr pFile, char **__restrict ptrBuf) { ret = getline(ptrBuf, &len, pFile->fp); #endif - END: +END: #if FILE_WITH_LOCK taosThreadRwlockUnlock(&(pFile->rwlock)); #endif @@ -1413,34 +1413,30 @@ int32_t taosLinkFile(char *src, char *dst) { return 0; } -FILE* taosOpenCFile(const char* filename, const char* mode) { - return fopen(filename, mode); -} +FILE *taosOpenCFile(const char *filename, const char *mode) { return fopen(filename, mode); } -int taosSeekCFile(FILE* file, int64_t offset, int whence) { +int taosSeekCFile(FILE *file, int64_t offset, int whence) { #ifdef WINDOWS return _fseeki64(file, offset, whence); #else return fseeko(file, offset, whence); -#endif +#endif } -size_t taosReadFromCFile(void *buffer, size_t size, size_t count, FILE *stream ) { +size_t taosReadFromCFile(void *buffer, size_t size, size_t count, FILE *stream) { return fread(buffer, size, count, stream); } -size_t taosWriteToCFile(const void* ptr, size_t size, size_t nitems, FILE* stream) { +size_t taosWriteToCFile(const void *ptr, size_t size, size_t nitems, FILE *stream) { return fwrite(ptr, size, nitems, stream); } -int taosCloseCFile(FILE *f) { - return fclose(f); -} +int taosCloseCFile(FILE *f) { return fclose(f); } -int taosSetAutoDelFile(char* path) { +int taosSetAutoDelFile(char *path) { #ifdef WINDOWS return SetFileAttributes(path, FILE_ATTRIBUTE_TEMPORARY); #else return unlink(path); -#endif +#endif } \ No newline at end of file