diff --git a/include/common/tdatablock.h b/include/common/tdatablock.h index ec998e9365..c271f118ae 100644 --- a/include/common/tdatablock.h +++ b/include/common/tdatablock.h @@ -253,7 +253,7 @@ int32_t assignOneDataBlock(SSDataBlock* dst, const SSDataBlock* src); int32_t copyDataBlock(SSDataBlock* pDst, const SSDataBlock* pSrc); SSDataBlock* createDataBlock(); -void* blockDataDestroy(SSDataBlock* pBlock); +void blockDataDestroy(SSDataBlock* pBlock); void blockDataFreeRes(SSDataBlock* pBlock); SSDataBlock* createOneDataBlock(const SSDataBlock* pDataBlock, bool copyData); SSDataBlock* createSpecialDataBlock(EStreamType type); diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 5327428f5b..dff212b15c 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -629,7 +629,7 @@ int32_t streamSetupScheduleTrigger(SStreamTask* pTask); int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pMsg); int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code); -int32_t streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId, SStreamUpstreamEpInfo** pEpInfo); +void streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId, SStreamUpstreamEpInfo** pEpInfo); #if 0 SEpSet* streamTaskGetDownstreamEpInfo(SStreamTask* pTask, int32_t taskId); #endif @@ -659,10 +659,10 @@ int8_t streamTaskSetSchedStatusInactive(SStreamTask* pTask); int32_t streamTaskClearHTaskAttr(SStreamTask* pTask, int32_t clearRelHalt); int32_t streamExecTask(SStreamTask* pTask); -int32_t streamResumeTask(SStreamTask* pTask); +void streamResumeTask(SStreamTask* pTask); int32_t streamTrySchedExec(SStreamTask* pTask); int32_t streamTaskSchedTask(SMsgCb* pMsgCb, int32_t vgId, int64_t streamId, int32_t taskId, int32_t execType); -int32_t streamTaskResumeInFuture(SStreamTask* pTask); +void streamTaskResumeInFuture(SStreamTask* pTask); void streamTaskClearSchedIdleInfo(SStreamTask* pTask); void streamTaskSetIdleInfo(SStreamTask* pTask, int32_t idleTime); @@ -675,8 +675,8 @@ int32_t streamTaskSendCheckRsp(const SStreamMeta* pMeta, int32_t vgId, SStreamTa int32_t streamTaskProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp); // check downstream status -int32_t streamTaskStartMonitorCheckRsp(SStreamTask* pTask); -int32_t streamTaskStopMonitorCheckRsp(STaskCheckInfo* pInfo, const char* id); +void streamTaskStartMonitorCheckRsp(SStreamTask* pTask); +void streamTaskStopMonitorCheckRsp(STaskCheckInfo* pInfo, const char* id); void streamTaskCleanupCheckInfo(STaskCheckInfo* pInfo); // fill-history task diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 965246326a..43bac56f93 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1523,9 +1523,9 @@ void blockDataFreeRes(SSDataBlock* pBlock) { memset(&pBlock->info, 0, sizeof(SDataBlockInfo)); } -void* blockDataDestroy(SSDataBlock* pBlock) { +void blockDataDestroy(SSDataBlock* pBlock) { if (pBlock == NULL) { - return NULL; + return; } if (IS_VAR_DATA_TYPE(pBlock->info.pks[0].type)) { @@ -1535,7 +1535,6 @@ void* blockDataDestroy(SSDataBlock* pBlock) { blockDataFreeRes(pBlock); taosMemoryFreeClear(pBlock); - return NULL; } // todo remove it @@ -2468,19 +2467,18 @@ char* buildCtbNameByGroupId(const char* stbFullName, uint64_t groupId) { int32_t buildCtbNameByGroupIdImpl(const char* stbFullName, uint64_t groupId, char* cname) { if (stbFullName[0] == 0) { - terrno = TSDB_CODE_INVALID_PARA; - return TSDB_CODE_FAILED; + return TSDB_CODE_INVALID_PARA; } SArray* tags = taosArrayInit(0, sizeof(SSmlKv)); if (tags == NULL) { - return TSDB_CODE_FAILED; + return TSDB_CODE_OUT_OF_MEMORY; } if (cname == NULL) { terrno = TSDB_CODE_INVALID_PARA; taosArrayDestroy(tags); - return TSDB_CODE_FAILED; + return terrno; } int8_t type = TSDB_DATA_TYPE_UBIGINT; diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index 2474666e93..23e396b6a4 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -4648,7 +4648,8 @@ void tsdbReaderClose2(STsdbReader* pReader) { } if (pReader->resBlockInfo.freeBlock) { - pReader->resBlockInfo.pResBlock = blockDataDestroy(pReader->resBlockInfo.pResBlock); + blockDataDestroy(pReader->resBlockInfo.pResBlock); + pReader->resBlockInfo.pResBlock = NULL; } taosMemoryFree(pSupInfo->colId); diff --git a/source/libs/executor/src/exchangeoperator.c b/source/libs/executor/src/exchangeoperator.c index 059e1f2663..a6a273cce8 100644 --- a/source/libs/executor/src/exchangeoperator.c +++ b/source/libs/executor/src/exchangeoperator.c @@ -607,7 +607,7 @@ int32_t extractDataBlockFromFetchRsp(SSDataBlock* pRes, char* pData, SArray* pCo blockDataAppendColInfo(pBlock, &idata); } - blockDecode(pBlock, pStart); + (void) blockDecode(pBlock, pStart); blockDataEnsureCapacity(pRes, pBlock->info.rows); // data from mnode diff --git a/source/libs/executor/src/executorInt.c b/source/libs/executor/src/executorInt.c index fad2b263b2..0476a7981c 100644 --- a/source/libs/executor/src/executorInt.c +++ b/source/libs/executor/src/executorInt.c @@ -970,7 +970,7 @@ void cleanupExprSupp(SExprSupp* pSupp) { taosMemoryFree(pSupp->rowEntryInfoOffset); } -void cleanupBasicInfo(SOptrBasicInfo* pInfo) { pInfo->pRes = blockDataDestroy(pInfo->pRes); } +void cleanupBasicInfo(SOptrBasicInfo* pInfo) { blockDataDestroy(pInfo->pRes); pInfo->pRes = NULL;} bool groupbyTbname(SNodeList* pGroupList) { bool bytbname = false; diff --git a/source/libs/executor/src/filloperator.c b/source/libs/executor/src/filloperator.c index e1aa75d828..ad1c6d67d4 100644 --- a/source/libs/executor/src/filloperator.c +++ b/source/libs/executor/src/filloperator.c @@ -310,8 +310,10 @@ static SSDataBlock* doFill(SOperatorInfo* pOperator) { void destroyFillOperatorInfo(void* param) { SFillOperatorInfo* pInfo = (SFillOperatorInfo*)param; pInfo->pFillInfo = taosDestroyFillInfo(pInfo->pFillInfo); - pInfo->pRes = blockDataDestroy(pInfo->pRes); - pInfo->pFinalRes = blockDataDestroy(pInfo->pFinalRes); + blockDataDestroy(pInfo->pRes); + pInfo->pRes = NULL; + blockDataDestroy(pInfo->pFinalRes); + pInfo->pFinalRes = NULL; cleanupExprSupp(&pInfo->noFillExprSupp); diff --git a/source/libs/executor/src/hashjoinoperator.c b/source/libs/executor/src/hashjoinoperator.c index 2fe2ccc56f..adc1055a6b 100755 --- a/source/libs/executor/src/hashjoinoperator.c +++ b/source/libs/executor/src/hashjoinoperator.c @@ -1065,7 +1065,8 @@ static void destroyHashJoinOperator(void* param) { hJoinFreeTableInfo(&pJoinOperator->tbs[0]); hJoinFreeTableInfo(&pJoinOperator->tbs[1]); - pJoinOperator->finBlk = blockDataDestroy(pJoinOperator->finBlk); + blockDataDestroy(pJoinOperator->finBlk); + pJoinOperator->finBlk = NULL; taosMemoryFreeClear(pJoinOperator->pResColMap); taosArrayDestroyEx(pJoinOperator->pRowBufs, hJoinFreeBufPage); diff --git a/source/libs/executor/src/mergejoin.c b/source/libs/executor/src/mergejoin.c index 5f0a2eadfb..50ce604a7c 100755 --- a/source/libs/executor/src/mergejoin.c +++ b/source/libs/executor/src/mergejoin.c @@ -3304,9 +3304,11 @@ void mJoinDestroyWindowCtx(SMJoinOperatorInfo* pJoin) { SMJoinWindowCtx* pCtx = &pJoin->ctx.windowCtx; mWinJoinResetWindowCache(pCtx, &pCtx->cache); - - pCtx->finBlk = blockDataDestroy(pCtx->finBlk); - pCtx->cache.outBlk = blockDataDestroy(pCtx->cache.outBlk); + + blockDataDestroy(pCtx->finBlk); + pCtx->finBlk = NULL; + blockDataDestroy(pCtx->cache.outBlk); + pCtx->cache.outBlk = NULL; taosArrayDestroy(pCtx->cache.grps); } @@ -3378,9 +3380,11 @@ int32_t mJoinInitWindowCtx(SMJoinOperatorInfo* pJoin, SSortMergeJoinPhysiNode* p void mJoinDestroyMergeCtx(SMJoinOperatorInfo* pJoin) { SMJoinMergeCtx* pCtx = &pJoin->ctx.mergeCtx; + blockDataDestroy(pCtx->finBlk); + blockDataDestroy(pCtx->midBlk); - pCtx->finBlk = blockDataDestroy(pCtx->finBlk); - pCtx->midBlk = blockDataDestroy(pCtx->midBlk); + pCtx->finBlk = NULL; + pCtx->midBlk = NULL; } diff --git a/source/libs/executor/src/mergeoperator.c b/source/libs/executor/src/mergeoperator.c index 993e8c72fd..2816bae03c 100755 --- a/source/libs/executor/src/mergeoperator.c +++ b/source/libs/executor/src/mergeoperator.c @@ -232,8 +232,11 @@ int32_t getSortMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, u void destroySortMergeOperatorInfo(void* param) { SSortMergeInfo* pSortMergeInfo = param; - pSortMergeInfo->pInputBlock = blockDataDestroy(pSortMergeInfo->pInputBlock); - pSortMergeInfo->pIntermediateBlock = blockDataDestroy(pSortMergeInfo->pIntermediateBlock); + blockDataDestroy(pSortMergeInfo->pInputBlock); + pSortMergeInfo->pInputBlock = NULL; + + blockDataDestroy(pSortMergeInfo->pIntermediateBlock); + pSortMergeInfo->pIntermediateBlock = NULL; taosArrayDestroy(pSortMergeInfo->matchInfo.pList); @@ -429,7 +432,8 @@ SSDataBlock* doMultiwayMerge(SOperatorInfo* pOperator) { void destroyMultiwayMergeOperatorInfo(void* param) { SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)param; - pInfo->binfo.pRes = blockDataDestroy(pInfo->binfo.pRes); + blockDataDestroy(pInfo->binfo.pRes); + pInfo->binfo.pRes = NULL; if (NULL != gMultiwayMergeFps[pInfo->type].closeFn) { (*gMultiwayMergeFps[pInfo->type].closeFn)(&pInfo->sortMergeInfo); diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 486f8b3ce8..39b1cce600 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3789,7 +3789,8 @@ static void destroyTagScanOperatorInfo(void* param) { taosArrayDestroy(pInfo->aFilterIdxs); taosArrayDestroyEx(pInfo->aUidTags, tagScanFreeUidTag); - pInfo->pRes = blockDataDestroy(pInfo->pRes); + blockDataDestroy(pInfo->pRes); + pInfo->pRes = NULL; taosArrayDestroy(pInfo->matchInfo.pList); pInfo->pTableListInfo = tableListDestroy(pInfo->pTableListInfo); taosMemoryFreeClear(param); @@ -4742,15 +4743,19 @@ void destroyTableMergeScanOperatorInfo(void* param) { pTableScanInfo->pSortHandle = NULL; taosHashCleanup(pTableScanInfo->mSkipTables); pTableScanInfo->mSkipTables = NULL; - pTableScanInfo->pSortInputBlock = blockDataDestroy(pTableScanInfo->pSortInputBlock); + blockDataDestroy(pTableScanInfo->pSortInputBlock); + pTableScanInfo->pSortInputBlock = NULL; // end one reader variable cleanupQueryTableDataCond(&pTableScanInfo->base.cond); destroyTableScanBase(&pTableScanInfo->base, &pTableScanInfo->base.readerAPI); - pTableScanInfo->pResBlock = blockDataDestroy(pTableScanInfo->pResBlock); + blockDataDestroy(pTableScanInfo->pResBlock); + pTableScanInfo->pResBlock = NULL; + // remove it from the task->result list - pTableScanInfo->pReaderBlock = blockDataDestroy(pTableScanInfo->pReaderBlock); + blockDataDestroy(pTableScanInfo->pReaderBlock); + pTableScanInfo->pReaderBlock = NULL; taosArrayDestroy(pTableScanInfo->pSortInfo); stopSubTablesTableMergeScan(pTableScanInfo); diff --git a/source/libs/executor/src/sortoperator.c b/source/libs/executor/src/sortoperator.c index 507dbe7ee2..82eebf5310 100644 --- a/source/libs/executor/src/sortoperator.c +++ b/source/libs/executor/src/sortoperator.c @@ -368,7 +368,8 @@ SSDataBlock* doSort(SOperatorInfo* pOperator) { void destroySortOperatorInfo(void* param) { SSortOperatorInfo* pInfo = (SSortOperatorInfo*)param; - pInfo->binfo.pRes = blockDataDestroy(pInfo->binfo.pRes); + blockDataDestroy(pInfo->binfo.pRes); + pInfo->binfo.pRes = NULL; tsortDestroySortHandle(pInfo->pSortHandle); taosArrayDestroy(pInfo->pSortInfo); @@ -611,7 +612,8 @@ int32_t getGroupSortExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, u void destroyGroupSortOperatorInfo(void* param) { SGroupSortOperatorInfo* pInfo = (SGroupSortOperatorInfo*)param; - pInfo->binfo.pRes = blockDataDestroy(pInfo->binfo.pRes); + blockDataDestroy(pInfo->binfo.pRes); + pInfo->binfo.pRes = NULL; taosArrayDestroy(pInfo->pSortInfo); taosArrayDestroy(pInfo->matchInfo.pList); diff --git a/source/libs/executor/src/streamfilloperator.c b/source/libs/executor/src/streamfilloperator.c index 5f188c2c8a..384c763063 100644 --- a/source/libs/executor/src/streamfilloperator.c +++ b/source/libs/executor/src/streamfilloperator.c @@ -126,9 +126,12 @@ static void destroyStreamFillOperatorInfo(void* param) { SStreamFillOperatorInfo* pInfo = (SStreamFillOperatorInfo*)param; pInfo->pFillInfo = destroyStreamFillInfo(pInfo->pFillInfo); pInfo->pFillSup = destroyStreamFillSupporter(pInfo->pFillSup); - pInfo->pRes = blockDataDestroy(pInfo->pRes); - pInfo->pSrcBlock = blockDataDestroy(pInfo->pSrcBlock); - pInfo->pDelRes = blockDataDestroy(pInfo->pDelRes); + blockDataDestroy(pInfo->pRes); + pInfo->pRes = NULL; + blockDataDestroy(pInfo->pSrcBlock); + pInfo->pSrcBlock = NULL; + blockDataDestroy(pInfo->pDelRes); + pInfo->pDelRes = NULL; taosArrayDestroy(pInfo->matchInfo.pList); pInfo->matchInfo.pList = NULL; taosMemoryFree(pInfo); diff --git a/source/libs/executor/src/timesliceoperator.c b/source/libs/executor/src/timesliceoperator.c index cda22fa320..cdcc702629 100644 --- a/source/libs/executor/src/timesliceoperator.c +++ b/source/libs/executor/src/timesliceoperator.c @@ -1087,7 +1087,8 @@ _error: void destroyTimeSliceOperatorInfo(void* param) { STimeSliceOperatorInfo* pInfo = (STimeSliceOperatorInfo*)param; - pInfo->pRes = blockDataDestroy(pInfo->pRes); + blockDataDestroy(pInfo->pRes); + pInfo->pRes = NULL; for (int32_t i = 0; i < taosArrayGetSize(pInfo->pPrevRow); ++i) { SGroupKeys* pKey = taosArrayGet(pInfo->pPrevRow, i); diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index d9bcc954a4..6d88eaef99 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -555,7 +555,8 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeT (*numOfCompleted) += 1; pSource->src.rowIndex = -1; pSource->pageIndex = -1; - pSource->src.pBlock = blockDataDestroy(pSource->src.pBlock); + blockDataDestroy(pSource->src.pBlock); + pSource->src.pBlock = NULL; } else { if (pSource->pageIndex % 512 == 0) { qDebug("begin source %p page %d", pSource, pSource->pageIndex); diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index fd248861e3..2fe86817e3 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -208,7 +208,7 @@ int32_t streamQueueOpen(int64_t cap, SStreamQueue** pQ); void streamQueueClose(SStreamQueue* pQueue, int32_t taskId); void streamQueueProcessSuccess(SStreamQueue* queue); void streamQueueProcessFail(SStreamQueue* queue); -void* streamQueueNextItem(SStreamQueue* pQueue); +void streamQueueNextItem(SStreamQueue* pQueue, SStreamQueueItem** pItem); void streamFreeQitem(SStreamQueueItem* data); int32_t streamQueueGetItemSize(const SStreamQueue* pQueue); @@ -226,7 +226,7 @@ int32_t streamTaskDownloadCheckpointData(const char* id, char* path); int32_t streamTaskOnNormalTaskReady(SStreamTask* pTask); int32_t streamTaskOnScanHistoryTaskReady(SStreamTask* pTask); -int32_t initCheckpointReadyInfo(STaskCheckpointReadyInfo* pReadyInfo, int32_t upstreamNodeId, int32_t upstreamTaskId, +void initCheckpointReadyInfo(STaskCheckpointReadyInfo* pReadyInfo, int32_t upstreamNodeId, int32_t upstreamTaskId, int32_t childId, SEpSet* pEpset, int64_t checkpointId); int32_t initCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamNodeId, int32_t upstreamTaskId, int32_t childId, int64_t checkpointId, SRpcMsg* pMsg); @@ -236,6 +236,10 @@ typedef int32_t (*__stream_async_exec_fn_t)(void* param); int32_t streamMetaAsyncExec(SStreamMeta* pMeta, __stream_async_exec_fn_t fn, void* param, int32_t* code); void flushStateDataInExecutor(SStreamTask* pTask, SStreamQueueItem* pCheckpointBlock); +void streamMutexLock(TdThreadMutex *pMutex); +void streamMutexUnlock(TdThreadMutex *pMutex); +void streamMutexDestroy(TdThreadMutex *pMutex); + #ifdef __cplusplus } #endif diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 8b87019ee0..86144a3099 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -887,8 +887,8 @@ _EXIT: rocksdb_options_destroy(opts); rocksdb_cache_destroy(cache); rocksdb_env_destroy(env); - taosThreadMutexDestroy(&pHandle->mutex); - taosThreadMutexDestroy(&pHandle->cfMutex); + streamMutexDestroy(&pHandle->mutex); + streamMutexDestroy(&pHandle->cfMutex); taosHashCleanup(pHandle->cfInst); tdListFree(pHandle->list); taosMemoryFree(pHandle); @@ -923,9 +923,9 @@ void streamBackendCleanup(void* arg) { } tdListFree(pHandle->list); - taosThreadMutexDestroy(&pHandle->mutex); + streamMutexDestroy(&pHandle->mutex); - taosThreadMutexDestroy(&pHandle->cfMutex); + streamMutexDestroy(&pHandle->cfMutex); stDebug("destroy stream backend :%p", pHandle); taosMemoryFree(pHandle); return; @@ -1393,7 +1393,7 @@ int32_t taskDbBuildSnap(void* arg, SArray* pSnap) { // vnode task->db SStreamMeta* pMeta = arg; - taosThreadMutexLock(&pMeta->backendMutex); + streamMutexLock(&pMeta->backendMutex); void* pIter = taosHashIterate(pMeta->pTaskDbUnique, NULL); int32_t code = 0; @@ -1434,14 +1434,14 @@ int32_t taskDbBuildSnap(void* arg, SArray* pSnap) { pIter = taosHashIterate(pMeta->pTaskDbUnique, pIter); } - taosThreadMutexUnlock(&pMeta->backendMutex); + streamMutexUnlock(&pMeta->backendMutex); return code; } int32_t taskDbDestroySnap(void* arg, SArray* pSnapInfo) { if (pSnapInfo == NULL) return 0; SStreamMeta* pMeta = arg; int32_t code = 0; - taosThreadMutexLock(&pMeta->backendMutex); + streamMutexLock(&pMeta->backendMutex); char buf[128] = {0}; for (int i = 0; i < taosArrayGetSize(pSnapInfo); i++) { @@ -1457,7 +1457,7 @@ int32_t taskDbDestroySnap(void* arg, SArray* pSnapInfo) { taskDbUnRefChkp(*pTaskDb, pSnap->chkpId); } - taosThreadMutexUnlock(&pMeta->backendMutex); + streamMutexUnlock(&pMeta->backendMutex); return 0; } #ifdef BUILD_NO_CALL @@ -1697,17 +1697,17 @@ int32_t streamBackendDoCheckpoint(void* arg, int64_t chkpId, int64_t processVer) SListNode* streamBackendAddCompare(void* backend, void* arg) { SBackendWrapper* pHandle = (SBackendWrapper*)backend; SListNode* node = NULL; - taosThreadMutexLock(&pHandle->mutex); + streamMutexLock(&pHandle->mutex); node = tdListAdd(pHandle->list, arg); - taosThreadMutexUnlock(&pHandle->mutex); + streamMutexUnlock(&pHandle->mutex); return node; } void streamBackendDelCompare(void* backend, void* arg) { SBackendWrapper* pHandle = (SBackendWrapper*)backend; SListNode* node = NULL; - taosThreadMutexLock(&pHandle->mutex); + streamMutexLock(&pHandle->mutex); node = tdListPopNode(pHandle->list, arg); - taosThreadMutexUnlock(&pHandle->mutex); + streamMutexUnlock(&pHandle->mutex); if (node) { streamStateDestroyCompar(node->data); taosMemoryFree(node); @@ -2461,9 +2461,9 @@ int32_t taskDbBuildFullPath(char* path, char* key, char** dbFullPath, char** sta void taskDbUpdateChkpId(void* pTaskDb, int64_t chkpId) { STaskDbWrapper* p = pTaskDb; - taosThreadMutexLock(&p->mutex); + streamMutexLock(&p->mutex); p->chkpId = chkpId; - taosThreadMutexUnlock(&p->mutex); + streamMutexUnlock(&p->mutex); } STaskDbWrapper* taskDbOpenImpl(const char* key, char* statePath, char* dbPath) { @@ -2622,7 +2622,7 @@ void taskDbDestroy(void* pDb, bool flush) { taosMemoryFree(wrapper->pCfOpts); taosMemoryFree(wrapper->pCfParams); - taosThreadMutexDestroy(&wrapper->mutex); + streamMutexDestroy(&wrapper->mutex); taskDbDestroyChkpOpt(wrapper); @@ -2957,7 +2957,7 @@ int streamStateOpenBackend(void* backend, SStreamState* pState) { SBackendWrapper* handle = backend; SBackendCfWrapper* pBackendCfWrapper = taosMemoryCalloc(1, sizeof(SBackendCfWrapper)); - taosThreadMutexLock(&handle->cfMutex); + streamMutexLock(&handle->cfMutex); RocksdbCfInst** ppInst = taosHashGet(handle->cfInst, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1); if (ppInst != NULL && *ppInst != NULL) { RocksdbCfInst* inst = *ppInst; @@ -2970,7 +2970,7 @@ int streamStateOpenBackend(void* backend, SStreamState* pState) { pBackendCfWrapper->param = inst->param; pBackendCfWrapper->pBackend = handle; pBackendCfWrapper->pComparNode = inst->pCompareNode; - taosThreadMutexUnlock(&handle->cfMutex); + streamMutexUnlock(&handle->cfMutex); pBackendCfWrapper->backendId = pState->streamBackendRid; memcpy(pBackendCfWrapper->idstr, pState->pTdbState->idstr, sizeof(pState->pTdbState->idstr)); @@ -2987,7 +2987,7 @@ int streamStateOpenBackend(void* backend, SStreamState* pState) { inst->rOpt = NULL; return 0; } - taosThreadMutexUnlock(&handle->cfMutex); + streamMutexUnlock(&handle->cfMutex); char* err = NULL; int cfLen = sizeof(ginitDict) / sizeof(ginitDict[0]); @@ -3046,14 +3046,14 @@ void streamStateCloseBackend(SStreamState* pState, bool remove) { stInfo("start to close state on backend: %p", pHandle); - taosThreadMutexLock(&pHandle->cfMutex); + streamMutexLock(&pHandle->cfMutex); RocksdbCfInst** ppInst = taosHashGet(pHandle->cfInst, wrapper->idstr, strlen(pState->pTdbState->idstr) + 1); if (ppInst != NULL && *ppInst != NULL) { RocksdbCfInst* inst = *ppInst; taosMemoryFree(inst); taosHashRemove(pHandle->cfInst, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1); } - taosThreadMutexUnlock(&pHandle->cfMutex); + streamMutexUnlock(&pHandle->cfMutex); char* status[] = {"close", "drop"}; stInfo("start to %s state %p on backendWrapper %p %s", status[remove == false ? 0 : 1], pState, wrapper, @@ -3085,7 +3085,7 @@ int streamStateGetCfIdx(SStreamState* pState, const char* funcName) { return -1; } - taosThreadMutexLock(&wrapper->mutex); + streamMutexLock(&wrapper->mutex); rocksdb_column_family_handle_t* cf = wrapper->pCf[idx]; if (cf == NULL) { @@ -3100,7 +3100,7 @@ int streamStateGetCfIdx(SStreamState* pState, const char* funcName) { wrapper->pCf[idx] = cf; } } - taosThreadMutexUnlock(&wrapper->mutex); + streamMutexUnlock(&wrapper->mutex); } return idx; diff --git a/source/libs/stream/src/streamCheckStatus.c b/source/libs/stream/src/streamCheckStatus.c index e588be0784..5e67f1766f 100644 --- a/source/libs/stream/src/streamCheckStatus.c +++ b/source/libs/stream/src/streamCheckStatus.c @@ -23,10 +23,10 @@ static void processDownstreamReadyRsp(SStreamTask* pTask); static void addIntoNodeUpdateList(SStreamTask* pTask, int32_t nodeId); static void rspMonitorFn(void* param, void* tmrId); -static int32_t streamTaskInitTaskCheckInfo(STaskCheckInfo* pInfo, STaskOutputInfo* pOutputInfo, int64_t startTs); +static void streamTaskInitTaskCheckInfo(STaskCheckInfo* pInfo, STaskOutputInfo* pOutputInfo, int64_t startTs); static int32_t streamTaskStartCheckDownstream(STaskCheckInfo* pInfo, const char* id); -static int32_t streamTaskCompleteCheckRsp(STaskCheckInfo* pInfo, bool lock, const char* id); -static int32_t streamTaskAddReqInfo(STaskCheckInfo* pInfo, int64_t reqId, int32_t taskId, int32_t vgId, const char* id); +static void streamTaskCompleteCheckRsp(STaskCheckInfo* pInfo, bool lock, const char* id); +static void streamTaskAddReqInfo(STaskCheckInfo* pInfo, int64_t reqId, int32_t taskId, int32_t vgId, const char* id); static void doSendCheckMsg(SStreamTask* pTask, SDownstreamStatusInfo* p); static void handleTimeoutDownstreamTasks(SStreamTask* pTask, SArray* pTimeoutList); static void handleNotReadyDownstreamTask(SStreamTask* pTask, SArray* pNotReadyList); @@ -41,9 +41,9 @@ static SDownstreamStatusInfo* findCheckRspStatus(STaskCheckInfo* pInfo, int32_t int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage, int64_t* oldStage) { SStreamUpstreamEpInfo* pInfo = NULL; - int32_t code = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId, &pInfo); - if (code != TSDB_CODE_SUCCESS) { - return code; + streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId, &pInfo); + if (pInfo == NULL) { + return TSDB_CODE_STREAM_TASK_NOT_EXIST; } *oldStage = pInfo->stage; @@ -65,21 +65,21 @@ int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_ ", prev:%" PRId64, id, upstreamTaskId, vgId, stage, pInfo->stage); // record the checkpoint failure id and sent to mnode - taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); ETaskStatus status = streamTaskGetStatus(pTask).state; if (status == TASK_STATUS__CK) { streamTaskSetFailedCheckpointId(pTask); } - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); } if (pInfo->stage != stage) { - taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); ETaskStatus status = streamTaskGetStatus(pTask).state; if (status == TASK_STATUS__CK) { streamTaskSetFailedCheckpointId(pTask); } - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); return TASK_UPSTREAM_NEW_STAGE; } else if (pTask->status.downstreamReady != 1) { @@ -120,7 +120,7 @@ void streamTaskSendCheckMsg(SStreamTask* pTask) { idstr, pTask->info.nodeId, req.stage, req.downstreamTaskId, req.downstreamNodeId, pRange->range.minVer, pRange->range.maxVer, pWindow->skey, pWindow->ekey, req.reqId); - streamSendCheckMsg(pTask, &req, pTask->outputInfo.fixedDispatcher.nodeId, &pTask->outputInfo.fixedDispatcher.epSet); + (void) streamSendCheckMsg(pTask, &req, pTask->outputInfo.fixedDispatcher.nodeId, &pTask->outputInfo.fixedDispatcher.epSet); } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { streamTaskStartMonitorCheckRsp(pTask); @@ -140,7 +140,7 @@ void streamTaskSendCheckMsg(SStreamTask* pTask) { stDebug("s-task:%s (vgId:%d) stage:%" PRId64 " check downstream task:0x%x (vgId:%d) (shuffle), idx:%d, reqId:0x%" PRIx64, idstr, pTask->info.nodeId, req.stage, req.downstreamTaskId, req.downstreamNodeId, i, req.reqId); - streamSendCheckMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); + (void) streamSendCheckMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); } } else { // for sink task, set it ready directly. stDebug("s-task:%s (vgId:%d) set downstream ready, since no downstream", idstr, pTask->info.nodeId); @@ -265,7 +265,7 @@ int32_t streamTaskSendCheckRsp(const SStreamMeta* pMeta, int32_t vgId, SStreamTa void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); tEncoderInit(&encoder, (uint8_t*)abuf, len); - tEncodeStreamTaskCheckRsp(&encoder, pRsp); + (void) tEncodeStreamTaskCheckRsp(&encoder, pRsp); tEncoderClear(&encoder); SRpcMsg rspMsg = {.code = 0, .pCont = buf, .contLen = sizeof(SMsgHead) + len, .info = *pRpcInfo}; @@ -274,14 +274,16 @@ int32_t streamTaskSendCheckRsp(const SStreamMeta* pMeta, int32_t vgId, SStreamTa return 0; } -int32_t streamTaskStartMonitorCheckRsp(SStreamTask* pTask) { +void streamTaskStartMonitorCheckRsp(SStreamTask* pTask) { + int32_t vgId = pTask->pMeta->vgId; STaskCheckInfo* pInfo = &pTask->taskCheckInfo; - taosThreadMutexLock(&pInfo->checkInfoLock); + + streamMutexLock(&pInfo->checkInfoLock); int32_t code = streamTaskStartCheckDownstream(pInfo, pTask->id.idStr); if (code != TSDB_CODE_SUCCESS) { - taosThreadMutexUnlock(&pInfo->checkInfoLock); - return TSDB_CODE_FAILED; + streamMutexUnlock(&pInfo->checkInfoLock); + return; } /*SStreamTask* p = */ streamMetaAcquireOneTask(pTask); // add task ref here @@ -293,20 +295,18 @@ int32_t streamTaskStartMonitorCheckRsp(SStreamTask* pTask) { if (pInfo->checkRspTmr == NULL) { pInfo->checkRspTmr = taosTmrStart(rspMonitorFn, CHECK_RSP_CHECK_INTERVAL, pTask, streamTimer); } else { - taosTmrReset(rspMonitorFn, CHECK_RSP_CHECK_INTERVAL, pTask, streamTimer, &pInfo->checkRspTmr); + streamTmrReset(rspMonitorFn, CHECK_RSP_CHECK_INTERVAL, pTask, streamTimer, &pInfo->checkRspTmr, vgId, "check-status-monitor"); } - taosThreadMutexUnlock(&pInfo->checkInfoLock); - return 0; + streamMutexUnlock(&pInfo->checkInfoLock); } -int32_t streamTaskStopMonitorCheckRsp(STaskCheckInfo* pInfo, const char* id) { - taosThreadMutexLock(&pInfo->checkInfoLock); +void streamTaskStopMonitorCheckRsp(STaskCheckInfo* pInfo, const char* id) { + streamMutexLock(&pInfo->checkInfoLock); pInfo->stopCheckProcess = 1; - taosThreadMutexUnlock(&pInfo->checkInfoLock); + streamMutexUnlock(&pInfo->checkInfoLock); stDebug("s-task:%s set stop check-rsp monitor flag", id); - return TSDB_CODE_SUCCESS; } void streamTaskCleanupCheckInfo(STaskCheckInfo* pInfo) { @@ -316,21 +316,21 @@ void streamTaskCleanupCheckInfo(STaskCheckInfo* pInfo) { pInfo->pList = NULL; if (pInfo->checkRspTmr != NULL) { - /*bool ret = */ taosTmrStop(pInfo->checkRspTmr); + (void) taosTmrStop(pInfo->checkRspTmr); pInfo->checkRspTmr = NULL; } - taosThreadMutexDestroy(&pInfo->checkInfoLock); + streamMutexDestroy(&pInfo->checkInfoLock); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void processDownstreamReadyRsp(SStreamTask* pTask) { EStreamTaskEvent event = (pTask->info.fillHistory == 0) ? TASK_EVENT_INIT : TASK_EVENT_INIT_SCANHIST; - streamTaskOnHandleEventSuccess(pTask->status.pSM, event, NULL, NULL); + (void) streamTaskOnHandleEventSuccess(pTask->status.pSM, event, NULL, NULL); int64_t checkTs = pTask->execInfo.checkTs; int64_t readyTs = pTask->execInfo.readyTs; - streamMetaAddTaskLaunchResult(pTask->pMeta, pTask->id.streamId, pTask->id.taskId, checkTs, readyTs, true); + (void) streamMetaAddTaskLaunchResult(pTask->pMeta, pTask->id.streamId, pTask->id.taskId, checkTs, readyTs, true); if (pTask->status.taskStatus == TASK_STATUS__HALT) { ASSERT(HAS_RELATED_FILLHISTORY_TASK(pTask) && (pTask->info.fillHistory == 0)); @@ -338,21 +338,25 @@ void processDownstreamReadyRsp(SStreamTask* pTask) { // halt it self for count window stream task until the related fill history task completed. stDebug("s-task:%s level:%d initial status is %s from mnode, set it to be halt", pTask->id.idStr, pTask->info.taskLevel, streamTaskGetStatusStr(pTask->status.taskStatus)); - streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_HALT); + int32_t code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_HALT); + if (code != 0) { + // todo: handle error + } } // start the related fill-history task, when current task is ready // not invoke in success callback due to the deadlock. + // todo: let's retry if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { stDebug("s-task:%s try to launch related fill-history task", pTask->id.idStr); - streamLaunchFillHistoryTask(pTask); + (void) streamLaunchFillHistoryTask(pTask); } } void addIntoNodeUpdateList(SStreamTask* pTask, int32_t nodeId) { int32_t vgId = pTask->pMeta->vgId; - taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); int32_t num = taosArrayGetSize(pTask->outputInfo.pNodeEpsetUpdateList); bool existed = false; for (int i = 0; i < num; ++i) { @@ -365,16 +369,18 @@ void addIntoNodeUpdateList(SStreamTask* pTask, int32_t nodeId) { if (!existed) { SDownstreamTaskEpset t = {.nodeId = nodeId}; - taosArrayPush(pTask->outputInfo.pNodeEpsetUpdateList, &t); - + void* p = taosArrayPush(pTask->outputInfo.pNodeEpsetUpdateList, &t); + if (p == NULL) { + // todo let's retry + } stInfo("s-task:%s vgId:%d downstream nodeId:%d needs to be updated, total needs updated:%d", pTask->id.idStr, vgId, t.nodeId, (num + 1)); } - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); } -int32_t streamTaskInitTaskCheckInfo(STaskCheckInfo* pInfo, STaskOutputInfo* pOutputInfo, int64_t startTs) { +void streamTaskInitTaskCheckInfo(STaskCheckInfo* pInfo, STaskOutputInfo* pOutputInfo, int64_t startTs) { taosArrayClear(pInfo->pList); if (pOutputInfo->type == TASK_OUTPUT__FIXED_DISPATCH) { @@ -387,7 +393,6 @@ int32_t streamTaskInitTaskCheckInfo(STaskCheckInfo* pInfo, STaskOutputInfo* pOut pInfo->startTs = startTs; pInfo->timeoutStartTs = startTs; pInfo->stopCheckProcess = 0; - return TSDB_CODE_SUCCESS; } SDownstreamStatusInfo* findCheckRspStatus(STaskCheckInfo* pInfo, int32_t taskId) { @@ -403,7 +408,7 @@ SDownstreamStatusInfo* findCheckRspStatus(STaskCheckInfo* pInfo, int32_t taskId) int32_t streamTaskUpdateCheckInfo(STaskCheckInfo* pInfo, int32_t taskId, int32_t status, int64_t rspTs, int64_t reqId, int32_t* pNotReady, const char* id) { - taosThreadMutexLock(&pInfo->checkInfoLock); + streamMutexLock(&pInfo->checkInfoLock); SDownstreamStatusInfo* p = findCheckRspStatus(pInfo, taskId); if (p != NULL) { @@ -411,7 +416,7 @@ int32_t streamTaskUpdateCheckInfo(STaskCheckInfo* pInfo, int32_t taskId, int32_t stError("s-task:%s reqId:0x%" PRIx64 " expected:0x%" PRIx64 " expired check-rsp recv from downstream task:0x%x, discarded", id, reqId, p->reqId, taskId); - taosThreadMutexUnlock(&pInfo->checkInfoLock); + streamMutexUnlock(&pInfo->checkInfoLock); return TSDB_CODE_FAILED; } @@ -425,11 +430,11 @@ int32_t streamTaskUpdateCheckInfo(STaskCheckInfo* pInfo, int32_t taskId, int32_t p->status = status; p->rspTs = rspTs; - taosThreadMutexUnlock(&pInfo->checkInfoLock); + streamMutexUnlock(&pInfo->checkInfoLock); return TSDB_CODE_SUCCESS; } - taosThreadMutexUnlock(&pInfo->checkInfoLock); + streamMutexUnlock(&pInfo->checkInfoLock); stError("s-task:%s unexpected check rsp msg, invalid downstream task:0x%x, reqId:%" PRIx64 " discarded", id, taskId, reqId); return TSDB_CODE_FAILED; @@ -450,9 +455,9 @@ int32_t streamTaskStartCheckDownstream(STaskCheckInfo* pInfo, const char* id) { return TSDB_CODE_SUCCESS; } -int32_t streamTaskCompleteCheckRsp(STaskCheckInfo* pInfo, bool lock, const char* id) { +void streamTaskCompleteCheckRsp(STaskCheckInfo* pInfo, bool lock, const char* id) { if (lock) { - taosThreadMutexLock(&pInfo->checkInfoLock); + streamMutexLock(&pInfo->checkInfoLock); } if (pInfo->inCheckProcess) { @@ -474,27 +479,28 @@ int32_t streamTaskCompleteCheckRsp(STaskCheckInfo* pInfo, bool lock, const char* } if (lock) { - taosThreadMutexUnlock(&pInfo->checkInfoLock); + streamMutexUnlock(&pInfo->checkInfoLock); } - - return 0; } -int32_t streamTaskAddReqInfo(STaskCheckInfo* pInfo, int64_t reqId, int32_t taskId, int32_t vgId, const char* id) { +// todo: retry until success +void streamTaskAddReqInfo(STaskCheckInfo* pInfo, int64_t reqId, int32_t taskId, int32_t vgId, const char* id) { SDownstreamStatusInfo info = {.taskId = taskId, .status = -1, .vgId = vgId, .reqId = reqId, .rspTs = 0}; - taosThreadMutexLock(&pInfo->checkInfoLock); + streamMutexLock(&pInfo->checkInfoLock); SDownstreamStatusInfo* p = findCheckRspStatus(pInfo, taskId); if (p != NULL) { stDebug("s-task:%s check info to task:0x%x already sent", id, taskId); - taosThreadMutexUnlock(&pInfo->checkInfoLock); - return TSDB_CODE_SUCCESS; + streamMutexUnlock(&pInfo->checkInfoLock); + return; } - taosArrayPush(pInfo->pList, &info); + void* px = taosArrayPush(pInfo->pList, &info); + if (px == NULL) { + // todo: retry + } - taosThreadMutexUnlock(&pInfo->checkInfoLock); - return TSDB_CODE_SUCCESS; + streamMutexUnlock(&pInfo->checkInfoLock); } void doSendCheckMsg(SStreamTask* pTask, SDownstreamStatusInfo* p) { @@ -519,7 +525,7 @@ void doSendCheckMsg(SStreamTask* pTask, SDownstreamStatusInfo* p) { stDebug("s-task:%s (vgId:%d) stage:%" PRId64 " re-send check downstream task:0x%x(vgId:%d) reqId:0x%" PRIx64, id, pTask->info.nodeId, req.stage, req.downstreamTaskId, req.downstreamNodeId, req.reqId); - streamSendCheckMsg(pTask, &req, pOutputInfo->fixedDispatcher.nodeId, &pOutputInfo->fixedDispatcher.epSet); + (void) streamSendCheckMsg(pTask, &req, pOutputInfo->fixedDispatcher.nodeId, &pOutputInfo->fixedDispatcher.epSet); } else if (pOutputInfo->type == TASK_OUTPUT__SHUFFLE_DISPATCH) { SArray* vgInfo = pOutputInfo->shuffleDispatcher.dbInfo.pVgroupInfos; int32_t numOfVgs = taosArrayGetSize(vgInfo); @@ -533,7 +539,7 @@ void doSendCheckMsg(SStreamTask* pTask, SDownstreamStatusInfo* p) { stDebug("s-task:%s (vgId:%d) stage:%" PRId64 " re-send check downstream task:0x%x(vgId:%d) (shuffle), idx:%d reqId:0x%" PRIx64, id, pTask->info.nodeId, req.stage, req.downstreamTaskId, req.downstreamNodeId, i, p->reqId); - streamSendCheckMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); + (void) streamSendCheckMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); break; } } @@ -556,12 +562,12 @@ void getCheckRspStatus(STaskCheckInfo* pInfo, int64_t el, int32_t* numOfReady, i if (p->rspTs == 0) { // not response yet ASSERT(p->status == -1); if (el >= CHECK_NOT_RSP_DURATION) { // not receive info for 10 sec. - taosArrayPush(pTimeoutList, &p->taskId); + (void) taosArrayPush(pTimeoutList, &p->taskId); } else { // el < CHECK_NOT_RSP_DURATION (*numOfNotRsp) += 1; // do nothing and continue waiting for their rsp } } else { - taosArrayPush(pNotReadyList, &p->taskId); + (void) taosArrayPush(pNotReadyList, &p->taskId); } } } @@ -676,7 +682,7 @@ void rspMonitorFn(void* param, void* tmrId) { // not record the failed of the current task if try to close current vnode // otherwise, the put of message operation may incur invalid read of message queue. if (!pMeta->closeFlag) { - addDownstreamFailedStatusResultAsync(pTask->pMsgCb, vgId, pTask->id.streamId, pTask->id.taskId); + (void) addDownstreamFailedStatusResultAsync(pTask->pMsgCb, vgId, pTask->id.streamId, pTask->id.taskId); } streamMetaReleaseTask(pMeta, pTask); @@ -692,14 +698,14 @@ void rspMonitorFn(void* param, void* tmrId) { return; } - taosThreadMutexLock(&pInfo->checkInfoLock); + streamMutexLock(&pInfo->checkInfoLock); if (pInfo->notReadyTasks == 0) { int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); stDebug("s-task:%s status:%s vgId:%d all downstream ready, quit from monitor rsp tmr, ref:%d", id, pStat.name, vgId, ref); streamTaskCompleteCheckRsp(pInfo, false, id); - taosThreadMutexUnlock(&pInfo->checkInfoLock); + streamMutexUnlock(&pInfo->checkInfoLock); streamMetaReleaseTask(pMeta, pTask); return; } @@ -723,7 +729,7 @@ void rspMonitorFn(void* param, void* tmrId) { id, pStat.name, vgId, total, numOfNotRsp, numOfNotReady, numOfFault, numOfTimeout, numOfReady, ref); streamTaskCompleteCheckRsp(pInfo, false, id); - taosThreadMutexUnlock(&pInfo->checkInfoLock); + streamMutexUnlock(&pInfo->checkInfoLock); streamMetaReleaseTask(pMeta, pTask); taosArrayDestroy(pNotReadyList); @@ -743,9 +749,9 @@ void rspMonitorFn(void* param, void* tmrId) { id, pStat.name, vgId, total, numOfNotRsp, numOfNotReady, numOfFault, numOfTimeout, numOfReady, ref); streamTaskCompleteCheckRsp(pInfo, false, id); - taosThreadMutexUnlock(&pInfo->checkInfoLock); + streamMutexUnlock(&pInfo->checkInfoLock); - addDownstreamFailedStatusResultAsync(pTask->pMsgCb, vgId, pTask->id.streamId, pTask->id.taskId); + (void) addDownstreamFailedStatusResultAsync(pTask->pMsgCb, vgId, pTask->id.streamId, pTask->id.taskId); streamMetaReleaseTask(pMeta, pTask); taosArrayDestroy(pNotReadyList); @@ -761,8 +767,8 @@ void rspMonitorFn(void* param, void* tmrId) { handleTimeoutDownstreamTasks(pTask, pTimeoutList); } - taosTmrReset(rspMonitorFn, CHECK_RSP_CHECK_INTERVAL, pTask, streamTimer, &pInfo->checkRspTmr); - taosThreadMutexUnlock(&pInfo->checkInfoLock); + streamTmrReset(rspMonitorFn, CHECK_RSP_CHECK_INTERVAL, pTask, streamTimer, &pInfo->checkRspTmr, vgId, "check-status-monitor"); + streamMutexUnlock(&pInfo->checkInfoLock); stDebug( "s-task:%s vgId:%d continue checking rsp in 300ms, total:%d, notRsp:%d, notReady:%d, fault:%d, timeout:%d, " diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 8de923e900..60019977cc 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -77,6 +77,7 @@ int32_t createChkptTriggerBlock(SStreamTask* pTask, int32_t checkpointType, int6 } // this message must be put into inputq successfully, continue retrying until it succeeds +// todo must be success int32_t appendCheckpointIntoInputQ(SStreamTask* pTask, int32_t checkpointType, int64_t checkpointId, int32_t transId, int32_t srcTaskId) { SStreamDataBlock* pCheckpoint = NULL; @@ -119,8 +120,8 @@ int32_t streamTaskProcessCheckpointTriggerRsp(SStreamTask* pTask, SCheckpointTri return TSDB_CODE_SUCCESS; } - appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER, pRsp->checkpointId, pRsp->transId, - pRsp->upstreamTaskId); + (void)appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT_TRIGGER, pRsp->checkpointId, pRsp->transId, + pRsp->upstreamTaskId); return TSDB_CODE_SUCCESS; } @@ -164,7 +165,7 @@ int32_t continueDispatchCheckpointTriggerBlock(SStreamDataBlock* pBlock, SStream int32_t code = taosWriteQitem(pTask->outputq.queue->pQueue, pBlock); if (code == 0) { ASSERT(pTask->chkInfo.pActiveInfo->dispatchTrigger == false); - streamDispatchStreamBlock(pTask); + code = streamDispatchStreamBlock(pTask); } else { stError("s-task:%s failed to put checkpoint into outputQ, code:%s", pTask->id.idStr, tstrerror(code)); streamFreeQitem((SStreamQueueItem*)pBlock); @@ -184,12 +185,12 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; - (void) taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); if (pTask->chkInfo.checkpointId > checkpointId) { stError("s-task:%s vgId:%d current checkpointId:%" PRId64 " recv expired checkpoint-trigger block, checkpointId:%" PRId64 " transId:%d, discard", id, vgId, pTask->chkInfo.checkpointId, checkpointId, transId); - code = taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); streamFreeQitem((SStreamQueueItem*)pBlock); return code; @@ -199,13 +200,16 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock { // send checkpoint-ready msg to upstream SRpcMsg msg = {0}; SStreamUpstreamEpInfo* pInfo = NULL; - code = streamTaskGetUpstreamTaskEpInfo(pTask, pBlock->srcTaskId, &pInfo); - if (code != TSDB_CODE_SUCCESS) { - return code; + streamTaskGetUpstreamTaskEpInfo(pTask, pBlock->srcTaskId, &pInfo); + if (pInfo == NULL) { + streamMutexUnlock(&pTask->lock); + return TSDB_CODE_STREAM_TASK_NOT_EXIST; } - initCheckpointReadyMsg(pTask, pInfo->nodeId, pBlock->srcTaskId, pInfo->childId, checkpointId, &msg); - tmsgSendReq(&pInfo->epSet, &msg); + code = initCheckpointReadyMsg(pTask, pInfo->nodeId, pBlock->srcTaskId, pInfo->childId, checkpointId, &msg); + if (code == TSDB_CODE_SUCCESS) { + (void)tmsgSendReq(&pInfo->epSet, &msg); + } } stWarn( @@ -214,7 +218,7 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock id, vgId, pBlock->srcTaskId); streamTaskOpenUpstreamInput(pTask, pBlock->srcTaskId); - code = taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); streamFreeQitem((SStreamQueueItem*)pBlock); return code; @@ -225,7 +229,7 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock stError("s-task:%s vgId:%d active checkpointId:%" PRId64 ", recv invalid checkpoint-trigger checkpointId:%" PRId64 " discard", id, vgId, pActiveInfo->activeId, checkpointId); - code = taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); streamFreeQitem((SStreamQueueItem*)pBlock); return code; @@ -235,7 +239,7 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock "s-task:%s vgId:%d all upstream checkpoint-trigger recv, discard this checkpoint-trigger, " "checkpointId:%" PRId64 " transId:%d", id, vgId, checkpointId, transId); - code = taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); streamFreeQitem((SStreamQueueItem*)pBlock); return code; } @@ -250,7 +254,7 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock ", prev recvTs:%" PRId64 " discard", pTask->id.idStr, p->upstreamTaskId, p->upstreamNodeId, p->checkpointId, p->recvTs); - code = taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); streamFreeQitem((SStreamQueueItem*)pBlock); return code; } @@ -259,7 +263,7 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock } } - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); stDebug("s-task:%s vgId:%d start to handle the checkpoint-trigger block, checkpointId:%" PRId64 " ver:%" PRId64 ", transId:%d current active checkpointId:%" PRId64, @@ -284,7 +288,7 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock if (pActiveInfo->pChkptTriggerTmr == NULL) { pActiveInfo->pChkptTriggerTmr = taosTmrStart(checkpointTriggerMonitorFn, 100, pTask, streamTimer); } else { - taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pChkptTriggerTmr); + streamTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pChkptTriggerTmr, vgId, "trigger-recv-monitor"); } } @@ -300,9 +304,10 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock if (type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__SHUFFLE_DISPATCH) { stDebug("s-task:%s set childIdx:%d, and add checkpoint-trigger block into outputQ", id, pTask->info.selfChildId); - continueDispatchCheckpointTriggerBlock(pBlock, pTask); + (void)continueDispatchCheckpointTriggerBlock(pBlock, pTask); // todo handle this failure } else { // only one task exists, no need to dispatch downstream info - appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT, pActiveInfo->activeId, pActiveInfo->transId, -1); + (void)appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT, pActiveInfo->activeId, pActiveInfo->transId, + -1); streamFreeQitem((SStreamQueueItem*)pBlock); } } else if (taskLevel == TASK_LEVEL__SINK || taskLevel == TASK_LEVEL__AGG) { @@ -312,8 +317,9 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock pTask->execInfo.checkpoint += 1; } + // todo: handle this // update the child Id for downstream tasks - streamAddCheckpointReadyMsg(pTask, pBlock->srcTaskId, pTask->info.selfChildId, checkpointId); + (void) streamAddCheckpointReadyMsg(pTask, pBlock->srcTaskId, pTask->info.selfChildId, checkpointId); // there are still some upstream tasks not send checkpoint request, do nothing and wait for then if (pActiveInfo->allUpstreamTriggerRecv != 1) { @@ -325,7 +331,7 @@ int32_t streamProcessCheckpointTriggerBlock(SStreamTask* pTask, SStreamDataBlock if (taskLevel == TASK_LEVEL__SINK) { stDebug("s-task:%s process checkpoint-trigger block, all %d upstreams sent, send ready msg to upstream", id, num); streamFreeQitem((SStreamQueueItem*)pBlock); - streamTaskBuildCheckpoint(pTask); + (void)streamTaskBuildCheckpoint(pTask); // todo: not handle error yet } else { // source & agg tasks need to forward the checkpoint msg downwards stDebug("s-task:%s process checkpoint-trigger block, all %d upstreams sent, forwards to downstream", id, num); @@ -369,7 +375,7 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask, int64_t checkpointId return -1; } - (void) taosThreadMutexLock(&pInfo->lock); + streamMutexLock(&pInfo->lock); // only when all downstream tasks are send checkpoint rsp, we can start the checkpoint procedure for the agg task int32_t size = taosArrayGetSize(pInfo->pCheckpointReadyRecvList); @@ -391,16 +397,16 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask, int64_t checkpointId .transId = pInfo->transId, .streamId = pTask->id.streamId, .downstreamNodeId = downstreamNodeId}; - taosArrayPush(pInfo->pCheckpointReadyRecvList, &info); + (void)taosArrayPush(pInfo->pCheckpointReadyRecvList, &info); } int32_t notReady = total - taosArrayGetSize(pInfo->pCheckpointReadyRecvList); int32_t transId = pInfo->transId; - (void) taosThreadMutexUnlock(&pInfo->lock); + streamMutexUnlock(&pInfo->lock); if (notReady == 0) { stDebug("s-task:%s all downstream tasks have completed build checkpoint, do checkpoint for current task", id); - appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT, checkpointId, transId, -1); + (void)appendCheckpointIntoInputQ(pTask, STREAM_INPUT__CHECKPOINT, checkpointId, transId, -1); } return 0; @@ -411,7 +417,7 @@ int32_t streamTaskProcessCheckpointReadyRsp(SStreamTask* pTask, int32_t upstream int64_t now = taosGetTimestampMs(); int32_t numOfConfirmed = 0; - (void) taosThreadMutexLock(&pInfo->lock); + streamMutexLock(&pInfo->lock); for (int32_t i = 0; i < taosArrayGetSize(pInfo->pReadyMsgList); ++i) { STaskCheckpointReadyInfo* pReadyInfo = taosArrayGet(pInfo->pReadyMsgList, i); if (pReadyInfo->upstreamTaskId == upstreamTaskId && pReadyInfo->checkpointId == checkpointId) { @@ -432,7 +438,7 @@ int32_t streamTaskProcessCheckpointReadyRsp(SStreamTask* pTask, int32_t upstream stDebug("s-task:%s send checkpoint-ready msg to %d upstream confirmed, checkpointId:%" PRId64, pTask->id.idStr, numOfConfirmed, checkpointId); - (void) taosThreadMutexUnlock(&pInfo->lock); + streamMutexUnlock(&pInfo->lock); return TSDB_CODE_SUCCESS; } @@ -440,12 +446,12 @@ void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg) { pTask->chkInfo.startTs = 0; // clear the recorded start time streamTaskOpenAllUpstreamInput(pTask); // open inputQ for all upstream tasks - (void) taosThreadMutexLock(&pTask->chkInfo.pActiveInfo->lock); + streamMutexLock(&pTask->chkInfo.pActiveInfo->lock); streamTaskClearActiveInfo(pTask->chkInfo.pActiveInfo); if (clearChkpReadyMsg) { streamClearChkptReadyMsg(pTask->chkInfo.pActiveInfo); } - (void) taosThreadMutexUnlock(&pTask->chkInfo.pActiveInfo->lock); + streamMutexUnlock(&pTask->chkInfo.pActiveInfo->lock); } int32_t streamTaskUpdateTaskCheckpointInfo(SStreamTask* pTask, bool restored, SVUpdateCheckpointInfoReq* pReq) { @@ -455,7 +461,7 @@ int32_t streamTaskUpdateTaskCheckpointInfo(SStreamTask* pTask, bool restored, SV const char* id = pTask->id.idStr; SCheckpointInfo* pInfo = &pTask->chkInfo; - (void) taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); if (pReq->checkpointId <= pInfo->checkpointId) { stDebug("s-task:%s vgId:%d latest checkpointId:%" PRId64 " checkpointVer:%" PRId64 @@ -463,7 +469,7 @@ int32_t streamTaskUpdateTaskCheckpointInfo(SStreamTask* pTask, bool restored, SV " transId:%d ignored", id, vgId, pInfo->checkpointId, pInfo->checkpointVer, pReq->checkpointId, pReq->checkpointVer, pReq->transId); - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); { // destroy the related fill-history tasks // drop task should not in the meta-lock, and drop the related fill-history task now @@ -525,13 +531,14 @@ int32_t streamTaskUpdateTaskCheckpointInfo(SStreamTask* pTask, bool restored, SV pTask->status.taskStatus = TASK_STATUS__READY; code = streamMetaSaveTask(pMeta, pTask); + streamMutexUnlock(&pTask->lock); + if (code != TSDB_CODE_SUCCESS) { stError("s-task:%s vgId:%d failed to save task info after do checkpoint, checkpointId:%" PRId64 ", since %s", id, vgId, pReq->checkpointId, terrstr()); return code; } - (void) taosThreadMutexUnlock(&pTask->lock); streamMetaWUnLock(pMeta); // drop task should not in the meta-lock, and drop the related fill-history task now @@ -730,9 +737,9 @@ int32_t streamTaskBuildCheckpoint(SStreamTask* pTask) { code = streamSendChkptReportMsg(pTask, &pTask->chkInfo, dropRelHTask); } } else { // clear the checkpoint info if failed - (void) taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); streamTaskSetFailedCheckpointId(pTask); // set failed checkpoint id before clear the checkpoint info - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); code = streamTaskHandleEvent(pTask->status.pSM, TASK_EVENT_CHECKPOINT_DONE); stDebug("s-task:%s clear checkpoint flag since gen checkpoint failed, checkpointId:%" PRId64, id, ckId); @@ -763,20 +770,20 @@ void checkpointTriggerMonitorFn(void* param, void* tmrId) { } if (++pActiveInfo->checkCounter < 100) { - taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pChkptTriggerTmr); + streamTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pChkptTriggerTmr, vgId, "trigger-recv-monitor"); return; } pActiveInfo->checkCounter = 0; stDebug("s-task:%s vgId:%d checkpoint-trigger monitor in tmr, ts:%" PRId64, id, vgId, now); - (void) taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); SStreamTaskState pState = streamTaskGetStatus(pTask); if (pState.state != TASK_STATUS__CK) { int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); stDebug("s-task:%s vgId:%d not in checkpoint status, quit from monitor checkpoint-trigger, ref:%d", id, vgId, ref); - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); streamMetaReleaseTask(pTask->pMeta, pTask); return; } @@ -787,14 +794,14 @@ void checkpointTriggerMonitorFn(void* param, void* tmrId) { stDebug("s-task:%s vgId:%d all checkpoint-trigger recv, quit from monitor checkpoint-trigger, ref:%d", id, vgId, ref); - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); streamMetaReleaseTask(pTask->pMeta, pTask); return; } - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); - (void) taosThreadMutexLock(&pActiveInfo->lock); + streamMutexLock(&pActiveInfo->lock); // send msg to retrieve checkpoint trigger msg SArray* pList = pTask->upstreamInfo.pList; @@ -820,19 +827,19 @@ void checkpointTriggerMonitorFn(void* param, void* tmrId) { if (!recved) { // make sure the inputQ is opened for not recv upstream checkpoint-trigger message streamTaskOpenUpstreamInput(pTask, pInfo->taskId); - taosArrayPush(pNotSendList, pInfo); + (void)taosArrayPush(pNotSendList, pInfo); } } // do send retrieve checkpoint trigger msg to upstream int32_t size = taosArrayGetSize(pNotSendList); - doSendRetrieveTriggerMsg(pTask, pNotSendList); - (void) taosThreadMutexUnlock(&pActiveInfo->lock); + (void)doSendRetrieveTriggerMsg(pTask, pNotSendList); + streamMutexUnlock(&pActiveInfo->lock); // check every 100ms if (size > 0) { stDebug("s-task:%s start to monitor checkpoint-trigger in 10s", id); - taosTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pChkptTriggerTmr); + streamTmrReset(checkpointTriggerMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pChkptTriggerTmr, vgId, "trigger-recv-monitor"); } else { int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); stDebug("s-task:%s all checkpoint-trigger recved, quit from monitor checkpoint-trigger tmr, ref:%d", id, ref); @@ -863,7 +870,7 @@ int32_t doSendRetrieveTriggerMsg(SStreamTask* pTask, SArray* pNotSendList) { SRetrieveChkptTriggerReq* pReq = rpcMallocCont(sizeof(SRetrieveChkptTriggerReq)); if (pReq == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; stError("vgId:%d failed to create msg to retrieve trigger msg for task:%s exec, code:out of memory", vgId, pId); continue; } @@ -880,11 +887,16 @@ int32_t doSendRetrieveTriggerMsg(SStreamTask* pTask, SArray* pNotSendList) { initRpcMsg(&rpcMsg, TDMT_STREAM_RETRIEVE_TRIGGER, pReq, sizeof(SRetrieveChkptTriggerReq)); code = tmsgSendReq(&pUpstreamTask->epSet, &rpcMsg); - stDebug("s-task:%s vgId:%d send checkpoint-trigger retrieve msg to 0x%x(vgId:%d) checkpointId:%" PRId64, pId, vgId, - pUpstreamTask->taskId, pUpstreamTask->nodeId, checkpointId); + if (code == TSDB_CODE_SUCCESS) { + stDebug("s-task:%s vgId:%d send checkpoint-trigger retrieve msg to 0x%x(vgId:%d) checkpointId:%" PRId64, pId, + vgId, pUpstreamTask->taskId, pUpstreamTask->nodeId, checkpointId); + } else { + stError("s-task:%s vgId:%d failed to send checkpoint-trigger retrieve msg to 0x%x(vgId:%d) checkpointId:%" PRId64, + pId, vgId, pUpstreamTask->taskId, pUpstreamTask->nodeId, checkpointId); + } } - return TSDB_CODE_SUCCESS; + return code; } bool streamTaskAlreadySendTrigger(SStreamTask* pTask, int32_t downstreamNodeId) { @@ -897,9 +909,9 @@ bool streamTaskAlreadySendTrigger(SStreamTask* pTask, int32_t downstreamNodeId) return false; } - (void) taosThreadMutexLock(&pInfo->lock); + streamMutexLock(&pInfo->lock); if (!pInfo->dispatchTrigger) { - (void) taosThreadMutexUnlock(&pInfo->lock); + streamMutexUnlock(&pTask->lock); return false; } @@ -921,7 +933,7 @@ bool streamTaskAlreadySendTrigger(SStreamTask* pTask, int32_t downstreamNodeId) id, pSendInfo->sendTs, before, pInfo->activeId, pInfo->transId); } - (void) taosThreadMutexUnlock(&pInfo->lock); + streamMutexUnlock(&pTask->lock); return true; } @@ -945,7 +957,7 @@ void streamTaskInitTriggerDispatchInfo(SStreamTask* pTask) { SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; int64_t now = taosGetTimestampMs(); - (void) taosThreadMutexLock(&pInfo->lock); + streamMutexLock(&pInfo->lock); // outputQ should be empty here ASSERT(streamQueueGetNumOfUnAccessedItems(pTask->outputq.queue) == 0); @@ -971,21 +983,21 @@ void streamTaskInitTriggerDispatchInfo(SStreamTask* pTask) { } } - (void) taosThreadMutexUnlock(&pInfo->lock); + streamMutexUnlock(&pInfo->lock); } int32_t streamTaskGetNumOfConfirmed(SStreamTask* pTask) { SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; int32_t num = 0; - (void) taosThreadMutexLock(&pInfo->lock); + streamMutexLock(&pInfo->lock); for (int32_t i = 0; i < taosArrayGetSize(pInfo->pDispatchTriggerList); ++i) { STaskTriggerSendInfo* p = taosArrayGet(pInfo->pDispatchTriggerList, i); if (p->recved) { num++; } } - (void) taosThreadMutexUnlock(&pInfo->lock); + streamMutexUnlock(&pInfo->lock); return num; } @@ -993,7 +1005,7 @@ void streamTaskSetTriggerDispatchConfirmed(SStreamTask* pTask, int32_t vgId) { SActiveCheckpointInfo* pInfo = pTask->chkInfo.pActiveInfo; int32_t taskId = 0; - (void) taosThreadMutexLock(&pInfo->lock); + streamMutexLock(&pInfo->lock); for (int32_t i = 0; i < taosArrayGetSize(pInfo->pDispatchTriggerList); ++i) { STaskTriggerSendInfo* p = taosArrayGet(pInfo->pDispatchTriggerList, i); @@ -1007,7 +1019,7 @@ void streamTaskSetTriggerDispatchConfirmed(SStreamTask* pTask, int32_t vgId) { } } - (void) taosThreadMutexUnlock(&pInfo->lock); + streamMutexUnlock(&pInfo->lock); int32_t numOfConfirmed = streamTaskGetNumOfConfirmed(pTask); int32_t total = streamTaskGetNumOfDownstream(pTask); @@ -1184,16 +1196,16 @@ int32_t deleteCheckpointFile(const char* id, const char* name) { int32_t streamTaskSendRestoreChkptMsg(SStreamTask* pTask) { const char* id = pTask->id.idStr; - (void) taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); if (pTask->status.sendConsensusChkptId == true) { stDebug("s-task:%s already start to consensus-checkpointId, not start again before it completed", id); - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); return TSDB_CODE_SUCCESS; } else { pTask->status.sendConsensusChkptId = true; } - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); ASSERT(pTask->pBackend == NULL); pTask->status.requireConsensusChkptId = true; @@ -1207,12 +1219,12 @@ int32_t streamTaskSendCheckpointsourceRsp(SStreamTask* pTask) { return code; } - (void) taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); SStreamTaskState p = streamTaskGetStatus(pTask); if (p.state == TASK_STATUS__CK) { code = streamTaskSendCheckpointSourceRsp(pTask); } - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); return code; } \ No newline at end of file diff --git a/source/libs/stream/src/streamData.c b/source/libs/stream/src/streamData.c index af4946cf81..c0ee503f77 100644 --- a/source/libs/stream/src/streamData.c +++ b/source/libs/stream/src/streamData.c @@ -49,7 +49,7 @@ int32_t createStreamBlockFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t pInput = p; } - blockDecode(pDataBlock, pInput); + (void) blockDecode(pDataBlock, pInput); if (pRetrieve->compressed && compLen < fullLen) { taosMemoryFree(pInput); @@ -116,11 +116,11 @@ int32_t streamRetrieveReqToData(const SStreamRetrieveReq* pReq, SStreamDataBlock return terrno; } - taosArrayPush(pArray, &(SSDataBlock){0}); + (void) taosArrayPush(pArray, &(SSDataBlock){0}); SRetrieveTableRsp* pRetrieve = pReq->pRetrieve; SSDataBlock* pDataBlock = taosArrayGet(pArray, 0); - blockDecode(pDataBlock, pRetrieve->data + PAYLOAD_PREFIX_LEN); + (void) blockDecode(pDataBlock, pRetrieve->data + PAYLOAD_PREFIX_LEN); // TODO: refactor pDataBlock->info.window.skey = be64toh(pRetrieve->skey); @@ -156,28 +156,31 @@ void streamDataSubmitDestroy(SStreamDataSubmit* pDataSubmit) { taosFreeQitem(pDataSubmit); } -SStreamMergedSubmit* streamMergedSubmitNew() { - SStreamMergedSubmit* pMerged; +int32_t streamMergedSubmitNew(SStreamMergedSubmit** pSubmit) { + *pSubmit = NULL; - int32_t code = taosAllocateQitem(sizeof(SStreamMergedSubmit), DEF_QITEM, 0, (void**)&pMerged); + int32_t code = taosAllocateQitem(sizeof(SStreamMergedSubmit), DEF_QITEM, 0, (void**)pSubmit); if (code) { - terrno = code; - return NULL; + return TSDB_CODE_OUT_OF_MEMORY; } - pMerged->submits = taosArrayInit(0, sizeof(SPackedData)); - if (pMerged->submits == NULL) { - taosArrayDestroy(pMerged->submits); - taosFreeQitem(pMerged); - return NULL; + (*pSubmit)->submits = taosArrayInit(0, sizeof(SPackedData)); + if ((*pSubmit)->submits == NULL) { + taosFreeQitem(*pSubmit); + *pSubmit = NULL; + return TSDB_CODE_OUT_OF_MEMORY; } - pMerged->type = STREAM_INPUT__MERGED_SUBMIT; - return pMerged; + (*pSubmit)->type = STREAM_INPUT__MERGED_SUBMIT; + return TSDB_CODE_SUCCESS; } int32_t streamMergeSubmit(SStreamMergedSubmit* pMerged, SStreamDataSubmit* pSubmit) { - taosArrayPush(pMerged->submits, &pSubmit->submit); + void* p = taosArrayPush(pMerged->submits, &pSubmit->submit); + if (p == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + if (pSubmit->ver > pMerged->ver) { pMerged->ver = pSubmit->ver; } @@ -187,11 +190,12 @@ int32_t streamMergeSubmit(SStreamMergedSubmit* pMerged, SStreamDataSubmit* pSubm // todo handle memory error int32_t streamQueueMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem, SStreamQueueItem** pRes) { *pRes = NULL; + int32_t code = 0; if (dst->type == STREAM_INPUT__DATA_BLOCK && pElem->type == STREAM_INPUT__DATA_BLOCK) { SStreamDataBlock* pBlock = (SStreamDataBlock*)dst; SStreamDataBlock* pBlockSrc = (SStreamDataBlock*)pElem; - taosArrayAddAll(pBlock->blocks, pBlockSrc->blocks); + (void) taosArrayAddAll(pBlock->blocks, pBlockSrc->blocks); taosArrayDestroy(pBlockSrc->blocks); streamQueueItemIncSize(dst, streamQueueItemGetSize(pElem)); @@ -201,33 +205,38 @@ int32_t streamQueueMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem } else if (dst->type == STREAM_INPUT__MERGED_SUBMIT && pElem->type == STREAM_INPUT__DATA_SUBMIT) { SStreamMergedSubmit* pMerged = (SStreamMergedSubmit*)dst; SStreamDataSubmit* pBlockSrc = (SStreamDataSubmit*)pElem; - streamMergeSubmit(pMerged, pBlockSrc); + + code = streamMergeSubmit(pMerged, pBlockSrc); streamQueueItemIncSize(dst, streamQueueItemGetSize(pElem)); taosFreeQitem(pElem); *pRes = dst; *pRes = dst; - return TSDB_CODE_SUCCESS; + return code; } else if (dst->type == STREAM_INPUT__DATA_SUBMIT && pElem->type == STREAM_INPUT__DATA_SUBMIT) { - SStreamMergedSubmit* pMerged = streamMergedSubmitNew(); - if (pMerged == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; + SStreamMergedSubmit* pMerged = NULL; + code = streamMergedSubmitNew(&pMerged); + if (code != 0) { + return code; } streamQueueItemIncSize((SStreamQueueItem*)pMerged, streamQueueItemGetSize(pElem)); - streamMergeSubmit(pMerged, (SStreamDataSubmit*)dst); - streamMergeSubmit(pMerged, (SStreamDataSubmit*)pElem); + code = streamMergeSubmit(pMerged, (SStreamDataSubmit*)dst); + if (code == 0) { + code = streamMergeSubmit(pMerged, (SStreamDataSubmit*)pElem); + } taosFreeQitem(dst); taosFreeQitem(pElem); *pRes = (SStreamQueueItem*)pMerged; - return TSDB_CODE_SUCCESS; + return code; } else { + code = TSDB_CODE_FAILED; stDebug("block type:%s not merged with existed blocks list, type:%d", streamQueueItemGetTypeStr(pElem->type), dst->type); - return TSDB_CODE_FAILED; + return code; } } diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 9e4b6bc09d..4e128ace54 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -121,7 +121,7 @@ int32_t streamTaskBroadcastRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* r void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); SEncoder encoder; tEncoderInit(&encoder, abuf, len); - tEncodeStreamRetrieveReq(&encoder, req); + (void) tEncodeStreamRetrieveReq(&encoder, req); tEncoderClear(&encoder); SRpcMsg rpcMsg = {0}; @@ -189,6 +189,7 @@ int32_t streamBroadcastToUpTasks(SStreamTask* pTask, const SSDataBlock* pBlock) return code; } +// no need to do anything if failed int32_t streamSendCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet) { void* buf = NULL; int32_t code = -1; @@ -202,7 +203,7 @@ int32_t streamSendCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, buf = rpcMallocCont(sizeof(SMsgHead) + tlen); if (buf == NULL) { - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } ((SMsgHead*)buf)->vgId = htonl(nodeId); @@ -220,8 +221,7 @@ int32_t streamSendCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, stDebug("s-task:%s (level:%d) send check msg to s-task:0x%" PRIx64 ":0x%x (vgId:%d)", pTask->id.idStr, pTask->info.taskLevel, pReq->streamId, pReq->downstreamTaskId, nodeId); - tmsgSendReq(pEpSet, &msg); - return 0; + return tmsgSendReq(pEpSet, &msg); } void destroyDispatchMsg(SStreamDispatchReq* pReq, int32_t numOfVgroups) { @@ -239,7 +239,7 @@ void clearBufferedDispatchMsg(SStreamTask* pTask) { destroyDispatchMsg(pMsgInfo->pData, streamTaskGetNumOfDownstream(pTask)); } - taosThreadMutexLock(&pMsgInfo->lock); + streamMutexLock(&pMsgInfo->lock); pMsgInfo->checkpointId = -1; pMsgInfo->transId = -1; @@ -249,7 +249,7 @@ void clearBufferedDispatchMsg(SStreamTask* pTask) { clearDispatchInfo(pMsgInfo); taosArrayClear(pTask->msgInfo.pSendInfo); - taosThreadMutexUnlock(&pMsgInfo->lock); + streamMutexUnlock(&pMsgInfo->lock); } static SStreamDispatchReq* createDispatchDataReq(SStreamTask* pTask, const SStreamDataBlock* pData) { @@ -424,9 +424,9 @@ static int32_t sendDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pDispatch } static void setNotInDispatchMonitor(SDispatchMsgInfo* pMsgInfo) { - taosThreadMutexLock(&pMsgInfo->lock); + streamMutexLock(&pMsgInfo->lock); pMsgInfo->inMonitor = 0; - taosThreadMutexUnlock(&pMsgInfo->lock); + streamMutexUnlock(&pMsgInfo->lock); } static void setResendInfo(SDispatchEntry* pEntry, int64_t now) { @@ -440,13 +440,13 @@ static void addDispatchEntry(SDispatchMsgInfo* pMsgInfo, int32_t nodeId, int64_t SDispatchEntry entry = {.nodeId = nodeId, .rspTs = -1, .status = 0, .sendTs = now}; if (lock) { - taosThreadMutexLock(&pMsgInfo->lock); + streamMutexLock(&pMsgInfo->lock); } - taosArrayPush(pMsgInfo->pSendInfo, &entry); + (void) taosArrayPush(pMsgInfo->pSendInfo, &entry); if (lock) { - taosThreadMutexUnlock(&pMsgInfo->lock); + streamMutexUnlock(&pMsgInfo->lock); } } @@ -496,16 +496,16 @@ static void doMonitorDispatchData(void* param, void* tmrId) { return; } - taosThreadMutexLock(&pMsgInfo->lock); + streamMutexLock(&pMsgInfo->lock); if (pTask->outputq.status == TASK_OUTPUT_STATUS__NORMAL) { int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); stDebug("s-task:%s not in dispatch procedure, abort from timer, ref:%d", pTask->id.idStr, ref); pTask->msgInfo.inMonitor = 0; - taosThreadMutexUnlock(&pMsgInfo->lock); + streamMutexUnlock(&pMsgInfo->lock); return; } - taosThreadMutexUnlock(&pMsgInfo->lock); + streamMutexUnlock(&pMsgInfo->lock); int32_t numOfFailed = getFailedDispatchInfo(pMsgInfo, now); if (numOfFailed == 0) { @@ -577,8 +577,9 @@ static void doMonitorDispatchData(void* param, void* tmrId) { } void streamStartMonitorDispatchData(SStreamTask* pTask, int64_t waitDuration) { + int32_t vgId = pTask->pMeta->vgId; if (pTask->msgInfo.pRetryTmr != NULL) { - taosTmrReset(doMonitorDispatchData, waitDuration, pTask, streamTimer, &pTask->msgInfo.pRetryTmr); + streamTmrReset(doMonitorDispatchData, waitDuration, pTask, streamTimer, &pTask->msgInfo.pRetryTmr, vgId, "dispatch-monitor-tmr"); } else { pTask->msgInfo.pRetryTmr = taosTmrStart(doMonitorDispatchData, waitDuration, pTask, streamTimer); } @@ -612,7 +613,7 @@ int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, S } } } else { - buildCtbNameByGroupIdImpl(pTask->outputInfo.shuffleDispatcher.stbFullName, groupId, pDataBlock->info.parTbName); + (void) buildCtbNameByGroupIdImpl(pTask->outputInfo.shuffleDispatcher.stbFullName, groupId, pDataBlock->info.parTbName); } snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->outputInfo.shuffleDispatcher.dbInfo.db, @@ -624,8 +625,10 @@ int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, S SBlockName bln = {0}; bln.hashValue = hashValue; memcpy(bln.parTbName, pDataBlock->info.parTbName, strlen(pDataBlock->info.parTbName)); + + // failed to put into name buffer, no need to do anything if (tSimpleHashGetSize(pTask->pNameMap) < MAX_BLOCK_NAME_NUM) { - tSimpleHashPut(pTask->pNameMap, &groupId, sizeof(int64_t), &bln, sizeof(SBlockName)); + (void) tSimpleHashPut(pTask->pNameMap, &groupId, sizeof(int64_t), &bln, sizeof(SBlockName)); } } @@ -633,14 +636,14 @@ int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, S int32_t numOfVgroups = taosArrayGetSize(vgInfo); // TODO: optimize search - taosThreadMutexLock(&pTask->msgInfo.lock); + streamMutexLock(&pTask->msgInfo.lock); for (int32_t j = 0; j < numOfVgroups; j++) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, j); if (hashValue >= pVgInfo->hashBegin && hashValue <= pVgInfo->hashEnd) { if (streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]) < 0) { - taosThreadMutexUnlock(&pTask->msgInfo.lock); + streamMutexUnlock(&pTask->msgInfo.lock); return -1; } @@ -655,7 +658,7 @@ int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, S } } - taosThreadMutexUnlock(&pTask->msgInfo.lock); + streamMutexUnlock(&pTask->msgInfo.lock); ASSERT(found); return 0; } @@ -690,7 +693,8 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { ASSERT(pTask->msgInfo.pData == NULL); stDebug("s-task:%s start to dispatch msg, set output status:%d", id, pTask->outputq.status); - SStreamDataBlock* pBlock = streamQueueNextItem(pTask->outputq.queue); + SStreamDataBlock* pBlock = NULL; + streamQueueNextItem(pTask->outputq.queue, (SStreamQueueItem**)&pBlock); if (pBlock == NULL) { atomic_store_8(&pTask->outputq.status, TASK_OUTPUT_STATUS__NORMAL); stDebug("s-task:%s not dispatch since no elems in outputQ, output status:%d", id, pTask->outputq.status); @@ -703,9 +707,9 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { pTask->execInfo.dispatch += 1; - taosThreadMutexLock(&pTask->msgInfo.lock); + streamMutexLock(&pTask->msgInfo.lock); initDispatchInfo(&pTask->msgInfo, pTask->execInfo.dispatch); - taosThreadMutexUnlock(&pTask->msgInfo.lock); + streamMutexUnlock(&pTask->msgInfo.lock); int32_t code = doBuildDispatchMsg(pTask, pBlock); if (code == 0) { @@ -719,7 +723,7 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { code = sendDispatchMsg(pTask, pTask->msgInfo.pData); - taosThreadMutexLock(&pTask->msgInfo.lock); + streamMutexLock(&pTask->msgInfo.lock); if (pTask->msgInfo.inMonitor == 0) { int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); stDebug("s-task:%s start dispatch monitor tmr in %dms, ref:%d, dispatch code:%s", id, DISPATCH_RETRY_INTERVAL_MS, @@ -730,7 +734,7 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { stDebug("s-task:%s already in dispatch monitor tmr", id); } - taosThreadMutexUnlock(&pTask->msgInfo.lock); + streamMutexUnlock(&pTask->msgInfo.lock); // this block can not be deleted until it has been sent to downstream task successfully. return TSDB_CODE_SUCCESS; @@ -758,7 +762,7 @@ int32_t initCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamNodeId, int32 buf = rpcMallocCont(sizeof(SMsgHead) + tlen); if (buf == NULL) { - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } ((SMsgHead*)buf)->vgId = htonl(req.upstreamNodeId); @@ -791,33 +795,33 @@ static void checkpointReadyMsgSendMonitorFn(void* param, void* tmrId) { SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; if (++pActiveInfo->sendReadyCheckCounter < 100) { - taosTmrReset(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pSendReadyMsgTmr); + streamTmrReset(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pSendReadyMsgTmr, vgId, "chkpt-ready-monitor"); return; } pActiveInfo->sendReadyCheckCounter = 0; stDebug("s-task:%s in sending checkpoint-ready msg monitor timer", id); - taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); SStreamTaskState pState = streamTaskGetStatus(pTask); if (pState.state != TASK_STATUS__CK) { int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); stDebug("s-task:%s vgId:%d status:%s not in checkpoint, quit from monitor checkpoint-ready send, ref:%d", id, vgId, pState.name, ref); - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); streamMetaReleaseTask(pTask->pMeta, pTask); return; } - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); - taosThreadMutexLock(&pActiveInfo->lock); + streamMutexLock(&pActiveInfo->lock); SArray* pList = pActiveInfo->pReadyMsgList; int32_t num = taosArrayGetSize(pList); // active checkpoint info is cleared for now if ((pActiveInfo->activeId == 0) && (pActiveInfo->transId == 0) && (num == 0) && (pTask->chkInfo.startTs == 0)) { - taosThreadMutexUnlock(&pActiveInfo->lock); + streamMutexUnlock(&pActiveInfo->lock); int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); stWarn("s-task:%s vgId:%d active checkpoint may be cleared, quit from readyMsg send tmr, ref:%d", id, vgId, ref); @@ -835,7 +839,7 @@ static void checkpointReadyMsgSendMonitorFn(void* param, void* tmrId) { continue; } - taosArrayPush(pNotRspList, &pInfo->upstreamTaskId); + (void) taosArrayPush(pNotRspList, &pInfo->upstreamTaskId); stDebug("s-task:%s vgId:%d level:%d checkpoint-ready rsp from upstream:0x%x not confirmed yet", id, vgId, pTask->info.taskLevel, pInfo->upstreamTaskId); } @@ -852,17 +856,25 @@ static void checkpointReadyMsgSendMonitorFn(void* param, void* tmrId) { if (taskId == pReadyInfo->upstreamTaskId) { // send msg again SRpcMsg msg = {0}; - initCheckpointReadyMsg(pTask, pReadyInfo->upstreamNodeId, pReadyInfo->upstreamTaskId, pReadyInfo->childId, + int32_t code = initCheckpointReadyMsg(pTask, pReadyInfo->upstreamNodeId, pReadyInfo->upstreamTaskId, pReadyInfo->childId, checkpointId, &msg); - tmsgSendReq(&pReadyInfo->upstreamNodeEpset, &msg); - stDebug("s-task:%s level:%d checkpoint-ready msg sent to upstream:0x%x again", id, pTask->info.taskLevel, - pReadyInfo->upstreamTaskId); + if (code == TSDB_CODE_SUCCESS) { + code = tmsgSendReq(&pReadyInfo->upstreamNodeEpset, &msg); + if (code == TSDB_CODE_SUCCESS) { + stDebug("s-task:%s level:%d checkpoint-ready msg sent to upstream:0x%x again", id, pTask->info.taskLevel, + pReadyInfo->upstreamTaskId); + } else { + stError("s-task:%s failed to send checkpoint-ready msg, try nex time in 10s", id); + } + } else { + stError("s-task:%s failed to prepare the checkpoint-ready msg, try nex time in 10s", id); + } } } } - taosTmrReset(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pSendReadyMsgTmr); - taosThreadMutexUnlock(&pActiveInfo->lock); + streamTmrReset(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pSendReadyMsgTmr, vgId, "chkpt-ready-monitor"); + streamMutexUnlock(&pActiveInfo->lock); } else { int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); stDebug( @@ -871,7 +883,7 @@ static void checkpointReadyMsgSendMonitorFn(void* param, void* tmrId) { id, vgId, ref); streamClearChkptReadyMsg(pActiveInfo); - taosThreadMutexUnlock(&pActiveInfo->lock); + streamMutexUnlock(&pActiveInfo->lock); streamMetaReleaseTask(pTask->pMeta, pTask); } @@ -883,9 +895,10 @@ int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask) { SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; const char* id = pTask->id.idStr; + int32_t vgId = pTask->pMeta->vgId; SArray* pList = pActiveInfo->pReadyMsgList; - taosThreadMutexLock(&pActiveInfo->lock); + streamMutexLock(&pActiveInfo->lock); int32_t num = taosArrayGetSize(pList); ASSERT(taosArrayGetSize(pTask->upstreamInfo.pList) == num); @@ -894,15 +907,22 @@ int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask) { STaskCheckpointReadyInfo* pInfo = taosArrayGet(pList, i); SRpcMsg msg = {0}; - initCheckpointReadyMsg(pTask, pInfo->upstreamNodeId, pInfo->upstreamTaskId, pInfo->childId, pInfo->checkpointId, + int32_t code = initCheckpointReadyMsg(pTask, pInfo->upstreamNodeId, pInfo->upstreamTaskId, pInfo->childId, pInfo->checkpointId, &msg); - tmsgSendReq(&pInfo->upstreamNodeEpset, &msg); - - stDebug("s-task:%s level:%d checkpoint-ready msg sent to upstream:0x%x", id, pTask->info.taskLevel, - pInfo->upstreamTaskId); + if (code == TSDB_CODE_SUCCESS) { + code = tmsgSendReq(&pInfo->upstreamNodeEpset, &msg); + if (code == TSDB_CODE_SUCCESS) { + stDebug("s-task:%s level:%d checkpoint-ready msg sent to upstream:0x%x", id, pTask->info.taskLevel, + pInfo->upstreamTaskId); + } else { + stError("s-task:%s failed to send checkpoint-ready msg, try nex time in 10s", id); + } + } else { + stError("s-task:%s failed to prepare the checkpoint-ready msg, try nex time in 10s", id); + } } - taosThreadMutexUnlock(&pActiveInfo->lock); + streamMutexUnlock(&pActiveInfo->lock); stDebug("s-task:%s level:%d checkpoint-ready msg sent to all %d upstreams", id, pTask->info.taskLevel, num); // start to check if checkpoint ready msg has successfully received by upstream tasks. @@ -914,7 +934,7 @@ int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask) { if (pActiveInfo->pSendReadyMsgTmr == NULL) { pActiveInfo->pSendReadyMsgTmr = taosTmrStart(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer); } else { - taosTmrReset(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pSendReadyMsgTmr); + streamTmrReset(checkpointReadyMsgSendMonitorFn, 100, pTask, streamTimer, &pActiveInfo->pSendReadyMsgTmr, vgId, "chkpt-ready-monitor"); } } @@ -925,7 +945,7 @@ int32_t streamTaskSendCheckpointReadyMsg(SStreamTask* pTask) { int32_t streamTaskSendCheckpointSourceRsp(SStreamTask* pTask) { SArray* pList = pTask->chkInfo.pActiveInfo->pReadyMsgList; - taosThreadMutexLock(&pTask->chkInfo.pActiveInfo->lock); + streamMutexLock(&pTask->chkInfo.pActiveInfo->lock); ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); if (taosArrayGetSize(pList) == 1) { @@ -940,7 +960,7 @@ int32_t streamTaskSendCheckpointSourceRsp(SStreamTask* pTask) { pTask->info.taskLevel); } - taosThreadMutexUnlock(&pTask->chkInfo.pActiveInfo->lock); + streamMutexUnlock(&pTask->chkInfo.pActiveInfo->lock); return TSDB_CODE_SUCCESS; } @@ -978,8 +998,8 @@ int32_t streamAddBlockIntoDispatchMsg(const SSDataBlock* pBlock, SStreamDispatch payloadLen += sizeof(SRetrieveTableRsp); - taosArrayPush(pReq->dataLen, &payloadLen); - taosArrayPush(pReq->data, &buf); + (void) taosArrayPush(pReq->dataLen, &payloadLen); + (void) taosArrayPush(pReq->data, &buf); pReq->totalLen += dataStrLen; return 0; @@ -1056,7 +1076,7 @@ int32_t streamTaskBuildCheckpointSourceRsp(SStreamCheckpointSourceReq* pReq, SRp void* abuf = POINTER_SHIFT(pBuf, sizeof(SMsgHead)); tEncoderInit(&encoder, (uint8_t*)abuf, len); - tEncodeStreamCheckpointSourceRsp(&encoder, &rsp); + (void) tEncodeStreamCheckpointSourceRsp(&encoder, &rsp); tEncoderClear(&encoder); initRpcMsg(pMsg, 0, pBuf, sizeof(SMsgHead) + len); @@ -1071,10 +1091,11 @@ int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHa STaskCheckpointReadyInfo info = { .recvTs = taosGetTimestampMs(), .transId = pReq->transId, .checkpointId = pReq->checkpointId}; - streamTaskBuildCheckpointSourceRsp(pReq, pRpcInfo, &info.msg, TSDB_CODE_SUCCESS); + // todo retry until it success + (void) streamTaskBuildCheckpointSourceRsp(pReq, pRpcInfo, &info.msg, TSDB_CODE_SUCCESS); SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; - taosThreadMutexLock(&pActiveInfo->lock); + streamMutexLock(&pActiveInfo->lock); int32_t size = taosArrayGetSize(pActiveInfo->pReadyMsgList); if (size > 0) { @@ -1091,15 +1112,15 @@ int32_t streamAddCheckpointSourceRspMsg(SStreamCheckpointSourceReq* pReq, SRpcHa ASSERT(0); // failed to handle it } } else { - taosArrayPush(pActiveInfo->pReadyMsgList, &info); + (void) taosArrayPush(pActiveInfo->pReadyMsgList, &info); stDebug("s-task:%s add checkpoint source rsp msg, total:%d", pTask->id.idStr, size + 1); } - taosThreadMutexUnlock(&pActiveInfo->lock); + streamMutexUnlock(&pActiveInfo->lock); return TSDB_CODE_SUCCESS; } -int32_t initCheckpointReadyInfo(STaskCheckpointReadyInfo* pReadyInfo, int32_t upstreamNodeId, int32_t upstreamTaskId, +void initCheckpointReadyInfo(STaskCheckpointReadyInfo* pReadyInfo, int32_t upstreamNodeId, int32_t upstreamTaskId, int32_t childId, SEpSet* pEpset, int64_t checkpointId) { ASSERT(upstreamTaskId != 0); @@ -1109,8 +1130,6 @@ int32_t initCheckpointReadyInfo(STaskCheckpointReadyInfo* pReadyInfo, int32_t up pReadyInfo->recvTs = taosGetTimestampMs(); pReadyInfo->checkpointId = checkpointId; pReadyInfo->childId = childId; - - return TSDB_CODE_SUCCESS; } int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, int32_t index, int64_t checkpointId) { @@ -1119,9 +1138,9 @@ int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, } SStreamUpstreamEpInfo* pInfo = NULL; - int32_t code = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId, &pInfo); - if (code != TSDB_CODE_SUCCESS) { - return code; + streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId, &pInfo); + if (pInfo == NULL) { + return TSDB_CODE_STREAM_TASK_NOT_EXIST; } STaskCheckpointReadyInfo info = {0}; @@ -1132,8 +1151,8 @@ int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, SActiveCheckpointInfo* pActiveInfo = pTask->chkInfo.pActiveInfo; - taosThreadMutexLock(&pActiveInfo->lock); - taosArrayPush(pActiveInfo->pReadyMsgList, &info); + streamMutexLock(&pActiveInfo->lock); + (void) taosArrayPush(pActiveInfo->pReadyMsgList, &info); int32_t numOfRecv = taosArrayGetSize(pActiveInfo->pReadyMsgList); int32_t total = streamTaskGetNumOfUpstream(pTask); @@ -1145,7 +1164,7 @@ int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, stDebug("s-task:%s %d/%d checkpoint-trigger recv", pTask->id.idStr, numOfRecv, total); } - taosThreadMutexUnlock(&pActiveInfo->lock); + streamMutexUnlock(&pActiveInfo->lock); return 0; } @@ -1187,10 +1206,8 @@ static int32_t handleDispatchSuccessRsp(SStreamTask* pTask, int32_t downstreamId if (delayDispatch) { return 0; } else { - streamDispatchStreamBlock(pTask); + return streamDispatchStreamBlock(pTask); } - - return 0; } static bool setDispatchRspInfo(SDispatchMsgInfo* pMsgInfo, int32_t vgId, int32_t code, int64_t now, int32_t* pNotRsp, const char* id) { @@ -1200,7 +1217,7 @@ static bool setDispatchRspInfo(SDispatchMsgInfo* pMsgInfo, int32_t vgId, int32_t bool allRsp = false; *pNotRsp = 0; - taosThreadMutexLock(&pMsgInfo->lock); + streamMutexLock(&pMsgInfo->lock); int32_t numOfDispatchBranch = taosArrayGetSize(pMsgInfo->pSendInfo); for(int32_t i = 0; i < numOfDispatchBranch; ++i) { @@ -1228,7 +1245,7 @@ static bool setDispatchRspInfo(SDispatchMsgInfo* pMsgInfo, int32_t vgId, int32_t *pNotRsp = numOfDispatchBranch - numOfRsp; allRsp = (numOfRsp == numOfDispatchBranch); - taosThreadMutexUnlock(&pMsgInfo->lock); + streamMutexUnlock(&pMsgInfo->lock); ASSERT(updated); return allRsp; @@ -1240,7 +1257,7 @@ bool isDispatchRspTimeout(SDispatchEntry* pEntry, int64_t now) { int32_t getFailedDispatchInfo(SDispatchMsgInfo* pMsgInfo, int64_t now) { int32_t numOfFailed = 0; - taosThreadMutexLock(&pMsgInfo->lock); + streamMutexLock(&pMsgInfo->lock); for (int32_t j = 0; j < taosArrayGetSize(pMsgInfo->pSendInfo); ++j) { SDispatchEntry* pEntry = taosArrayGet(pMsgInfo->pSendInfo, j); @@ -1248,7 +1265,7 @@ int32_t getFailedDispatchInfo(SDispatchMsgInfo* pMsgInfo, int64_t now) { numOfFailed += 1; } } - taosThreadMutexUnlock(&pMsgInfo->lock); + streamMutexUnlock(&pMsgInfo->lock); return numOfFailed; } @@ -1260,9 +1277,9 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i bool allRsp = false; int32_t notRsp = 0; - taosThreadMutexLock(&pMsgInfo->lock); + streamMutexLock(&pMsgInfo->lock); int32_t msgId = pMsgInfo->msgId; - taosThreadMutexUnlock(&pMsgInfo->lock); + streamMutexUnlock(&pMsgInfo->lock); // follower not handle the dispatch rsp if ((pTask->pMeta->role == NODE_ROLE_FOLLOWER) || (pTask->status.downstreamReady != 1)) { @@ -1315,7 +1332,7 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i { bool delayDispatch = (pMsgInfo->dispatchMsgType == STREAM_INPUT__CHECKPOINT_TRIGGER); if (delayDispatch) { - taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); // we only set the dispatch msg info for current checkpoint trans if (streamTaskGetStatus(pTask).state == TASK_STATUS__CK && pTask->chkInfo.pActiveInfo->activeId == pMsgInfo->checkpointId) { @@ -1329,7 +1346,7 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i " transId:%d discard, since expired", pTask->id.idStr, pMsgInfo->checkpointId, pMsgInfo->transId); } - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); } } } @@ -1374,12 +1391,12 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i // now ready for next data output atomic_store_8(&pTask->outputq.status, TASK_OUTPUT_STATUS__NORMAL); } else { - handleDispatchSuccessRsp(pTask, pRsp->downstreamTaskId, pRsp->downstreamNodeId); + code = handleDispatchSuccessRsp(pTask, pRsp->downstreamTaskId, pRsp->downstreamNodeId); } } } - return 0; + return code; } static int32_t buildDispatchRsp(const SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t status, void** pBuf) { @@ -1438,9 +1455,9 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S pReq->upstreamTaskId, pReq->upstreamNodeId, pReq->totalLen, pReq->msgId); SStreamUpstreamEpInfo* pInfo = NULL; - int32_t code = streamTaskGetUpstreamTaskEpInfo(pTask, pReq->upstreamTaskId, &pInfo); - if (code != TSDB_CODE_SUCCESS) { - return code; + streamTaskGetUpstreamTaskEpInfo(pTask, pReq->upstreamTaskId, &pInfo); + if (pInfo == NULL) { + return TSDB_CODE_STREAM_TASK_NOT_EXIST; } if (pMeta->role == NODE_ROLE_FOLLOWER) { @@ -1474,7 +1491,7 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S { // do send response with the input status - code = buildDispatchRsp(pTask, pReq, status, &pRsp->pCont); + int32_t code = buildDispatchRsp(pTask, pReq, status, &pRsp->pCont); if (code != TSDB_CODE_SUCCESS) { stError("s-task:%s failed to build dispatch rsp, msgId:%d, code:%s", id, pReq->msgId, tstrerror(code)); return code; @@ -1484,6 +1501,5 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S tmsgSendRsp(pRsp); } - streamTrySchedExec(pTask); - return code; + return streamTrySchedExec(pTask); } diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 238db27d60..d222004fb7 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -24,7 +24,7 @@ #define FILL_HISTORY_TASK_EXEC_INTERVAL 5000 // 5 sec static int32_t streamTransferStateDoPrepare(SStreamTask* pTask); -static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, int64_t* totalSize, int32_t* totalBlocks); +static void streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, int64_t* totalSize, int32_t* totalBlocks); bool streamTaskShouldStop(const SStreamTask* pTask) { SStreamTaskState pState = streamTaskGetStatus(pTask); @@ -52,8 +52,9 @@ static int32_t doOutputResultBlockImpl(SStreamTask* pTask, SStreamDataBlock* pBl return code; } + // not handle error, if dispatch failed, try next time. // checkpoint trigger will be checked - streamDispatchStreamBlock(pTask); + (void) streamDispatchStreamBlock(pTask); } return code; @@ -90,7 +91,7 @@ static int32_t doDumpResult(SStreamTask* pTask, SStreamQueueItem* pItem, SArray* return code; } -int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, int64_t* totalSize, int32_t* totalBlocks) { +void streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, int64_t* totalSize, int32_t* totalBlocks) { int32_t code = TSDB_CODE_SUCCESS; void* pExecutor = pTask->exec.pExecutor; @@ -108,7 +109,7 @@ int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, int64_t* if (streamTaskShouldStop(pTask)) { taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - return 0; + return; } SSDataBlock* output = NULL; @@ -128,11 +129,16 @@ int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, int64_t* const SStreamDataBlock* pRetrieveBlock = (const SStreamDataBlock*)pItem; ASSERT(taosArrayGetSize(pRetrieveBlock->blocks) == 1); - assignOneDataBlock(&block, taosArrayGet(pRetrieveBlock->blocks, 0)); + (void) assignOneDataBlock(&block, taosArrayGet(pRetrieveBlock->blocks, 0)); block.info.type = STREAM_PULL_OVER; block.info.childId = pTask->info.selfChildId; - taosArrayPush(pRes, &block); - numOfBlocks += 1; + + void* p = taosArrayPush(pRes, &block); + if (p != NULL) { + numOfBlocks += 1; + } else { + stError("s-task:%s failed to add retrieve block", pTask->id.idStr); + } stDebug("s-task:%s(child %d) retrieve process completed, reqId:0x%" PRIx64 " dump results", pTask->id.idStr, pTask->info.selfChildId, pRetrieveBlock->reqId); @@ -151,13 +157,21 @@ int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, int64_t* } SSDataBlock block = {0}; - assignOneDataBlock(&block, output); + code = assignOneDataBlock(&block, output); + if (code) { + stError("s-task:%s failed to build result block due to out of memory", pTask->id.idStr); + continue; + } + block.info.childId = pTask->info.selfChildId; size += blockDataGetSize(output) + sizeof(SSDataBlock) + sizeof(SColumnInfoData) * blockDataGetNumOfCols(&block); numOfBlocks += 1; - taosArrayPush(pRes, &block); + void* p = taosArrayPush(pRes, &block); + if (p == NULL) { + stError("s-task:%s failed to add computing results, the final res may be incorrect", pTask->id.idStr); + } stDebug("s-task:%s (child %d) executed and get %d result blocks, size:%.2fMiB", pTask->id.idStr, pTask->info.selfChildId, numOfBlocks, SIZE_IN_MiB(size)); @@ -166,8 +180,9 @@ int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, int64_t* if (numOfBlocks >= STREAM_RESULT_DUMP_THRESHOLD || size >= STREAM_RESULT_DUMP_SIZE_THRESHOLD) { ASSERT(numOfBlocks == taosArrayGetSize(pRes)); code = doDumpResult(pTask, pItem, pRes, size, totalSize, totalBlocks); + // todo: here we need continue retry to put it into output buffer if (code != TSDB_CODE_SUCCESS) { - return code; + return; } pRes = NULL; @@ -182,16 +197,16 @@ int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, int64_t* } else { taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); } - - return code; } -static int32_t handleSanhistoryResultBlocks(SStreamTask* pTask, SArray* pRes, int32_t size) { +// todo contiuous try to create result blocks +static int32_t handleScanhistoryResultBlocks(SStreamTask* pTask, SArray* pRes, int32_t size) { int32_t code = TSDB_CODE_SUCCESS; if (taosArrayGetSize(pRes) > 0) { SStreamDataBlock* pStreamBlocks = NULL; code = createStreamBlockFromResults(NULL, pTask, size, pRes, &pStreamBlocks); if (code) { + stError("s-task:%s failed to build history result blocks", pTask->id.idStr); return code; } @@ -236,9 +251,13 @@ static void streamScanHistoryDataImpl(SStreamTask* pTask, SArray* pRes, int32_t* } SSDataBlock block = {0}; - assignOneDataBlock(&block, output); + (void) assignOneDataBlock(&block, output); block.info.childId = pTask->info.selfChildId; - taosArrayPush(pRes, &block); + + void* p = taosArrayPush(pRes, &block); + if (p == NULL) { + stError("s-task:%s failed to add computing results, the final res may be incorrect", pTask->id.idStr); + } (*pSize) += blockDataGetSize(output) + sizeof(SSDataBlock) + sizeof(SColumnInfoData) * blockDataGetNumOfCols(&block); numOfBlocks += 1; @@ -264,7 +283,7 @@ SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask, int64_t st) { const char* id = pTask->id.idStr; if (!pTask->hTaskInfo.operatorOpen) { - qSetStreamOpOpen(exec); + (void) qSetStreamOpOpen(exec); pTask->hTaskInfo.operatorOpen = true; } @@ -301,8 +320,8 @@ SScanhistoryDataInfo streamScanHistoryData(SStreamTask* pTask, int64_t st) { return buildScanhistoryExecRet(TASK_SCANHISTORY_QUIT, 0); } - // dispatch the generated results - /*int32_t code = */handleSanhistoryResultBlocks(pTask, pRes, size); + // dispatch the generated results, todo fix error + (void) handleScanhistoryResultBlocks(pTask, pRes, size); if (finished) { return buildScanhistoryExecRet(TASK_SCANHISTORY_CONT, 0); @@ -323,14 +342,15 @@ int32_t streamTransferStateDoPrepare(SStreamTask* pTask) { SStreamTask* pStreamTask = NULL; int32_t code = streamMetaAcquireTask(pMeta, pTask->streamTaskId.streamId, pTask->streamTaskId.taskId, &pStreamTask); - if (pStreamTask == NULL) { + if (pStreamTask == NULL || code != TSDB_CODE_SUCCESS) { stError( "s-task:%s failed to find related stream task:0x%x, it may have been destroyed or closed, destroy the related " "fill-history task", id, (int32_t) pTask->streamTaskId.taskId); // 1. free it and remove fill-history task from disk meta-store - streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &pTask->id, 0); + // todo: this function should never be failed. + (void) streamBuildAndSendDropTaskMsg(pTask->pMsgCb, pMeta->vgId, &pTask->id, 0); // 2. save to disk streamMetaWLock(pMeta); @@ -388,14 +408,14 @@ int32_t streamTransferStateDoPrepare(SStreamTask* pTask) { pStreamTask->id.idStr, TASK_LEVEL__SOURCE, pTimeWindow->skey, pTimeWindow->ekey, INT64_MIN, pTimeWindow->ekey, p, pStreamTask->status.schedStatus); - streamTaskResetTimewindowFilter(pStreamTask); + (void) streamTaskResetTimewindowFilter(pStreamTask); } else { stDebug("s-task:%s no need to update/reset filter time window for non-source tasks", pStreamTask->id.idStr); } // NOTE: transfer the ownership of executor state before handle the checkpoint block during stream exec // 2. send msg to mnode to launch a checkpoint to keep the state for current stream - streamTaskSendCheckpointReq(pStreamTask); + (void) streamTaskSendCheckpointReq(pStreamTask); // 3. assign the status to the value that will be kept in disk pStreamTask->status.taskStatus = streamTaskGetStatus(pStreamTask).state; @@ -409,7 +429,7 @@ int32_t streamTransferStateDoPrepare(SStreamTask* pTask) { static int32_t haltCallback(SStreamTask* pTask, void* param) { streamTaskOpenAllUpstreamInput(pTask); - streamTaskSendCheckpointReq(pTask); + (void) streamTaskSendCheckpointReq(pTask); return TSDB_CODE_SUCCESS; } @@ -445,18 +465,19 @@ int32_t streamTransferStatePrepare(SStreamTask* pTask) { } // set input -static void doSetStreamInputBlock(SStreamTask* pTask, const void* pInput, int64_t* pVer, const char* id) { +static int32_t doSetStreamInputBlock(SStreamTask* pTask, const void* pInput, int64_t* pVer, const char* id) { void* pExecutor = pTask->exec.pExecutor; + int32_t code = 0; const SStreamQueueItem* pItem = pInput; if (pItem->type == STREAM_INPUT__GET_RES) { const SStreamTrigger* pTrigger = (const SStreamTrigger*)pInput; - qSetMultiStreamInput(pExecutor, pTrigger->pBlock, 1, STREAM_INPUT__DATA_BLOCK); + code = qSetMultiStreamInput(pExecutor, pTrigger->pBlock, 1, STREAM_INPUT__DATA_BLOCK); } else if (pItem->type == STREAM_INPUT__DATA_SUBMIT) { ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE); const SStreamDataSubmit* pSubmit = (const SStreamDataSubmit*)pInput; - qSetMultiStreamInput(pExecutor, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT); + code = qSetMultiStreamInput(pExecutor, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT); stDebug("s-task:%s set submit blocks as source block completed, %p %p len:%d ver:%" PRId64, id, pSubmit, pSubmit->submit.msgStr, pSubmit->submit.msgLen, pSubmit->submit.ver); ASSERT((*pVer) <= pSubmit->submit.ver); @@ -468,7 +489,7 @@ static void doSetStreamInputBlock(SStreamTask* pTask, const void* pInput, int64_ SArray* pBlockList = pBlock->blocks; int32_t numOfBlocks = taosArrayGetSize(pBlockList); stDebug("s-task:%s set sdata blocks as input num:%d, ver:%" PRId64, id, numOfBlocks, pBlock->sourceVer); - qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__DATA_BLOCK); + code = qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__DATA_BLOCK); } else if (pItem->type == STREAM_INPUT__MERGED_SUBMIT) { const SStreamMergedSubmit* pMerged = (const SStreamMergedSubmit*)pInput; @@ -477,24 +498,26 @@ static void doSetStreamInputBlock(SStreamTask* pTask, const void* pInput, int64_ int32_t numOfBlocks = taosArrayGetSize(pBlockList); stDebug("s-task:%s %p set (merged) submit blocks as a batch, numOfBlocks:%d, ver:%" PRId64, id, pTask, numOfBlocks, pMerged->ver); - qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__MERGED_SUBMIT); + code = qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__MERGED_SUBMIT); ASSERT((*pVer) <= pMerged->ver); (*pVer) = pMerged->ver; } else if (pItem->type == STREAM_INPUT__REF_DATA_BLOCK) { const SStreamRefDataBlock* pRefBlock = (const SStreamRefDataBlock*)pInput; - qSetMultiStreamInput(pExecutor, pRefBlock->pBlock, 1, STREAM_INPUT__DATA_BLOCK); + code = qSetMultiStreamInput(pExecutor, pRefBlock->pBlock, 1, STREAM_INPUT__DATA_BLOCK); } else if (pItem->type == STREAM_INPUT__CHECKPOINT || pItem->type == STREAM_INPUT__CHECKPOINT_TRIGGER) { const SStreamDataBlock* pCheckpoint = (const SStreamDataBlock*)pInput; - qSetMultiStreamInput(pExecutor, pCheckpoint->blocks, 1, pItem->type); + code = qSetMultiStreamInput(pExecutor, pCheckpoint->blocks, 1, pItem->type); } else { ASSERT(0); } + + return code; } -int32_t streamProcessTransstateBlock(SStreamTask* pTask, SStreamDataBlock* pBlock) { +void streamProcessTransstateBlock(SStreamTask* pTask, SStreamDataBlock* pBlock) { const char* id = pTask->id.idStr; int32_t code = TSDB_CODE_SUCCESS; int32_t level = pTask->info.taskLevel; @@ -505,7 +528,7 @@ int32_t streamProcessTransstateBlock(SStreamTask* pTask, SStreamDataBlock* pBloc if (remain > 0) { streamFreeQitem((SStreamQueueItem*)pBlock); stDebug("s-task:%s receive upstream trans-state msg, not sent remain:%d", id, remain); - return 0; + return; } } @@ -526,7 +549,7 @@ int32_t streamProcessTransstateBlock(SStreamTask* pTask, SStreamDataBlock* pBloc pBlock->srcVgId = pTask->pMeta->vgId; code = taosWriteQitem(pTask->outputq.queue->pQueue, pBlock); if (code == 0) { - streamDispatchStreamBlock(pTask); + (void) streamDispatchStreamBlock(pTask); } else { // todo put into queue failed, retry streamFreeQitem((SStreamQueueItem*)pBlock); } @@ -540,11 +563,9 @@ int32_t streamProcessTransstateBlock(SStreamTask* pTask, SStreamDataBlock* pBloc code = streamTransferStatePrepare(pTask); if (code != TSDB_CODE_SUCCESS) { - /*int8_t status = */ streamTaskSetSchedStatusInactive(pTask); + (void)streamTaskSetSchedStatusInactive(pTask); } } - - return code; } //static void streamTaskSetIdleInfo(SStreamTask* pTask, int32_t idleTime) { pTask->status.schedIdleTime = idleTime; } @@ -559,7 +580,11 @@ static void doStreamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pBlock, i stDebug("s-task:%s start to process batch blocks, num:%d, type:%s", id, num, streamQueueItemGetTypeStr(pBlock->type)); - doSetStreamInputBlock(pTask, pBlock, &ver, id); + int32_t code = doSetStreamInputBlock(pTask, pBlock, &ver, id); + if(code) { + stError("s-task:%s failed to set input block, not exec for these blocks", id); + return; + } int64_t totalSize = 0; int32_t totalBlocks = 0; @@ -601,9 +626,9 @@ void flushStateDataInExecutor(SStreamTask* pTask, SStreamQueueItem* pCheckpointB STaskId* pHTaskId = &pTask->hTaskInfo.id; SStreamTask* pHTask = NULL; int32_t code = streamMetaAcquireTask(pTask->pMeta, pHTaskId->streamId, pHTaskId->taskId, &pHTask); - if (pHTask != NULL) { - streamTaskReleaseState(pHTask); - streamTaskReloadState(pTask); + if (code == TSDB_CODE_SUCCESS) { // ignore the error code. + (void) streamTaskReleaseState(pHTask); + (void) streamTaskReloadState(pTask); stDebug("s-task:%s transfer state from fill-history task:%s, status:%s completed", id, pHTask->id.idStr, streamTaskGetStatus(pHTask).name); @@ -676,7 +701,7 @@ static int32_t doStreamExecTask(SStreamTask* pTask) { // dispatch checkpoint msg to all downstream tasks int32_t type = pInput->type; if (type == STREAM_INPUT__CHECKPOINT_TRIGGER) { - streamProcessCheckpointTriggerBlock(pTask, (SStreamDataBlock*)pInput); + (void) streamProcessCheckpointTriggerBlock(pTask, (SStreamDataBlock*)pInput); continue; } @@ -694,7 +719,10 @@ static int32_t doStreamExecTask(SStreamTask* pTask) { if (type == STREAM_INPUT__DATA_BLOCK) { pTask->execInfo.sink.dataSize += blockSize; stDebug("s-task:%s sink task start to sink %d blocks, size:%.2fKiB", id, numOfBlocks, SIZE_IN_KiB(blockSize)); - doOutputResultBlockImpl(pTask, (SStreamDataBlock*)pInput); + int32_t code = doOutputResultBlockImpl(pTask, (SStreamDataBlock*)pInput); + if (code != TSDB_CODE_SUCCESS) { + // todo handle error. + } double el = (taosGetTimestampMs() - st) / 1000.0; if (fabs(el - 0.0) <= DBL_EPSILON) { @@ -712,11 +740,11 @@ static int32_t doStreamExecTask(SStreamTask* pTask) { streamFreeQitem(pInput); } else { // todo other thread may change the status // do nothing after sync executor state to storage backend, untill the vnode-level checkpoint is completed. - taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); SStreamTaskState pState = streamTaskGetStatus(pTask); if (pState.state == TASK_STATUS__CK) { stDebug("s-task:%s checkpoint block received, set status:%s", id, pState.name); - streamTaskBuildCheckpoint(pTask); + (void) streamTaskBuildCheckpoint(pTask); // ignore this error msg, and continue } else { // todo refactor int32_t code = 0; if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { @@ -732,7 +760,7 @@ static int32_t doStreamExecTask(SStreamTask* pTask) { } } - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); streamFreeQitem(pInput); return 0; } @@ -765,21 +793,21 @@ bool streamTaskReadyToRun(const SStreamTask* pTask, char** pStatus) { } } -int32_t streamResumeTask(SStreamTask* pTask) { +void streamResumeTask(SStreamTask* pTask) { ASSERT(pTask->status.schedStatus == TASK_SCHED_STATUS__ACTIVE); const char* id = pTask->id.idStr; while (1) { - /*int32_t code = */ doStreamExecTask(pTask); + (void) doStreamExecTask(pTask); // check if continue - taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); int32_t numOfItems = streamQueueGetNumOfItems(pTask->inputq.queue); if ((numOfItems == 0) || streamTaskShouldStop(pTask) || streamTaskShouldPause(pTask)) { atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); streamTaskClearSchedIdleInfo(pTask); - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); setLastExecTs(pTask, taosGetTimestampMs()); @@ -787,19 +815,19 @@ int32_t streamResumeTask(SStreamTask* pTask) { stDebug("s-task:%s exec completed, status:%s, sched-status:%d, lastExecTs:%" PRId64, id, p, pTask->status.schedStatus, pTask->status.lastExecTs); - return 0; + return; } else { // check if this task needs to be idle for a while if (pTask->status.schedIdleTime > 0) { streamTaskResumeInFuture(pTask); - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); setLastExecTs(pTask, taosGetTimestampMs()); - return 0; + return; } } - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); } } diff --git a/source/libs/stream/src/streamHb.c b/source/libs/stream/src/streamHb.c index 4aaaed615c..9804943ec2 100644 --- a/source/libs/stream/src/streamHb.c +++ b/source/libs/stream/src/streamHb.c @@ -54,7 +54,7 @@ static bool existInHbMsg(SStreamHbMsg* pMsg, SDownstreamTaskEpset* pTaskEpset) { static void addUpdateNodeIntoHbMsg(SStreamTask* pTask, SStreamHbMsg* pMsg) { SStreamMeta* pMeta = pTask->pMeta; - (void) taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); int32_t num = taosArrayGetSize(pTask->outputInfo.pNodeEpsetUpdateList); for (int j = 0; j < num; ++j) { @@ -73,7 +73,7 @@ static void addUpdateNodeIntoHbMsg(SStreamTask* pTask, SStreamHbMsg* pMsg) { } taosArrayClear(pTask->outputInfo.pNodeEpsetUpdateList); - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); } static int32_t doSendHbMsgInfo(SStreamHbMsg* pMsg, SStreamMeta* pMeta, SEpSet* pEpset) { @@ -170,9 +170,9 @@ int32_t streamMetaSendHbHelper(SStreamMeta* pMeta) { continue; } - (void) taosThreadMutexLock(&(*pTask)->lock); + streamMutexLock(&(*pTask)->lock); STaskStatusEntry entry = streamTaskGetStatusEntry(*pTask); - (void) taosThreadMutexUnlock(&(*pTask)->lock); + streamMutexUnlock(&(*pTask)->lock); entry.inputRate = entry.inputQUsed * 100.0 / (2 * STREAM_TASK_QUEUE_CAPACITY_IN_SIZE); if ((*pTask)->info.taskLevel == TASK_LEVEL__SINK) { @@ -190,9 +190,9 @@ int32_t streamMetaSendHbHelper(SStreamMeta* pMeta) { stInfo("s-task:%s set kill checkpoint trans in hbMsg, transId:%d, clear the active checkpointInfo", (*pTask)->id.idStr, p->transId); - (void) taosThreadMutexLock(&(*pTask)->lock); + streamMutexLock(&(*pTask)->lock); streamTaskClearCheckInfo((*pTask), true); - (void) taosThreadMutexUnlock(&(*pTask)->lock); + streamMutexUnlock(&(*pTask)->lock); } } diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index b48c90be16..42d2f86dac 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -57,10 +57,13 @@ static void streamMetaEnvInit() { streamMetaId = taosOpenRef(64, streamMetaCloseImpl); metaRefMgtInit(); - streamTimerInit(); + int32_t code = streamTimerInit(); + if (code != 0) { + stError("failed to init stream meta env, start failed"); + } } -void streamMetaInit() { taosThreadOnce(&streamMetaModuleInit, streamMetaEnvInit); } +void streamMetaInit() { (void) taosThreadOnce(&streamMetaModuleInit, streamMetaEnvInit); } void streamMetaCleanup() { taosCloseRef(streamBackendId); @@ -89,11 +92,11 @@ void metaRefMgtCleanup() { } taosHashCleanup(gMetaRefMgt.pTable); - taosThreadMutexDestroy(&gMetaRefMgt.mutex); + streamMutexDestroy(&gMetaRefMgt.mutex); } int32_t metaRefMgtAdd(int64_t vgId, int64_t* rid) { - taosThreadMutexLock(&gMetaRefMgt.mutex); + streamMutexLock(&gMetaRefMgt.mutex); void* p = taosHashGet(gMetaRefMgt.pTable, &vgId, sizeof(vgId)); if (p == NULL) { @@ -105,7 +108,7 @@ int32_t metaRefMgtAdd(int64_t vgId, int64_t* rid) { taosArrayPush(list, &rid); } - taosThreadMutexUnlock(&gMetaRefMgt.mutex); + streamMutexUnlock(&gMetaRefMgt.mutex); return 0; } @@ -238,7 +241,7 @@ int32_t streamMetaMayCvtDbFormat(SStreamMeta* pMeta) { int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key) { int64_t chkpId = pTask->chkInfo.checkpointId; - taosThreadMutexLock(&pMeta->backendMutex); + streamMutexLock(&pMeta->backendMutex); void** ppBackend = taosHashGet(pMeta->pTaskDbUnique, key, strlen(key)); if ((ppBackend != NULL) && (*ppBackend != NULL)) { taskDbAddRef(*ppBackend); @@ -247,7 +250,7 @@ int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key) pBackend->pMeta = pMeta; pTask->pBackend = pBackend; - taosThreadMutexUnlock(&pMeta->backendMutex); + streamMutexUnlock(&pMeta->backendMutex); stDebug("s-task:0x%x set backend %p", pTask->id.taskId, pBackend); return 0; } @@ -260,11 +263,11 @@ int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key) break; } - taosThreadMutexUnlock(&pMeta->backendMutex); + streamMutexUnlock(&pMeta->backendMutex); taosMsleep(1000); stDebug("backend held by other task, restart later, path:%s, key:%s", pMeta->path, key); - taosThreadMutexLock(&pMeta->backendMutex); + streamMutexLock(&pMeta->backendMutex); } int64_t tref = taosAddRef(taskDbWrapperId, pBackend); @@ -276,7 +279,7 @@ int32_t streamTaskSetDb(SStreamMeta* pMeta, SStreamTask* pTask, const char* key) if (processVer != -1) pTask->chkInfo.processedVer = processVer; taosHashPut(pMeta->pTaskDbUnique, key, strlen(key), &pBackend, sizeof(void*)); - taosThreadMutexUnlock(&pMeta->backendMutex); + streamMutexUnlock(&pMeta->backendMutex); stDebug("s-task:0x%x set backend %p", pTask->id.taskId, pBackend); return 0; @@ -286,10 +289,10 @@ void streamMetaRemoveDB(void* arg, char* key) { if (arg == NULL || key == NULL) return; SStreamMeta* pMeta = arg; - taosThreadMutexLock(&pMeta->backendMutex); + streamMutexLock(&pMeta->backendMutex); taosHashRemove(pMeta->pTaskDbUnique, key, strlen(key)); - taosThreadMutexUnlock(&pMeta->backendMutex); + streamMutexUnlock(&pMeta->backendMutex); } int32_t streamMetaOpen(const char* path, void* ahandle, FTaskBuild buildTaskFn, FTaskExpand expandTaskFn, int32_t vgId, @@ -541,7 +544,7 @@ void streamMetaCloseImpl(void* arg) { pMeta->pHbInfo = NULL; taosMemoryFree(pMeta->path); - taosThreadMutexDestroy(&pMeta->backendMutex); + streamMutexDestroy(&pMeta->backendMutex); taosCleanUpScheduler(pMeta->qHandle); taosMemoryFree(pMeta->qHandle); @@ -1132,7 +1135,7 @@ int32_t streamMetaSendMsgBeforeCloseTasks(SStreamMeta* pMeta, SArray** pList) { continue; } - taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); SStreamTaskState pState = streamTaskGetStatus(pTask); if (pState.state == TASK_STATUS__CK) { @@ -1141,7 +1144,7 @@ int32_t streamMetaSendMsgBeforeCloseTasks(SStreamMeta* pMeta, SArray** pList) { stDebug("s-task:%s status:%s not reset the checkpoint", pTask->id.idStr, pState.name); } - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); streamMetaReleaseTask(pMeta, pTask); } @@ -1271,7 +1274,7 @@ int32_t streamMetaStartAllTasks(SStreamMeta* pMeta) { streamLaunchFillHistoryTask(pTask); } - streamMetaAddTaskLaunchResult(pMeta, pTaskId->streamId, pTaskId->taskId, pInfo->checkTs, pInfo->readyTs, true); + (void) streamMetaAddTaskLaunchResult(pMeta, pTaskId->streamId, pTaskId->taskId, pInfo->checkTs, pInfo->readyTs, true); streamMetaReleaseTask(pMeta, pTask); continue; } @@ -1388,16 +1391,16 @@ int32_t streamMetaStartOneTask(SStreamMeta* pMeta, int64_t streamId, int32_t tas ASSERT(pTask->status.downstreamReady == 0); // avoid initialization and destroy running concurrently. - taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); if (pTask->pBackend == NULL) { code = pMeta->expandTaskFn(pTask); - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); if (code != TSDB_CODE_SUCCESS) { streamMetaAddFailedTaskSelf(pTask, pInfo->readyTs); } } else { - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); } if (code == TSDB_CODE_SUCCESS) { @@ -1440,7 +1443,6 @@ int32_t streamMetaAddTaskLaunchResult(SStreamMeta* pMeta, int64_t streamId, int3 STaskId id = {.streamId = streamId, .taskId = taskId}; streamMetaWLock(pMeta); - SStreamTask** p = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); if (p == NULL) { // task does not exists in current vnode, not record the complete info stError("vgId:%d s-task:0x%x not exists discard the check downstream info", pMeta->vgId, taskId); @@ -1449,9 +1451,9 @@ int32_t streamMetaAddTaskLaunchResult(SStreamMeta* pMeta, int64_t streamId, int3 } // clear the send consensus-checkpointId flag - taosThreadMutexLock(&(*p)->lock); + streamMutexLock(&(*p)->lock); (*p)->status.sendConsensusChkptId = false; - taosThreadMutexUnlock(&(*p)->lock); + streamMutexUnlock(&(*p)->lock); if (pStartInfo->startAllTasks != 1) { int64_t el = endTs - startTs; @@ -1537,9 +1539,9 @@ int32_t streamMetaAddFailedTask(SStreamMeta* pMeta, int64_t streamId, int32_t ta streamMetaRUnLock(pMeta); // add the failed task info, along with the related fill-history task info into tasks list. - streamMetaAddTaskLaunchResult(pMeta, streamId, taskId, startTs, now, false); + (void) streamMetaAddTaskLaunchResult(pMeta, streamId, taskId, startTs, now, false); if (hasFillhistoryTask) { - streamMetaAddTaskLaunchResult(pMeta, hId.streamId, hId.taskId, startTs, now, false); + (void) streamMetaAddTaskLaunchResult(pMeta, hId.streamId, hId.taskId, startTs, now, false); } } else { streamMetaRUnLock(pMeta); @@ -1554,12 +1556,12 @@ int32_t streamMetaAddFailedTask(SStreamMeta* pMeta, int64_t streamId, int32_t ta void streamMetaAddFailedTaskSelf(SStreamTask* pTask, int64_t failedTs) { int32_t startTs = pTask->execInfo.checkTs; - streamMetaAddTaskLaunchResult(pTask->pMeta, pTask->id.streamId, pTask->id.taskId, startTs, failedTs, false); + (void) streamMetaAddTaskLaunchResult(pTask->pMeta, pTask->id.streamId, pTask->id.taskId, startTs, failedTs, false); // automatically set the related fill-history task to be failed. if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { STaskId* pId = &pTask->hTaskInfo.id; - streamMetaAddTaskLaunchResult(pTask->pMeta, pId->streamId, pId->taskId, startTs, failedTs, false); + (void) streamMetaAddTaskLaunchResult(pTask->pMeta, pId->streamId, pId->taskId, startTs, failedTs, false); } } diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index b8cdcd4cf5..537062b04e 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -27,20 +27,24 @@ typedef struct SQueueReader { int32_t waitDuration; // maximum wait time to format several block into a batch to process, unit: ms } SQueueReader; +#define streamQueueCurItem(_q) ((_q)->qItem) + static bool streamTaskExtractAvailableToken(STokenBucket* pBucket, const char* id); static void streamTaskPutbackToken(STokenBucket* pBucket); static void streamTaskConsumeQuota(STokenBucket* pBucket, int32_t bytes); static void streamQueueCleanup(SStreamQueue* pQueue) { - void* qItem = NULL; - while ((qItem = streamQueueNextItem(pQueue)) != NULL) { + SStreamQueueItem* qItem = NULL; + while (1) { + streamQueueNextItem(pQueue, &qItem); + if (qItem == NULL) { + break; + } streamFreeQitem(qItem); } pQueue->status = STREAM_QUEUE__SUCESS; } -static void* streamQueueCurItem(SStreamQueue* queue) { return queue->qItem; } - int32_t streamQueueOpen(int64_t cap, SStreamQueue** pQ) { *pQ = NULL; int32_t code = 0; @@ -81,21 +85,22 @@ void streamQueueClose(SStreamQueue* pQueue, int32_t taskId) { taosMemoryFree(pQueue); } -void* streamQueueNextItem(SStreamQueue* pQueue) { +void streamQueueNextItem(SStreamQueue* pQueue, SStreamQueueItem** pItem) { + *pItem = NULL; int8_t flag = atomic_exchange_8(&pQueue->status, STREAM_QUEUE__PROCESSING); if (flag == STREAM_QUEUE__FAILED) { ASSERT(pQueue->qItem != NULL); - return streamQueueCurItem(pQueue); + *pItem = streamQueueCurItem(pQueue); } else { pQueue->qItem = NULL; - taosGetQitem(pQueue->qall, &pQueue->qItem); + (void) taosGetQitem(pQueue->qall, &pQueue->qItem); if (pQueue->qItem == NULL) { - taosReadAllQitems(pQueue->pQueue, pQueue->qall); - taosGetQitem(pQueue->qall, &pQueue->qItem); + (void) taosReadAllQitems(pQueue->pQueue, pQueue->qall); + (void) taosGetQitem(pQueue->qall, &pQueue->qItem); } - return streamQueueCurItem(pQueue); + *pItem = streamQueueCurItem(pQueue); } } @@ -181,7 +186,8 @@ EExtractDataCode streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueIte return EXEC_CONTINUE; } - SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputq.queue); + SStreamQueueItem* qItem = NULL; + streamQueueNextItem(pTask->inputq.queue, (SStreamQueueItem**)&qItem); if (qItem == NULL) { // restore the token to bucket if (*numOfBlocks > 0) { @@ -338,7 +344,8 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) if (type != STREAM_INPUT__GET_RES && type != STREAM_INPUT__CHECKPOINT && type != STREAM_INPUT__CHECKPOINT_TRIGGER && (pTask->info.delaySchedParam != 0)) { - atomic_val_compare_exchange_8(&pTask->schedInfo.status, TASK_TRIGGER_STATUS__INACTIVE, TASK_TRIGGER_STATUS__ACTIVE); + (void)atomic_val_compare_exchange_8(&pTask->schedInfo.status, TASK_TRIGGER_STATUS__INACTIVE, + TASK_TRIGGER_STATUS__ACTIVE); stDebug("s-task:%s new data arrived, active the sched-trigger, triggerStatus:%d", pTask->id.idStr, pTask->schedInfo.status); } @@ -347,18 +354,19 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) } int32_t streamTaskPutTranstateIntoInputQ(SStreamTask* pTask) { - int32_t code; - SStreamDataBlock* pTranstate; + int32_t code = 0; + SStreamDataBlock* pTranstate = NULL; + SSDataBlock* pBlock = NULL; code = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, sizeof(SSDataBlock), (void**)&pTranstate); if (code) { return code; } - SSDataBlock* pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock)); + pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock)); if (pBlock == NULL) { - taosFreeQitem(pTranstate); - return TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; } pTranstate->type = STREAM_INPUT__TRANS_STATE; @@ -368,15 +376,30 @@ int32_t streamTaskPutTranstateIntoInputQ(SStreamTask* pTask) { pBlock->info.childId = pTask->info.selfChildId; pTranstate->blocks = taosArrayInit(4, sizeof(SSDataBlock)); // pBlock; - taosArrayPush(pTranstate->blocks, pBlock); + if (pTranstate->blocks == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + void* p = taosArrayPush(pTranstate->blocks, pBlock); + if (p == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } taosMemoryFree(pBlock); if (streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pTranstate) < 0) { - return TSDB_CODE_OUT_OF_MEMORY; + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; } pTask->status.appendTranstateBlock = true; return TSDB_CODE_SUCCESS; + +_err: + taosMemoryFree(pBlock); + taosFreeQitem(pTranstate); + return code; } // the result should be put into the outputQ in any cases, the result may be lost otherwise. diff --git a/source/libs/stream/src/streamSched.c b/source/libs/stream/src/streamSched.c index f11b135168..74f370d199 100644 --- a/source/libs/stream/src/streamSched.c +++ b/source/libs/stream/src/streamSched.c @@ -36,7 +36,7 @@ int32_t streamSetupScheduleTrigger(SStreamTask* pTask) { int32_t streamTrySchedExec(SStreamTask* pTask) { if (streamTaskSetSchedStatusWait(pTask)) { - streamTaskSchedTask(pTask->pMsgCb, pTask->info.nodeId, pTask->id.streamId, pTask->id.taskId, 0); + return streamTaskSchedTask(pTask->pMsgCb, pTask->info.nodeId, pTask->id.streamId, pTask->id.taskId, 0); } else { stTrace("s-task:%s not launch task since sched status:%d", pTask->id.idStr, pTask->status.schedStatus); } @@ -47,10 +47,9 @@ int32_t streamTrySchedExec(SStreamTask* pTask) { int32_t streamTaskSchedTask(SMsgCb* pMsgCb, int32_t vgId, int64_t streamId, int32_t taskId, int32_t execType) { SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); if (pRunReq == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; stError("vgId:%d failed to create msg to start stream task:0x%x exec, type:%d, code:%s", vgId, taskId, execType, terrstr()); - return -1; + return TSDB_CODE_OUT_OF_MEMORY; } if (streamId != 0) { @@ -65,29 +64,27 @@ int32_t streamTaskSchedTask(SMsgCb* pMsgCb, int32_t vgId, int64_t streamId, int3 pRunReq->reqType = execType; SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; - tmsgPutToQueue(pMsgCb, STREAM_QUEUE, &msg); - return TSDB_CODE_SUCCESS; + return tmsgPutToQueue(pMsgCb, STREAM_QUEUE, &msg); } void streamTaskClearSchedIdleInfo(SStreamTask* pTask) { pTask->status.schedIdleTime = 0; } void streamTaskSetIdleInfo(SStreamTask* pTask, int32_t idleTime) { pTask->status.schedIdleTime = idleTime; } -int32_t streamTaskResumeInFuture(SStreamTask* pTask) { +void streamTaskResumeInFuture(SStreamTask* pTask) { int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); stDebug("s-task:%s task should idle, add into timer to retry in %dms, ref:%d", pTask->id.idStr, pTask->status.schedIdleTime, ref); // add one ref count for task - /*SStreamTask* pAddRefTask = */ streamMetaAcquireOneTask(pTask); + streamMetaAcquireOneTask(pTask); if (pTask->schedInfo.pIdleTimer == NULL) { pTask->schedInfo.pIdleTimer = taosTmrStart(streamTaskResumeHelper, pTask->status.schedIdleTime, pTask, streamTimer); } else { - taosTmrReset(streamTaskResumeHelper, pTask->status.schedIdleTime, pTask, streamTimer, &pTask->schedInfo.pIdleTimer); + streamTmrReset(streamTaskResumeHelper, pTask->status.schedIdleTime, pTask, streamTimer, + &pTask->schedInfo.pIdleTimer, pTask->pMeta->vgId, "resume-task-tmr"); } - - return TSDB_CODE_SUCCESS; } ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -97,7 +94,7 @@ void streamTaskResumeHelper(void* param, void* tmrId) { SStreamTaskState p = streamTaskGetStatus(pTask); if (p.state == TASK_STATUS__DROPPING || p.state == TASK_STATUS__STOP) { - streamTaskSetSchedStatusInactive(pTask); + (void) streamTaskSetSchedStatusInactive(pTask); int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); stDebug("s-task:%s status:%s not resume task, ref:%d", pId->idStr, p.name, ref); @@ -106,21 +103,25 @@ void streamTaskResumeHelper(void* param, void* tmrId) { return; } - streamTaskSchedTask(pTask->pMsgCb, pTask->info.nodeId, pId->streamId, pId->taskId, STREAM_EXEC_T_RESUME_TASK); + int32_t code = streamTaskSchedTask(pTask->pMsgCb, pTask->info.nodeId, pId->streamId, pId->taskId, STREAM_EXEC_T_RESUME_TASK); + if (code) { + stError("s-task:%s sched task failed, code:%s", pTask->id.idStr, strerror(code)); + } else { + int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); + stDebug("trigger to resume s-task:%s after being idled for %dms, ref:%d", pId->idStr, pTask->status.schedIdleTime, + ref); - int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); - stDebug("trigger to resume s-task:%s after being idled for %dms, ref:%d", pId->idStr, pTask->status.schedIdleTime, - ref); - - // release the task ref count - streamTaskClearSchedIdleInfo(pTask); - streamMetaReleaseTask(pTask->pMeta, pTask); + // release the task ref count + streamTaskClearSchedIdleInfo(pTask); + streamMetaReleaseTask(pTask->pMeta, pTask); + } } void streamTaskSchedHelper(void* param, void* tmrId) { SStreamTask* pTask = (void*)param; const char* id = pTask->id.idStr; int32_t nextTrigger = (int32_t)pTask->info.delaySchedParam; + int32_t vgId = pTask->pMeta->vgId; int8_t status = atomic_load_8(&pTask->schedInfo.status); stTrace("s-task:%s in scheduler, trigger status:%d, next:%dms", id, status, nextTrigger); @@ -140,7 +141,7 @@ void streamTaskSchedHelper(void* param, void* tmrId) { if (code) { stError("s-task:%s failed to prepare retrieve data trigger, code:%s, try again in %dms", id, "out of memory", nextTrigger); - taosTmrReset(streamTaskSchedHelper, nextTrigger, pTask, streamTimer, &pTask->schedInfo.pDelayTimer); + streamTmrReset(streamTaskSchedHelper, nextTrigger, pTask, streamTimer, &pTask->schedInfo.pDelayTimer, vgId, "sched-run-tmr"); terrno = code; return; } @@ -152,7 +153,7 @@ void streamTaskSchedHelper(void* param, void* tmrId) { stError("s-task:%s failed to prepare retrieve data trigger, code:%s, try again in %dms", id, "out of memory", nextTrigger); - taosTmrReset(streamTaskSchedHelper, nextTrigger, pTask, streamTimer, &pTask->schedInfo.pDelayTimer); + streamTmrReset(streamTaskSchedHelper, nextTrigger, pTask, streamTimer, &pTask->schedInfo.pDelayTimer, vgId, "sched-run-tmr"); return; } @@ -161,13 +162,16 @@ void streamTaskSchedHelper(void* param, void* tmrId) { code = streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pTrigger); if (code != TSDB_CODE_SUCCESS) { - taosTmrReset(streamTaskSchedHelper, nextTrigger, pTask, streamTimer, &pTask->schedInfo.pDelayTimer); + streamTmrReset(streamTaskSchedHelper, nextTrigger, pTask, streamTimer, &pTask->schedInfo.pDelayTimer, vgId, "sched-run-tmr"); return; } - streamTrySchedExec(pTask); + code = streamTrySchedExec(pTask); + if (code != TSDB_CODE_SUCCESS) { + stError("s-task:%s failed to sched to run, wait for next time", pTask->id.idStr); + } } } - taosTmrReset(streamTaskSchedHelper, nextTrigger, pTask, streamTimer, &pTask->schedInfo.pDelayTimer); + streamTmrReset(streamTaskSchedHelper, nextTrigger, pTask, streamTimer, &pTask->schedInfo.pDelayTimer, vgId, "sched-run-tmr"); } diff --git a/source/libs/stream/src/streamStartHistory.c b/source/libs/stream/src/streamStartHistory.c index 3c7ad2639a..1efb2af381 100644 --- a/source/libs/stream/src/streamStartHistory.c +++ b/source/libs/stream/src/streamStartHistory.c @@ -212,7 +212,7 @@ int32_t streamLaunchFillHistoryTask(SStreamTask* pTask) { stDebug("s-task:%s not launch related fill-history task:0x%" PRIx64 "-0x%x, status:%s", idStr, hStreamId, hTaskId, pStatus.name); - streamMetaAddTaskLaunchResult(pMeta, hStreamId, hTaskId, pExecInfo->checkTs, pExecInfo->readyTs, false); + (void) streamMetaAddTaskLaunchResult(pMeta, hStreamId, hTaskId, pExecInfo->checkTs, pExecInfo->readyTs, false); return -1; // todo set the correct error code } @@ -228,11 +228,11 @@ int32_t streamLaunchFillHistoryTask(SStreamTask* pTask) { code = streamMetaAcquireTask(pMeta, hStreamId, hTaskId, &pHisTask); if (pHisTask == NULL) { stDebug("s-task:%s failed acquire and start fill-history task, it may have been dropped/stopped", idStr); - streamMetaAddTaskLaunchResult(pMeta, hStreamId, hTaskId, pExecInfo->checkTs, pExecInfo->readyTs, false); + (void) streamMetaAddTaskLaunchResult(pMeta, hStreamId, hTaskId, pExecInfo->checkTs, pExecInfo->readyTs, false); } else { if (pHisTask->status.downstreamReady == 1) { // it's ready now, do nothing stDebug("s-task:%s fill-history task is ready, no need to check downstream", pHisTask->id.idStr); - streamMetaAddTaskLaunchResult(pMeta, hStreamId, hTaskId, pExecInfo->checkTs, pExecInfo->readyTs, true); + (void) streamMetaAddTaskLaunchResult(pMeta, hStreamId, hTaskId, pExecInfo->checkTs, pExecInfo->readyTs, true); } else { // exist, but not ready, continue check downstream task status if (pHisTask->pBackend == NULL) { code = pMeta->expandTaskFn(pHisTask); @@ -289,7 +289,7 @@ void notRetryLaunchFillHistoryTask(SStreamTask* pTask, SLaunchHTaskInfo* pInfo, SHistoryTaskInfo* pHTaskInfo = &pTask->hTaskInfo; int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); - streamMetaAddTaskLaunchResult(pMeta, pInfo->hTaskId.streamId, pInfo->hTaskId.taskId, 0, now, false); + (void) streamMetaAddTaskLaunchResult(pMeta, pInfo->hTaskId.streamId, pInfo->hTaskId.taskId, 0, now, false); stError("s-task:%s max retry:%d reached, quit from retrying launch related fill-history task:0x%x, ref:%d", pTask->id.idStr, MAX_RETRY_LAUNCH_HISTORY_TASK, (int32_t)pHTaskInfo->id.taskId, ref); @@ -307,7 +307,7 @@ void doRetryLaunchFillHistoryTask(SStreamTask* pTask, SLaunchHTaskInfo* pInfo, i stDebug("s-task:0x%" PRIx64 " stopped, not launch rel history task:0x%" PRIx64 ", ref:%d", pInfo->id.taskId, pInfo->hTaskId.taskId, ref); - streamMetaAddTaskLaunchResult(pMeta, pInfo->hTaskId.streamId, pInfo->hTaskId.taskId, 0, now, false); + (void) streamMetaAddTaskLaunchResult(pMeta, pInfo->hTaskId.streamId, pInfo->hTaskId.taskId, 0, now, false); taosMemoryFree(pInfo); } else { char* p = streamTaskGetStatus(pTask).name; @@ -350,7 +350,7 @@ void tryLaunchHistoryTask(void* param, void* tmrId) { streamMetaWUnLock(pMeta); // record the related fill-history task failed - streamMetaAddTaskLaunchResult(pMeta, pInfo->hTaskId.streamId, pInfo->hTaskId.taskId, 0, now, false); + (void) streamMetaAddTaskLaunchResult(pMeta, pInfo->hTaskId.streamId, pInfo->hTaskId.taskId, 0, now, false); taosMemoryFree(pInfo); return; } @@ -407,7 +407,7 @@ void tryLaunchHistoryTask(void* param, void* tmrId) { streamMetaReleaseTask(pMeta, pTask); } else { - streamMetaAddTaskLaunchResult(pMeta, pInfo->hTaskId.streamId, pInfo->hTaskId.taskId, 0, now, false); + (void) streamMetaAddTaskLaunchResult(pMeta, pInfo->hTaskId.streamId, pInfo->hTaskId.taskId, 0, now, false); int32_t ref = atomic_sub_fetch_32(&(*ppTask)->status.timerActive, 1); stError("s-task:0x%x rel fill-history task:0x%" PRIx64 " may have been destroyed, not launch, ref:%d", @@ -448,7 +448,7 @@ int32_t launchNotBuiltFillHistoryTask(SStreamTask* pTask) { SLaunchHTaskInfo* pInfo = createHTaskLaunchInfo(pMeta, &id, hStreamId, hTaskId); if (pInfo == NULL) { stError("s-task:%s failed to launch related fill-history task, since Out Of Memory", idStr); - streamMetaAddTaskLaunchResult(pMeta, hStreamId, hTaskId, pExecInfo->checkTs, pExecInfo->readyTs, false); + (void) streamMetaAddTaskLaunchResult(pMeta, hStreamId, hTaskId, pExecInfo->checkTs, pExecInfo->readyTs, false); return terrno; } @@ -465,7 +465,7 @@ int32_t launchNotBuiltFillHistoryTask(SStreamTask* pTask) { stError("s-task:%s failed to start timer, related fill-history task not launched, ref:%d", idStr, ref); taosMemoryFree(pInfo); - streamMetaAddTaskLaunchResult(pMeta, hStreamId, hTaskId, pExecInfo->checkTs, pExecInfo->readyTs, false); + (void) streamMetaAddTaskLaunchResult(pMeta, hStreamId, hTaskId, pExecInfo->checkTs, pExecInfo->readyTs, false); return terrno; } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 39e12a9da7..7c2d0b3556 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -197,13 +197,13 @@ void tFreeStreamTask(SStreamTask* pTask) { STaskExecStatisInfo* pStatis = &pTask->execInfo; ETaskStatus status1 = TASK_STATUS__UNINIT; - taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); if (pTask->status.pSM != NULL) { SStreamTaskState pStatus = streamTaskGetStatus(pTask); p = pStatus.name; status1 = pStatus.state; } - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); stDebug("start to free s-task:0x%x %p, state:%s", taskId, pTask, p); @@ -284,11 +284,11 @@ void tFreeStreamTask(SStreamTask* pTask) { streamTaskDestroyUpstreamInfo(&pTask->upstreamInfo); taosMemoryFree(pTask->outputInfo.pTokenBucket); - taosThreadMutexDestroy(&pTask->lock); + streamMutexDestroy(&pTask->lock); taosArrayDestroy(pTask->msgInfo.pSendInfo); pTask->msgInfo.pSendInfo = NULL; - taosThreadMutexDestroy(&pTask->msgInfo.lock); + streamMutexDestroy(&pTask->msgInfo.lock); taosArrayDestroy(pTask->outputInfo.pNodeEpsetUpdateList); pTask->outputInfo.pNodeEpsetUpdateList = NULL; @@ -644,11 +644,7 @@ void streamTaskOpenAllUpstreamInput(SStreamTask* pTask) { void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId) { SStreamUpstreamEpInfo* pInfo = NULL; - - int32_t code = streamTaskGetUpstreamTaskEpInfo(pTask, taskId, &pInfo); - if (code != TSDB_CODE_SUCCESS) { - return; - } + streamTaskGetUpstreamTaskEpInfo(pTask, taskId, &pInfo); if ((pInfo != NULL) && pInfo->dataAllowed) { pInfo->dataAllowed = false; @@ -659,11 +655,7 @@ void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId) { void streamTaskOpenUpstreamInput(SStreamTask* pTask, int32_t taskId) { SStreamUpstreamEpInfo* pInfo = NULL; - - int32_t code = streamTaskGetUpstreamTaskEpInfo(pTask, taskId, &pInfo); - if (code != TSDB_CODE_SUCCESS) { - return; - } + streamTaskGetUpstreamTaskEpInfo(pTask, taskId, &pInfo); if (pInfo != NULL && (!pInfo->dataAllowed)) { int32_t t = atomic_sub_fetch_32(&pTask->upstreamInfo.numOfClosed, 1); @@ -679,34 +671,34 @@ bool streamTaskIsAllUpstreamClosed(SStreamTask* pTask) { bool streamTaskSetSchedStatusWait(SStreamTask* pTask) { bool ret = false; - taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); if (pTask->status.schedStatus == TASK_SCHED_STATUS__INACTIVE) { pTask->status.schedStatus = TASK_SCHED_STATUS__WAITING; ret = true; } - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); return ret; } int8_t streamTaskSetSchedStatusActive(SStreamTask* pTask) { - taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); int8_t status = pTask->status.schedStatus; if (status == TASK_SCHED_STATUS__WAITING) { pTask->status.schedStatus = TASK_SCHED_STATUS__ACTIVE; } - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); return status; } int8_t streamTaskSetSchedStatusInactive(SStreamTask* pTask) { - taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); int8_t status = pTask->status.schedStatus; ASSERT(status == TASK_SCHED_STATUS__WAITING || status == TASK_SCHED_STATUS__ACTIVE || status == TASK_SCHED_STATUS__INACTIVE); pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; - taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); return status; } @@ -723,7 +715,7 @@ int32_t streamTaskClearHTaskAttr(SStreamTask* pTask, int32_t resetRelHalt) { stDebug("s-task:%s clear the related stream task:0x%x attr to fill-history task", pTask->id.idStr, (int32_t)sTaskId.taskId); - taosThreadMutexLock(&(*ppStreamTask)->lock); + streamMutexLock(&(*ppStreamTask)->lock); CLEAR_RELATED_FILLHISTORY_TASK((*ppStreamTask)); if (resetRelHalt) { @@ -734,7 +726,7 @@ int32_t streamTaskClearHTaskAttr(SStreamTask* pTask, int32_t resetRelHalt) { } streamMetaSaveTask(pMeta, *ppStreamTask); - taosThreadMutexUnlock(&(*ppStreamTask)->lock); + streamMutexUnlock(&(*ppStreamTask)->lock); } return TSDB_CODE_SUCCESS; @@ -923,6 +915,7 @@ void streamTaskResume(SStreamTask* pTask) { bool streamTaskIsSinkTask(const SStreamTask* pTask) { return pTask->info.taskLevel == TASK_LEVEL__SINK; } +// this task must success int32_t streamTaskSendCheckpointReq(SStreamTask* pTask) { int32_t code; int32_t tlen = 0; @@ -960,24 +953,23 @@ int32_t streamTaskSendCheckpointReq(SStreamTask* pTask) { return 0; } -int32_t streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId, SStreamUpstreamEpInfo** pEpInfo) { +void streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId, SStreamUpstreamEpInfo** pEpInfo) { *pEpInfo = NULL; int32_t num = taosArrayGetSize(pTask->upstreamInfo.pList); for (int32_t i = 0; i < num; ++i) { SStreamUpstreamEpInfo* pInfo = taosArrayGetP(pTask->upstreamInfo.pList, i); if (pInfo == NULL) { - return TSDB_CODE_FAILED; + return; } if (pInfo->taskId == taskId) { *pEpInfo = pInfo; - return TSDB_CODE_SUCCESS; + return; } } stError("s-task:%s failed to find upstream task:0x%x", pTask->id.idStr, taskId); - return TSDB_CODE_FAILED; } SEpSet* streamTaskGetDownstreamEpInfo(SStreamTask* pTask, int32_t taskId) { @@ -1100,7 +1092,7 @@ void streamTaskDestroyActiveChkptInfo(SActiveCheckpointInfo* pInfo) { return; } - taosThreadMutexDestroy(&pInfo->lock); + streamMutexDestroy(&pInfo->lock); taosArrayDestroy(pInfo->pDispatchTriggerList); pInfo->pDispatchTriggerList = NULL; taosArrayDestroy(pInfo->pReadyMsgList); diff --git a/source/libs/stream/src/streamTaskSm.c b/source/libs/stream/src/streamTaskSm.c index 7e47857a39..c3e0df52d4 100644 --- a/source/libs/stream/src/streamTaskSm.c +++ b/source/libs/stream/src/streamTaskSm.c @@ -201,7 +201,7 @@ static int32_t doHandleWaitingEvent(SStreamTaskSM* pSM, const char* pEventName, pSM->pActiveTrans = pNextTrans; pSM->startTs = taosGetTimestampMs(); - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); code = pNextTrans->pAction(pSM->pTask); if (pNextTrans->autoInvokeEndFn) { @@ -210,7 +210,7 @@ static int32_t doHandleWaitingEvent(SStreamTaskSM* pSM, const char* pEventName, return code; } } else { - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); stDebug("s-task:%s state:%s event:%s in waiting list, req state:%s not fulfilled, put it back", pTask->id.idStr, pSM->current.name, GET_EVT_NAME(pEvtInfo->event), StreamTaskStatusList[pEvtInfo->status].name); @@ -247,7 +247,7 @@ int32_t streamTaskRestoreStatus(SStreamTask* pTask) { SStreamTaskSM* pSM = pTask->status.pSM; int32_t code = 0; - (void) taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); if (pSM->current.state == TASK_STATUS__PAUSE && pSM->pActiveTrans == NULL) { SStreamTaskState state = pSM->current; @@ -326,13 +326,13 @@ static int32_t doHandleEvent(SStreamTaskSM* pSM, EStreamTaskEvent event, STaskSt return code; } - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); while (1) { // wait for the task to be here - (void) taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); ETaskStatus s = streamTaskGetStatus(pTask).state; - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); if ((s == pTrans->next.state) && (pSM->prev.evt == pTrans->event)) {// this event has been handled already stDebug("s-task:%s attached event:%s handled", id, GET_EVT_NAME(pTrans->event)); @@ -349,7 +349,7 @@ static int32_t doHandleEvent(SStreamTaskSM* pSM, EStreamTaskEvent event, STaskSt } else { // override current active trans pSM->pActiveTrans = pTrans; pSM->startTs = taosGetTimestampMs(); - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); code = pTrans->pAction(pTask); @@ -374,11 +374,11 @@ static int32_t doHandleEventAsync(SStreamTaskSM* pSM, EStreamTaskEvent event, ST info.callBackFn = callbackFn; code = attachWaitedEvent(pTask, &info); - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); } else { // override current active trans pSM->pActiveTrans = pTrans; pSM->startTs = taosGetTimestampMs(); - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); code = pTrans->pAction(pTask); // todo handle error code; @@ -400,11 +400,11 @@ int32_t streamTaskHandleEvent(SStreamTaskSM* pSM, EStreamTaskEvent event) { STaskStateTrans* pTrans = NULL; while (1) { - (void) taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); if (pSM->pActiveTrans != NULL && pSM->pActiveTrans->autoInvokeEndFn) { EStreamTaskEvent evt = pSM->pActiveTrans->event; - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); stDebug("s-task:%s status:%s handling event:%s by some other thread, wait for 100ms and check if completed", pTask->id.idStr, pSM->current.name, GET_EVT_NAME(evt)); @@ -414,7 +414,7 @@ int32_t streamTaskHandleEvent(SStreamTaskSM* pSM, EStreamTaskEvent event) { pTrans = streamTaskFindTransform(pSM->current.state, event); if (pTrans == NULL) { stDebug("s-task:%s failed to handle event:%s", pTask->id.idStr, GET_EVT_NAME(event)); - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); return TSDB_CODE_STREAM_INVALID_STATETRANS; } @@ -439,11 +439,11 @@ int32_t streamTaskHandleEventAsync(SStreamTaskSM* pSM, EStreamTaskEvent event, _ STaskStateTrans* pTrans = NULL; while (1) { - (void) taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); if (pSM->pActiveTrans != NULL && pSM->pActiveTrans->autoInvokeEndFn) { EStreamTaskEvent evt = pSM->pActiveTrans->event; - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); stDebug("s-task:%s status:%s handling event:%s by some other thread, wait for 100ms and check if completed", pTask->id.idStr, pSM->current.name, GET_EVT_NAME(evt)); @@ -453,7 +453,7 @@ int32_t streamTaskHandleEventAsync(SStreamTaskSM* pSM, EStreamTaskEvent event, _ pTrans = streamTaskFindTransform(pSM->current.state, event); if (pTrans == NULL) { stDebug("s-task:%s failed to handle event:%s, status:%s", pTask->id.idStr, GET_EVT_NAME(event), pSM->current.name); - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); return TSDB_CODE_STREAM_INVALID_STATETRANS; } @@ -485,7 +485,7 @@ int32_t streamTaskOnHandleEventSuccess(SStreamTaskSM* pSM, EStreamTaskEvent even int32_t code = 0; // do update the task status - (void) taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); STaskStateTrans* pTrans = pSM->pActiveTrans; if (pTrans == NULL) { @@ -497,14 +497,14 @@ int32_t streamTaskOnHandleEventSuccess(SStreamTaskSM* pSM, EStreamTaskEvent even stDebug("s-task:%s event:%s handled failed, current status:%s, trigger event:%s", id, GET_EVT_NAME(event), pSM->current.name, GET_EVT_NAME(pSM->prev.evt)); - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); return TSDB_CODE_STREAM_INVALID_STATETRANS; } if (pTrans->event != event) { stWarn("s-task:%s handle event:%s failed, current status:%s, active trans evt:%s", id, GET_EVT_NAME(event), pSM->current.name, GET_EVT_NAME(pTrans->event)); - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); return TSDB_CODE_STREAM_INVALID_STATETRANS; } @@ -518,7 +518,7 @@ int32_t streamTaskOnHandleEventSuccess(SStreamTaskSM* pSM, EStreamTaskEvent even // on success callback, add into lock if necessary, or maybe we should add an option for this? code = pTrans->pSuccAction(pTask); - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); // todo: add parameter to control lock // after handling the callback function assigned by invoker, go on handling the waiting tasks @@ -532,13 +532,13 @@ int32_t streamTaskOnHandleEventSuccess(SStreamTaskSM* pSM, EStreamTaskEvent even stDebug("s-task:%s handle user-specified callback fn for event:%s completed", id, GET_EVT_NAME(pTrans->event)); } - (void) taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); // tasks in waiting list if (taosArrayGetSize(pSM->pWaitingEventList) > 0) { code = doHandleWaitingEvent(pSM, GET_EVT_NAME(pTrans->event), pTask); } else { - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); int64_t el = (taosGetTimestampMs() - pSM->startTs); stDebug("s-task:%s handle event:%s completed, elapsed time:%" PRId64 "ms state:%s -> %s", id, @@ -563,14 +563,14 @@ const char* streamTaskGetStatusStr(ETaskStatus status) { void streamTaskResetStatus(SStreamTask* pTask) { SStreamTaskSM* pSM = pTask->status.pSM; - (void) taosThreadMutexLock(&pTask->lock); + streamMutexLock(&pTask->lock); stDebug("s-task:%s level:%d fill-history:%d vgId:%d set uninit, prev status:%s", pTask->id.idStr, pTask->info.taskLevel, pTask->info.fillHistory, pTask->pMeta->vgId, pSM->current.name); pSM->current = StreamTaskStatusList[TASK_STATUS__UNINIT]; pSM->pActiveTrans = NULL; taosArrayClear(pSM->pWaitingEventList); - (void) taosThreadMutexUnlock(&pTask->lock); + streamMutexUnlock(&pTask->lock); // clear the downstream ready status pTask->status.downstreamReady = 0; diff --git a/source/libs/stream/src/streamTimer.c b/source/libs/stream/src/streamTimer.c index c76ec92e33..931de397cc 100644 --- a/source/libs/stream/src/streamTimer.c +++ b/source/libs/stream/src/streamTimer.c @@ -41,11 +41,11 @@ tmr_h streamTimerGetInstance() { void streamTmrReset(TAOS_TMR_CALLBACK fp, int32_t mseconds, void* param, void* handle, tmr_h* pTmrId, int32_t vgId, const char* pMsg) { - while (1) { +// while (1) { bool ret = taosTmrReset(fp, mseconds, param, handle, pTmrId); if (ret) { - break; +// break; } - stError("vgId:%d failed to reset %s, try again", vgId, pMsg); - } +// stError("vgId:%d failed to reset tmr: %s, try again", vgId, pMsg); +// } } diff --git a/source/libs/stream/src/streamUtil.c b/source/libs/stream/src/streamUtil.c new file mode 100644 index 0000000000..44c6adce5f --- /dev/null +++ b/source/libs/stream/src/streamUtil.c @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "streamInt.h" + +void streamMutexLock(TdThreadMutex *pMutex) { + (void) taosThreadMutexLock(pMutex); +} + +void streamMutexUnlock(TdThreadMutex *pMutex) { + (void) taosThreadMutexUnlock(pMutex); +} + +void streamMutexDestroy(TdThreadMutex *pMutex) { (void) taosThreadMutexDestroy(pMutex); }