From f5b52749c288eff98799dd0f8db5dd677a84f688 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sun, 9 Apr 2023 01:39:09 +0800 Subject: [PATCH 01/25] refactor: do some internal refactor. --- include/libs/executor/executor.h | 16 +- include/libs/qcom/query.h | 1 + include/libs/stream/tstream.h | 44 +-- include/libs/wal/wal.h | 1 + source/client/inc/clientInt.h | 36 +-- source/client/src/clientImpl.c | 4 +- source/client/src/clientTmq.c | 47 ++- source/dnode/mnode/impl/src/mndDef.c | 10 +- source/dnode/mnode/impl/src/mndScheduler.c | 8 +- source/dnode/mnode/impl/src/mndStream.c | 4 +- source/dnode/snode/src/snode.c | 11 +- source/dnode/vnode/inc/vnode.h | 6 +- source/dnode/vnode/src/inc/tq.h | 2 +- source/dnode/vnode/src/meta/metaCache.c | 11 +- source/dnode/vnode/src/sma/smaRollup.c | 2 +- source/dnode/vnode/src/tq/tq.c | 284 +++++++++++++----- source/dnode/vnode/src/tq/tqMeta.c | 10 +- source/dnode/vnode/src/tq/tqOffset.c | 25 +- source/dnode/vnode/src/tq/tqPush.c | 3 +- source/dnode/vnode/src/tq/tqRead.c | 85 ++++-- source/dnode/vnode/src/tq/tqScan.c | 18 +- source/dnode/vnode/src/tq/tqSink.c | 12 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 2 - source/libs/executor/src/executor.c | 54 ++-- source/libs/executor/src/executorimpl.c | 6 +- source/libs/executor/src/scanoperator.c | 11 +- source/libs/executor/src/timewindowoperator.c | 7 +- source/libs/stream/inc/streamInc.h | 2 +- source/libs/stream/src/stream.c | 43 ++- source/libs/stream/src/streamData.c | 25 +- source/libs/stream/src/streamDispatch.c | 26 +- source/libs/stream/src/streamExec.c | 120 ++++---- source/libs/stream/src/streamMeta.c | 19 +- source/libs/stream/src/streamRecover.c | 36 +-- source/libs/stream/src/streamState.c | 2 +- source/libs/stream/src/streamTask.c | 51 +++- 36 files changed, 625 insertions(+), 419 deletions(-) diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index 33172a4f86..ee8ee1050d 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -26,6 +26,7 @@ extern "C" { typedef void* qTaskInfo_t; typedef void* DataSinkHandle; + struct SRpcMsg; struct SSubplan; @@ -118,7 +119,7 @@ int32_t qSetSMAInput(qTaskInfo_t tinfo, const void* pBlocks, size_t numOfBlocks, * @param isAdd * @return */ -int32_t qUpdateQualifiedTableId(qTaskInfo_t tinfo, const SArray* tableIdList, bool isAdd); +int32_t qUpdateTableListForStreamScanner(qTaskInfo_t tinfo, const SArray* tableIdList, bool isAdd, SArray* pList); /** * Create the exec task object according to task json @@ -162,6 +163,7 @@ void qCleanExecTaskBlockBuf(qTaskInfo_t tinfo); * @return */ int32_t qAsyncKillTask(qTaskInfo_t tinfo, int32_t rspCode); + int32_t qKillTask(qTaskInfo_t tinfo, int32_t rspCode); bool qTaskIsExecuting(qTaskInfo_t qinfo); @@ -181,21 +183,11 @@ int32_t qSerializeTaskStatus(qTaskInfo_t tinfo, char** pOutput, int32_t* len); int32_t qDeserializeTaskStatus(qTaskInfo_t tinfo, const char* pInput, int32_t len); STimeWindow getAlignQueryTimeWindow(SInterval* pInterval, int32_t precision, int64_t key); -/** - * return the scan info, in the form of tuple of two items, including table uid and current timestamp - * @param tinfo - * @param uid - * @param ts - * @return - */ -int32_t qGetStreamScanStatus(qTaskInfo_t tinfo, uint64_t* uid, int64_t* ts); -int32_t qStreamPrepareTsdbScan(qTaskInfo_t tinfo, uint64_t uid, int64_t ts); +SArray* qGetQueriedTableListInfo(qTaskInfo_t tinfo); int32_t qStreamPrepareScan(qTaskInfo_t tinfo, STqOffsetVal* pOffset, int8_t subType); -// int32_t qStreamScanMemData(qTaskInfo_t tinfo, const SSubmitReq* pReq, int64_t ver); -// int32_t qStreamSetScanMemData(qTaskInfo_t tinfo, SPackedData submit); void qStreamSetOpen(qTaskInfo_t tinfo); diff --git a/include/libs/qcom/query.h b/include/libs/qcom/query.h index b6ada5a0c7..cfc6ef2025 100644 --- a/include/libs/qcom/query.h +++ b/include/libs/qcom/query.h @@ -194,6 +194,7 @@ typedef struct SRequestConnInfo { typedef void (*__freeFunc)(void* param); +// todo add creator/destroyer function typedef struct SMsgSendInfo { __async_send_cb_fn_t fp; // async callback function STargetInfo target; // for update epset diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 5b1d1fa1bc..d9b82c8c59 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -225,15 +225,15 @@ static FORCE_INLINE void* streamQueueCurItem(SStreamQueue* queue) { void* streamQueueNextItem(SStreamQueue* queue); -SStreamDataSubmit2* streamDataSubmitNew(SPackedData submit); +SStreamDataSubmit2* streamDataSubmitNew(SPackedData submit, int32_t type); void streamDataSubmitDestroy(SStreamDataSubmit2* pDataSubmit); SStreamDataSubmit2* streamSubmitBlockClone(SStreamDataSubmit2* pSubmit); typedef struct { - char* qmsg; - // followings are not applicable to encoder and decoder - void* executor; + char* qmsg; + void* pExecutor; // not applicable to encoder and decoder + struct STqReader* pTqReader; // not applicable to encoder and decoder } STaskExec; typedef struct { @@ -280,16 +280,20 @@ typedef struct { SEpSet epSet; } SStreamChildEpInfo; -struct SStreamTask { - int64_t streamId; - int32_t taskId; - int32_t totalLevel; - int8_t taskLevel; - int8_t outputType; - int16_t dispatchMsgType; +typedef struct SStreamId { + int64_t streamId; + int32_t taskId; + const char* idStr; +} SStreamId; - int8_t taskStatus; - int8_t schedStatus; +struct SStreamTask { + SStreamId id; + int32_t totalLevel; + int8_t taskLevel; + int8_t outputType; + int16_t dispatchMsgType; + int8_t taskStatus; + int8_t schedStatus; // node info int32_t selfChildId; @@ -319,11 +323,8 @@ struct SStreamTask { STaskSinkFetch fetchSink; }; - int8_t inputStatus; - int8_t outputStatus; - - // STaosQueue* inputQueue1; - // STaosQall* inputQall; + int8_t inputStatus; + int8_t outputStatus; SStreamQueue* inputQueue; SStreamQueue* outputQueue; @@ -345,8 +346,8 @@ struct SStreamTask { SArray* checkReqIds; // shuffle int32_t refCnt; - int64_t checkpointingId; - int32_t checkpointAlignCnt; + int64_t checkpointingId; + int32_t checkpointAlignCnt; }; int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); @@ -355,8 +356,9 @@ int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamChildEpInfo* pInfo); SStreamTask* tNewSStreamTask(int64_t streamId); int32_t tEncodeSStreamTask(SEncoder* pEncoder, const SStreamTask* pTask); int32_t tDecodeSStreamTask(SDecoder* pDecoder, SStreamTask* pTask); -void tFreeSStreamTask(SStreamTask* pTask); +void tFreeStreamTask(SStreamTask* pTask); int32_t tAppendDataForStream(SStreamTask* pTask, SStreamQueueItem* pItem); +bool tInputQueueIsFull(const SStreamTask* pTask); static FORCE_INLINE void streamTaskInputFail(SStreamTask* pTask) { atomic_store_8(&pTask->inputStatus, TASK_INPUT_STATUS__FAILED); diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index ccbc53fa5d..fdd21c7092 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -138,6 +138,7 @@ typedef struct { int8_t enableRef; } SWalFilterCond; +// todo hide this struct typedef struct { SWal *pWal; int64_t readerId; diff --git a/source/client/inc/clientInt.h b/source/client/inc/clientInt.h index 86db35b412..41f87379a9 100644 --- a/source/client/inc/clientInt.h +++ b/source/client/inc/clientInt.h @@ -36,14 +36,6 @@ extern "C" { #include "tconfig.h" -#define CHECK_CODE_GOTO(expr, label) \ - do { \ - code = expr; \ - if (TSDB_CODE_SUCCESS != code) { \ - goto label; \ - } \ - } while (0) - #define ERROR_MSG_BUF_DEFAULT_SIZE 512 #define HEARTBEAT_INTERVAL 1500 // ms @@ -286,28 +278,7 @@ static FORCE_INLINE SReqResultInfo* tmqGetCurResInfo(TAOS_RES* res) { return (SReqResultInfo*)&msg->resInfo; } -static FORCE_INLINE SReqResultInfo* tmqGetNextResInfo(TAOS_RES* res, bool convertUcs4) { - SMqRspObj* pRspObj = (SMqRspObj*)res; - pRspObj->resIter++; - - if (pRspObj->resIter < pRspObj->rsp.blockNum) { - SRetrieveTableRsp* pRetrieve = (SRetrieveTableRsp*)taosArrayGetP(pRspObj->rsp.blockData, pRspObj->resIter); - if (pRspObj->rsp.withSchema) { - SSchemaWrapper* pSW = (SSchemaWrapper*)taosArrayGetP(pRspObj->rsp.blockSchema, pRspObj->resIter); - setResSchemaInfo(&pRspObj->resInfo, pSW->pSchema, pSW->nCols); - taosMemoryFreeClear(pRspObj->resInfo.row); - taosMemoryFreeClear(pRspObj->resInfo.pCol); - taosMemoryFreeClear(pRspObj->resInfo.length); - taosMemoryFreeClear(pRspObj->resInfo.convertBuf); - taosMemoryFreeClear(pRspObj->resInfo.convertJson); - } - - setQueryResultFromRsp(&pRspObj->resInfo, pRetrieve, convertUcs4, false); - return &pRspObj->resInfo; - } - - return NULL; -} +SReqResultInfo* tmqGetNextResInfo(TAOS_RES* res, bool convertUcs4); static FORCE_INLINE SReqResultInfo* tscGetCurResInfo(TAOS_RES* res) { if (TD_RES_QUERY(res)) return &(((SRequestObj*)res)->body.resInfo); @@ -320,7 +291,6 @@ extern int32_t clientConnRefPool; extern int32_t timestampDeltaLimit; extern int64_t lastClusterId; - __async_send_cb_fn_t getMsgRspHandle(int32_t msgType); SMsgSendInfo* buildMsgInfoImpl(SRequestObj* pReqObj); @@ -373,7 +343,6 @@ void taos_close_internal(void* taos); // global, called by mgmt int hbMgrInit(); void hbMgrCleanUp(); -int hbHandleRsp(SClientHbBatchRsp* hbRsp); // cluster level SAppHbMgr* appHbMgrInit(SAppInstInfo* pAppInstInfo, char* key); @@ -386,9 +355,6 @@ void stopAllRequests(SHashObj* pRequests); int hbRegisterConn(SAppHbMgr* pAppHbMgr, int64_t tscRefId, int64_t clusterId, int8_t connType); void hbDeregisterConn(SAppHbMgr* pAppHbMgr, SClientHbKey connKey); -// --- mq -void hbMgrInitMqHbRspHandle(); - typedef struct SSqlCallbackWrapper { SParseContext* pParseCtx; SCatalogReq* pCatalogReq; diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 9e5d9080b4..8ced67c9b2 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1039,8 +1039,7 @@ static int32_t asyncExecSchQuery(SRequestObj* pRequest, SQuery* pQuery, SMetaDat .sysInfo = pRequest->pTscObj->sysInfo, .allocatorId = pRequest->allocatorRefId}; - SAppInstInfo* pAppInfo = getAppInfo(pRequest); - SQueryPlan* pDag = NULL; + SQueryPlan* pDag = NULL; int64_t st = taosGetTimestampUs(); int32_t code = qCreateQueryPlan(&cxt, &pDag, pMnodeList); @@ -1052,7 +1051,6 @@ static int32_t asyncExecSchQuery(SRequestObj* pRequest, SQuery* pQuery, SMetaDat } pRequest->metric.execStart = taosGetTimestampUs(); - pRequest->metric.planCostUs = pRequest->metric.execStart - st; if (TSDB_CODE_SUCCESS == code && !pRequest->validateOnly) { diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index e295ec93af..f90c004eec 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -154,7 +154,7 @@ typedef struct { typedef struct { int8_t tmqRspType; - int32_t epoch; // epoch can be used to guard the vgHandle + int32_t epoch; // epoch can be used to guard the vgHandle int32_t vgId; SMqClientVg* vgHandle; SMqClientTopic* topicHandle; @@ -210,6 +210,11 @@ typedef struct { tmq_t* pTmq; } SMqCommitCbParam; +typedef struct SSyncCommitInfo { + tsem_t sem; + int32_t code; +} SSyncCommitInfo; + static int32_t doAskEp(tmq_t* tmq); static int32_t makeTopicVgroupKey(char* dst, const char* topicName, int32_t vg); static int32_t tmqCommitDone(SMqCommitCbParamSet* pParamSet); @@ -521,11 +526,7 @@ static int32_t doSendCommitMsg(tmq_t* tmq, SMqClientVg* pVg, const char* pTopicN return TSDB_CODE_OUT_OF_MEMORY; } - pMsgSendInfo->msgInfo = (SDataBuf){ - .pData = buf, - .len = sizeof(SMsgHead) + len, - .handle = NULL, - }; + pMsgSendInfo->msgInfo = (SDataBuf) { .pData = buf, .len = sizeof(SMsgHead) + len, .handle = NULL }; pMsgSendInfo->requestId = generateRequestId(); pMsgSendInfo->requestObjRefId = 0; @@ -786,11 +787,7 @@ void tmqSendHbReq(void* param, void* tmrId) { goto OVER; } - sendInfo->msgInfo = (SDataBuf){ - .pData = pReq, - .len = tlen, - .handle = NULL, - }; + sendInfo->msgInfo = (SDataBuf){ .pData = pReq, .len = tlen, .handle = NULL }; sendInfo->requestId = generateRequestId(); sendInfo->requestObjRefId = 0; @@ -2115,11 +2112,6 @@ void tmq_commit_async(tmq_t* tmq, const TAOS_RES* pRes, tmq_commit_cb* cb, void* } } -typedef struct SSyncCommitInfo { - tsem_t sem; - int32_t code; -} SSyncCommitInfo; - static void commitCallBackFn(tmq_t *pTmq, int32_t code, void* param) { SSyncCommitInfo* pInfo = (SSyncCommitInfo*) param; pInfo->code = code; @@ -2298,3 +2290,26 @@ void commitRspCountDown(SMqCommitCbParamSet* pParamSet, int64_t consumerId, cons waitingRspNum); } } + +SReqResultInfo* tmqGetNextResInfo(TAOS_RES* res, bool convertUcs4) { + SMqRspObj* pRspObj = (SMqRspObj*)res; + pRspObj->resIter++; + + if (pRspObj->resIter < pRspObj->rsp.blockNum) { + SRetrieveTableRsp* pRetrieve = (SRetrieveTableRsp*)taosArrayGetP(pRspObj->rsp.blockData, pRspObj->resIter); + if (pRspObj->rsp.withSchema) { + SSchemaWrapper* pSW = (SSchemaWrapper*)taosArrayGetP(pRspObj->rsp.blockSchema, pRspObj->resIter); + setResSchemaInfo(&pRspObj->resInfo, pSW->pSchema, pSW->nCols); + taosMemoryFreeClear(pRspObj->resInfo.row); + taosMemoryFreeClear(pRspObj->resInfo.pCol); + taosMemoryFreeClear(pRspObj->resInfo.length); + taosMemoryFreeClear(pRspObj->resInfo.convertBuf); + taosMemoryFreeClear(pRspObj->resInfo.convertJson); + } + + setQueryResultFromRsp(&pRspObj->resInfo, pRetrieve, convertUcs4, false); + return &pRspObj->resInfo; + } + + return NULL; +} \ No newline at end of file diff --git a/source/dnode/mnode/impl/src/mndDef.c b/source/dnode/mnode/impl/src/mndDef.c index fb81a764f1..e221a64619 100644 --- a/source/dnode/mnode/impl/src/mndDef.c +++ b/source/dnode/mnode/impl/src/mndDef.c @@ -158,7 +158,10 @@ void tFreeStreamObj(SStreamObj *pStream) { taosMemoryFree(pStream->sql); taosMemoryFree(pStream->ast); taosMemoryFree(pStream->physicalPlan); - if (pStream->outputSchema.nCols) taosMemoryFree(pStream->outputSchema.pSchema); + + if (pStream->outputSchema.nCols) { + taosMemoryFree(pStream->outputSchema.pSchema); + } int32_t sz = taosArrayGetSize(pStream->tasks); for (int32_t i = 0; i < sz; i++) { @@ -166,11 +169,14 @@ void tFreeStreamObj(SStreamObj *pStream) { int32_t taskSz = taosArrayGetSize(pLevel); for (int32_t j = 0; j < taskSz; j++) { SStreamTask *pTask = taosArrayGetP(pLevel, j); - tFreeSStreamTask(pTask); + tFreeStreamTask(pTask); } + taosArrayDestroy(pLevel); } + taosArrayDestroy(pStream->tasks); + // tagSchema.pSchema if (pStream->tagSchema.nCols > 0) { taosMemoryFree(pStream->tagSchema.pSchema); diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index d1671aa12a..504749df49 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -138,7 +138,7 @@ int32_t mndAddDispatcherToInnerTask(SMnode* pMnode, SStreamObj* pStream, SStream for (int32_t j = 0; j < sinkLvSize; j++) { SStreamTask* pLastLevelTask = taosArrayGetP(sinkLv, j); if (pLastLevelTask->nodeId == pVgInfo->vgId) { - pVgInfo->taskId = pLastLevelTask->taskId; + pVgInfo->taskId = pLastLevelTask->id.taskId; break; } } @@ -149,7 +149,7 @@ int32_t mndAddDispatcherToInnerTask(SMnode* pMnode, SStreamObj* pStream, SStream SArray* pArray = taosArrayGetP(pStream->tasks, 0); // one sink only SStreamTask* lastLevelTask = taosArrayGetP(pArray, 0); - pTask->fixedEpDispatcher.taskId = lastLevelTask->taskId; + pTask->fixedEpDispatcher.taskId = lastLevelTask->id.taskId; pTask->fixedEpDispatcher.nodeId = lastLevelTask->nodeId; pTask->fixedEpDispatcher.epSet = lastLevelTask->epSet; } @@ -440,7 +440,7 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) { pTask->dispatchMsgType = TDMT_STREAM_TASK_DISPATCH; pTask->outputType = TASK_OUTPUT__FIXED_DISPATCH; - pTask->fixedEpDispatcher.taskId = pInnerTask->taskId; + pTask->fixedEpDispatcher.taskId = pInnerTask->id.taskId; pTask->fixedEpDispatcher.nodeId = pInnerTask->nodeId; pTask->fixedEpDispatcher.epSet = pInnerTask->epSet; @@ -460,7 +460,7 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) { pEpInfo->childId = pTask->selfChildId; pEpInfo->epSet = pTask->epSet; pEpInfo->nodeId = pTask->nodeId; - pEpInfo->taskId = pTask->taskId; + pEpInfo->taskId = pTask->id.taskId; taosArrayPush(pInnerTask->childEpInfo, &pEpInfo); sdbRelease(pSdb, pVgroup); } diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index b7f80f6b0e..ab83f29ef9 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -600,7 +600,7 @@ static int32_t mndPersistTaskDropReq(STrans *pTrans, SStreamTask *pTask) { return -1; } pReq->head.vgId = htonl(pTask->nodeId); - pReq->taskId = pTask->taskId; + pReq->taskId = pTask->id.taskId; STransAction action = {0}; memcpy(&action.epSet, &pTask->epSet, sizeof(SEpSet)); action.pCont = pReq; @@ -1208,7 +1208,7 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock // task id pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); - colDataSetVal(pColInfo, numOfRows, (const char *)&pTask->taskId, false); + colDataSetVal(pColInfo, numOfRows, (const char *)&pTask->id.taskId, false); // node type char nodeType[20 + VARSTR_HEADER_SIZE] = {0}; diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 3d1b356f8c..447c90eb58 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -83,14 +83,11 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) { return -1; } - SReadHandle mgHandle = { - .vnode = NULL, - .numOfVgroups = (int32_t)taosArrayGetSize(pTask->childEpInfo), - .pStateBackend = pTask->pState, - }; + int32_t numOfChildEp = taosArrayGetSize(pTask->childEpInfo); + SReadHandle mgHandle = { .vnode = NULL, .numOfVgroups = numOfChildEp, .pStateBackend = pTask->pState }; - pTask->exec.executor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &mgHandle, 0); - ASSERT(pTask->exec.executor); + pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &mgHandle, 0); + ASSERT(pTask->exec.pExecutor); streamSetupTrigger(pTask); return 0; diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index a9e5fe628b..d62eebd2e1 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -256,15 +256,15 @@ void tqCloseReader(STqReader *); void tqReaderSetColIdList(STqReader *pReader, SArray *pColIdList); int32_t tqReaderSetTbUidList(STqReader *pReader, const SArray *tbUidList); -int32_t tqReaderAddTbUidList(STqReader *pReader, const SArray *tbUidList); +int32_t tqReaderAddTbUidList(STqReader *pReader, const SArray *pTableUidList); int32_t tqReaderRemoveTbUidList(STqReader *pReader, const SArray *tbUidList); int32_t tqSeekVer(STqReader *pReader, int64_t ver, const char *id); void tqNextBlock(STqReader *pReader, SFetchRet *ret); -int32_t tqReaderSetSubmitReq2(STqReader *pReader, void *msgStr, int32_t msgLen, int64_t ver); +int32_t tqReaderSetSubmitMsg(STqReader *pReader, void *msgStr, int32_t msgLen, int64_t ver); // int32_t tqReaderSetDataMsg(STqReader *pReader, const SSubmitReq *pMsg, int64_t ver); -bool tqNextDataBlock2(STqReader *pReader); +bool tqNextDataBlock(STqReader *pReader); bool tqNextDataBlockFilterOut2(STqReader *pReader, SHashObj *filterOutUids); int32_t tqRetrieveDataBlock2(SSDataBlock *pBlock, STqReader *pReader, SSubmitTbData **pSubmitTbDataRet); int32_t tqRetrieveTaosxBlock2(STqReader *pReader, SArray *blocks, SArray *schemas, SSubmitTbData **pSubmitTbDataRet); diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 9037644602..884c01d397 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -80,7 +80,7 @@ typedef struct { typedef struct { int8_t subType; - STqReader* pExecReader; + STqReader* pTqReader; qTaskInfo_t task; union { STqExecCol execCol; diff --git a/source/dnode/vnode/src/meta/metaCache.c b/source/dnode/vnode/src/meta/metaCache.c index 9501bf4b8e..795f281ab2 100644 --- a/source/dnode/vnode/src/meta/metaCache.c +++ b/source/dnode/vnode/src/meta/metaCache.c @@ -531,10 +531,11 @@ static void freePayload(const void* key, size_t keyLen, void* value) { return; } - SHashObj* pHashObj = (SHashObj*)p[0]; + SHashObj* pHashObj = (SHashObj*)p[0]; + STagFilterResEntry** pEntry = taosHashGet(pHashObj, &p[1], sizeof(uint64_t)); - { + if (pEntry != NULL && (*pEntry) != NULL) { int64_t st = taosGetTimestampUs(); SListIter iter = {0}; @@ -547,9 +548,9 @@ static void freePayload(const void* key, size_t keyLen, void* value) { void* tmp = tdListPopNode(&((*pEntry)->list), pNode); taosMemoryFree(tmp); - int64_t et = taosGetTimestampUs(); - metaInfo("clear items in cache, remain cached item:%d, elapsed time:%.2fms", listNEles(&((*pEntry)->list)), - (et - st) / 1000.0); + double el = (taosGetTimestampUs() - st) / 1000.0; + metaInfo("clear items in meta-cache, remain cached item:%d, elapsed time:%.2fms", listNEles(&((*pEntry)->list)), + el); break; } } diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index c75c675ec3..8aeb705d90 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -168,7 +168,7 @@ static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids, for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { if (pRSmaInfo->taskInfo[i]) { - if ((terrno = qUpdateQualifiedTableId(pRSmaInfo->taskInfo[i], tbUids, isAdd)) < 0) { + if ((terrno = qUpdateTableListForStreamScanner(pRSmaInfo->taskInfo[i], tbUids, isAdd, NULL)) < 0) { tdReleaseRSmaInfo(pSma, pRSmaInfo); smaError("vgId:%d, update tbUidList failed for uid:%" PRIi64 " level %d since %s", SMA_VID(pSma), *suid, i, terrstr()); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 66e6ac4cd8..44d5e26603 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -15,6 +15,8 @@ #include "tq.h" +#define IS_OFFSET_RESET_TYPE(_t) ((_t) < 0) + int32_t tqInit() { int8_t old; while (1) { @@ -57,12 +59,12 @@ static void destroyTqHandle(void* data) { if (pData->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { taosMemoryFreeClear(pData->execHandle.execCol.qmsg); } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__DB) { - tqCloseReader(pData->execHandle.pExecReader); + tqCloseReader(pData->execHandle.pTqReader); walCloseReader(pData->pWalReader); taosHashCleanup(pData->execHandle.execDb.pFilterOutTbUid); } else if (pData->execHandle.subType == TOPIC_SUB_TYPE__TABLE) { walCloseReader(pData->pWalReader); - tqCloseReader(pData->execHandle.pExecReader); + tqCloseReader(pData->execHandle.pTqReader); } } @@ -78,12 +80,33 @@ static void tqPushEntryFree(void* data) { taosMemoryFree(p); } +static bool tqOffsetLessOrEqual(const STqOffset* pLeft, const STqOffset* pRight) { + return pLeft->val.type == TMQ_OFFSET__LOG && pRight->val.type == TMQ_OFFSET__LOG && + pLeft->val.version <= pRight->val.version; +} + +// stream_task:stream_id:task_id +static void createStreamTaskOffsetKey(char* dst, uint64_t streamId, uint32_t taskId) { + int32_t n = 12; + char* p = dst; + + memcpy(p, "stream_task:", n); + p += n; + + int32_t inc = tintToHex(streamId, p); + p += inc; + + *(p++) = ':'; + tintToHex(taskId, p); +} + STQ* tqOpen(const char* path, SVnode* pVnode) { STQ* pTq = taosMemoryCalloc(1, sizeof(STQ)); if (pTq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } + pTq->path = taosStrdup(path); pTq->pVnode = pVnode; pTq->walLogLastVer = pVnode->pWal->vers.lastVer; @@ -249,11 +272,6 @@ int32_t tqSendDataRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, con return 0; } -static FORCE_INLINE bool tqOffsetLessOrEqual(const STqOffset* pLeft, const STqOffset* pRight) { - return pLeft->val.type == TMQ_OFFSET__LOG && pRight->val.type == TMQ_OFFSET__LOG && - pLeft->val.version <= pRight->val.version; -} - int32_t tqProcessOffsetCommitReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { STqOffset offset = {0}; int32_t vgId = TD_VID(pTq->pVnode); @@ -432,8 +450,6 @@ static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHand return 0; } -#define IS_OFFSET_RESET_TYPE(_t) ((_t) < 0) - static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg, STqOffsetVal* pOffset) { uint64_t consumerId = pRequest->consumerId; @@ -805,10 +821,10 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg qCreateQueueExecTaskInfo(pHandle->execHandle.execCol.qmsg, &handle, vgId, &pHandle->execHandle.numOfCols, req.newConsumerId); void* scanner = NULL; qExtractStreamScanner(pHandle->execHandle.task, &scanner); - pHandle->execHandle.pExecReader = qExtractReaderFromStreamScanner(scanner); + pHandle->execHandle.pTqReader = qExtractReaderFromStreamScanner(scanner); } else if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__DB) { pHandle->pWalReader = walOpenReader(pVnode->pWal, NULL); - pHandle->execHandle.pExecReader = tqOpenReader(pVnode); + pHandle->execHandle.pTqReader = tqOpenReader(pVnode); pHandle->execHandle.execDb.pFilterOutTbUid = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK); @@ -827,8 +843,8 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg int64_t tbUid = *(int64_t*)taosArrayGet(tbUidList, i); tqDebug("vgId:%d, idx %d, uid:%" PRId64, vgId, i, tbUid); } - pHandle->execHandle.pExecReader = tqOpenReader(pVnode); - tqReaderSetTbUidList(pHandle->execHandle.pExecReader, tbUidList); + pHandle->execHandle.pTqReader = tqOpenReader(pVnode); + tqReaderSetTbUidList(pHandle->execHandle.pTqReader, tbUidList); taosArrayDestroy(tbUidList); buildSnapContext(handle.meta, handle.version, req.suid, pHandle->execHandle.subType, pHandle->fetchMeta, @@ -886,16 +902,14 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg } int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { -#if 0 - if (pTask->taskLevel == TASK_LEVEL__AGG) { - A(taosArrayGetSize(pTask->childEpInfo) != 0); - } -#endif + // todo extract method + char buf[128] = {0}; + sprintf(buf, "0x%"PRIx64"-%d", pTask->id.streamId, pTask->id.taskId); int32_t vgId = TD_VID(pTq->pVnode); + pTask->id.idStr = taosStrdup(buf); pTask->refCnt = 1; pTask->schedStatus = TASK_SCHED_STATUS__INACTIVE; - pTask->inputQueue = streamQueueOpen(); pTask->outputQueue = streamQueueOpen(); @@ -920,14 +934,10 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { } SReadHandle handle = { - .meta = pTq->pVnode->pMeta, - .vnode = pTq->pVnode, - .initTqReader = 1, - .pStateBackend = pTask->pState, - }; + .meta = pTq->pVnode->pMeta, .vnode = pTq->pVnode, .initTqReader = 1, .pStateBackend = pTask->pState}; - pTask->exec.executor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId); - if (pTask->exec.executor == NULL) { + pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId); + if (pTask->exec.pExecutor == NULL) { return -1; } @@ -936,14 +946,12 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { if (pTask->pState == NULL) { return -1; } - SReadHandle mgHandle = { - .vnode = NULL, - .numOfVgroups = (int32_t)taosArrayGetSize(pTask->childEpInfo), - .pStateBackend = pTask->pState, - }; - pTask->exec.executor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &mgHandle, vgId); - if (pTask->exec.executor == NULL) { + int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->childEpInfo); + SReadHandle mgHandle = { .vnode = NULL, .numOfVgroups = numOfVgroups, .pStateBackend = pTask->pState}; + + pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &mgHandle, vgId); + if (pTask->exec.pExecutor == NULL) { return -1; } } @@ -964,15 +972,26 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { ver1 = info.skmVer; } - pTask->tbSink.pTSchema = - tBuildTSchema(pTask->tbSink.pSchemaWrapper->pSchema, pTask->tbSink.pSchemaWrapper->nCols, ver1); + SSchemaWrapper* pschemaWrapper = pTask->tbSink.pSchemaWrapper; + pTask->tbSink.pTSchema = tBuildTSchema(pschemaWrapper->pSchema, pschemaWrapper->nCols, ver1); if(pTask->tbSink.pTSchema == NULL) { return -1; } } + if (pTask->taskLevel == TASK_LEVEL__SOURCE) { + pTask->exec.pTqReader = tqOpenReader(pTq->pVnode); + if (pTask->exec.pTqReader == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + SArray* pList = qGetQueriedTableListInfo(pTask->exec.pExecutor); + tqReaderAddTbUidList(pTask->exec.pTqReader, pList); + } + streamSetupTrigger(pTask); - tqInfo("expand stream task on vg %d, task id %d, child id %d, level %d", vgId, pTask->taskId, pTask->selfChildId, pTask->taskLevel); + tqInfo("vgId:%d expand stream task, s-task:%s, child id %d, level %d", vgId, pTask->id.idStr, pTask->selfChildId, pTask->taskLevel); return 0; } @@ -995,6 +1014,7 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { .upstreamNodeId = req.upstreamNodeId, .upstreamTaskId = req.upstreamTaskId, }; + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); if (pTask && atomic_load_8(&pTask->taskStatus) == TASK_STATUS__NORMAL) { rsp.status = 1; @@ -1085,6 +1105,7 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms taosMemoryFree(pTask); return -1; } + tDecoderClear(&decoder); // 2.save task @@ -1298,7 +1319,7 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) { SStreamTask* pTask = *(SStreamTask**)pIter; if (pTask->taskLevel != TASK_LEVEL__SOURCE) continue; - qDebug("delete req enqueue stream task: %d, ver: %" PRId64, pTask->taskId, ver); + qDebug("delete req enqueue stream task: %d, ver: %" PRId64, pTask->id.taskId, ver); if (!failed) { SStreamRefDataBlock* pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0); @@ -1308,7 +1329,7 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) { atomic_add_fetch_32(pRefBlock->dataRef, 1); if (tAppendDataForStream(pTask, (SStreamQueueItem*)pRefBlock) < 0) { - qError("stream task input del failed, task id %d", pTask->taskId); + qError("stream task input del failed, task id %d", pTask->id.taskId); atomic_sub_fetch_32(pRef, 1); taosFreeQitem(pRefBlock); @@ -1316,7 +1337,7 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) { } if (streamSchedExec(pTask) < 0) { - qError("stream task launch failed, task id %d", pTask->taskId); + qError("stream task launch failed, task id %d", pTask->id.taskId); continue; } @@ -1343,12 +1364,12 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) { if (!failed) { if (tAppendDataForStream(pTask, (SStreamQueueItem*)pStreamBlock) < 0) { - qError("stream task input del failed, task id %d", pTask->taskId); + qError("stream task input del failed, task id %d", pTask->id.taskId); continue; } if (streamSchedExec(pTask) < 0) { - qError("stream task launch failed, task id %d", pTask->taskId); + qError("stream task launch failed, task id %d", pTask->id.taskId); continue; } } else { @@ -1361,15 +1382,82 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) { return 0; } +static int32_t doAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem, int64_t ver) { + int32_t code = tAppendDataForStream(pTask, pQueueItem); + if (code < 0) { + tqError("s-task:%s failed to put into queue, too many, next start ver:%" PRId64, pTask->id.idStr, ver); + return -1; + } + + if (streamSchedExec(pTask) < 0) { + tqError("stream task:%d failed to be launched, code:%s", pTask->id.taskId, tstrerror(terrno)); + return -1; + } + + return TSDB_CODE_SUCCESS; +} + +static void doSaveTaskOffset(STqOffsetStore* pOffsetStore, const char* pKey, int64_t ver) { + STqOffset offset = {0}; + tqOffsetResetToLog(&offset.val, ver); + + tstrncpy(offset.subKey, pKey, tListLen(offset.subKey)); + + // keep the offset info in the offset store + tqOffsetWrite(pOffsetStore, &offset); +} + +static int32_t addSubmitBlockNLaunchTask(STqOffsetStore* pOffsetStore, SStreamTask* pTask, SStreamDataSubmit2* pSubmit, + const char* key, int64_t ver) { + doSaveTaskOffset(pOffsetStore, key, ver); + int32_t code = doAddInputBlockNLaunchTask(pTask, (SStreamQueueItem*)pSubmit, ver); + + // remove the offset, if all functions are completed successfully. + if (code == TSDB_CODE_SUCCESS) { + tqOffsetDelete(pOffsetStore, key); + } + return TSDB_CODE_SUCCESS; +} + +static void saveOffsetForAllTasks(STQ* pTq, SPackedData submit) { + void* pIter = NULL; + + while(1) { + pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter); + if (pIter == NULL) { + break; + } + + SStreamTask* pTask = *(SStreamTask**)pIter; + if (pTask->taskLevel != TASK_LEVEL__SOURCE) { + continue; + } + + if (pTask->taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { + tqDebug("stream task:%d skip push data, not ready for processing, status %d", pTask->id.taskId, + pTask->taskStatus); + continue; + } + + char key[128] = {0}; + createStreamTaskOffsetKey(key, pTask->id.streamId, pTask->id.taskId); + + STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key); + if (pOffset == NULL) { + doSaveTaskOffset(pTq->pOffsetStore, key, submit.ver); + } + } +} + int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) { void* pIter = NULL; - bool succ = true; - SStreamDataSubmit2* pSubmit = streamDataSubmitNew(submit); + SStreamDataSubmit2* pSubmit = streamDataSubmitNew(submit, STREAM_INPUT__DATA_SUBMIT); if (pSubmit == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; tqError("failed to create data submit for stream since out of memory"); - succ = false; + saveOffsetForAllTasks(pTq, submit); + return -1; } while (1) { @@ -1384,35 +1472,85 @@ int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) { } if (pTask->taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { - tqDebug("stream task:%d skip push data, not ready for processing, status %d", pTask->taskId, pTask->taskStatus); + tqDebug("stream task:%d skip push data, not ready for processing, status %d", pTask->id.taskId, + pTask->taskStatus); continue; } - tqDebug("data submit enqueue stream task:%d, ver: %" PRId64, pTask->taskId, submit.ver); - if (succ) { - int32_t code = tAppendDataForStream(pTask, (SStreamQueueItem*)pSubmit); - if (code < 0) { - // let's handle the back pressure + // check if offset value exists + char key[128] = {0}; + createStreamTaskOffsetKey(key, pTask->id.streamId, pTask->id.taskId); - tqError("stream task:%d failed to put into queue for, too many", pTask->taskId); - continue; + if (tInputQueueIsFull(pTask)) { + STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key); + + int64_t ver = submit.ver; + if (pOffset == NULL) { + doSaveTaskOffset(pTq->pOffsetStore, key, submit.ver); + } else { + ver = pOffset->val.version; } - if (streamSchedExec(pTask) < 0) { - tqError("stream task:%d launch failed, code:%s", pTask->taskId, tstrerror(terrno)); - continue; + tqDebug("s-task:%s input queue is full, do nothing, start ver:%" PRId64, pTask->id.idStr, ver); + continue; + } + + // check if offset value exists + STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key); + if (pOffset != NULL) { + // seek the stored version and extract data from WAL + int32_t code = tqSeekVer(pTask->exec.pTqReader, pOffset->val.version, ""); + + // all data has been retrieved from WAL, let's try submit block directly. + if (code == TSDB_CODE_SUCCESS) { // all data retrieved, abort + // append the data for the stream + SFetchRet ret = {0}; + terrno = 0; + + tqNextBlock(pTask->exec.pTqReader, &ret); + if (ret.fetchType == FETCH_TYPE__DATA) { + SStreamDataBlock* pBlocks = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); + if (pBlocks == NULL) { // failed, do nothing + terrno = TSDB_CODE_OUT_OF_MEMORY; + continue; + } + + ret.data.info.type = STREAM_NORMAL; + pBlocks->type = STREAM_INPUT__DATA_BLOCK; + pBlocks->sourceVer = pOffset->val.version; + pBlocks->blocks = taosArrayInit(0, sizeof(SSDataBlock)); + taosArrayPush(pBlocks->blocks, &ret.data); + + int64_t* ts = (int64_t*)(((SColumnInfoData*)ret.data.pDataBlock->pData)->pData); +// tqDebug("-----------%ld\n", ts[0]); + + code = doAddInputBlockNLaunchTask(pTask, (SStreamQueueItem*)pBlocks, pBlocks->sourceVer); + if (code == TSDB_CODE_SUCCESS) { + pOffset->val.version = pTask->exec.pTqReader->pWalReader->curVersion; + tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr, + pOffset->val.version); + } + + } else { // FETCH_TYPE__NONE, let's try submit block directly + tqDebug("s-task:%s data in WAL are all consumed, try data in submit message", pTask->id.idStr); + addSubmitBlockNLaunchTask(pTq->pOffsetStore, pTask, pSubmit, key, submit.ver); + } + + // do nothing if failed, since the offset value is kept already + } else { // failed to seek to the WAL version + // todo handle the case where offset has been deleted in WAL, due to stream computing too slow + tqDebug("s-task:%s data in WAL are all consumed, try data in submit msg", pTask->id.idStr); + addSubmitBlockNLaunchTask(pTq->pOffsetStore, pTask, pSubmit, key, submit.ver); } } else { - streamTaskInputFail(pTask); + addSubmitBlockNLaunchTask(pTq->pOffsetStore, pTask, pSubmit, key, submit.ver); } } - if (pSubmit != NULL) { - streamDataSubmitDestroy(pSubmit); - taosFreeQitem(pSubmit); - } + streamDataSubmitDestroy(pSubmit); + taosFreeQitem(pSubmit); - return succ ? 0 : -1; + return 0; } int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { @@ -1420,6 +1558,7 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { int32_t taskId = pReq->taskId; SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); if (pTask) { + tqDebug("stream task:%d start to process run req", pTask->id.taskId); streamProcessRunReq(pTask); streamMetaReleaseTask(pTq->pStreamMeta, pTask); return 0; @@ -1456,7 +1595,7 @@ int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) { SStreamDispatchRsp* pRsp = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t taskId = ntohl(pRsp->upstreamTaskId); SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); - tqDebug("recv dispatch rsp, code: %x", pMsg->code); + tqDebug("recv dispatch rsp, code:%x", pMsg->code); if (pTask) { streamProcessDispatchRsp(pTask, pRsp, pMsg->code); streamMetaReleaseTask(pTq->pStreamMeta, pTask); @@ -1484,10 +1623,7 @@ int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) { int32_t taskId = req.dstTaskId; SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); if (pTask) { - SRpcMsg rsp = { - .info = pMsg->info, - .code = 0, - }; + SRpcMsg rsp = { .info = pMsg->info, .code = 0 }; streamProcessRetrieveReq(pTask, &req, &rsp); streamMetaReleaseTask(pTq->pStreamMeta, pTask); tDeleteStreamRetrieveReq(&req); @@ -1523,10 +1659,7 @@ int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) { SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); if (pTask) { - SRpcMsg rsp = { - .info = pMsg->info, - .code = 0, - }; + SRpcMsg rsp = { .info = pMsg->info, .code = 0 }; streamProcessDispatchReq(pTask, &req, &rsp, false); streamMetaReleaseTask(pTq->pStreamMeta, pTask); rpcFreeCont(pMsg->pCont); @@ -1543,10 +1676,7 @@ FAIL: SMsgHead* pRspHead = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp)); if (pRspHead == NULL) { - SRpcMsg rsp = { - .code = TSDB_CODE_OUT_OF_MEMORY, - .info = pMsg->info, - }; + SRpcMsg rsp = { .code = TSDB_CODE_OUT_OF_MEMORY, .info = pMsg->info }; tqDebug("send dispatch error rsp, code: %x", code); tmsgSendRsp(&rsp); rpcFreeCont(pMsg->pCont); @@ -1564,11 +1694,7 @@ FAIL: pRsp->inputStatus = TASK_OUTPUT_STATUS__NORMAL; SRpcMsg rsp = { - .code = code, - .info = pMsg->info, - .contLen = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp), - .pCont = pRspHead, - }; + .code = code, .info = pMsg->info, .contLen = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp), .pCont = pRspHead}; tqDebug("send dispatch error rsp, code: %x", code); tmsgSendRsp(&rsp); rpcFreeCont(pMsg->pCont); diff --git a/source/dnode/vnode/src/tq/tqMeta.c b/source/dnode/vnode/src/tq/tqMeta.c index a273f2edec..8f8ee78e97 100644 --- a/source/dnode/vnode/src/tq/tqMeta.c +++ b/source/dnode/vnode/src/tq/tqMeta.c @@ -320,15 +320,15 @@ int32_t tqMetaRestoreHandle(STQ* pTq) { code = -1; goto end; } - handle.execHandle.pExecReader = qExtractReaderFromStreamScanner(scanner); - if (handle.execHandle.pExecReader == NULL) { + handle.execHandle.pTqReader = qExtractReaderFromStreamScanner(scanner); + if (handle.execHandle.pTqReader == NULL) { tqError("cannot extract exec reader for %s", handle.subKey); code = -1; goto end; } } else if (handle.execHandle.subType == TOPIC_SUB_TYPE__DB) { handle.pWalReader = walOpenReader(pTq->pVnode->pWal, NULL); - handle.execHandle.pExecReader = tqOpenReader(pTq->pVnode); + handle.execHandle.pTqReader = tqOpenReader(pTq->pVnode); buildSnapContext(reader.meta, reader.version, 0, handle.execHandle.subType, handle.fetchMeta, (SSnapContext**)(&reader.sContext)); @@ -343,8 +343,8 @@ int32_t tqMetaRestoreHandle(STQ* pTq) { int64_t tbUid = *(int64_t*)taosArrayGet(tbUidList, i); tqDebug("vgId:%d, idx %d, uid:%" PRId64, vgId, i, tbUid); } - handle.execHandle.pExecReader = tqOpenReader(pTq->pVnode); - tqReaderSetTbUidList(handle.execHandle.pExecReader, tbUidList); + handle.execHandle.pTqReader = tqOpenReader(pTq->pVnode); + tqReaderSetTbUidList(handle.execHandle.pTqReader, tbUidList); taosArrayDestroy(tbUidList); buildSnapContext(reader.meta, reader.version, handle.execHandle.execTb.suid, handle.execHandle.subType, diff --git a/source/dnode/vnode/src/tq/tqOffset.c b/source/dnode/vnode/src/tq/tqOffset.c index 66d1ac2c7e..e8051a1406 100644 --- a/source/dnode/vnode/src/tq/tqOffset.c +++ b/source/dnode/vnode/src/tq/tqOffset.c @@ -128,31 +128,35 @@ int32_t tqOffsetDelete(STqOffsetStore* pStore, const char* subscribeKey) { } int32_t tqOffsetCommitFile(STqOffsetStore* pStore) { - if (!pStore->needCommit) return 0; + if (!pStore->needCommit) { + return 0; + } + // TODO file name should be with a newer version char* fname = tqOffsetBuildFName(pStore->pTq->path, 0); TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); if (pFile == NULL) { terrno = TAOS_SYSTEM_ERROR(errno); - - int32_t err = terrno; - const char* errStr = tstrerror(err); - int32_t sysErr = errno; - const char* sysErrStr = strerror(errno); - tqError("vgId:%d, cannot open file %s when commit offset since %s", pStore->pTq->pVnode->config.vgId, fname, - sysErrStr); + const char* err = strerror(errno); + tqError("vgId:%d, failed to open offset file %s, since %s", TD_VID(pStore->pTq->pVnode), fname, err); taosMemoryFree(fname); return -1; } + taosMemoryFree(fname); + void* pIter = NULL; while (1) { pIter = taosHashIterate(pStore->pHash, pIter); - if (pIter == NULL) break; + if (pIter == NULL) { + break; + } + STqOffset* pOffset = (STqOffset*)pIter; int32_t bodyLen; int32_t code; tEncodeSize(tEncodeSTqOffset, pOffset, bodyLen, code); + if (code < 0) { taosHashCancelIterate(pStore->pHash, pIter); return -1; @@ -166,6 +170,7 @@ int32_t tqOffsetCommitFile(STqOffsetStore* pStore) { SEncoder encoder; tEncoderInit(&encoder, abuf, bodyLen); tEncodeSTqOffset(&encoder, pOffset); + // write file int64_t writeLen; if ((writeLen = taosWriteFile(pFile, buf, totLen)) != totLen) { @@ -174,8 +179,10 @@ int32_t tqOffsetCommitFile(STqOffsetStore* pStore) { taosMemoryFree(buf); return -1; } + taosMemoryFree(buf); } + // close and rename file taosCloseFile(&pFile); pStore->needCommit = 0; diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index 9dfcf43e4d..dd003aec98 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -331,6 +331,7 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v if (msgType == TDMT_VND_SUBMIT) { void* data = taosMemoryMalloc(len); if (data == NULL) { + // todo: for all stream in this vnode, keep this offset in the offset files, and wait for a moment, and then retry terrno = TSDB_CODE_OUT_OF_MEMORY; tqError("vgId:%d, failed to copy submit data for stream processing, since out of memory", vgId); return -1; @@ -339,7 +340,7 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v memcpy(data, pReq, len); SPackedData submit = {.msgStr = data, .msgLen = len, .ver = ver}; - tqDebug("tq copy write msg %p %d %" PRId64 " from %p", data, len, ver, pReq); + tqDebug("tq copy submit msg:%p len:%d ver:%" PRId64 " from %p for stream", data, len, ver, pReq); tqProcessSubmitReq(pTq, submit); } diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 54e4e393ec..24546168e5 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -113,7 +113,7 @@ bool isValValidForTable(STqHandle* pHandle, SWalCont* pHead) { } SMetaReader mr = {0}; - metaReaderInit(&mr, pHandle->execHandle.pExecReader->pVnodeMeta, 0); + metaReaderInit(&mr, pHandle->execHandle.pTqReader->pVnodeMeta, 0); if (metaGetTableEntryByName(&mr, req.tbName) < 0) { metaReaderClear(&mr); @@ -262,8 +262,6 @@ STqReader* tqOpenReader(SVnode* pVnode) { } pReader->pVnodeMeta = pVnode->pMeta; - /*pReader->pMsg = NULL;*/ -// pReader->ver = -1; pReader->pColIdList = NULL; pReader->cachedSchemaVer = 0; pReader->cachedSchemaSuid = 0; @@ -296,10 +294,10 @@ void tqCloseReader(STqReader* pReader) { int32_t tqSeekVer(STqReader* pReader, int64_t ver, const char* id) { if (walReadSeekVer(pReader->pWalReader, ver) < 0) { - tqDebug("tmq poll: wal reader failed to seek to ver:%"PRId64" code:%s, %s", ver, tstrerror(terrno), id); + tqDebug("wal reader failed to seek to ver:%"PRId64" code:%s, %s", ver, tstrerror(terrno), id); return -1; } - tqDebug("tmq poll: wal reader seek to ver:%"PRId64" %s", ver, id); + tqDebug("wal reader seek to ver:%"PRId64" %s", ver, id); return 0; } @@ -310,26 +308,28 @@ void tqNextBlock(STqReader* pReader, SFetchRet* ret) { ret->fetchType = FETCH_TYPE__NONE; return; } - void* body = POINTER_SHIFT(pReader->pWalReader->pHead->head.body, sizeof(SSubmitReq2Msg)); + + void* pBody = POINTER_SHIFT(pReader->pWalReader->pHead->head.body, sizeof(SSubmitReq2Msg)); int32_t bodyLen = pReader->pWalReader->pHead->head.bodyLen - sizeof(SSubmitReq2Msg); int64_t ver = pReader->pWalReader->pHead->head.version; - tqReaderSetSubmitReq2(pReader, body, bodyLen, ver); + tqReaderSetSubmitMsg(pReader, pBody, bodyLen, ver); } - while (tqNextDataBlock2(pReader)) { + while (tqNextDataBlock(pReader)) { memset(&ret->data, 0, sizeof(SSDataBlock)); int32_t code = tqRetrieveDataBlock2(&ret->data, pReader, NULL); if (code != 0 || ret->data.info.rows == 0) { continue; } + ret->fetchType = FETCH_TYPE__DATA; return; } } } -int32_t tqReaderSetSubmitReq2(STqReader* pReader, void* msgStr, int32_t msgLen, int64_t ver) { +int32_t tqReaderSetSubmitMsg(STqReader* pReader, void* msgStr, int32_t msgLen, int64_t ver) { pReader->msg2.msgStr = msgStr; pReader->msg2.msgLen = msgLen; pReader->msg2.ver = ver; @@ -346,7 +346,7 @@ int32_t tqReaderSetSubmitReq2(STqReader* pReader, void* msgStr, int32_t msgLen, return 0; } -bool tqNextDataBlock2(STqReader* pReader) { +bool tqNextDataBlock(STqReader* pReader) { if (pReader->msg2.msgStr == NULL) { return false; } @@ -355,13 +355,20 @@ bool tqNextDataBlock2(STqReader* pReader) { while (pReader->nextBlk < blockSz) { tqDebug("tq reader next data block %p, %d %" PRId64 " %d", pReader->msg2.msgStr, pReader->msg2.msgLen, pReader->msg2.ver, pReader->nextBlk); + SSubmitTbData* pSubmitTbData = taosArrayGet(pReader->submit.aSubmitTbData, pReader->nextBlk); - if (pReader->tbIdHash == NULL) return true; + if (pReader->tbIdHash == NULL) { + return true; + } void* ret = taosHashGet(pReader->tbIdHash, &pSubmitTbData->uid, sizeof(int64_t)); if (ret != NULL) { + tqDebug("tq reader block found, ver:%"PRId64", uid:%"PRId64, pReader->msg2.ver, pSubmitTbData->uid); return true; + } else { + tqDebug("tq reader discard block, uid:%"PRId64", continue", pSubmitTbData->uid); } + pReader->nextBlk++; } @@ -901,7 +908,7 @@ int tqReaderSetTbUidList(STqReader* pReader, const SArray* tbUidList) { return 0; } -int tqReaderAddTbUidList(STqReader* pReader, const SArray* tbUidList) { +int tqReaderAddTbUidList(STqReader* pReader, const SArray* pTableUidList) { if (pReader->tbIdHash == NULL) { pReader->tbIdHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), true, HASH_ENTRY_LOCK); if (pReader->tbIdHash == NULL) { @@ -910,8 +917,9 @@ int tqReaderAddTbUidList(STqReader* pReader, const SArray* tbUidList) { } } - for (int i = 0; i < taosArrayGetSize(tbUidList); i++) { - int64_t* pKey = (int64_t*)taosArrayGet(tbUidList, i); + int32_t numOfTables = taosArrayGetSize(pTableUidList); + for (int i = 0; i < numOfTables; i++) { + int64_t* pKey = (int64_t*)taosArrayGet(pTableUidList, i); taosHashPut(pReader->tbIdHash, pKey, sizeof(int64_t), NULL, 0); } @@ -927,30 +935,34 @@ int tqReaderRemoveTbUidList(STqReader* pReader, const SArray* tbUidList) { return 0; } +// todo update the table list in wal reader int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) { - void* pIter = NULL; + void* pIter = NULL; + int32_t vgId = TD_VID(pTq->pVnode); + + // update the table list for each consumer handle while (1) { pIter = taosHashIterate(pTq->pHandle, pIter); if (pIter == NULL) { break; } - STqHandle* pExec = (STqHandle*)pIter; - if (pExec->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { - int32_t code = qUpdateQualifiedTableId(pExec->execHandle.task, tbUidList, isAdd); + STqHandle* pTqHandle = (STqHandle*)pIter; + if (pTqHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { + int32_t code = qUpdateTableListForStreamScanner(pTqHandle->execHandle.task, tbUidList, isAdd, NULL); if (code != 0) { - tqError("update qualified table error for %s", pExec->subKey); + tqError("update qualified table error for %s", pTqHandle->subKey); continue; } - } else if (pExec->execHandle.subType == TOPIC_SUB_TYPE__DB) { + } else if (pTqHandle->execHandle.subType == TOPIC_SUB_TYPE__DB) { if (!isAdd) { int32_t sz = taosArrayGetSize(tbUidList); for (int32_t i = 0; i < sz; i++) { int64_t tbUid = *(int64_t*)taosArrayGet(tbUidList, i); - taosHashPut(pExec->execHandle.execDb.pFilterOutTbUid, &tbUid, sizeof(int64_t), NULL, 0); + taosHashPut(pTqHandle->execHandle.execDb.pFilterOutTbUid, &tbUid, sizeof(int64_t), NULL, 0); } } - } else if (pExec->execHandle.subType == TOPIC_SUB_TYPE__TABLE) { + } else if (pTqHandle->execHandle.subType == TOPIC_SUB_TYPE__TABLE) { if (isAdd) { SArray* qa = taosArrayInit(4, sizeof(tb_uid_t)); SMetaReader mr = {0}; @@ -965,35 +977,50 @@ int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) { } tDecoderClear(&mr.coder); - - if (mr.me.type != TSDB_CHILD_TABLE || mr.me.ctbEntry.suid != pExec->execHandle.execTb.suid) { + if (mr.me.type != TSDB_CHILD_TABLE || mr.me.ctbEntry.suid != pTqHandle->execHandle.execTb.suid) { tqDebug("table uid %" PRId64 " does not add to tq handle", *id); continue; } + tqDebug("table uid %" PRId64 " add to tq handle", *id); taosArrayPush(qa, id); } + metaReaderClear(&mr); if (taosArrayGetSize(qa) > 0) { - tqReaderAddTbUidList(pExec->execHandle.pExecReader, qa); + tqReaderAddTbUidList(pTqHandle->execHandle.pTqReader, qa); } + taosArrayDestroy(qa); } else { - tqReaderRemoveTbUidList(pExec->execHandle.pExecReader, tbUidList); + tqReaderRemoveTbUidList(pTqHandle->execHandle.pTqReader, tbUidList); } } } + + // update the table list handle for each stream scanner/wal reader while (1) { pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter); - if (pIter == NULL) break; + if (pIter == NULL) { + break; + } + SStreamTask* pTask = *(SStreamTask**)pIter; if (pTask->taskLevel == TASK_LEVEL__SOURCE) { - int32_t code = qUpdateQualifiedTableId(pTask->exec.executor, tbUidList, isAdd); + SArray* pList = NULL; + int32_t code = qUpdateTableListForStreamScanner(pTask->exec.pExecutor, tbUidList, isAdd, pList); if (code != 0) { - tqError("update qualified table error for stream task %d", pTask->taskId); + tqError("vgId:%d, s-task:%s update qualified table error for stream task", vgId, pTask->id.idStr); continue; } + + if (isAdd) { // only add qualified tables + tqReaderAddTbUidList(pTask->exec.pTqReader, pList); + } else { + tqReaderRemoveTbUidList(pTask->exec.pTqReader, tbUidList); + } } } + return 0; } diff --git a/source/dnode/vnode/src/tq/tqScan.c b/source/dnode/vnode/src/tq/tqScan.c index b4e50312fd..5633dc37a3 100644 --- a/source/dnode/vnode/src/tq/tqScan.c +++ b/source/dnode/vnode/src/tq/tqScan.c @@ -38,7 +38,7 @@ int32_t tqAddBlockDataToRsp(const SSDataBlock* pBlock, SMqDataRsp* pRsp, int32_t } static int32_t tqAddBlockSchemaToRsp(const STqExecHandle* pExec, STaosxRsp* pRsp) { - SSchemaWrapper* pSW = tCloneSSchemaWrapper(pExec->pExecReader->pSchemaWrapper); + SSchemaWrapper* pSW = tCloneSSchemaWrapper(pExec->pTqReader->pSchemaWrapper); if (pSW == NULL) { return -1; } @@ -135,7 +135,7 @@ int32_t tqScanTaosx(STQ* pTq, const STqHandle* pHandle, STaosxRsp* pRsp, SMqMeta if (pDataBlock != NULL && pDataBlock->info.rows > 0) { if (pRsp->withTbName) { if (pOffset->type == TMQ_OFFSET__LOG) { - int64_t uid = pExec->pExecReader->lastBlkUid; + int64_t uid = pExec->pTqReader->lastBlkUid; if (tqAddTbNameToRsp(pTq, uid, pRsp, 1) < 0) { continue; } @@ -200,9 +200,9 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR SArray* pSchemas = taosArrayInit(0, sizeof(void*)); if (pExec->subType == TOPIC_SUB_TYPE__TABLE) { - STqReader* pReader = pExec->pExecReader; - tqReaderSetSubmitReq2(pReader, submit.msgStr, submit.msgLen, submit.ver); - while (tqNextDataBlock2(pReader)) { + STqReader* pReader = pExec->pTqReader; + tqReaderSetSubmitMsg(pReader, submit.msgStr, submit.msgLen, submit.ver); + while (tqNextDataBlock(pReader)) { taosArrayClear(pBlocks); taosArrayClear(pSchemas); SSubmitTbData* pSubmitTbDataRet = NULL; @@ -210,7 +210,7 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR if (terrno == TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND) continue; } if (pRsp->withTbName) { - int64_t uid = pExec->pExecReader->lastBlkUid; + int64_t uid = pExec->pTqReader->lastBlkUid; if (tqAddTbNameToRsp(pTq, uid, pRsp, taosArrayGetSize(pBlocks)) < 0) { taosArrayDestroyEx(pBlocks, (FDelete)blockDataFreeRes); taosArrayDestroyP(pSchemas, (FDelete)tDeleteSSchemaWrapper); @@ -259,8 +259,8 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR } } } else if (pExec->subType == TOPIC_SUB_TYPE__DB) { - STqReader* pReader = pExec->pExecReader; - tqReaderSetSubmitReq2(pReader, submit.msgStr, submit.msgLen, submit.ver); + STqReader* pReader = pExec->pTqReader; + tqReaderSetSubmitMsg(pReader, submit.msgStr, submit.msgLen, submit.ver); while (tqNextDataBlockFilterOut2(pReader, pExec->execDb.pFilterOutTbUid)) { taosArrayClear(pBlocks); taosArrayClear(pSchemas); @@ -269,7 +269,7 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR if (terrno == TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND) continue; } if (pRsp->withTbName) { - int64_t uid = pExec->pExecReader->lastBlkUid; + int64_t uid = pExec->pTqReader->lastBlkUid; if (tqAddTbNameToRsp(pTq, uid, pRsp, taosArrayGetSize(pBlocks)) < 0) { taosArrayDestroyEx(pBlocks, (FDelete)blockDataFreeRes); taosArrayDestroyP(pSchemas, (FDelete)tDeleteSSchemaWrapper); diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 8a89cb6bd7..22f0387d4e 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -87,7 +87,7 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d return; } - tqDebug("vgId:%d, task %d write into table, block num: %d", TD_VID(pVnode), pTask->taskId, blockSz); + tqDebug("vgId:%d, s-task:%s write into table, block num: %d", TD_VID(pVnode), pTask->id.idStr, blockSz); for (int32_t i = 0; i < blockSz; i++) { bool createTb = true; SSDataBlock* pDataBlock = taosArrayGet(pBlocks, i); @@ -382,7 +382,7 @@ void tqSinkToTablePipeline2(SStreamTask* pTask, void* vnode, int64_t ver, void* int32_t blockSz = taosArrayGetSize(pBlocks); - tqDebug("vgId:%d, task %d write into table, block num: %d", TD_VID(pVnode), pTask->taskId, blockSz); + tqDebug("vgId:%d, s-task:%s write results blocks:%d into table", TD_VID(pVnode), pTask->id.idStr, blockSz); void* pBuf = NULL; SArray* tagArray = NULL; @@ -475,11 +475,9 @@ void tqSinkToTablePipeline2(SStreamTask* pTask, void* vnode, int64_t ver, void* } for (int32_t tagId = UD_TAG_COLUMN_INDEX, step = 1; tagId < size; tagId++, step++) { SColumnInfoData* pTagData = taosArrayGet(pDataBlock->pDataBlock, tagId); - STagVal tagVal = { - .cid = pTSchema->numOfCols + step, - .type = pTagData->info.type, - }; - void* pData = colDataGetData(pTagData, rowId); + + STagVal tagVal = {.cid = pTSchema->numOfCols + step, .type = pTagData->info.type}; + void* pData = colDataGetData(pTagData, rowId); if (colDataIsNull_s(pTagData, rowId)) { continue; } else if (IS_VAR_DATA_TYPE(pTagData->info.type)) { diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 3d2b032156..e353988d4c 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -447,13 +447,11 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp walApplyVer(pVnode->pWal, version); - /*vInfo("vgId:%d, push msg begin", pVnode->config.vgId);*/ if (tqPushMsg(pVnode->pTq, pMsg->pCont, pMsg->contLen, pMsg->msgType, version) < 0) { /*vInfo("vgId:%d, push msg end", pVnode->config.vgId);*/ vError("vgId:%d, failed to push msg to TQ since %s", TD_VID(pVnode), tstrerror(terrno)); return -1; } - /*vInfo("vgId:%d, push msg end", pVnode->config.vgId);*/ // commit if need if (needCommit) { diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index b4431a7c3b..4389cc7fdf 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -127,7 +127,7 @@ static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t nu pOperator->status = OP_NOT_OPENED; SStreamScanInfo* pInfo = pOperator->info; - qDebug("task stream set total blocks:%d %s", (int32_t)numOfBlocks, id); + qDebug("s-task set source blocks:%d %s", (int32_t)numOfBlocks, id); ASSERT(pInfo->validBlockIndex == 0 && taosArrayGetSize(pInfo->pBlockLists) == 0); if (type == STREAM_INPUT__MERGED_SUBMIT) { @@ -363,27 +363,28 @@ static SArray* filterUnqualifiedTables(const SStreamScanInfo* pScanInfo, const S return qa; } -int32_t qUpdateQualifiedTableId(qTaskInfo_t tinfo, const SArray* tableIdList, bool isAdd) { +int32_t qUpdateTableListForStreamScanner(qTaskInfo_t tinfo, const SArray* tableIdList, bool isAdd, SArray* pList) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; + const char* id = GET_TASKID(pTaskInfo); + int32_t code = 0; if (isAdd) { - qDebug("add %d tables id into query list, %s", (int32_t)taosArrayGetSize(tableIdList), pTaskInfo->id.str); + qDebug("add %d tables id into query list, %s", (int32_t)taosArrayGetSize(tableIdList), id); } // traverse to the stream scanner node to add this table id - SOperatorInfo* pInfo = pTaskInfo->pRoot; - while (pInfo->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { - pInfo = pInfo->pDownstream[0]; - } - - int32_t code = 0; + SOperatorInfo* pInfo = extractOperatorInTree(pTaskInfo->pRoot, QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN, id); SStreamScanInfo* pScanInfo = pInfo->info; + if (isAdd) { // add new table id SArray* qa = filterUnqualifiedTables(pScanInfo, tableIdList, GET_TASKID(pTaskInfo)); int32_t numOfQualifiedTables = taosArrayGetSize(qa); - qDebug(" %d qualified child tables added into stream scanner", numOfQualifiedTables); + if (pList != NULL) { + taosArrayAddAll(pList, qa); + } + qDebug("%d qualified child tables added into stream scanner, %s", numOfQualifiedTables, id); code = tqReaderAddTbUidList(pScanInfo->tqReader, qa); if (code != TSDB_CODE_SUCCESS) { taosArrayDestroy(qa); @@ -424,19 +425,6 @@ int32_t qUpdateQualifiedTableId(qTaskInfo_t tinfo, const SArray* tableIdList, bo } } -#if 0 - bool exists = false; - for (int32_t k = 0; k < taosArrayGetSize(pListInfo->pTableList); ++k) { - STableKeyInfo* pKeyInfo = taosArrayGet(pListInfo->pTableList, k); - if (pKeyInfo->uid == keyInfo.uid) { - qWarn("ignore duplicated query table uid:%" PRIu64 " added, %s", pKeyInfo->uid, pTaskInfo->id.str); - exists = true; - } - } - - if (!exists) { -#endif - tableListAddTableInfo(pTableListInfo, keyInfo.uid, keyInfo.groupId); } @@ -447,7 +435,7 @@ int32_t qUpdateQualifiedTableId(qTaskInfo_t tinfo, const SArray* tableIdList, bo taosArrayDestroy(qa); } else { // remove the table id in current list - qDebug(" %d remove child tables from the stream scanner", (int32_t)taosArrayGetSize(tableIdList)); + qDebug("%d remove child tables from the stream scanner, %s", (int32_t)taosArrayGetSize(tableIdList), id); taosWLockLatch(&pTaskInfo->lock); code = tqReaderRemoveTbUidList(pScanInfo->tqReader, tableIdList); taosWUnLockLatch(&pTaskInfo->lock); @@ -1263,3 +1251,21 @@ void qProcessRspMsg(void* parent, SRpcMsg* pMsg, SEpSet* pEpSet) { destroySendMsgInfo(pSendInfo); } +SArray* qGetQueriedTableListInfo(qTaskInfo_t tinfo) { + SExecTaskInfo* pTaskInfo = tinfo; + SArray* plist = getTableListInfo(pTaskInfo); + + // only extract table in the first elements + STableListInfo* pTableListInfo = taosArrayGetP(plist, 0); + + SArray* pUidList = taosArrayInit(10, sizeof(uint64_t)); + + int32_t numOfTables = tableListGetSize(pTableListInfo); + for(int32_t i = 0; i < numOfTables; ++i) { + STableKeyInfo* pKeyInfo = tableListGetInfo(pTableListInfo, i); + taosArrayPush(pUidList, &pKeyInfo->uid); + } + + taosArrayDestroy(plist); + return pUidList; +} diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 11753c181c..52b749efee 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -2001,7 +2001,11 @@ void qStreamCloseTsdbReader(void* task) { } static void extractTableList(SArray* pList, const SOperatorInfo* pOperator) { - if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) { + if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { + SStreamScanInfo* pScanInfo = pOperator->info; + STableScanInfo* pTableScanInfo = pScanInfo->pTableScanOp->info; + taosArrayPush(pList, &pTableScanInfo->base.pTableListInfo); + } else if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) { STableScanInfo* pScanInfo = pOperator->info; taosArrayPush(pList, &pScanInfo->base.pTableListInfo); } else { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 29990f2d06..21e3c75924 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1635,7 +1635,7 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { if (pTaskInfo->streamInfo.submit.msgStr != NULL) { if (pInfo->tqReader->msg2.msgStr == NULL) { SPackedData submit = pTaskInfo->streamInfo.submit; - if (tqReaderSetSubmitReq2(pInfo->tqReader, submit.msgStr, submit.msgLen, submit.ver) < 0) { + if (tqReaderSetSubmitMsg(pInfo->tqReader, submit.msgStr, submit.msgLen, submit.ver) < 0) { qError("submit msg messed up when initing stream submit block %p", submit.msgStr); return NULL; } @@ -1644,7 +1644,7 @@ static SSDataBlock* doQueueScan(SOperatorInfo* pOperator) { blockDataCleanup(pInfo->pRes); SDataBlockInfo* pBlockInfo = &pInfo->pRes->info; - while (tqNextDataBlock2(pInfo->tqReader)) { + while (tqNextDataBlock(pInfo->tqReader)) { SSDataBlock block = {0}; int32_t code = tqRetrieveDataBlock2(&block, pInfo->tqReader, NULL); @@ -1805,7 +1805,6 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { /*resetTableScanInfo(pTSInfo, pWin);*/ tsdbReaderClose(pTSInfo->base.dataReader); - qDebug("4"); pTSInfo->base.dataReader = NULL; pInfo->pTableScanOp->status = OP_OPENED; @@ -1888,7 +1887,6 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__NONE; STableScanInfo* pTSInfo = pInfo->pTableScanOp->info; tsdbReaderClose(pTSInfo->base.dataReader); - qDebug("5"); pTSInfo->base.dataReader = NULL; @@ -1915,6 +1913,7 @@ FETCH_NEXT_BLOCK: if (pBlock->info.parTbName[0]) { streamStatePutParName(pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, pBlock->info.parTbName); } + // TODO move into scan pBlock->info.calWin.skey = INT64_MIN; pBlock->info.calWin.ekey = INT64_MAX; @@ -2057,7 +2056,7 @@ FETCH_NEXT_BLOCK: int32_t current = pInfo->validBlockIndex++; SPackedData* pSubmit = taosArrayGet(pInfo->pBlockLists, current); - if (tqReaderSetSubmitReq2(pInfo->tqReader, pSubmit->msgStr, pSubmit->msgLen, pSubmit->ver) < 0) { + if (tqReaderSetSubmitMsg(pInfo->tqReader, pSubmit->msgStr, pSubmit->msgLen, pSubmit->ver) < 0) { qError("submit msg messed up when initing stream submit block %p, current %d, total %d", pSubmit, current, totBlockNum); continue; @@ -2066,7 +2065,7 @@ FETCH_NEXT_BLOCK: blockDataCleanup(pInfo->pRes); - while (tqNextDataBlock2(pInfo->tqReader)) { + while (tqNextDataBlock(pInfo->tqReader)) { SSDataBlock block = {0}; int32_t code = tqRetrieveDataBlock2(&block, pInfo->tqReader, NULL); diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 1a1fb6208d..880de7d6bf 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -2432,10 +2432,8 @@ static void doStreamIntervalAggImpl(SOperatorInfo* pOperatorInfo, SSDataBlock* p updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &nextWin, true); applyAggFunctionOnPartialTuples(pTaskInfo, pSup->pCtx, &pInfo->twAggSup.timeWindowData, startPos, forwardRows, pSDataBlock->info.rows, numOfOutput); - SWinKey key = { - .ts = nextWin.skey, - .groupId = groupId, - }; + + SWinKey key = { .ts = nextWin.skey, .groupId = groupId }; saveOutputBuf(pInfo->pState, &key, pResult, pInfo->aggSup.resultRowSize); releaseOutputBuf(pInfo->pState, &key, pResult); if (pInfo->delKey.ts > key.ts) { @@ -4771,6 +4769,7 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { pInfo->numOfDatapack = 0; break; } + pInfo->numOfDatapack++; printDataBlock(pBlock, "single interval recv"); diff --git a/source/libs/stream/inc/streamInc.h b/source/libs/stream/inc/streamInc.h index 66496f11f8..876b80697a 100644 --- a/source/libs/stream/inc/streamInc.h +++ b/source/libs/stream/inc/streamInc.h @@ -44,7 +44,7 @@ int32_t streamDispatchOneCheckReq(SStreamTask* pTask, const SStreamTaskCheckReq* int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecoverFinishReq* pReq, int32_t vgId, SEpSet* pEpSet); -SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* elem); +SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem); #ifdef __cplusplus } diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index 361cd2cacc..59ac8a61d6 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -16,6 +16,8 @@ #include "streamInc.h" #include "ttimer.h" +#define STREAM_TASK_INPUT_QUEUEU_CAPACITY 2 + int32_t streamInit() { int8_t old; while (1) { @@ -96,13 +98,14 @@ int32_t streamSchedExec(SStreamTask* pTask) { if (schedStatus == TASK_SCHED_STATUS__INACTIVE) { SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); if (pRunReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; atomic_store_8(&pTask->schedStatus, TASK_SCHED_STATUS__INACTIVE); return -1; } pRunReq->head.vgId = pTask->nodeId; - pRunReq->streamId = pTask->streamId; - pRunReq->taskId = pTask->taskId; + pRunReq->streamId = pTask->id.streamId; + pRunReq->taskId = pTask->id.taskId; SRpcMsg msg = { .msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq) }; tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &msg); @@ -142,7 +145,7 @@ int32_t streamTaskEnqueue(SStreamTask* pTask, const SStreamDispatchReq* pReq, SR pCont->upstreamNodeId = htonl(pReq->upstreamNodeId); pCont->upstreamTaskId = htonl(pReq->upstreamTaskId); pCont->downstreamNodeId = htonl(pTask->nodeId); - pCont->downstreamTaskId = htonl(pTask->taskId); + pCont->downstreamTaskId = htonl(pTask->id.taskId); pRsp->pCont = buf; pRsp->contLen = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp); tmsgSendRsp(pRsp); @@ -155,7 +158,7 @@ int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq, // enqueue if (pData != NULL) { - qDebug("task %d(child %d) recv retrieve req from task %d, reqId %" PRId64, pTask->taskId, pTask->selfChildId, + qDebug("task %d(child %d) recv retrieve req from task %d, reqId %" PRId64, pTask->id.taskId, pTask->selfChildId, pReq->srcTaskId, pReq->reqId); pData->type = STREAM_INPUT__DATA_RETRIEVE; @@ -205,7 +208,7 @@ int32_t streamTaskOutput(SStreamTask* pTask, SStreamDataBlock* pBlock) { } int32_t streamProcessDispatchReq(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pRsp, bool exec) { - qDebug("task %d receive dispatch req from node %d task %d", pTask->taskId, pReq->upstreamNodeId, + qDebug("task %d receive dispatch req from node %d task %d", pTask->id.taskId, pReq->upstreamNodeId, pReq->upstreamTaskId); streamTaskEnqueue(pTask, pReq, pRsp); @@ -228,12 +231,11 @@ int32_t streamProcessDispatchReq(SStreamTask* pTask, SStreamDispatchReq* pReq, S int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code) { ASSERT(pRsp->inputStatus == TASK_OUTPUT_STATUS__NORMAL || pRsp->inputStatus == TASK_OUTPUT_STATUS__BLOCKED); - - qDebug("task %d receive dispatch rsp, code: %x", pTask->taskId, code); + qDebug("task %d receive dispatch rsp, code: %x", pTask->id.taskId, code); if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { int32_t leftRsp = atomic_sub_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); - qDebug("task %d is shuffle, left waiting rsp %d", pTask->taskId, leftRsp); + qDebug("task %d is shuffle, left waiting rsp %d", pTask->id.taskId, leftRsp); if (leftRsp > 0) return 0; } @@ -261,7 +263,7 @@ int32_t streamProcessRunReq(SStreamTask* pTask) { } int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pRsp) { - qDebug("task %d receive retrieve req from node %d task %d", pTask->taskId, pReq->srcNodeId, pReq->srcTaskId); + qDebug("task %d receive retrieve req from node %d task %d", pTask->id.taskId, pReq->srcNodeId, pReq->srcTaskId); streamTaskEnqueueRetrieve(pTask, pReq, pRsp); @@ -275,26 +277,43 @@ int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, S return 0; } +bool tInputQueueIsFull(const SStreamTask* pTask) { + return taosQueueItemSize((pTask->inputQueue->queue)) >= STREAM_TASK_INPUT_QUEUEU_CAPACITY; +} + int32_t tAppendDataForStream(SStreamTask* pTask, SStreamQueueItem* pItem) { int8_t type = pItem->type; if (type == STREAM_INPUT__DATA_SUBMIT) { SStreamDataSubmit2* pSubmitBlock = streamSubmitBlockClone((SStreamDataSubmit2*)pItem); if (pSubmitBlock == NULL) { - qDebug("task %d %p submit enqueue failed since out of memory", pTask->taskId, pTask); + qDebug("task %d %p submit enqueue failed since out of memory", pTask->id.taskId, pTask); terrno = TSDB_CODE_OUT_OF_MEMORY; atomic_store_8(&pTask->inputStatus, TASK_INPUT_STATUS__FAILED); return -1; } int32_t total = taosQueueItemSize(pTask->inputQueue->queue) + 1; - qDebug("stream task:%d %p submit enqueue %p %p %p msgLen:%d ver:%" PRId64 ", total in queue:%d", pTask->taskId, - pTask, pItem, pSubmitBlock, pSubmitBlock->submit.msgStr, pSubmitBlock->submit.msgLen, + qDebug("s-task:%s submit enqueue %p %p %p msgLen:%d ver:%" PRId64 ", total in queue:%d", pTask->id.idStr, + pItem, pSubmitBlock, pSubmitBlock->submit.msgStr, pSubmitBlock->submit.msgLen, pSubmitBlock->submit.ver, total); + if (total > STREAM_TASK_INPUT_QUEUEU_CAPACITY) { + qDebug("s-task:%s input queue is full, capacity:%d, abort", pTask->id.idStr, STREAM_TASK_INPUT_QUEUEU_CAPACITY); + streamDataSubmitDestroy(pSubmitBlock); + return -1; + } + taosWriteQitem(pTask->inputQueue->queue, pSubmitBlock); } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || type == STREAM_INPUT__REF_DATA_BLOCK) { + int32_t total = taosQueueItemSize(pTask->inputQueue->queue) + 1; + if (total > 2) { + qDebug("stream task input queue is full, abort"); + return -1; + } + + qDebug("s-task:%s data block enqueue, total in queue:%d", pTask->id.idStr, total); taosWriteQitem(pTask->inputQueue->queue, pItem); } else if (type == STREAM_INPUT__CHECKPOINT) { taosWriteQitem(pTask->inputQueue->queue, pItem); diff --git a/source/libs/stream/src/streamData.c b/source/libs/stream/src/streamData.c index 3fba1cb556..63d15f134d 100644 --- a/source/libs/stream/src/streamData.c +++ b/source/libs/stream/src/streamData.c @@ -67,7 +67,7 @@ int32_t streamRetrieveReqToData(const SStreamRetrieveReq* pReq, SStreamDataBlock return 0; } -SStreamDataSubmit2* streamDataSubmitNew(SPackedData submit) { +SStreamDataSubmit2* streamDataSubmitNew(SPackedData submit, int32_t type) { SStreamDataSubmit2* pDataSubmit = (SStreamDataSubmit2*)taosAllocateQitem(sizeof(SStreamDataSubmit2), DEF_QITEM, 0); if (pDataSubmit == NULL) { @@ -82,7 +82,7 @@ SStreamDataSubmit2* streamDataSubmitNew(SPackedData submit) { pDataSubmit->submit = submit; *pDataSubmit->dataRef = 1; // initialize the reference count to be 1 - pDataSubmit->type = STREAM_INPUT__DATA_SUBMIT; + pDataSubmit->type = type; return pDataSubmit; } @@ -139,28 +139,27 @@ SStreamDataSubmit2* streamSubmitBlockClone(SStreamDataSubmit2* pSubmit) { return pSubmitClone; } -SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* elem) { - ASSERT(elem); - if (dst->type == STREAM_INPUT__DATA_BLOCK && elem->type == STREAM_INPUT__DATA_BLOCK) { +SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem) { + if (dst->type == STREAM_INPUT__DATA_BLOCK && pElem->type == STREAM_INPUT__DATA_BLOCK) { SStreamDataBlock* pBlock = (SStreamDataBlock*)dst; - SStreamDataBlock* pBlockSrc = (SStreamDataBlock*)elem; + SStreamDataBlock* pBlockSrc = (SStreamDataBlock*)pElem; taosArrayAddAll(pBlock->blocks, pBlockSrc->blocks); taosArrayDestroy(pBlockSrc->blocks); - taosFreeQitem(elem); + taosFreeQitem(pElem); return dst; - } else if (dst->type == STREAM_INPUT__MERGED_SUBMIT && elem->type == STREAM_INPUT__DATA_SUBMIT) { + } else if (dst->type == STREAM_INPUT__MERGED_SUBMIT && pElem->type == STREAM_INPUT__DATA_SUBMIT) { SStreamMergedSubmit2* pMerged = (SStreamMergedSubmit2*)dst; - SStreamDataSubmit2* pBlockSrc = (SStreamDataSubmit2*)elem; + SStreamDataSubmit2* pBlockSrc = (SStreamDataSubmit2*)pElem; streamMergeSubmit(pMerged, pBlockSrc); - taosFreeQitem(elem); + taosFreeQitem(pElem); return dst; - } else if (dst->type == STREAM_INPUT__DATA_SUBMIT && elem->type == STREAM_INPUT__DATA_SUBMIT) { + } else if (dst->type == STREAM_INPUT__DATA_SUBMIT && pElem->type == STREAM_INPUT__DATA_SUBMIT) { SStreamMergedSubmit2* pMerged = streamMergedSubmitNew(); ASSERT(pMerged); streamMergeSubmit(pMerged, (SStreamDataSubmit2*)dst); - streamMergeSubmit(pMerged, (SStreamDataSubmit2*)elem); + streamMergeSubmit(pMerged, (SStreamDataSubmit2*)pElem); taosFreeQitem(dst); - taosFreeQitem(elem); + taosFreeQitem(pElem); return (SStreamQueueItem*)pMerged; } else { return NULL; diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 7e7c23f98a..4e491f906a 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -121,9 +121,9 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) int32_t actualLen = blockEncode(pBlock, pRetrieve->data, numOfCols); SStreamRetrieveReq req = { - .streamId = pTask->streamId, + .streamId = pTask->id.streamId, .srcNodeId = pTask->nodeId, - .srcTaskId = pTask->taskId, + .srcTaskId = pTask->id.taskId, .pRetrieve = pRetrieve, .retrieveLen = dataStrLen, }; @@ -168,7 +168,7 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) } buf = NULL; - qDebug("task %d(child %d) send retrieve req to task %d at node %d, reqId %" PRId64, pTask->taskId, + qDebug("s-task:%s (child %d) send retrieve req to task %d at node %d, reqId %" PRId64, pTask->id.idStr, pTask->selfChildId, pEpInfo->taskId, pEpInfo->nodeId, req.reqId); } code = 0; @@ -238,7 +238,7 @@ int32_t streamDispatchOneCheckReq(SStreamTask* pTask, const SStreamTaskCheckReq* msg.pCont = buf; msg.msgType = TDMT_STREAM_TASK_CHECK; - qDebug("dispatch from task %d to task %d node %d: check msg", pTask->taskId, pReq->downstreamTaskId, nodeId); + qDebug("dispatch from task %d to task %d node %d: check msg", pTask->id.taskId, pReq->downstreamTaskId, nodeId); tmsgSendReq(pEpSet, &msg); @@ -282,7 +282,7 @@ int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecov tmsgSendReq(pEpSet, &msg); - qDebug("dispatch from task %d to task %d node %d: recover finish msg", pTask->taskId, pReq->taskId, vgId); + qDebug("dispatch from task %d to task %d node %d: recover finish msg", pTask->id.taskId, pReq->taskId, vgId); return 0; FAIL: @@ -319,7 +319,7 @@ int32_t streamDispatchOneDataReq(SStreamTask* pTask, const SStreamDispatchReq* p msg.pCont = buf; msg.msgType = pTask->dispatchMsgType; - qDebug("dispatch from task %d to task %d node %d: data msg", pTask->taskId, pReq->taskId, vgId); + qDebug("dispatch from task %d to task %d node %d: data msg", pTask->id.taskId, pReq->taskId, vgId); tmsgSendReq(pEpSet, &msg); @@ -382,9 +382,9 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { SStreamDispatchReq req = { - .streamId = pTask->streamId, + .streamId = pTask->id.streamId, .dataSrcVgId = pData->srcVgId, - .upstreamTaskId = pTask->taskId, + .upstreamTaskId = pTask->id.taskId, .upstreamChildId = pTask->selfChildId, .upstreamNodeId = pTask->nodeId, .blockNum = blockNum, @@ -408,7 +408,7 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat req.taskId = downstreamTaskId; - qDebug("dispatch from task %d (child id %d) to down stream task %d in vnode %d", pTask->taskId, pTask->selfChildId, + qDebug("dispatch from task %d (child id %d) to down stream task %d in vnode %d", pTask->id.taskId, pTask->selfChildId, downstreamTaskId, vgId); if (streamDispatchOneDataReq(pTask, &req, vgId, pEpSet) < 0) { @@ -432,9 +432,9 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat } for (int32_t i = 0; i < vgSz; i++) { - pReqs[i].streamId = pTask->streamId; + pReqs[i].streamId = pTask->id.streamId; pReqs[i].dataSrcVgId = pData->srcVgId; - pReqs[i].upstreamTaskId = pTask->taskId; + pReqs[i].upstreamTaskId = pTask->id.taskId; pReqs[i].upstreamChildId = pTask->selfChildId; pReqs[i].upstreamNodeId = pTask->nodeId; pReqs[i].blockNum = 0; @@ -503,13 +503,13 @@ int32_t streamDispatch(SStreamTask* pTask) { SStreamDataBlock* pBlock = streamQueueNextItem(pTask->outputQueue); if (pBlock == NULL) { - qDebug("stream stop dispatching since no output: task %d", pTask->taskId); + qDebug("stream stop dispatching since no output: task %d", pTask->id.taskId); atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL); return 0; } ASSERT(pBlock->type == STREAM_INPUT__DATA_BLOCK); - qDebug("stream dispatching: task %d", pTask->taskId); + qDebug("stream dispatching: task %d", pTask->id.taskId); int32_t code = 0; if (streamDispatchAllBlocks(pTask, pBlock) < 0) { diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 6ef327049c..d23590a08b 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -18,8 +18,9 @@ #define STREAM_EXEC_MAX_BATCH_NUM 100 static int32_t streamTaskExecImpl(SStreamTask* pTask, const void* data, SArray* pRes) { - int32_t code; - void* exec = pTask->exec.executor; + int32_t code = TSDB_CODE_SUCCESS; + void* pExecutor = pTask->exec.pExecutor; + while(pTask->taskLevel == TASK_LEVEL__SOURCE && atomic_load_8(&pTask->taskStatus) != TASK_STATUS__NORMAL) { qError("stream task wait for the end of fill history"); taosMsleep(2); @@ -30,31 +31,35 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, const void* data, SArray* const SStreamQueueItem* pItem = (const SStreamQueueItem*)data; if (pItem->type == STREAM_INPUT__GET_RES) { const SStreamTrigger* pTrigger = (const SStreamTrigger*)data; - qSetMultiStreamInput(exec, pTrigger->pBlock, 1, STREAM_INPUT__DATA_BLOCK); + qSetMultiStreamInput(pExecutor, pTrigger->pBlock, 1, STREAM_INPUT__DATA_BLOCK); } else if (pItem->type == STREAM_INPUT__DATA_SUBMIT) { ASSERT(pTask->taskLevel == TASK_LEVEL__SOURCE); const SStreamDataSubmit2* pSubmit = (const SStreamDataSubmit2*)data; - qDebug("stream task:%d %p set submit input %p %p %d %" PRId64, pTask->taskId, pTask, pSubmit, pSubmit->submit.msgStr, + qDebug("s-task:%s set submit blocks as input %p %p %d ver:%" PRId64, pTask->id.idStr, pSubmit, pSubmit->submit.msgStr, pSubmit->submit.msgLen, pSubmit->submit.ver); - qSetMultiStreamInput(exec, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT); + qSetMultiStreamInput(pExecutor, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT); } else if (pItem->type == STREAM_INPUT__DATA_BLOCK || pItem->type == STREAM_INPUT__DATA_RETRIEVE) { const SStreamDataBlock* pBlock = (const SStreamDataBlock*)data; - SArray* blocks = pBlock->blocks; - qDebug("task %d %p set ssdata input", pTask->taskId, pTask); - qSetMultiStreamInput(exec, blocks->pData, blocks->size, STREAM_INPUT__DATA_BLOCK); + + SArray* pBlockList = pBlock->blocks; + int32_t numOfBlocks = taosArrayGetSize(pBlockList); + qDebug("s-task:%s set sdata blocks as input num:%d, ver:%"PRId64, pTask->id.idStr, numOfBlocks, pBlock->sourceVer); + qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__DATA_BLOCK); } else if (pItem->type == STREAM_INPUT__MERGED_SUBMIT) { const SStreamMergedSubmit2* pMerged = (const SStreamMergedSubmit2*)data; - SArray* blocks = pMerged->submits; - qDebug("task %d %p set submit input (merged), batch num: %d", pTask->taskId, pTask, (int32_t)blocks->size); - qSetMultiStreamInput(exec, blocks->pData, blocks->size, STREAM_INPUT__MERGED_SUBMIT); + + SArray* pBlockList = pMerged->submits; + int32_t numOfBlocks = taosArrayGetSize(pBlockList); + qDebug("st-task:%s %p set submit input (merged), batch num:%d", pTask->id.idStr, pTask, numOfBlocks); + qSetMultiStreamInput(pExecutor, pBlockList->pData, numOfBlocks, STREAM_INPUT__MERGED_SUBMIT); } else if (pItem->type == STREAM_INPUT__REF_DATA_BLOCK) { const SStreamRefDataBlock* pRefBlock = (const SStreamRefDataBlock*)data; - qSetMultiStreamInput(exec, pRefBlock->pBlock, 1, STREAM_INPUT__DATA_BLOCK); + qSetMultiStreamInput(pExecutor, pRefBlock->pBlock, 1, STREAM_INPUT__DATA_BLOCK); } else { ASSERT(0); } - // exec + // pExecutor while (1) { if (pTask->taskStatus == TASK_STATUS__DROPPING) { return 0; @@ -62,26 +67,28 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, const void* data, SArray* SSDataBlock* output = NULL; uint64_t ts = 0; - if ((code = qExecTask(exec, &output, &ts)) < 0) { + if ((code = qExecTask(pExecutor, &output, &ts)) < 0) { if (code == TSDB_CODE_QRY_IN_EXEC) { - resetTaskInfo(exec); + resetTaskInfo(pExecutor); } - /*ASSERT(false);*/ - qError("unexpected stream execution, stream %" PRId64 " task: %d, since %s", pTask->streamId, pTask->taskId, - terrstr()); + + qError("unexpected stream execution, s-task:%s since %s", pTask->id.idStr, terrstr()); continue; } + if (output == NULL) { if (pItem->type == STREAM_INPUT__DATA_RETRIEVE) { - SSDataBlock block = {0}; + SSDataBlock block = {0}; + const SStreamDataBlock* pRetrieveBlock = (const SStreamDataBlock*)data; ASSERT(taosArrayGetSize(pRetrieveBlock->blocks) == 1); + assignOneDataBlock(&block, taosArrayGet(pRetrieveBlock->blocks, 0)); block.info.type = STREAM_PULL_OVER; block.info.childId = pTask->selfChildId; taosArrayPush(pRes, &block); - qDebug("task %d(child %d) processed retrieve, reqId %" PRId64, pTask->taskId, pTask->selfChildId, + qDebug("task %d(child %d) processed retrieve, reqId %" PRId64, pTask->id.taskId, pTask->selfChildId, pRetrieveBlock->reqId); } break; @@ -94,20 +101,21 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, const void* data, SArray* continue; } - qDebug("task %d(child %d) executed and get block", pTask->taskId, pTask->selfChildId); + qDebug("task %d(child %d) executed and get block", pTask->id.taskId, pTask->selfChildId); SSDataBlock block = {0}; assignOneDataBlock(&block, output); block.info.childId = pTask->selfChildId; taosArrayPush(pRes, &block); } + return 0; } int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { ASSERT(pTask->taskLevel == TASK_LEVEL__SOURCE); - void* exec = pTask->exec.executor; + void* exec = pTask->exec.pExecutor; qSetStreamOpOpen(exec); bool finished = false; @@ -147,17 +155,17 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { batchCnt++; - qDebug("task %d scan exec block num %d, block limit %d", pTask->taskId, batchCnt, batchSz); + qDebug("task %d scan exec block num %d, block limit %d", pTask->id.taskId, batchCnt, batchSz); if (batchCnt >= batchSz) break; } if (taosArrayGetSize(pRes) == 0) { if (finished) { taosArrayDestroy(pRes); - qDebug("task %d finish recover exec task ", pTask->taskId); + qDebug("task %d finish recover exec task ", pTask->id.taskId); break; } else { - qDebug("task %d continue recover exec task ", pTask->taskId); + qDebug("task %d continue recover exec task ", pTask->id.taskId); continue; } } @@ -173,7 +181,7 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { streamTaskOutput(pTask, qRes); if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { - qDebug("task %d scan exec dispatch block num %d", pTask->taskId, batchCnt); + qDebug("task %d scan exec dispatch block num %d", pTask->id.taskId, batchCnt); streamDispatch(pTask); } if (finished) break; @@ -186,7 +194,7 @@ int32_t streamBatchExec(SStreamTask* pTask, int32_t batchLimit) { // fetch all queue item, merge according to batchLimit int32_t numOfItems = taosReadAllQitems(pTask->inputQueue1, pTask->inputQall); if (numOfItems == 0) { - qDebug("task: %d, stream task exec over, queue empty", pTask->taskId); + qDebug("task: %d, stream task exec over, queue empty", pTask->id.taskId); return 0; } SStreamQueueItem* pMerged = NULL; @@ -221,30 +229,33 @@ int32_t streamBatchExec(SStreamTask* pTask, int32_t batchLimit) { int32_t streamExecForAll(SStreamTask* pTask) { while (1) { - int32_t batchCnt = 1; - void* input = NULL; + int32_t batchSize = 1; + void* pInput = NULL; + + // merge multiple input data if possible in the input queue. while (1) { SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue); if (qItem == NULL) { - qDebug("stream task exec over, queue empty, task: %d", pTask->taskId); + qDebug("stream task exec over, queue empty, task: %d", pTask->id.taskId); break; } - if (input == NULL) { - input = qItem; + + if (pInput == NULL) { + pInput = qItem; streamQueueProcessSuccess(pTask->inputQueue); if (pTask->taskLevel == TASK_LEVEL__SINK) { break; } } else { - void* newRet; - if ((newRet = streamMergeQueueItem(input, qItem)) == NULL) { + void* newRet = NULL; + if ((newRet = streamMergeQueueItem(pInput, qItem)) == NULL) { streamQueueProcessFail(pTask->inputQueue); break; } else { - batchCnt++; - input = newRet; + batchSize++; + pInput = newRet; streamQueueProcessSuccess(pTask->inputQueue); - if (batchCnt > STREAM_EXEC_MAX_BATCH_NUM) { + if (batchSize > STREAM_EXEC_MAX_BATCH_NUM) { break; } } @@ -252,75 +263,82 @@ int32_t streamExecForAll(SStreamTask* pTask) { } if (pTask->taskStatus == TASK_STATUS__DROPPING) { - if (input) streamFreeQitem(input); + if (pInput) { + streamFreeQitem(pInput); + } return 0; } - if (input == NULL) { + if (pInput == NULL) { break; } if (pTask->taskLevel == TASK_LEVEL__SINK) { - ASSERT(((SStreamQueueItem*)input)->type == STREAM_INPUT__DATA_BLOCK); - streamTaskOutput(pTask, input); + ASSERT(((SStreamQueueItem*)pInput)->type == STREAM_INPUT__DATA_BLOCK); + streamTaskOutput(pTask, pInput); continue; } SArray* pRes = taosArrayInit(0, sizeof(SSDataBlock)); + qDebug("s-task:%s exec begin, msg batch: %d", pTask->id.idStr, batchSize); - qDebug("stream task:%d exec begin, msg batch: %d", pTask->taskId, batchCnt); - streamTaskExecImpl(pTask, input, pRes); + streamTaskExecImpl(pTask, pInput, pRes); - qDebug("stream task:%d exec end", pTask->taskId); + qDebug("s-task:%s exec end", pTask->id.idStr); if (taosArrayGetSize(pRes) != 0) { SStreamDataBlock* qRes = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); if (qRes == NULL) { taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - streamFreeQitem(input); + streamFreeQitem(pInput); return -1; } + qRes->type = STREAM_INPUT__DATA_BLOCK; qRes->blocks = pRes; - if (((SStreamQueueItem*)input)->type == STREAM_INPUT__DATA_SUBMIT) { - SStreamDataSubmit2* pSubmit = (SStreamDataSubmit2*)input; + if (((SStreamQueueItem*)pInput)->type == STREAM_INPUT__DATA_SUBMIT) { + SStreamDataSubmit2* pSubmit = (SStreamDataSubmit2*)pInput; qRes->childId = pTask->selfChildId; qRes->sourceVer = pSubmit->ver; - } else if (((SStreamQueueItem*)input)->type == STREAM_INPUT__MERGED_SUBMIT) { - SStreamMergedSubmit2* pMerged = (SStreamMergedSubmit2*)input; + } else if (((SStreamQueueItem*)pInput)->type == STREAM_INPUT__MERGED_SUBMIT) { + SStreamMergedSubmit2* pMerged = (SStreamMergedSubmit2*)pInput; qRes->childId = pTask->selfChildId; qRes->sourceVer = pMerged->ver; } if (streamTaskOutput(pTask, qRes) < 0) { taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes); - streamFreeQitem(input); + streamFreeQitem(pInput); taosFreeQitem(qRes); return -1; } } else { taosArrayDestroy(pRes); } - streamFreeQitem(input); + streamFreeQitem(pInput); } return 0; } int32_t streamTryExec(SStreamTask* pTask) { + // this function may be executed by multi-threads, so status check is required. int8_t schedStatus = atomic_val_compare_exchange_8(&pTask->schedStatus, TASK_SCHED_STATUS__WAITING, TASK_SCHED_STATUS__ACTIVE); + if (schedStatus == TASK_SCHED_STATUS__WAITING) { int32_t code = streamExecForAll(pTask); if (code < 0) { atomic_store_8(&pTask->schedStatus, TASK_SCHED_STATUS__FAILED); return -1; } + atomic_store_8(&pTask->schedStatus, TASK_SCHED_STATUS__INACTIVE); if (!taosQueueEmpty(pTask->inputQueue->queue)) { streamSchedExec(pTask); } } + return 0; } diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 66d98e90bf..577f6d6e00 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -23,6 +23,7 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } + int32_t len = strlen(path) + 20; char* streamPath = taosMemoryCalloc(1, len); sprintf(streamPath, "%s/%s", path, "stream"); @@ -83,7 +84,7 @@ void streamMetaClose(SStreamMeta* pMeta) { taosTmrStop(pTask->timer); pTask->timer = NULL; } - tFreeSStreamTask(pTask); + tFreeStreamTask(pTask); /*streamMetaReleaseTask(pMeta, pTask);*/ } taosHashCleanup(pMeta->pTasks); @@ -111,12 +112,12 @@ int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t ver, char* msg, goto FAIL; } - if (taosHashPut(pMeta->pTasks, &pTask->taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) { + if (taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) { goto FAIL; } - if (tdbTbUpsert(pMeta->pTaskDb, &pTask->taskId, sizeof(int32_t), msg, msgLen, pMeta->txn) < 0) { - taosHashRemove(pMeta->pTasks, &pTask->taskId, sizeof(int32_t)); + if (tdbTbUpsert(pMeta->pTaskDb, &pTask->id.taskId, sizeof(int32_t), msg, msgLen, pMeta->txn) < 0) { + taosHashRemove(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t)); ASSERT(0); goto FAIL; } @@ -124,7 +125,7 @@ int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t ver, char* msg, return 0; FAIL: - if (pTask) tFreeSStreamTask(pTask); + if (pTask) tFreeStreamTask(pTask); return -1; } #endif @@ -147,7 +148,7 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { tEncodeSStreamTask(&encoder, pTask); tEncoderClear(&encoder); - if (tdbTbUpsert(pMeta->pTaskDb, &pTask->taskId, sizeof(int32_t), buf, len, pMeta->txn) < 0) { + if (tdbTbUpsert(pMeta->pTaskDb, &pTask->id.taskId, sizeof(int32_t), buf, len, pMeta->txn) < 0) { return -1; } @@ -165,7 +166,7 @@ int32_t streamMetaAddTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask) { return -1; } - taosHashPut(pMeta->pTasks, &pTask->taskId, sizeof(int32_t), &pTask, sizeof(void*)); + taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, sizeof(void*)); return 0; } @@ -207,7 +208,7 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) { ASSERT(left >= 0); if (left == 0) { ASSERT(atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING); - tFreeSStreamTask(pTask); + tFreeStreamTask(pTask); } } @@ -297,7 +298,7 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { return -1; } - if (taosHashPut(pMeta->pTasks, &pTask->taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) { + if (taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) { tdbFree(pKey); tdbFree(pVal); tdbTbcClose(pCur); diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index 87058bf490..3e7a02b8d5 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -16,7 +16,7 @@ #include "streamInc.h" int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version) { - qDebug("task %d at node %d launch recover", pTask->taskId, pTask->nodeId); + qDebug("s-task:%s at node %d launch recover", pTask->id.idStr, pTask->nodeId); if (pTask->taskLevel == TASK_LEVEL__SOURCE) { atomic_store_8(&pTask->taskStatus, TASK_STATUS__RECOVER_PREPARE); streamSetParamForRecover(pTask); @@ -56,8 +56,8 @@ int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version) { // checkstatus int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) { SStreamTaskCheckReq req = { - .streamId = pTask->streamId, - .upstreamTaskId = pTask->taskId, + .streamId = pTask->id.streamId, + .upstreamTaskId = pTask->id.taskId, .upstreamNodeId = pTask->nodeId, .childId = pTask->selfChildId, }; @@ -68,7 +68,7 @@ int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) { req.downstreamTaskId = pTask->fixedEpDispatcher.taskId; pTask->checkReqId = req.reqId; - qDebug("task %d at node %d check downstream task %d at node %d", pTask->taskId, pTask->nodeId, req.downstreamTaskId, + qDebug("task %d at node %d check downstream task %d at node %d", pTask->id.taskId, pTask->nodeId, req.downstreamTaskId, req.downstreamNodeId); streamDispatchOneCheckReq(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); } else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { @@ -83,12 +83,12 @@ int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) { taosArrayPush(pTask->checkReqIds, &req.reqId); req.downstreamNodeId = pVgInfo->vgId; req.downstreamTaskId = pVgInfo->taskId; - qDebug("task %d at node %d check downstream task %d at node %d (shuffle)", pTask->taskId, pTask->nodeId, + qDebug("task %d at node %d check downstream task %d at node %d (shuffle)", pTask->id.taskId, pTask->nodeId, req.downstreamTaskId, req.downstreamNodeId); streamDispatchOneCheckReq(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); } } else { - qDebug("task %d at node %d direct launch recover since no downstream", pTask->taskId, pTask->nodeId); + qDebug("task %d at node %d direct launch recover since no downstream", pTask->id.taskId, pTask->nodeId); streamTaskLaunchRecover(pTask, version); } return 0; @@ -104,7 +104,7 @@ int32_t streamRecheckOneDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp .downstreamNodeId = pRsp->downstreamNodeId, .childId = pRsp->childId, }; - qDebug("task %d at node %d check downstream task %d at node %d (recheck)", pTask->taskId, pTask->nodeId, + qDebug("task %d at node %d check downstream task %d at node %d (recheck)", pTask->id.taskId, pTask->nodeId, req.downstreamTaskId, req.downstreamNodeId); if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { streamDispatchOneCheckReq(pTask, &req, pRsp->downstreamNodeId, &pTask->fixedEpDispatcher.epSet); @@ -160,11 +160,11 @@ int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* // common int32_t streamSetParamForRecover(SStreamTask* pTask) { - void* exec = pTask->exec.executor; + void* exec = pTask->exec.pExecutor; return qStreamSetParamForRecover(exec); } int32_t streamRestoreParam(SStreamTask* pTask) { - void* exec = pTask->exec.executor; + void* exec = pTask->exec.pExecutor; return qStreamRestoreParam(exec); } int32_t streamSetStatusNormal(SStreamTask* pTask) { @@ -174,14 +174,14 @@ int32_t streamSetStatusNormal(SStreamTask* pTask) { // source int32_t streamSourceRecoverPrepareStep1(SStreamTask* pTask, int64_t ver) { - void* exec = pTask->exec.executor; + void* exec = pTask->exec.pExecutor; return qStreamSourceRecoverStep1(exec, ver); } int32_t streamBuildSourceRecover1Req(SStreamTask* pTask, SStreamRecoverStep1Req* pReq) { pReq->msgHead.vgId = pTask->nodeId; - pReq->streamId = pTask->streamId; - pReq->taskId = pTask->taskId; + pReq->streamId = pTask->id.streamId; + pReq->taskId = pTask->id.taskId; return 0; } @@ -192,13 +192,13 @@ int32_t streamSourceRecoverScanStep1(SStreamTask* pTask) { int32_t streamBuildSourceRecover2Req(SStreamTask* pTask, SStreamRecoverStep2Req* pReq) { pReq->msgHead.vgId = pTask->nodeId; - pReq->streamId = pTask->streamId; - pReq->taskId = pTask->taskId; + pReq->streamId = pTask->id.streamId; + pReq->taskId = pTask->id.taskId; return 0; } int32_t streamSourceRecoverScanStep2(SStreamTask* pTask, int64_t ver) { - void* exec = pTask->exec.executor; + void* exec = pTask->exec.pExecutor; if (qStreamSourceRecoverStep2(exec, ver) < 0) { } return streamScanExec(pTask, 100); @@ -206,7 +206,7 @@ int32_t streamSourceRecoverScanStep2(SStreamTask* pTask, int64_t ver) { int32_t streamDispatchRecoverFinishReq(SStreamTask* pTask) { SStreamRecoverFinishReq req = { - .streamId = pTask->streamId, + .streamId = pTask->id.streamId, .childId = pTask->selfChildId, }; // serialize @@ -227,13 +227,13 @@ int32_t streamDispatchRecoverFinishReq(SStreamTask* pTask) { // agg int32_t streamAggRecoverPrepare(SStreamTask* pTask) { - void* exec = pTask->exec.executor; + void* exec = pTask->exec.pExecutor; pTask->recoverWaitingUpstream = taosArrayGetSize(pTask->childEpInfo); return 0; } int32_t streamAggChildrenRecoverFinish(SStreamTask* pTask) { - void* exec = pTask->exec.executor; + void* exec = pTask->exec.pExecutor; if (qStreamRestoreParam(exec) < 0) { return -1; } diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index 411726075e..04a1414438 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -121,7 +121,7 @@ SStreamState* streamStateOpen(char* path, SStreamTask* pTask, bool specPath, int char statePath[1024]; if (!specPath) { - sprintf(statePath, "%s/%d", path, pTask->taskId); + sprintf(statePath, "%s/%d", path, pTask->id.taskId); } else { memset(statePath, 0, 1024); tstrncpy(statePath, path, 1024); diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index e9aba0bc39..c1f50178dd 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -21,8 +21,14 @@ SStreamTask* tNewSStreamTask(int64_t streamId) { if (pTask == NULL) { return NULL; } - pTask->taskId = tGenIdPI32(); - pTask->streamId = streamId; + + pTask->id.taskId = tGenIdPI32(); + pTask->id.streamId = streamId; + + char buf[128] = {0}; + sprintf(buf, "0x%"PRIx64"-%d", pTask->id.streamId, pTask->id.taskId); + + pTask->id.idStr = taosStrdup(buf); pTask->schedStatus = TASK_SCHED_STATUS__INACTIVE; pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL; @@ -50,8 +56,8 @@ int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamChildEpInfo* pInfo) { int32_t tEncodeSStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { if (tStartEncode(pEncoder) < 0) return -1; - if (tEncodeI64(pEncoder, pTask->streamId) < 0) return -1; - if (tEncodeI32(pEncoder, pTask->taskId) < 0) return -1; + if (tEncodeI64(pEncoder, pTask->id.streamId) < 0) return -1; + if (tEncodeI32(pEncoder, pTask->id.taskId) < 0) return -1; if (tEncodeI32(pEncoder, pTask->totalLevel) < 0) return -1; if (tEncodeI8(pEncoder, pTask->taskLevel) < 0) return -1; if (tEncodeI8(pEncoder, pTask->outputType) < 0) return -1; @@ -103,8 +109,8 @@ int32_t tEncodeSStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { int32_t tDecodeSStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tStartDecode(pDecoder) < 0) return -1; - if (tDecodeI64(pDecoder, &pTask->streamId) < 0) return -1; - if (tDecodeI32(pDecoder, &pTask->taskId) < 0) return -1; + if (tDecodeI64(pDecoder, &pTask->id.streamId) < 0) return -1; + if (tDecodeI32(pDecoder, &pTask->id.taskId) < 0) return -1; if (tDecodeI32(pDecoder, &pTask->totalLevel) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->taskLevel) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->outputType) < 0) return -1; @@ -162,24 +168,43 @@ int32_t tDecodeSStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { return 0; } -void tFreeSStreamTask(SStreamTask* pTask) { - qDebug("free stream task %d", pTask->taskId); - if (pTask->inputQueue) streamQueueClose(pTask->inputQueue); - if (pTask->outputQueue) streamQueueClose(pTask->outputQueue); - if (pTask->exec.qmsg) taosMemoryFree(pTask->exec.qmsg); - if (pTask->exec.executor) qDestroyTask(pTask->exec.executor); +void tFreeStreamTask(SStreamTask* pTask) { + qDebug("free s-task:%s", pTask->id.idStr); + + if (pTask->inputQueue) { + streamQueueClose(pTask->inputQueue); + } + if (pTask->outputQueue) { + streamQueueClose(pTask->outputQueue); + } + if (pTask->exec.qmsg) { + taosMemoryFree(pTask->exec.qmsg); + } + + if (pTask->exec.pExecutor) { + qDestroyTask(pTask->exec.pExecutor); + pTask->exec.pExecutor = NULL; + } + + if (pTask->exec.pTqReader != NULL) { + pTask->exec.pTqReader = NULL; + } + taosArrayDestroyP(pTask->childEpInfo, taosMemoryFree); if (pTask->outputType == TASK_OUTPUT__TABLE) { tDeleteSSchemaWrapper(pTask->tbSink.pSchemaWrapper); taosMemoryFree(pTask->tbSink.pTSchema); } + if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { taosArrayDestroy(pTask->shuffleDispatcher.dbInfo.pVgroupInfos); taosArrayDestroy(pTask->checkReqIds); pTask->checkReqIds = NULL; } - if (pTask->pState) streamStateClose(pTask->pState); + if (pTask->pState) { + streamStateClose(pTask->pState); + } taosMemoryFree(pTask); } From 7419c0bfe99cc6802ebe9ef9e94ad6829755f09a Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 10 Apr 2023 16:56:14 +0800 Subject: [PATCH 02/25] enh(stream): the stream will start after vnode restore being completed. --- include/libs/stream/tstream.h | 3 +- include/libs/wal/wal.h | 1 + source/dnode/mnode/impl/src/mndStream.c | 2 +- source/dnode/vnode/CMakeLists.txt | 2 + source/dnode/vnode/src/inc/tq.h | 14 ++- source/dnode/vnode/src/inc/vnodeInt.h | 5 +- source/dnode/vnode/src/tq/tq.c | 113 ++++++++++---------- source/dnode/vnode/src/tq/tqPush.c | 11 +- source/dnode/vnode/src/tq/tqRestore.c | 132 ++++++++++++++++++++++++ source/dnode/vnode/src/tq/tqUtil.c | 72 +++++++++++++ source/dnode/vnode/src/vnd/vnodeSync.c | 3 + source/libs/stream/src/streamMeta.c | 18 +++- source/libs/wal/src/walRead.c | 2 + 13 files changed, 300 insertions(+), 78 deletions(-) create mode 100644 source/dnode/vnode/src/tq/tqRestore.c create mode 100644 source/dnode/vnode/src/tq/tqUtil.c diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index d9b82c8c59..a338413502 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -50,6 +50,7 @@ enum { TASK_STATUS__RECOVER_PREPARE, TASK_STATUS__RECOVER1, TASK_STATUS__RECOVER2, + TASK_STATUS_RESTORE, // only available for source task to replay WAL from the checkpoint }; enum { @@ -576,7 +577,7 @@ typedef struct SStreamMeta { TTB* pTaskDb; TTB* pCheckpointDb; SHashObj* pTasks; - SHashObj* pRecoverStatus; + SHashObj* pRestoreTasks; void* ahandle; TXN* txn; FTaskExpand* expandFunc; diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index fdd21c7092..0a359bfd42 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -197,6 +197,7 @@ void walReadReset(SWalReader *pReader); int32_t walReadVer(SWalReader *pRead, int64_t ver); int32_t walReadSeekVer(SWalReader *pRead, int64_t ver); int32_t walNextValidMsg(SWalReader *pRead); +int64_t walReaderGetCurrentVer(const SWalReader* pReader); // only for tq usage void walSetReaderCapacity(SWalReader *pRead, int32_t capacity); diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index ab83f29ef9..a0118ee749 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -35,7 +35,7 @@ static int32_t mndStreamActionInsert(SSdb *pSdb, SStreamObj *pStream); static int32_t mndStreamActionDelete(SSdb *pSdb, SStreamObj *pStream); -static int32_t mndStreamActionUpdate(SSdb *pSdb, SStreamObj *pStream, SStreamObj *pNewStream); +static int32_t mndStreamActionUpdate(SSdb *pSdb, SStreamObj *pOldStream, SStreamObj *pNewStream); static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq); static int32_t mndProcessDropStreamReq(SRpcMsg *pReq); static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq); diff --git a/source/dnode/vnode/CMakeLists.txt b/source/dnode/vnode/CMakeLists.txt index 9911752f8e..c713d1e247 100644 --- a/source/dnode/vnode/CMakeLists.txt +++ b/source/dnode/vnode/CMakeLists.txt @@ -57,6 +57,7 @@ target_sources( # tq "src/tq/tq.c" + "src/tq/tqUtil.c" "src/tq/tqScan.c" "src/tq/tqMeta.c" "src/tq/tqRead.c" @@ -64,6 +65,7 @@ target_sources( "src/tq/tqPush.c" "src/tq/tqSink.c" "src/tq/tqCommit.c" + "src/tq/tqRestore.c" "src/tq/tqSnapshot.c" "src/tq/tqOffsetSnapshot.c" ) diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 884c01d397..d4af9ac481 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -128,6 +128,10 @@ typedef struct { tmr_h timer; } STqMgmt; +typedef struct { + int32_t size; +} STqOffsetHead; + static STqMgmt tqMgmt = {0}; int32_t tEncodeSTqHandle(SEncoder* pEncoder, const STqHandle* pHandle); @@ -154,10 +158,6 @@ int32_t tqMetaSaveCheckInfo(STQ* pTq, const char* key, const void* value, int32_ int32_t tqMetaDeleteCheckInfo(STQ* pTq, const char* key); int32_t tqMetaRestoreCheckInfo(STQ* pTq); -typedef struct { - int32_t size; -} STqOffsetHead; - STqOffsetStore* tqOffsetOpen(STQ* pTq); void tqOffsetClose(STqOffsetStore*); STqOffset* tqOffsetRead(STqOffsetStore* pStore, const char* subscribeKey); @@ -176,6 +176,12 @@ int32_t tqOffsetRestoreFromFile(STqOffsetStore* pStore, const char* fname); // tqStream int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver); +int32_t tqDoRestoreSourceStreamTasks(STQ* pTq); + +// tq util +void createStreamTaskOffsetKey(char* dst, uint64_t streamId, uint32_t taskId); +int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem, int64_t ver); +int32_t launchTaskForWalBlock(SStreamTask* pTask, SFetchRet* pRet, STqOffset* pOffset); #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 8b01ba237f..412c2549b5 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -192,8 +192,9 @@ void tqCleanUp(); STQ* tqOpen(const char* path, SVnode* pVnode); void tqClose(STQ*); int tqPushMsg(STQ*, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver); -int tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp, int32_t type); -int tqUnregisterPushEntry(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer); +int tqRegisterPushHandle(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp, int32_t type); +int tqUnregisterPushHandle(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer); +int tqRestoreStreamTasks(STQ* pTq); // restore all stream tasks after vnode launching completed. int tqCommit(STQ*); int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 44d5e26603..aa8960e977 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -16,6 +16,7 @@ #include "tq.h" #define IS_OFFSET_RESET_TYPE(_t) ((_t) < 0) +#define ALL_STREAM_TASKS_ID (-1) int32_t tqInit() { int8_t old; @@ -85,21 +86,6 @@ static bool tqOffsetLessOrEqual(const STqOffset* pLeft, const STqOffset* pRight) pLeft->val.version <= pRight->val.version; } -// stream_task:stream_id:task_id -static void createStreamTaskOffsetKey(char* dst, uint64_t streamId, uint32_t taskId) { - int32_t n = 12; - char* p = dst; - - memcpy(p, "stream_task:", n); - p += n; - - int32_t inc = tintToHex(streamId, p); - p += inc; - - *(p++) = ':'; - tintToHex(taskId, p); -} - STQ* tqOpen(const char* path, SVnode* pVnode) { STQ* pTq = taosMemoryCalloc(1, sizeof(STQ)); if (pTq == NULL) { @@ -470,7 +456,7 @@ static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, // till now, all data has been transferred to consumer, new data needs to push client once arrived. if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) { - code = tqRegisterPushEntry(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); + code = tqRegisterPushHandle(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); taosWUnLockLatch(&pTq->lock); return code; } @@ -880,7 +866,7 @@ int32_t tqProcessSubscribeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg atomic_store_32(&pHandle->epoch, -1); // remove if it has been register in the push manager, and return one empty block to consumer - tqUnregisterPushEntry(pTq, req.subKey, (int32_t)strlen(req.subKey), pHandle->consumerId, true); + tqUnregisterPushHandle(pTq, req.subKey, (int32_t)strlen(req.subKey), pHandle->consumerId, true); atomic_store_64(&pHandle->consumerId, req.newConsumerId); atomic_add_fetch_32(&pHandle->epoch, 1); @@ -925,6 +911,8 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { // expand executor if (pTask->fillHistory) { pTask->taskStatus = TASK_STATUS__WAIT_DOWNSTREAM; + } else { + pTask->taskStatus = TASK_STATUS_RESTORE; } if (pTask->taskLevel == TASK_LEVEL__SOURCE) { @@ -1382,21 +1370,6 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) { return 0; } -static int32_t doAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem, int64_t ver) { - int32_t code = tAppendDataForStream(pTask, pQueueItem); - if (code < 0) { - tqError("s-task:%s failed to put into queue, too many, next start ver:%" PRId64, pTask->id.idStr, ver); - return -1; - } - - if (streamSchedExec(pTask) < 0) { - tqError("stream task:%d failed to be launched, code:%s", pTask->id.taskId, tstrerror(terrno)); - return -1; - } - - return TSDB_CODE_SUCCESS; -} - static void doSaveTaskOffset(STqOffsetStore* pOffsetStore, const char* pKey, int64_t ver) { STqOffset offset = {0}; tqOffsetResetToLog(&offset.val, ver); @@ -1410,7 +1383,7 @@ static void doSaveTaskOffset(STqOffsetStore* pOffsetStore, const char* pKey, int static int32_t addSubmitBlockNLaunchTask(STqOffsetStore* pOffsetStore, SStreamTask* pTask, SStreamDataSubmit2* pSubmit, const char* key, int64_t ver) { doSaveTaskOffset(pOffsetStore, key, ver); - int32_t code = doAddInputBlockNLaunchTask(pTask, (SStreamQueueItem*)pSubmit, ver); + int32_t code = tqAddInputBlockNLaunchTask(pTask, (SStreamQueueItem*)pSubmit, ver); // remove the offset, if all functions are completed successfully. if (code == TSDB_CODE_SUCCESS) { @@ -1504,33 +1477,15 @@ int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) { // all data has been retrieved from WAL, let's try submit block directly. if (code == TSDB_CODE_SUCCESS) { // all data retrieved, abort // append the data for the stream - SFetchRet ret = {0}; + SFetchRet ret = {.data.info.type = STREAM_NORMAL}; terrno = 0; tqNextBlock(pTask->exec.pTqReader, &ret); if (ret.fetchType == FETCH_TYPE__DATA) { - SStreamDataBlock* pBlocks = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); - if (pBlocks == NULL) { // failed, do nothing - terrno = TSDB_CODE_OUT_OF_MEMORY; + code = launchTaskForWalBlock(pTask, &ret, pOffset); + if (code != TSDB_CODE_SUCCESS) { continue; } - - ret.data.info.type = STREAM_NORMAL; - pBlocks->type = STREAM_INPUT__DATA_BLOCK; - pBlocks->sourceVer = pOffset->val.version; - pBlocks->blocks = taosArrayInit(0, sizeof(SSDataBlock)); - taosArrayPush(pBlocks->blocks, &ret.data); - - int64_t* ts = (int64_t*)(((SColumnInfoData*)ret.data.pDataBlock->pData)->pData); -// tqDebug("-----------%ld\n", ts[0]); - - code = doAddInputBlockNLaunchTask(pTask, (SStreamQueueItem*)pBlocks, pBlocks->sourceVer); - if (code == TSDB_CODE_SUCCESS) { - pOffset->val.version = pTask->exec.pTqReader->pWalReader->curVersion; - tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr, - pOffset->val.version); - } - } else { // FETCH_TYPE__NONE, let's try submit block directly tqDebug("s-task:%s data in WAL are all consumed, try data in submit message", pTask->id.idStr); addSubmitBlockNLaunchTask(pTq->pOffsetStore, pTask, pSubmit, key, submit.ver); @@ -1555,15 +1510,29 @@ int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) { int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { SStreamTaskRunReq* pReq = pMsg->pCont; - int32_t taskId = pReq->taskId; - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); - if (pTask) { - tqDebug("stream task:%d start to process run req", pTask->id.taskId); - streamProcessRunReq(pTask); - streamMetaReleaseTask(pTq->pStreamMeta, pTask); + + int32_t taskId = pReq->taskId; + int32_t vgId = TD_VID(pTq->pVnode); + + if (taskId == ALL_STREAM_TASKS_ID) { // all tasks are restored from the wal + tqDoRestoreSourceStreamTasks(pTq); return 0; } else { - return -1; + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); + if (pTask != NULL) { + if (pTask->taskStatus == TASK_STATUS__NORMAL) { + tqDebug("vgId:%d s-task:%s start to process run req", vgId, pTask->id.idStr); + streamProcessRunReq(pTask); + } else { + tqDebug("vgId:%d s-task:%s ignore run req since not in ready state", vgId, pTask->id.idStr); + } + + streamMetaReleaseTask(pTq->pStreamMeta, pTask); + return 0; + } else { + tqError("vgId:%d failed to found s-task, taskId:%d", vgId, taskId); + return -1; + } } } @@ -1703,3 +1672,25 @@ FAIL: } int32_t tqCheckLogInWal(STQ* pTq, int64_t sversion) { return sversion <= pTq->walLogLastVer; } + +int32_t tqRestoreStreamTasks(STQ* pTq) { + int32_t vgId = TD_VID(pTq->pVnode); + + SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); + if (pRunReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tqError("vgId:%d failed restore stream tasks, code:%s", vgId, terrstr(terrno)); + return -1; + } + + tqInfo("vgId:%d start to restore all stream tasks", vgId); + + pRunReq->head.vgId = vgId; + pRunReq->streamId = 0; + pRunReq->taskId = ALL_STREAM_TASKS_ID; + + SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; + tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg); + + return 0; +} diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index dd003aec98..d651e945b5 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -322,8 +322,11 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v taosWUnLockLatch(&pTq->lock); } - // push data for stream processing - if (!tsDisableStream && vnodeIsRoleLeader(pTq->pVnode)) { + // push data for stream processing: + // 1. the vnode isn't in the restore procedure. + // 2. the vnode should be the leader. + // 3. the stream is not suspended yet. + if (!tsDisableStream && vnodeIsRoleLeader(pTq->pVnode) && (!pTq->pVnode->restored)) { if (taosHashGetSize(pTq->pStreamMeta->pTasks) == 0) { return 0; } @@ -352,7 +355,7 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v return 0; } -int32_t tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, +int32_t tqRegisterPushHandle(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp, int32_t type) { uint64_t consumerId = pRequest->consumerId; int32_t vgId = TD_VID(pTq->pVnode); @@ -389,7 +392,7 @@ int32_t tqRegisterPushEntry(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, return 0; } -int32_t tqUnregisterPushEntry(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer) { +int32_t tqUnregisterPushHandle(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer) { int32_t vgId = TD_VID(pTq->pVnode); STqPushEntry** pEntry = taosHashGet(pTq->pPushMgr, pKey, keyLen); diff --git a/source/dnode/vnode/src/tq/tqRestore.c b/source/dnode/vnode/src/tq/tqRestore.c new file mode 100644 index 0000000000..50fcea2b54 --- /dev/null +++ b/source/dnode/vnode/src/tq/tqRestore.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tq.h" + +static int32_t restoreStreamTask(SStreamMeta* pStreamMeta, STqOffsetStore* pOffsetStore, SArray* pTaskList); +static int32_t transferToNormalTask(SStreamMeta* pStreamMeta, SArray* pTaskList); + +// this function should be executed by stream threads. +// there is a case that the WAL increases more fast than the restore procedure, and this restore procedure +// will not stop eventually. +int tqDoRestoreSourceStreamTasks(STQ* pTq) { + + // todo set the offset value from the previous check point offset + int64_t st = taosGetTimestampMs(); + int32_t vgId = TD_VID(pTq->pVnode); + int32_t numOfTasks = taosHashGetSize(pTq->pStreamMeta->pRestoreTasks); + tqInfo("vgId:%d start restoring stream tasks, total tasks:%d", vgId, numOfTasks); + + while (1) { + SArray* pTaskList = taosArrayInit(4, POINTER_BYTES); + + // check all restore tasks + restoreStreamTask(pTq->pStreamMeta, pTq->pOffsetStore, pTaskList); + transferToNormalTask(pTq->pStreamMeta, pTaskList); + taosArrayDestroy(pTaskList); + + int32_t numOfRestored = taosHashGetSize(pTq->pStreamMeta->pRestoreTasks); + if (numOfRestored <= 0) { + break; + } + } + + int64_t et = taosGetTimestampMs(); + tqInfo("vgId:%d restoring task completed, elapsed time:%" PRId64 " sec.", vgId, (et - st)); + return 0; +} + +int32_t transferToNormalTask(SStreamMeta* pStreamMeta, SArray* pTaskList) { + int32_t numOfTask = taosArrayGetSize(pTaskList); + if (numOfTask <= 0) { + return TSDB_CODE_SUCCESS; + } + + // todo: add lock + for(int32_t i = 0; i < numOfTask; ++i){ + SStreamTask* pTask = taosArrayGetP(pTaskList, i); + tqDebug("vgId:%d transfer s-task:%s state restore -> ready", pStreamMeta->vgId, pTask->id.idStr); + taosHashRemove(pStreamMeta->pRestoreTasks, &pTask->id.taskId, sizeof(pTask->id.taskId)); + + // NOTE: do not change the following order + atomic_store_8(&pTask->taskStatus, TASK_STATUS__NORMAL); + taosHashPut(pStreamMeta->pTasks, &pTask->id.taskId, sizeof(pTask->id.taskId), &pTask, POINTER_BYTES); + } + + return TSDB_CODE_SUCCESS; +} + +int32_t restoreStreamTask(SStreamMeta* pStreamMeta, STqOffsetStore* pOffsetStore, SArray* pTaskList) { + // check all restore tasks + void* pIter = NULL; + + while (1) { + pIter = taosHashIterate(pStreamMeta->pRestoreTasks, pIter); + if (pIter == NULL) { + break; + } + + SStreamTask* pTask = *(SStreamTask**)pIter; + if (pTask->taskLevel != TASK_LEVEL__SOURCE) { + continue; + } + + if (pTask->taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { + tqDebug("s-task:%s skip push data, not ready for processing, status %d", pTask->id.idStr, pTask->taskStatus); + continue; + } + + // check if offset value exists + char key[128] = {0}; + createStreamTaskOffsetKey(key, pTask->id.streamId, pTask->id.taskId); + + if (tInputQueueIsFull(pTask)) { + tqDebug("s-task:%s input queue is full, do nothing" PRId64, pTask->id.idStr); + continue; + } + + // check if offset value exists + STqOffset* pOffset = tqOffsetRead(pOffsetStore, key); + if (pOffset != NULL) { + // seek the stored version and extract data from WAL + int32_t code = tqSeekVer(pTask->exec.pTqReader, pOffset->val.version, ""); + if (code == TSDB_CODE_SUCCESS) { // all data retrieved, abort + // append the data for the stream + SFetchRet ret = {.data.info.type = STREAM_NORMAL}; + terrno = 0; + + tqNextBlock(pTask->exec.pTqReader, &ret); + if (ret.fetchType == FETCH_TYPE__DATA) { + code = launchTaskForWalBlock(pTask, &ret, pOffset); + if (code != TSDB_CODE_SUCCESS) { + continue; + } + } else { + // FETCH_TYPE__NONE: all data has been retrieved from WAL, let's try submit block directly. + tqDebug("s-task:%s data in WAL are all consumed, transfer this task to be normal state", pTask->id.idStr); + taosArrayPush(pTaskList, &pTask); + } + } else { // failed to seek to the WAL version + tqDebug("s-task:%s data in WAL are all consumed, transfer this task to be normal state", pTask->id.idStr); + taosArrayPush(pTaskList, &pTask); + } + } else { + ASSERT(0); + } + } + + return 0; +} + diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c new file mode 100644 index 0000000000..ac88cf1916 --- /dev/null +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tq.h" + +// stream_task:stream_id:task_id +void createStreamTaskOffsetKey(char* dst, uint64_t streamId, uint32_t taskId) { + int32_t n = 12; + char* p = dst; + + memcpy(p, "stream_task:", n); + p += n; + + int32_t inc = tintToHex(streamId, p); + p += inc; + + *(p++) = ':'; + tintToHex(taskId, p); +} + +int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem, int64_t ver) { + int32_t code = tAppendDataForStream(pTask, pQueueItem); + if (code < 0) { + tqError("s-task:%s failed to put into queue, too many, next start ver:%" PRId64, pTask->id.idStr, ver); + return -1; + } + + if (streamSchedExec(pTask) < 0) { + tqError("stream task:%d failed to be launched, code:%s", pTask->id.taskId, tstrerror(terrno)); + return -1; + } + + return TSDB_CODE_SUCCESS; +} + +int32_t launchTaskForWalBlock(SStreamTask* pTask, SFetchRet* pRet, STqOffset* pOffset) { + SStreamDataBlock* pBlocks = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); + if (pBlocks == NULL) { // failed, do nothing + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + pRet->data.info.type = STREAM_NORMAL; + pBlocks->type = STREAM_INPUT__DATA_BLOCK; + pBlocks->sourceVer = pOffset->val.version; + pBlocks->blocks = taosArrayInit(0, sizeof(SSDataBlock)); + taosArrayPush(pBlocks->blocks, &pRet->data); + +// int64_t* ts = (int64_t*)(((SColumnInfoData*)ret.data.pDataBlock->pData)->pData); +// tqDebug("-----------%ld\n", ts[0]); + + int32_t code = tqAddInputBlockNLaunchTask(pTask, (SStreamQueueItem*)pBlocks, pBlocks->sourceVer); + if (code == TSDB_CODE_SUCCESS) { + pOffset->val.version = walReaderGetCurrentVer(pTask->exec.pTqReader->pWalReader); + tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr, + pOffset->val.version); + } + + return 0; +} \ No newline at end of file diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index d681f5b65e..eb3c5d1f64 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -549,6 +549,9 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) pVnode->restored = true; vInfo("vgId:%d, sync restore finished", pVnode->config.vgId); + + // start to restore all stream tasks + tqRestoreStreamTasks(pVnode->pTq); } static void vnodeBecomeFollower(const SSyncFSM *pFsm) { diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 577f6d6e00..8693915c46 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -45,11 +45,17 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF goto _err; } - pMeta->pTasks = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_ENTRY_LOCK); + _hash_fn_t fp = taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT); + pMeta->pTasks = taosHashInit(64, fp, true, HASH_ENTRY_LOCK); if (pMeta->pTasks == NULL) { goto _err; } + pMeta->pRestoreTasks = taosHashInit(64, fp, true, HASH_ENTRY_LOCK); + if (pMeta->pRestoreTasks == NULL) { + goto _err; + } + if (streamMetaBegin(pMeta) < 0) { goto _err; } @@ -62,6 +68,7 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF _err: taosMemoryFree(pMeta->path); if (pMeta->pTasks) taosHashCleanup(pMeta->pTasks); + if (pMeta->pRestoreTasks) taosHashCleanup(pMeta->pRestoreTasks); if (pMeta->pTaskDb) tdbTbClose(pMeta->pTaskDb); if (pMeta->pCheckpointDb) tdbTbClose(pMeta->pCheckpointDb); if (pMeta->db) tdbClose(pMeta->db); @@ -87,8 +94,9 @@ void streamMetaClose(SStreamMeta* pMeta) { tFreeStreamTask(pTask); /*streamMetaReleaseTask(pMeta, pTask);*/ } + taosHashCleanup(pMeta->pTasks); - taosHashCleanup(pMeta->pRecoverStatus); + taosHashCleanup(pMeta->pRestoreTasks); taosMemoryFree(pMeta->path); taosMemoryFree(pMeta); } @@ -166,8 +174,7 @@ int32_t streamMetaAddTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask) { return -1; } - taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, sizeof(void*)); - + taosHashPut(pMeta->pRestoreTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, POINTER_BYTES); return 0; } #endif @@ -298,12 +305,13 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { return -1; } - if (taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) { + if (taosHashPut(pMeta->pRestoreTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) { tdbFree(pKey); tdbFree(pVal); tdbTbcClose(pCur); return -1; } + /*pTask->taskStatus = TASK_STATUS__NORMAL;*/ if (pTask->fillHistory) { pTask->taskStatus = TASK_STATUS__WAIT_DOWNSTREAM; diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c index ad6127ead2..e20299be38 100644 --- a/source/libs/wal/src/walRead.c +++ b/source/libs/wal/src/walRead.c @@ -100,6 +100,8 @@ int32_t walNextValidMsg(SWalReader *pReader) { return -1; } +int64_t walReaderGetCurrentVer(const SWalReader *pReader) { return pReader->curVersion; } + static int64_t walReadSeekFilePos(SWalReader *pReader, int64_t fileFirstVer, int64_t ver) { int64_t ret = 0; From a4ba5401f96b505c6edf6cb4f70c7af57a2bb76d Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 11 Apr 2023 10:01:36 +0800 Subject: [PATCH 03/25] enh(stream): set the start version of all operators. --- include/libs/executor/executor.h | 3 + include/libs/stream/tstream.h | 33 ++++----- source/dnode/vnode/src/inc/tq.h | 4 + source/dnode/vnode/src/tq/tq.c | 57 +++----------- source/dnode/vnode/src/tq/tqCommit.c | 3 + source/dnode/vnode/src/tq/tqRestore.c | 17 ++--- source/dnode/vnode/src/tq/tqUtil.c | 74 ++++++++++++++++++- source/libs/executor/inc/executorimpl.h | 16 ++-- source/libs/executor/src/executor.c | 6 ++ source/libs/executor/src/timewindowoperator.c | 15 ++++ source/libs/stream/src/streamExec.c | 22 +++++- source/libs/stream/src/streamMeta.c | 46 +++++++++--- source/util/src/tworker.c | 2 +- 13 files changed, 200 insertions(+), 98 deletions(-) diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index ee8ee1050d..fd66194143 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -91,6 +91,9 @@ void qSetTaskId(qTaskInfo_t tinfo, uint64_t taskId, uint64_t queryId); int32_t qSetStreamOpOpen(qTaskInfo_t tinfo); +// todo refactor +int64_t qGetCheckpointVersion(qTaskInfo_t tinfo); + /** * Set multiple input data blocks for the stream scan. * @param tinfo diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index a338413502..22e9356b3d 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -295,14 +295,11 @@ struct SStreamTask { int16_t dispatchMsgType; int8_t taskStatus; int8_t schedStatus; - - // node info - int32_t selfChildId; - int32_t nodeId; - SEpSet epSet; - - int64_t recoverSnapVer; - int64_t startVer; + int32_t selfChildId; + int32_t nodeId; + SEpSet epSet; + int64_t recoverSnapVer; + int64_t startVer; // fill history int8_t fillHistory; @@ -340,15 +337,15 @@ struct SStreamTask { // state backend SStreamState* pState; - // do not serialize - int32_t recoverTryingDownstream; - int32_t recoverWaitingUpstream; - int64_t checkReqId; - SArray* checkReqIds; // shuffle - int32_t refCnt; - - int64_t checkpointingId; - int32_t checkpointAlignCnt; + // the followings attributes don't be serialized + int32_t recoverTryingDownstream; + int32_t recoverWaitingUpstream; + int64_t checkReqId; + SArray* checkReqIds; // shuffle + int32_t refCnt; + int64_t checkpointingId; + int32_t checkpointAlignCnt; + struct SStreamMeta* pMeta; }; int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); @@ -597,6 +594,8 @@ SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId); void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask); void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId); +SStreamTask* streamMetaAcquireTaskEx(SStreamMeta* pMeta, int32_t taskId); + int32_t streamMetaBegin(SStreamMeta* pMeta); int32_t streamMetaCommit(SStreamMeta* pMeta); int32_t streamMetaRollBack(SStreamMeta* pMeta); diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index d4af9ac481..50a09229fa 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -183,6 +183,10 @@ void createStreamTaskOffsetKey(char* dst, uint64_t streamId, uint32_t taskId) int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem, int64_t ver); int32_t launchTaskForWalBlock(SStreamTask* pTask, SFetchRet* pRet, STqOffset* pOffset); +void doSaveTaskOffset(STqOffsetStore* pOffsetStore, const char* pKey, int64_t ver); +void saveOffsetForAllTasks(STQ* pTq, int64_t ver); +void initOffsetForAllRestoreTasks(STQ* pTq); + #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index aa8960e977..91e2569a8c 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -907,6 +907,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL; pTask->pMsgCb = &pTq->pVnode->msgCb; pTask->startVer = ver; + pTask->pMeta = pTq->pStreamMeta; // expand executor if (pTask->fillHistory) { @@ -979,7 +980,8 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { } streamSetupTrigger(pTask); - tqInfo("vgId:%d expand stream task, s-task:%s, child id %d, level %d", vgId, pTask->id.idStr, pTask->selfChildId, pTask->taskLevel); + tqInfo("vgId:%d expand stream task, s-task:%s, ver:%" PRId64 " child id:%d, level:%d", vgId, pTask->id.idStr, + pTask->startVer, pTask->selfChildId, pTask->taskLevel); return 0; } @@ -1370,16 +1372,6 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) { return 0; } -static void doSaveTaskOffset(STqOffsetStore* pOffsetStore, const char* pKey, int64_t ver) { - STqOffset offset = {0}; - tqOffsetResetToLog(&offset.val, ver); - - tstrncpy(offset.subKey, pKey, tListLen(offset.subKey)); - - // keep the offset info in the offset store - tqOffsetWrite(pOffsetStore, &offset); -} - static int32_t addSubmitBlockNLaunchTask(STqOffsetStore* pOffsetStore, SStreamTask* pTask, SStreamDataSubmit2* pSubmit, const char* key, int64_t ver) { doSaveTaskOffset(pOffsetStore, key, ver); @@ -1392,36 +1384,6 @@ static int32_t addSubmitBlockNLaunchTask(STqOffsetStore* pOffsetStore, SStreamTa return TSDB_CODE_SUCCESS; } -static void saveOffsetForAllTasks(STQ* pTq, SPackedData submit) { - void* pIter = NULL; - - while(1) { - pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter); - if (pIter == NULL) { - break; - } - - SStreamTask* pTask = *(SStreamTask**)pIter; - if (pTask->taskLevel != TASK_LEVEL__SOURCE) { - continue; - } - - if (pTask->taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { - tqDebug("stream task:%d skip push data, not ready for processing, status %d", pTask->id.taskId, - pTask->taskStatus); - continue; - } - - char key[128] = {0}; - createStreamTaskOffsetKey(key, pTask->id.streamId, pTask->id.taskId); - - STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key); - if (pOffset == NULL) { - doSaveTaskOffset(pTq->pOffsetStore, key, submit.ver); - } - } -} - int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) { void* pIter = NULL; @@ -1429,7 +1391,7 @@ int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) { if (pSubmit == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; tqError("failed to create data submit for stream since out of memory"); - saveOffsetForAllTasks(pTq, submit); + saveOffsetForAllTasks(pTq, submit.ver); return -1; } @@ -1518,11 +1480,14 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { tqDoRestoreSourceStreamTasks(pTq); return 0; } else { - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); + SStreamTask* pTask = streamMetaAcquireTaskEx(pTq->pStreamMeta, taskId); if (pTask != NULL) { if (pTask->taskStatus == TASK_STATUS__NORMAL) { tqDebug("vgId:%d s-task:%s start to process run req", vgId, pTask->id.idStr); streamProcessRunReq(pTask); + } else if (pTask->taskStatus == TASK_STATUS_RESTORE) { + tqDebug("vgId:%d s-task:%s start to restore from last ck", vgId, pTask->id.idStr); + streamProcessRunReq(pTask); } else { tqDebug("vgId:%d s-task:%s ignore run req since not in ready state", vgId, pTask->id.idStr); } @@ -1683,8 +1648,10 @@ int32_t tqRestoreStreamTasks(STQ* pTq) { return -1; } - tqInfo("vgId:%d start to restore all stream tasks", vgId); - + int32_t numOfTasks = taosHashGetSize(pTq->pStreamMeta->pRestoreTasks); + tqInfo("vgId:%d start restoring stream tasks, total tasks:%d", vgId, numOfTasks); + initOffsetForAllRestoreTasks(pTq); + pRunReq->head.vgId = vgId; pRunReq->streamId = 0; pRunReq->taskId = ALL_STREAM_TASKS_ID; diff --git a/source/dnode/vnode/src/tq/tqCommit.c b/source/dnode/vnode/src/tq/tqCommit.c index 7fc66c4919..0f5daa31ad 100644 --- a/source/dnode/vnode/src/tq/tqCommit.c +++ b/source/dnode/vnode/src/tq/tqCommit.c @@ -16,10 +16,13 @@ #include "tq.h" int tqCommit(STQ* pTq) { +#if 0 + // stream meta commit does not be aligned to the vnode commit if (streamMetaCommit(pTq->pStreamMeta) < 0) { tqError("vgId:%d, failed to commit stream meta since %s", TD_VID(pTq->pVnode), terrstr()); return -1; } +#endif return tqOffsetCommitFile(pTq->pOffsetStore); } diff --git a/source/dnode/vnode/src/tq/tqRestore.c b/source/dnode/vnode/src/tq/tqRestore.c index 50fcea2b54..9377e3d58f 100644 --- a/source/dnode/vnode/src/tq/tqRestore.c +++ b/source/dnode/vnode/src/tq/tqRestore.c @@ -15,25 +15,19 @@ #include "tq.h" -static int32_t restoreStreamTask(SStreamMeta* pStreamMeta, STqOffsetStore* pOffsetStore, SArray* pTaskList); +static int32_t restoreStreamTaskImpl(SStreamMeta* pStreamMeta, STqOffsetStore* pOffsetStore, SArray* pTaskList); static int32_t transferToNormalTask(SStreamMeta* pStreamMeta, SArray* pTaskList); // this function should be executed by stream threads. // there is a case that the WAL increases more fast than the restore procedure, and this restore procedure // will not stop eventually. int tqDoRestoreSourceStreamTasks(STQ* pTq) { - - // todo set the offset value from the previous check point offset int64_t st = taosGetTimestampMs(); - int32_t vgId = TD_VID(pTq->pVnode); - int32_t numOfTasks = taosHashGetSize(pTq->pStreamMeta->pRestoreTasks); - tqInfo("vgId:%d start restoring stream tasks, total tasks:%d", vgId, numOfTasks); - while (1) { SArray* pTaskList = taosArrayInit(4, POINTER_BYTES); // check all restore tasks - restoreStreamTask(pTq->pStreamMeta, pTq->pOffsetStore, pTaskList); + restoreStreamTaskImpl(pTq->pStreamMeta, pTq->pOffsetStore, pTaskList); transferToNormalTask(pTq->pStreamMeta, pTaskList); taosArrayDestroy(pTaskList); @@ -44,7 +38,7 @@ int tqDoRestoreSourceStreamTasks(STQ* pTq) { } int64_t et = taosGetTimestampMs(); - tqInfo("vgId:%d restoring task completed, elapsed time:%" PRId64 " sec.", vgId, (et - st)); + tqInfo("vgId:%d restoring task completed, elapsed time:%" PRId64 " sec.", TD_VID(pTq->pVnode), (et - st)); return 0; } @@ -68,7 +62,7 @@ int32_t transferToNormalTask(SStreamMeta* pStreamMeta, SArray* pTaskList) { return TSDB_CODE_SUCCESS; } -int32_t restoreStreamTask(SStreamMeta* pStreamMeta, STqOffsetStore* pOffsetStore, SArray* pTaskList) { +int32_t restoreStreamTaskImpl(SStreamMeta* pStreamMeta, STqOffsetStore* pOffsetStore, SArray* pTaskList) { // check all restore tasks void* pIter = NULL; @@ -93,7 +87,8 @@ int32_t restoreStreamTask(SStreamMeta* pStreamMeta, STqOffsetStore* pOffsetStore createStreamTaskOffsetKey(key, pTask->id.streamId, pTask->id.taskId); if (tInputQueueIsFull(pTask)) { - tqDebug("s-task:%s input queue is full, do nothing" PRId64, pTask->id.idStr); + tqDebug("s-task:%s input queue is full, do nothing", pTask->id.idStr); + taosMsleep(10); continue; } diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index ac88cf1916..14054ad998 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -69,4 +69,76 @@ int32_t launchTaskForWalBlock(SStreamTask* pTask, SFetchRet* pRet, STqOffset* pO } return 0; -} \ No newline at end of file +} + +void initOffsetForAllRestoreTasks(STQ* pTq) { + void* pIter = NULL; + + while(1) { + pIter = taosHashIterate(pTq->pStreamMeta->pRestoreTasks, pIter); + if (pIter == NULL) { + break; + } + + SStreamTask* pTask = *(SStreamTask**)pIter; + if (pTask->taskLevel != TASK_LEVEL__SOURCE) { + continue; + } + + if (pTask->taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { + tqDebug("stream task:%d skip push data, not ready for processing, status %d", pTask->id.taskId, + pTask->taskStatus); + continue; + } + + char key[128] = {0}; + createStreamTaskOffsetKey(key, pTask->id.streamId, pTask->id.taskId); + + STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key); + if (pOffset == NULL) { + doSaveTaskOffset(pTq->pOffsetStore, key, pTask->startVer); + } + } + +} + +void saveOffsetForAllTasks(STQ* pTq, int64_t ver) { + void* pIter = NULL; + + while(1) { + pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter); + if (pIter == NULL) { + break; + } + + SStreamTask* pTask = *(SStreamTask**)pIter; + if (pTask->taskLevel != TASK_LEVEL__SOURCE) { + continue; + } + + if (pTask->taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { + tqDebug("stream task:%d skip push data, not ready for processing, status %d", pTask->id.taskId, + pTask->taskStatus); + continue; + } + + char key[128] = {0}; + createStreamTaskOffsetKey(key, pTask->id.streamId, pTask->id.taskId); + + STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key); + if (pOffset == NULL) { + doSaveTaskOffset(pTq->pOffsetStore, key, ver); + } + } +} + +void doSaveTaskOffset(STqOffsetStore* pOffsetStore, const char* pKey, int64_t ver) { + STqOffset offset = {0}; + tqOffsetResetToLog(&offset.val, ver); + + tstrncpy(offset.subKey, pKey, tListLen(offset.subKey)); + + // keep the offset info in the offset store + tqOffsetWrite(pOffsetStore, &offset); +} + diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 16433dc34e..759502e40f 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -127,14 +127,9 @@ enum { }; typedef struct { - // TODO remove prepareStatus -// STqOffsetVal prepareStatus; // for tmq - STqOffsetVal currentOffset; // for tmq - SMqMetaRsp metaRsp; // for tmq fetching meta -// int8_t returned; - int64_t snapshotVer; - // const SSubmitReq* pReq; - + STqOffsetVal currentOffset; // for tmq + SMqMetaRsp metaRsp; // for tmq fetching meta + int64_t snapshotVer; SPackedData submit; SSchemaWrapper* schema; char tbName[TSDB_TABLE_NAME_LEN]; @@ -144,6 +139,7 @@ typedef struct { int64_t fillHistoryVer1; int64_t fillHistoryVer2; SStreamState* pState; + int64_t dataVersion; } SStreamTaskInfo; typedef struct { @@ -191,7 +187,6 @@ enum { OP_OPENED = 0x1, OP_RES_TO_RETURN = 0x5, OP_EXEC_DONE = 0x9, -// OP_EXEC_RECV = 0x11, }; typedef struct SOperatorFpSet { @@ -560,6 +555,7 @@ typedef struct SStreamIntervalOperatorInfo { uint64_t numOfDatapack; SArray* pUpdated; SSHashObj* pUpdatedMap; + int64_t dataVersion; } SStreamIntervalOperatorInfo; typedef struct SDataGroupInfo { @@ -609,6 +605,7 @@ typedef struct SStreamSessionAggOperatorInfo { bool ignoreExpiredDataSaved; SArray* pUpdated; SSHashObj* pStUpdated; + int64_t dataVersion; } SStreamSessionAggOperatorInfo; typedef struct SStreamStateAggOperatorInfo { @@ -627,6 +624,7 @@ typedef struct SStreamStateAggOperatorInfo { bool ignoreExpiredDataSaved; SArray* pUpdated; SSHashObj* pSeUpdated; + int64_t dataVersion; } SStreamStateAggOperatorInfo; typedef struct SStreamPartitionOperatorInfo { diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index d9868f59b9..caaeaa76c2 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -198,6 +198,12 @@ int32_t qSetStreamOpOpen(qTaskInfo_t tinfo) { return code; } +int64_t qGetCheckpointVersion(qTaskInfo_t tinfo) { + SExecTaskInfo* pTaskInfo = tinfo; + return pTaskInfo->streamInfo.dataVersion; +} + + int32_t qSetMultiStreamInput(qTaskInfo_t tinfo, const void* pBlocks, size_t numOfBlocks, int32_t type) { if (tinfo == NULL) { return TSDB_CODE_APP_ERROR; diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 880de7d6bf..f122323109 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -2333,9 +2333,14 @@ static int32_t getNextQualifiedFinalWindow(SInterval* pInterval, STimeWindow* pN return startPos; } +static void setStreamDataVersion(SExecTaskInfo* pTaskInfo, int64_t version) { + pTaskInfo->streamInfo.dataVersion = version; +} + static void doStreamIntervalAggImpl(SOperatorInfo* pOperatorInfo, SSDataBlock* pSDataBlock, uint64_t groupId, SSHashObj* pUpdatedMap) { SStreamIntervalOperatorInfo* pInfo = (SStreamIntervalOperatorInfo*)pOperatorInfo->info; + pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version); SResultRowInfo* pResultRowInfo = &(pInfo->binfo.resultRowInfo); SExecTaskInfo* pTaskInfo = pOperatorInfo->pTaskInfo; @@ -2501,6 +2506,7 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { clearFunctionContext(&pOperator->exprSupp); // semi interval operator clear disk buffer clearStreamIntervalOperator(pInfo); + setStreamDataVersion(pTaskInfo, pInfo->dataVersion); qDebug("===stream===clear semi operator"); } else { deleteIntervalDiscBuf(pInfo->pState, pInfo->pPullDataMap, pInfo->twAggSup.maxTs - pInfo->twAggSup.deleteMark, @@ -2774,6 +2780,7 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, pInfo->numOfDatapack = 0; pInfo->pUpdated = NULL; pInfo->pUpdatedMap = NULL; + pInfo->dataVersion = 0; pOperator->operatorType = pPhyNode->type; pOperator->blocking = true; @@ -3124,6 +3131,8 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData int32_t rows = pSDataBlock->info.rows; int32_t winRows = 0; + pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version); + SColumnInfoData* pStartTsCol = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); TSKEY* startTsCols = (int64_t*)pStartTsCol->pData; SColumnInfoData* pEndTsCol = NULL; @@ -3587,6 +3596,7 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh pInfo->ignoreExpiredDataSaved = false; pInfo->pUpdated = NULL; pInfo->pStUpdated = NULL; + pInfo->dataVersion = 0; setOperatorInfo(pOperator, "StreamSessionWindowAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION, true, OP_NOT_OPENED, pInfo, pTaskInfo); @@ -3897,6 +3907,9 @@ static void doStreamStateAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBl TSKEY* tsCols = NULL; SResultRow* pResult = NULL; int32_t winRows = 0; + + pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version); + if (pSDataBlock->pDataBlock != NULL) { SColumnInfoData* pColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); tsCols = (int64_t*)pColDataInfo->pData; @@ -4113,6 +4126,7 @@ SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhys pInfo->ignoreExpiredDataSaved = false; pInfo->pUpdated = NULL; pInfo->pSeUpdated = NULL; + pInfo->dataVersion = 0; setOperatorInfo(pOperator, "StreamStateAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE, true, OP_NOT_OPENED, pInfo, pTaskInfo); @@ -4748,6 +4762,7 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { &pInfo->delKey); setOperatorCompleted(pOperator); streamStateCommit(pTaskInfo->streamInfo.pState); + setStreamDataVersion(pTaskInfo, pInfo->dataVersion); return NULL; } diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index d23590a08b..f2db8113d3 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -21,8 +21,9 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, const void* data, SArray* int32_t code = TSDB_CODE_SUCCESS; void* pExecutor = pTask->exec.pExecutor; - while(pTask->taskLevel == TASK_LEVEL__SOURCE && atomic_load_8(&pTask->taskStatus) != TASK_STATUS__NORMAL) { - qError("stream task wait for the end of fill history"); + while (pTask->taskLevel == TASK_LEVEL__SOURCE && atomic_load_8(&pTask->taskStatus) != TASK_STATUS__NORMAL) { + qError("stream task wait for the end of fill history, s-task:%s, status:%d", pTask->id.idStr, + atomic_load_8(&pTask->taskStatus)); taosMsleep(2); continue; } @@ -236,7 +237,7 @@ int32_t streamExecForAll(SStreamTask* pTask) { while (1) { SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue); if (qItem == NULL) { - qDebug("stream task exec over, queue empty, task: %d", pTask->id.taskId); + qDebug("s-task:%s stream task exec over, queue empty", pTask->id.idStr); break; } @@ -284,7 +285,19 @@ int32_t streamExecForAll(SStreamTask* pTask) { streamTaskExecImpl(pTask, pInput, pRes); - qDebug("s-task:%s exec end", pTask->id.idStr); + int64_t ckVer = qGetCheckpointVersion(pTask->exec.pExecutor); + if (ckVer > pTask->startVer) { // save it since the checkpoint is updated + qDebug("s-task:%s exec end, checkpoint ver from %"PRId64" to %"PRId64, pTask->id.idStr, pTask->startVer, ckVer); + pTask->startVer = ckVer; + streamMetaSaveTask(pTask->pMeta, pTask); + + if (streamMetaCommit(pTask->pMeta) < 0) { + qError("failed to commit stream meta, since %s", terrstr()); + return -1; + } + } else { + qDebug("s-task:%s exec end", pTask->id.idStr); + } if (taosArrayGetSize(pRes) != 0) { SStreamDataBlock* qRes = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); @@ -333,6 +346,7 @@ int32_t streamTryExec(SStreamTask* pTask) { return -1; } + // todo the task should be commit here atomic_store_8(&pTask->schedStatus, TASK_SCHED_STATUS__INACTIVE); if (!taosQueueEmpty(pTask->inputQueue->queue)) { diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 8693915c46..a22d768a89 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -195,17 +195,12 @@ SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId) { taosRLockLatch(&pMeta->lock); SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); - if (ppTask) { - SStreamTask* pTask = *ppTask; - if (atomic_load_8(&pTask->taskStatus) != TASK_STATUS__DROPPING) { - atomic_add_fetch_32(&pTask->refCnt, 1); - taosRUnLockLatch(&pMeta->lock); - return pTask; - } else { - taosRUnLockLatch(&pMeta->lock); - return NULL; - } + if (ppTask != NULL && (atomic_load_8(&((*ppTask)->taskStatus)) != TASK_STATUS__DROPPING)) { + atomic_add_fetch_32(&(*ppTask)->refCnt, 1); + taosRUnLockLatch(&pMeta->lock); + return *ppTask; } + taosRUnLockLatch(&pMeta->lock); return NULL; } @@ -219,6 +214,37 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) { } } +SStreamTask* streamMetaAcquireTaskEx(SStreamMeta* pMeta, int32_t taskId) { + taosRLockLatch(&pMeta->lock); + + SStreamTask* pTask = NULL; + int32_t numOfRestored = taosHashGetSize(pMeta->pRestoreTasks); + if (numOfRestored > 0) { + SStreamTask** p = (SStreamTask**)taosHashGet(pMeta->pRestoreTasks, &taskId, sizeof(int32_t)); + if (p != NULL) { + pTask = *p; + if (pTask != NULL && (atomic_load_8(&(pTask->taskStatus)) != TASK_STATUS__DROPPING)) { + atomic_add_fetch_32(&pTask->refCnt, 1); + taosRUnLockLatch(&pMeta->lock); + return pTask; + } + } + } else { + SStreamTask** p = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); + if (p != NULL) { + pTask = *p; + if (pTask != NULL && atomic_load_8(&(pTask->taskStatus)) != TASK_STATUS__DROPPING) { + atomic_add_fetch_32(&pTask->refCnt, 1); + taosRUnLockLatch(&pMeta->lock); + return pTask; + } + } + } + + taosRUnLockLatch(&pMeta->lock); + return NULL; +} + void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); if (ppTask) { diff --git a/source/util/src/tworker.c b/source/util/src/tworker.c index 631bcb443e..d57104dd78 100644 --- a/source/util/src/tworker.c +++ b/source/util/src/tworker.c @@ -218,7 +218,7 @@ STaosQueue *tAutoQWorkerAllocQueue(SAutoQWorkerPool *pool, void *ahandle, FItem int32_t queueNum = taosGetQueueNumber(pool->qset); int32_t curWorkerNum = taosArrayGetSize(pool->workers); int32_t dstWorkerNum = ceil(queueNum * pool->ratio); - if (dstWorkerNum < 1) dstWorkerNum = 1; + if (dstWorkerNum < 2) dstWorkerNum = 2; // spawn a thread to process queue while (curWorkerNum < dstWorkerNum) { From 74da3c05bcad0ceef999f09c1dc63719c76ed985 Mon Sep 17 00:00:00 2001 From: liuyao <54liuyao@163.com> Date: Tue, 11 Apr 2023 10:33:10 +0800 Subject: [PATCH 04/25] feat:set check point id --- include/libs/executor/executor.h | 2 +- include/libs/stream/streamState.h | 1 + source/libs/executor/inc/executorimpl.h | 1 + source/libs/executor/src/executor.c | 5 +++-- source/libs/executor/src/timewindowoperator.c | 7 ++++--- source/libs/stream/src/streamExec.c | 10 ++++++---- source/libs/stream/src/streamState.c | 2 ++ 7 files changed, 18 insertions(+), 10 deletions(-) diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index fd66194143..9d05d1fdc9 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -92,7 +92,7 @@ void qSetTaskId(qTaskInfo_t tinfo, uint64_t taskId, uint64_t queryId); int32_t qSetStreamOpOpen(qTaskInfo_t tinfo); // todo refactor -int64_t qGetCheckpointVersion(qTaskInfo_t tinfo); +void qGetCheckpointVersion(qTaskInfo_t tinfo, int64_t* dataVer, int64_t* ckId); /** * Set multiple input data blocks for the stream scan. diff --git a/include/libs/stream/streamState.h b/include/libs/stream/streamState.h index fd5cec2931..42a7261f38 100644 --- a/include/libs/stream/streamState.h +++ b/include/libs/stream/streamState.h @@ -42,6 +42,7 @@ typedef struct STdbState { typedef struct { STdbState* pTdbState; int32_t number; + int64_t checkPointId; } SStreamState; SStreamState* streamStateOpen(char* path, struct SStreamTask* pTask, bool specPath, int32_t szPage, int32_t pages); diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 759502e40f..7fee4f9b83 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -140,6 +140,7 @@ typedef struct { int64_t fillHistoryVer2; SStreamState* pState; int64_t dataVersion; + int64_t checkPointId; } SStreamTaskInfo; typedef struct { diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index caaeaa76c2..2244847faa 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -198,9 +198,10 @@ int32_t qSetStreamOpOpen(qTaskInfo_t tinfo) { return code; } -int64_t qGetCheckpointVersion(qTaskInfo_t tinfo) { +void qGetCheckpointVersion(qTaskInfo_t tinfo, int64_t* dataVer, int64_t* ckId) { SExecTaskInfo* pTaskInfo = tinfo; - return pTaskInfo->streamInfo.dataVersion; + *dataVer = pTaskInfo->streamInfo.dataVersion; + *ckId = pTaskInfo->streamInfo.checkPointId; } diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index f122323109..658dd51978 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -2333,8 +2333,9 @@ static int32_t getNextQualifiedFinalWindow(SInterval* pInterval, STimeWindow* pN return startPos; } -static void setStreamDataVersion(SExecTaskInfo* pTaskInfo, int64_t version) { +static void setStreamDataVersion(SExecTaskInfo* pTaskInfo, int64_t version, int64_t ckId) { pTaskInfo->streamInfo.dataVersion = version; + pTaskInfo->streamInfo.checkPointId = ckId; } static void doStreamIntervalAggImpl(SOperatorInfo* pOperatorInfo, SSDataBlock* pSDataBlock, uint64_t groupId, @@ -2506,7 +2507,7 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { clearFunctionContext(&pOperator->exprSupp); // semi interval operator clear disk buffer clearStreamIntervalOperator(pInfo); - setStreamDataVersion(pTaskInfo, pInfo->dataVersion); + setStreamDataVersion(pTaskInfo, pInfo->dataVersion, pInfo->pState->checkPointId); qDebug("===stream===clear semi operator"); } else { deleteIntervalDiscBuf(pInfo->pState, pInfo->pPullDataMap, pInfo->twAggSup.maxTs - pInfo->twAggSup.deleteMark, @@ -4762,7 +4763,7 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { &pInfo->delKey); setOperatorCompleted(pOperator); streamStateCommit(pTaskInfo->streamInfo.pState); - setStreamDataVersion(pTaskInfo, pInfo->dataVersion); + setStreamDataVersion(pTaskInfo, pInfo->dataVersion, pInfo->pState->checkPointId); return NULL; } diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index f2db8113d3..8dafafcc5f 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -285,10 +285,12 @@ int32_t streamExecForAll(SStreamTask* pTask) { streamTaskExecImpl(pTask, pInput, pRes); - int64_t ckVer = qGetCheckpointVersion(pTask->exec.pExecutor); - if (ckVer > pTask->startVer) { // save it since the checkpoint is updated - qDebug("s-task:%s exec end, checkpoint ver from %"PRId64" to %"PRId64, pTask->id.idStr, pTask->startVer, ckVer); - pTask->startVer = ckVer; + int64_t ckId = 0; + int64_t dataVer = 0; + qGetCheckpointVersion(pTask->exec.pExecutor, &dataVer, &ckId); + if (dataVer > pTask->startVer) { // save it since the checkpoint is updated + qDebug("s-task:%s exec end, checkpoint ver from %"PRId64" to %"PRId64, pTask->id.idStr, pTask->startVer, dataVer); + pTask->startVer = dataVer; streamMetaSaveTask(pTask->pMeta, pTask); if (streamMetaCommit(pTask->pMeta) < 0) { diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index 04a1414438..7bea989e3a 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -193,6 +193,7 @@ SStreamState* streamStateOpen(char* path, SStreamTask* pTask, bool specPath, int } pState->pTdbState->pOwner = pTask; + pState->checkPointId = 0; return pState; @@ -243,6 +244,7 @@ int32_t streamStateCommit(SStreamState* pState) { TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { return -1; } + pState->checkPointId++; return 0; } From 15cceb5a5aa234bdb124287670abb74852af8923 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 11 Apr 2023 11:53:10 +0800 Subject: [PATCH 05/25] refactor: do some internal refactors. --- include/libs/stream/tstream.h | 34 ++++++++++++++------------ source/dnode/snode/src/snode.c | 2 +- source/dnode/vnode/src/tq/tq.c | 6 ++--- source/dnode/vnode/src/tq/tqUtil.c | 2 +- source/dnode/vnode/src/tsdb/tsdbRead.c | 16 ++++++------ source/libs/stream/src/streamExec.c | 6 ++--- source/libs/stream/src/streamTask.c | 8 +++--- 7 files changed, 38 insertions(+), 36 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 22e9356b3d..7dcb2e1796 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -287,19 +287,24 @@ typedef struct SStreamId { const char* idStr; } SStreamId; +typedef struct SCheckpointInfo { + int64_t id; + int64_t version; // offset in WAL +} SCheckpointInfo; + struct SStreamTask { - SStreamId id; - int32_t totalLevel; - int8_t taskLevel; - int8_t outputType; - int16_t dispatchMsgType; - int8_t taskStatus; - int8_t schedStatus; - int32_t selfChildId; - int32_t nodeId; - SEpSet epSet; - int64_t recoverSnapVer; - int64_t startVer; + SStreamId id; + int32_t totalLevel; + int8_t taskLevel; + int8_t outputType; + int16_t dispatchMsgType; + int8_t taskStatus; + int8_t schedStatus; + int32_t selfChildId; + int32_t nodeId; + SEpSet epSet; + SCheckpointInfo chkInfo; + STaskExec exec; // fill history int8_t fillHistory; @@ -309,9 +314,6 @@ struct SStreamTask { int32_t nextCheckId; SArray* checkpointInfo; // SArray - // exec - STaskExec exec; - // output union { STaskDispatcherFixedEp fixedEpDispatcher; @@ -587,7 +589,7 @@ void streamMetaClose(SStreamMeta* streamMeta); int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask); int32_t streamMetaAddTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask); -int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t startVer, char* msg, int32_t msgLen); +int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t checkpointVer, char* msg, int32_t msgLen); // SStreamTask* streamMetaGetTask(SStreamMeta* pMeta, int32_t taskId); SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId); diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 447c90eb58..ee6f649d52 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -76,7 +76,7 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) { pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL; pTask->pMsgCb = &pSnode->msgCb; - pTask->startVer = ver; + pTask->chkInfo.version = ver; pTask->pState = streamStateOpen(pSnode->path, pTask, false, -1, -1); if (pTask->pState == NULL) { diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 91e2569a8c..afdbf4e7c8 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -906,7 +906,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL; pTask->pMsgCb = &pTq->pVnode->msgCb; - pTask->startVer = ver; + pTask->chkInfo.version = ver; pTask->pMeta = pTq->pStreamMeta; // expand executor @@ -981,7 +981,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { streamSetupTrigger(pTask); tqInfo("vgId:%d expand stream task, s-task:%s, ver:%" PRId64 " child id:%d, level:%d", vgId, pTask->id.idStr, - pTask->startVer, pTask->selfChildId, pTask->taskLevel); + pTask->chkInfo.version, pTask->selfChildId, pTask->taskLevel); return 0; } @@ -1124,7 +1124,7 @@ int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg) { } // check param - int64_t fillVer1 = pTask->startVer; + int64_t fillVer1 = pTask->chkInfo.version; if (fillVer1 <= 0) { streamMetaReleaseTask(pTq->pStreamMeta, pTask); return -1; diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index 14054ad998..5f4e0ced11 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -96,7 +96,7 @@ void initOffsetForAllRestoreTasks(STQ* pTq) { STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key); if (pOffset == NULL) { - doSaveTaskOffset(pTq->pOffsetStore, key, pTask->startVer); + doSaveTaskOffset(pTq->pOffsetStore, key, pTask->chkInfo.version); } } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index b80c952ee0..61f7747ae4 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -2088,7 +2088,7 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, pBlockScanInfo->lastKey = tsLastBlock; return TSDB_CODE_SUCCESS; } else { - int32_t code = tsdbRowMergerInit(&merge, NULL, &fRow, pReader->pSchema); + code = tsdbRowMergerInit(&merge, NULL, &fRow, pReader->pSchema); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -2112,7 +2112,7 @@ static int32_t doMergeFileBlockAndLastBlock(SLastBlockReader* pLastBlockReader, } } } else { // not merge block data - int32_t code = tsdbRowMergerInit(&merge, NULL, &fRow, pReader->pSchema); + code = tsdbRowMergerInit(&merge, NULL, &fRow, pReader->pSchema); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -2352,7 +2352,7 @@ static int32_t doMergeMultiLevelRows(STsdbReader* pReader, STableBlockScanInfo* tsdbRowMergerAdd(&merge, piRow, piSchema); } else { init = true; - STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(piRow), pReader, pBlockScanInfo->uid); + pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(piRow), pReader, pBlockScanInfo->uid); code = tsdbRowMergerInit(&merge, pSchema, piRow, piSchema); if (code != TSDB_CODE_SUCCESS) { return code; @@ -2575,7 +2575,7 @@ int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBloc SRow* pTSRow = NULL; SRowMerger merge = {0}; - int32_t code = tsdbRowMergerInit(&merge, NULL, &fRow, pReader->pSchema); + code = tsdbRowMergerInit(&merge, NULL, &fRow, pReader->pSchema); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -3242,8 +3242,8 @@ static int32_t readRowsCountFromFiles(STsdbReader* pReader) { int32_t code = TSDB_CODE_SUCCESS; while (1) { - bool hasNext = false; - int32_t code = filesetIteratorNext(&pReader->status.fileIter, pReader, &hasNext); + bool hasNext = false; + code = filesetIteratorNext(&pReader->status.fileIter, pReader, &hasNext); if (code) { return code; } @@ -3515,8 +3515,8 @@ SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_ int64_t startVer = (pCond->startVersion == -1) ? 0 : pCond->startVersion; int64_t endVer = 0; - if (pCond->endVersion == - -1) { // user not specified end version, set current maximum version of vnode as the endVersion + if (pCond->endVersion == -1) { + // user not specified end version, set current maximum version of vnode as the endVersion endVer = pVnode->state.applied; } else { endVer = (pCond->endVersion > pVnode->state.applied) ? pVnode->state.applied : pCond->endVersion; diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 8dafafcc5f..98052ec6ba 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -288,9 +288,9 @@ int32_t streamExecForAll(SStreamTask* pTask) { int64_t ckId = 0; int64_t dataVer = 0; qGetCheckpointVersion(pTask->exec.pExecutor, &dataVer, &ckId); - if (dataVer > pTask->startVer) { // save it since the checkpoint is updated - qDebug("s-task:%s exec end, checkpoint ver from %"PRId64" to %"PRId64, pTask->id.idStr, pTask->startVer, dataVer); - pTask->startVer = dataVer; + if (dataVer > pTask->chkInfo.version) { // save it since the checkpoint is updated + qDebug("s-task:%s exec end, checkpoint ver from %"PRId64" to %"PRId64, pTask->id.idStr, pTask->chkInfo.version, dataVer); + pTask->chkInfo = (SCheckpointInfo) {.version = dataVer, .id = ckId}; streamMetaSaveTask(pTask->pMeta, pTask); if (streamMetaCommit(pTask->pMeta) < 0) { diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index c1f50178dd..b1f0a63c2e 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -70,8 +70,8 @@ int32_t tEncodeSStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { if (tEncodeI32(pEncoder, pTask->nodeId) < 0) return -1; if (tEncodeSEpSet(pEncoder, &pTask->epSet) < 0) return -1; - if (tEncodeI64(pEncoder, pTask->recoverSnapVer) < 0) return -1; - if (tEncodeI64(pEncoder, pTask->startVer) < 0) return -1; + if (tEncodeI64(pEncoder, pTask->chkInfo.id) < 0) return -1; + if (tEncodeI64(pEncoder, pTask->chkInfo.version) < 0) return -1; if (tEncodeI8(pEncoder, pTask->fillHistory) < 0) return -1; int32_t epSz = taosArrayGetSize(pTask->childEpInfo); @@ -123,8 +123,8 @@ int32_t tDecodeSStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tDecodeI32(pDecoder, &pTask->nodeId) < 0) return -1; if (tDecodeSEpSet(pDecoder, &pTask->epSet) < 0) return -1; - if (tDecodeI64(pDecoder, &pTask->recoverSnapVer) < 0) return -1; - if (tDecodeI64(pDecoder, &pTask->startVer) < 0) return -1; + if (tDecodeI64(pDecoder, &pTask->chkInfo.id) < 0) return -1; + if (tDecodeI64(pDecoder, &pTask->chkInfo.version) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->fillHistory) < 0) return -1; int32_t epSz; From 18479d8115f26c52e726b56d75ddb76bcada6209 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 11 Apr 2023 15:42:24 +0800 Subject: [PATCH 06/25] refactor: do some internall refactor. --- include/libs/stream/tstream.h | 8 ++++---- source/dnode/mnode/impl/src/mndDef.c | 4 ++-- source/dnode/mnode/impl/src/mndScheduler.c | 10 +++++----- source/dnode/mnode/impl/src/mndStream.c | 8 ++++---- source/dnode/snode/src/snode.c | 2 +- source/dnode/vnode/src/tq/tq.c | 10 +++++----- source/dnode/vnode/src/tq/tqRestore.c | 5 +++-- source/libs/stream/src/streamExec.c | 14 +++++++++----- source/libs/stream/src/streamMeta.c | 8 ++++---- source/libs/stream/src/streamTask.c | 6 +++--- 10 files changed, 40 insertions(+), 35 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 7dcb2e1796..2368788824 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -50,7 +50,7 @@ enum { TASK_STATUS__RECOVER_PREPARE, TASK_STATUS__RECOVER1, TASK_STATUS__RECOVER2, - TASK_STATUS_RESTORE, // only available for source task to replay WAL from the checkpoint + TASK_STATUS__RESTORE, // only available for source task to replay WAL from the checkpoint }; enum { @@ -353,9 +353,9 @@ struct SStreamTask { int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamChildEpInfo* pInfo); -SStreamTask* tNewSStreamTask(int64_t streamId); -int32_t tEncodeSStreamTask(SEncoder* pEncoder, const SStreamTask* pTask); -int32_t tDecodeSStreamTask(SDecoder* pDecoder, SStreamTask* pTask); +SStreamTask* tNewStreamTask(int64_t streamId); +int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask); +int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask); void tFreeStreamTask(SStreamTask* pTask); int32_t tAppendDataForStream(SStreamTask* pTask, SStreamQueueItem* pItem); bool tInputQueueIsFull(const SStreamTask* pTask); diff --git a/source/dnode/mnode/impl/src/mndDef.c b/source/dnode/mnode/impl/src/mndDef.c index e221a64619..c69f08eb6b 100644 --- a/source/dnode/mnode/impl/src/mndDef.c +++ b/source/dnode/mnode/impl/src/mndDef.c @@ -70,7 +70,7 @@ int32_t tEncodeSStreamObj(SEncoder *pEncoder, const SStreamObj *pObj) { if (tEncodeI32(pEncoder, innerSz) < 0) return -1; for (int32_t j = 0; j < innerSz; j++) { SStreamTask *pTask = taosArrayGetP(pArray, j); - if (tEncodeSStreamTask(pEncoder, pTask) < 0) return -1; + if (tEncodeStreamTask(pEncoder, pTask) < 0) return -1; } } @@ -130,7 +130,7 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj, int32_t sver) { taosArrayDestroy(pArray); return -1; } - if (tDecodeSStreamTask(pDecoder, pTask) < 0) { + if (tDecodeStreamTask(pDecoder, pTask) < 0) { taosMemoryFree(pTask); taosArrayDestroy(pArray); return -1; diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index 504749df49..36521fd778 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -224,7 +224,7 @@ int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SStreamObj* pStream) { continue; } - SStreamTask* pTask = tNewSStreamTask(pStream->uid); + SStreamTask* pTask = tNewStreamTask(pStream->uid); if (pTask == NULL) { sdbRelease(pSdb, pVgroup); terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -260,7 +260,7 @@ int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SStreamObj* pStream) { int32_t mndAddFixedSinkTaskToStream(SMnode* pMnode, SStreamObj* pStream) { SArray* tasks = taosArrayGetP(pStream->tasks, 0); - SStreamTask* pTask = tNewSStreamTask(pStream->uid); + SStreamTask* pTask = tNewStreamTask(pStream->uid); if (pTask == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; @@ -350,7 +350,7 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) { return -1; } - pInnerTask = tNewSStreamTask(pStream->uid); + pInnerTask = tNewStreamTask(pStream->uid); if (pInnerTask == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; qDestroyQueryPlan(pPlan); @@ -421,7 +421,7 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) { continue; } - SStreamTask* pTask = tNewSStreamTask(pStream->uid); + SStreamTask* pTask = tNewStreamTask(pStream->uid); if (pTask == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; sdbRelease(pSdb, pVgroup); @@ -491,7 +491,7 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) { continue; } - SStreamTask* pTask = tNewSStreamTask(pStream->uid); + SStreamTask* pTask = tNewStreamTask(pStream->uid); if (pTask == NULL) { sdbRelease(pSdb, pVgroup); qDestroyQueryPlan(pPlan); diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index a0118ee749..b8c540266f 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -39,8 +39,8 @@ static int32_t mndStreamActionUpdate(SSdb *pSdb, SStreamObj *pOldStream, SStream static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq); static int32_t mndProcessDropStreamReq(SRpcMsg *pReq); static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq); -// static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq); -/*static int32_t mndProcessRecoverStreamReq(SRpcMsg *pReq);*/ +static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq); +static int32_t mndProcessRecoverStreamReq(SRpcMsg *pReq); static int32_t mndProcessStreamMetaReq(SRpcMsg *pReq); static int32_t mndGetStreamMeta(SRpcMsg *pReq, SShowObj *pShow, STableMetaRsp *pMeta); static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows); @@ -418,7 +418,7 @@ FAIL: int32_t mndPersistTaskDeployReq(STrans *pTrans, const SStreamTask *pTask) { SEncoder encoder; tEncoderInit(&encoder, NULL, 0); - tEncodeSStreamTask(&encoder, pTask); + tEncodeStreamTask(&encoder, pTask); int32_t size = encoder.pos; int32_t tlen = sizeof(SMsgHead) + size; tEncoderClear(&encoder); @@ -430,7 +430,7 @@ int32_t mndPersistTaskDeployReq(STrans *pTrans, const SStreamTask *pTask) { ((SMsgHead *)buf)->vgId = htonl(pTask->nodeId); void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); tEncoderInit(&encoder, abuf, size); - tEncodeSStreamTask(&encoder, pTask); + tEncodeStreamTask(&encoder, pTask); tEncoderClear(&encoder); STransAction action = {0}; diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index ee6f649d52..f8e4268aad 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -139,7 +139,7 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { } SDecoder decoder; tDecoderInit(&decoder, (uint8_t *)msg, msgLen); - code = tDecodeSStreamTask(&decoder, pTask); + code = tDecodeStreamTask(&decoder, pTask); if (code < 0) { tDecoderClear(&decoder); taosMemoryFree(pTask); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index afdbf4e7c8..6fca9cc808 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -906,14 +906,13 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL; pTask->pMsgCb = &pTq->pVnode->msgCb; - pTask->chkInfo.version = ver; pTask->pMeta = pTq->pStreamMeta; // expand executor if (pTask->fillHistory) { pTask->taskStatus = TASK_STATUS__WAIT_DOWNSTREAM; } else { - pTask->taskStatus = TASK_STATUS_RESTORE; + pTask->taskStatus = TASK_STATUS__RESTORE; } if (pTask->taskLevel == TASK_LEVEL__SOURCE) { @@ -1089,7 +1088,7 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)msg, msgLen); - code = tDecodeSStreamTask(&decoder, pTask); + code = tDecodeStreamTask(&decoder, pTask); if (code < 0) { tDecoderClear(&decoder); taosMemoryFree(pTask); @@ -1485,8 +1484,9 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { if (pTask->taskStatus == TASK_STATUS__NORMAL) { tqDebug("vgId:%d s-task:%s start to process run req", vgId, pTask->id.idStr); streamProcessRunReq(pTask); - } else if (pTask->taskStatus == TASK_STATUS_RESTORE) { - tqDebug("vgId:%d s-task:%s start to restore from last ck", vgId, pTask->id.idStr); + } else if (pTask->taskStatus == TASK_STATUS__RESTORE) { + tqDebug("vgId:%d s-task:%s start to process in restore procedure from last chk point:%" PRId64, vgId, + pTask->id.idStr, pTask->chkInfo.version); streamProcessRunReq(pTask); } else { tqDebug("vgId:%d s-task:%s ignore run req since not in ready state", vgId, pTask->id.idStr); diff --git a/source/dnode/vnode/src/tq/tqRestore.c b/source/dnode/vnode/src/tq/tqRestore.c index 9377e3d58f..a123bdb1dc 100644 --- a/source/dnode/vnode/src/tq/tqRestore.c +++ b/source/dnode/vnode/src/tq/tqRestore.c @@ -49,9 +49,10 @@ int32_t transferToNormalTask(SStreamMeta* pStreamMeta, SArray* pTaskList) { } // todo: add lock - for(int32_t i = 0; i < numOfTask; ++i){ + for (int32_t i = 0; i < numOfTask; ++i) { SStreamTask* pTask = taosArrayGetP(pTaskList, i); - tqDebug("vgId:%d transfer s-task:%s state restore -> ready", pStreamMeta->vgId, pTask->id.idStr); + tqDebug("vgId:%d transfer s-task:%s state restore -> ready, checkpoint:%" PRId64 " checkpoint id:%" PRId64, + pStreamMeta->vgId, pTask->id.idStr, pTask->chkInfo.version, pTask->chkInfo.id); taosHashRemove(pStreamMeta->pRestoreTasks, &pTask->id.taskId, sizeof(pTask->id.taskId)); // NOTE: do not change the following order diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 98052ec6ba..075e477eb3 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -21,11 +21,15 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, const void* data, SArray* int32_t code = TSDB_CODE_SUCCESS; void* pExecutor = pTask->exec.pExecutor; - while (pTask->taskLevel == TASK_LEVEL__SOURCE && atomic_load_8(&pTask->taskStatus) != TASK_STATUS__NORMAL) { - qError("stream task wait for the end of fill history, s-task:%s, status:%d", pTask->id.idStr, - atomic_load_8(&pTask->taskStatus)); - taosMsleep(2); - continue; + while (pTask->taskLevel == TASK_LEVEL__SOURCE) { + int8_t status = atomic_load_8(&pTask->taskStatus); + if (status != TASK_STATUS__NORMAL && status != TASK_STATUS__RESTORE) { + qError("stream task wait for the end of fill history, s-task:%s, status:%d", pTask->id.idStr, + atomic_load_8(&pTask->taskStatus)); + taosMsleep(2); + } else { + break; + } } // set input diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index a22d768a89..ae65753bed 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -109,7 +109,7 @@ int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t ver, char* msg, } SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)msg, msgLen); - if (tDecodeSStreamTask(&decoder, pTask) < 0) { + if (tDecodeStreamTask(&decoder, pTask) < 0) { tDecoderClear(&decoder); goto FAIL; } @@ -142,7 +142,7 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { void* buf = NULL; int32_t len; int32_t code; - tEncodeSize(tEncodeSStreamTask, pTask, len, code); + tEncodeSize(tEncodeStreamTask, pTask, len, code); if (code < 0) { return -1; } @@ -153,7 +153,7 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, len); - tEncodeSStreamTask(&encoder, pTask); + tEncodeStreamTask(&encoder, pTask); tEncoderClear(&encoder); if (tdbTbUpsert(pMeta->pTaskDb, &pTask->id.taskId, sizeof(int32_t), buf, len, pMeta->txn) < 0) { @@ -321,7 +321,7 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { return -1; } tDecoderInit(&decoder, (uint8_t*)pVal, vLen); - tDecodeSStreamTask(&decoder, pTask); + tDecodeStreamTask(&decoder, pTask); tDecoderClear(&decoder); if (pMeta->expandFunc(pMeta->ahandle, pTask, -1) < 0) { diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index b1f0a63c2e..f45b6ad7b7 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -16,7 +16,7 @@ #include "executor.h" #include "tstream.h" -SStreamTask* tNewSStreamTask(int64_t streamId) { +SStreamTask* tNewStreamTask(int64_t streamId) { SStreamTask* pTask = (SStreamTask*)taosMemoryCalloc(1, sizeof(SStreamTask)); if (pTask == NULL) { return NULL; @@ -54,7 +54,7 @@ int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamChildEpInfo* pInfo) { return 0; } -int32_t tEncodeSStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { +int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pTask->id.streamId) < 0) return -1; if (tEncodeI32(pEncoder, pTask->id.taskId) < 0) return -1; @@ -107,7 +107,7 @@ int32_t tEncodeSStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { return pEncoder->pos; } -int32_t tDecodeSStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { +int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &pTask->id.streamId) < 0) return -1; if (tDecodeI32(pDecoder, &pTask->id.taskId) < 0) return -1; From aae275886352a704787940933b94b226dda4693c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 11 Apr 2023 16:58:51 +0800 Subject: [PATCH 07/25] fix(stream): disable the deploy msg when restart taosd. --- source/dnode/vnode/src/vnd/vnodeSvr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index e353988d4c..76ff04b81a 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -400,7 +400,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp } break; case TDMT_STREAM_TASK_DEPLOY: { - if (tqProcessTaskDeployReq(pVnode->pTq, version, pReq, len) < 0) { + if (pVnode->restored && tqProcessTaskDeployReq(pVnode->pTq, version, pReq, len) < 0) { goto _err; } } break; From f083697dba5091531c03874deefb395adcfe2e8e Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 11 Apr 2023 19:04:33 +0800 Subject: [PATCH 08/25] refactor(tq): do some internal refactor. --- source/dnode/vnode/src/inc/tq.h | 1 + source/dnode/vnode/src/tq/tq.c | 346 +---------------------------- source/dnode/vnode/src/tq/tqUtil.c | 346 +++++++++++++++++++++++++++++ 3 files changed, 348 insertions(+), 345 deletions(-) diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 50a09229fa..94ba399a0a 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -182,6 +182,7 @@ int32_t tqDoRestoreSourceStreamTasks(STQ* pTq); void createStreamTaskOffsetKey(char* dst, uint64_t streamId, uint32_t taskId); int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem, int64_t ver); int32_t launchTaskForWalBlock(SStreamTask* pTask, SFetchRet* pRet, STqOffset* pOffset); +int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg); void doSaveTaskOffset(STqOffsetStore* pOffsetStore, const char* pKey, int64_t ver); void saveOffsetForAllTasks(STQ* pTq, int64_t ver); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 6fca9cc808..48c9a4f445 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -15,7 +15,6 @@ #include "tq.h" -#define IS_OFFSET_RESET_TYPE(_t) ((_t) < 0) #define ALL_STREAM_TASKS_ID (-1) int32_t tqInit() { @@ -143,44 +142,6 @@ void tqClose(STQ* pTq) { taosMemoryFree(pTq); } -int32_t tqSendMetaPollRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqMetaRsp* pRsp) { - int32_t len = 0; - int32_t code = 0; - tEncodeSize(tEncodeSMqMetaRsp, pRsp, len, code); - if (code < 0) { - return -1; - } - int32_t tlen = sizeof(SMqRspHead) + len; - void* buf = rpcMallocCont(tlen); - if (buf == NULL) { - return -1; - } - - ((SMqRspHead*)buf)->mqMsgType = TMQ_MSG_TYPE__POLL_META_RSP; - ((SMqRspHead*)buf)->epoch = pReq->epoch; - ((SMqRspHead*)buf)->consumerId = pReq->consumerId; - - void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); - - SEncoder encoder = {0}; - tEncoderInit(&encoder, abuf, len); - tEncodeSMqMetaRsp(&encoder, pRsp); - tEncoderClear(&encoder); - - SRpcMsg resp = { - .info = pMsg->info, - .pCont = buf, - .contLen = tlen, - .code = 0, - }; - tmsgSendRsp(&resp); - - tqDebug("vgId:%d, from consumer:0x%" PRIx64 " (epoch %d) send rsp, res msg type %d, offset type:%d", - TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->resMsgType, pRsp->rspOffset.type); - - return 0; -} - static int32_t doSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId, int32_t type) { int32_t len = 0; @@ -330,311 +291,6 @@ int32_t tqCheckColModifiable(STQ* pTq, int64_t tbUid, int32_t colId) { return 0; } -static int32_t tqInitDataRsp(SMqDataRsp* pRsp, const SMqPollReq* pReq, int8_t subType) { - pRsp->reqOffset = pReq->reqOffset; - - pRsp->blockData = taosArrayInit(0, sizeof(void*)); - pRsp->blockDataLen = taosArrayInit(0, sizeof(int32_t)); - - if (pRsp->blockData == NULL || pRsp->blockDataLen == NULL) { - return -1; - } - - pRsp->withTbName = 0; - pRsp->withSchema = false; - return 0; -} - -static int32_t tqInitTaosxRsp(STaosxRsp* pRsp, const SMqPollReq* pReq) { - pRsp->reqOffset = pReq->reqOffset; - - pRsp->withTbName = 1; - pRsp->withSchema = 1; - pRsp->blockData = taosArrayInit(0, sizeof(void*)); - pRsp->blockDataLen = taosArrayInit(0, sizeof(int32_t)); - pRsp->blockTbName = taosArrayInit(0, sizeof(void*)); - pRsp->blockSchema = taosArrayInit(0, sizeof(void*)); - - if (pRsp->blockData == NULL || pRsp->blockDataLen == NULL || pRsp->blockTbName == NULL || pRsp->blockSchema == NULL) { - return -1; - } - - return 0; -} - -static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, - SRpcMsg* pMsg, bool* pBlockReturned) { - uint64_t consumerId = pRequest->consumerId; - STqOffsetVal reqOffset = pRequest->reqOffset; - STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, pRequest->subKey); - int32_t vgId = TD_VID(pTq->pVnode); - - *pBlockReturned = false; - - // In this vnode, data has been polled by consumer for this topic, so let's continue from the last offset value. - if (pOffset != NULL) { - *pOffsetVal = pOffset->val; - - char formatBuf[80]; - tFormatOffset(formatBuf, 80, pOffsetVal); - tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, existed offset found, offset reset to %s and continue. reqId:0x%"PRIx64, - consumerId, pHandle->subKey, vgId, formatBuf, pRequest->reqId); - return 0; - } else { - // no poll occurs in this vnode for this topic, let's seek to the right offset value. - if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEAST) { - if (pRequest->useSnapshot) { - tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey:%s, vgId:%d, (earliest) set offset to be snapshot", - consumerId, pHandle->subKey, vgId); - - if (pHandle->fetchMeta) { - tqOffsetResetToMeta(pOffsetVal, 0); - } else { - tqOffsetResetToData(pOffsetVal, 0, 0); - } - } else { - pHandle->pRef = walRefFirstVer(pTq->pVnode->pWal, pHandle->pRef); - if (pHandle->pRef == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - // offset set to previous version when init - tqOffsetResetToLog(pOffsetVal, pHandle->pRef->refVer - 1); - } - } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) { - if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { - SMqDataRsp dataRsp = {0}; - tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType); - - tqOffsetResetToLog(&dataRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); - tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, (latest) offset reset to %" PRId64, consumerId, - pHandle->subKey, vgId, dataRsp.rspOffset.version); - int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); - tDeleteSMqDataRsp(&dataRsp); - - *pBlockReturned = true; - return code; - } else { - STaosxRsp taosxRsp = {0}; - tqInitTaosxRsp(&taosxRsp, pRequest); - tqOffsetResetToLog(&taosxRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); - int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); - tDeleteSTaosxRsp(&taosxRsp); - - *pBlockReturned = true; - return code; - } - } else if (reqOffset.type == TMQ_OFFSET__RESET_NONE) { - tqError("tmq poll: subkey:%s, no offset committed for consumer:0x%" PRIx64 " in vg %d, subkey %s, reset none failed", - pHandle->subKey, consumerId, vgId, pRequest->subKey); - terrno = TSDB_CODE_TQ_NO_COMMITTED_OFFSET; - return -1; - } - } - - return 0; -} - -static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, - SRpcMsg* pMsg, STqOffsetVal* pOffset) { - uint64_t consumerId = pRequest->consumerId; - int32_t vgId = TD_VID(pTq->pVnode); - - SMqDataRsp dataRsp = {0}; - tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType); - - // lock - taosWLockLatch(&pTq->lock); - - qSetTaskId(pHandle->execHandle.task, consumerId, pRequest->reqId); - int code = tqScanData(pTq, pHandle, &dataRsp, pOffset); - if(code != 0) { - goto end; - } - - // till now, all data has been transferred to consumer, new data needs to push client once arrived. - if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && - dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) { - code = tqRegisterPushHandle(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); - taosWUnLockLatch(&pTq->lock); - return code; - } - - - code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&dataRsp, TMQ_MSG_TYPE__POLL_RSP); - - // NOTE: this pHandle->consumerId may have been changed already. - -end: - { - char buf[80] = {0}; - tFormatOffset(buf, 80, &dataRsp.rspOffset); - tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, rsp offset type:%s, reqId:0x%" PRIx64 " code:%d", - consumerId, pHandle->subKey, vgId, dataRsp.blockNum, buf, pRequest->reqId, code); - taosWUnLockLatch(&pTq->lock); - tDeleteSMqDataRsp(&dataRsp); - } - return code; -} - - -static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg, STqOffsetVal *offset) { - int code = 0; - int32_t vgId = TD_VID(pTq->pVnode); - SWalCkHead* pCkHead = NULL; - SMqMetaRsp metaRsp = {0}; - STaosxRsp taosxRsp = {0}; - tqInitTaosxRsp(&taosxRsp, pRequest); - - if (offset->type != TMQ_OFFSET__LOG) { - if (tqScanTaosx(pTq, pHandle, &taosxRsp, &metaRsp, offset) < 0) { - return -1; - } - - if (metaRsp.metaRspLen > 0) { - code = tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp); - tqDebug("tmq poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send meta offset type:%d,uid:%" PRId64 ",ts:%" PRId64, - pRequest->consumerId, pHandle->subKey, vgId, metaRsp.rspOffset.type, metaRsp.rspOffset.uid, metaRsp.rspOffset.ts); - taosMemoryFree(metaRsp.metaRsp); - tDeleteSTaosxRsp(&taosxRsp); - return code; - } - - tqDebug("taosx poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send data blockNum:%d, offset type:%d,uid:%" PRId64 - ",ts:%" PRId64,pRequest->consumerId, pHandle->subKey, vgId, taosxRsp.blockNum, taosxRsp.rspOffset.type, taosxRsp.rspOffset.uid,taosxRsp.rspOffset.ts); - if (taosxRsp.blockNum > 0) { - code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); - tDeleteSTaosxRsp(&taosxRsp); - return code; - }else { - *offset = taosxRsp.rspOffset; - } - } - - - if (offset->type == TMQ_OFFSET__LOG) { - int64_t fetchVer = offset->version + 1; - pCkHead = taosMemoryMalloc(sizeof(SWalCkHead) + 2048); - if (pCkHead == NULL) { - tDeleteSTaosxRsp(&taosxRsp); - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - walSetReaderCapacity(pHandle->pWalReader, 2048); - int totalRows = 0; - while (1) { - int32_t savedEpoch = atomic_load_32(&pHandle->epoch); - if (savedEpoch > pRequest->epoch) { - tqWarn("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey:%s vgId:%d offset %" PRId64 - ", found new consumer epoch %d, discard req epoch %d", pRequest->consumerId, pRequest->epoch, pHandle->subKey, vgId, fetchVer, savedEpoch, pRequest->epoch); - break; - } - - if (tqFetchLog(pTq, pHandle, &fetchVer, &pCkHead, pRequest->reqId) < 0) { - tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); - code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); - tDeleteSTaosxRsp(&taosxRsp); - taosMemoryFreeClear(pCkHead); - return code; - } - - SWalCont* pHead = &pCkHead->head; - tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d) iter log, vgId:%d offset %" PRId64 " msgType %d", pRequest->consumerId, - pRequest->epoch, vgId, fetchVer, pHead->msgType); - - // process meta - if (pHead->msgType != TDMT_VND_SUBMIT) { - if(totalRows > 0) { - tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer - 1); - code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); - tDeleteSTaosxRsp(&taosxRsp); - taosMemoryFreeClear(pCkHead); - return code; - } - - tqDebug("fetch meta msg, ver:%" PRId64 ", type:%s", pHead->version, TMSG_INFO(pHead->msgType)); - tqOffsetResetToLog(&metaRsp.rspOffset, fetchVer); - metaRsp.resMsgType = pHead->msgType; - metaRsp.metaRspLen = pHead->bodyLen; - metaRsp.metaRsp = pHead->body; - if (tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp) < 0) { - code = -1; - taosMemoryFreeClear(pCkHead); - tDeleteSTaosxRsp(&taosxRsp); - return code; - } - code = 0; - taosMemoryFreeClear(pCkHead); - tDeleteSTaosxRsp(&taosxRsp); - return code; - } - - // process data - SPackedData submit = { - .msgStr = POINTER_SHIFT(pHead->body, sizeof(SSubmitReq2Msg)), - .msgLen = pHead->bodyLen - sizeof(SSubmitReq2Msg), - .ver = pHead->version, - }; - - if (tqTaosxScanLog(pTq, pHandle, submit, &taosxRsp, &totalRows) < 0) { - tqError("tmq poll: tqTaosxScanLog error %" PRId64 ", in vgId:%d, subkey %s", pRequest->consumerId, vgId, - pRequest->subKey); - taosMemoryFreeClear(pCkHead); - tDeleteSTaosxRsp(&taosxRsp); - return -1; - } - - if (totalRows >= 4096 || taosxRsp.createTableNum > 0) { - tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); - code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); - tDeleteSTaosxRsp(&taosxRsp); - taosMemoryFreeClear(pCkHead); - return code; - } else { - fetchVer++; - } - } - } - - tDeleteSTaosxRsp(&taosxRsp); - taosMemoryFreeClear(pCkHead); - return 0; -} - -static int32_t doPollDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg) { - int32_t code = -1; - STqOffsetVal offset = {0}; - STqOffsetVal reqOffset = pRequest->reqOffset; - - // 1. reset the offset if needed - if (IS_OFFSET_RESET_TYPE(reqOffset.type)) { - // handle the reset offset cases, according to the consumer's choice. - bool blockReturned = false; - code = extractResetOffsetVal(&offset, pTq, pHandle, pRequest, pMsg, &blockReturned); - if (code != 0) { - return code; - } - - // empty block returned, quit - if (blockReturned) { - return 0; - } - } else { // use the consumer specified offset - // the offset value can not be monotonious increase?? - offset = reqOffset; - } - - // this is a normal subscribe requirement - if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { - return extractDataAndRspForNormalSubscribe(pTq, pHandle, pRequest, pMsg, &offset); - } - - // todo handle the case where re-balance occurs. - // for taosx - return extractDataAndRspForDbStbSubscribe(pTq, pHandle, pRequest, pMsg, &offset); -} - int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { SMqPollReq req = {0}; if (tDeserializeSMqPollReq(pMsg->pCont, pMsg->contLen, &req) < 0) { @@ -681,7 +337,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg) { tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey %s, recv poll req vgId:%d, req:%s, reqId:0x%" PRIx64, consumerId, req.epoch, pHandle->subKey, vgId, buf, req.reqId); - return doPollDataForMq(pTq, pHandle, &req, pMsg); + return tqExtractDataForMq(pTq, pHandle, &req, pMsg); } int32_t tqProcessDeleteSubReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) { diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index 5f4e0ced11..8b86cb6716 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -15,6 +15,10 @@ #include "tq.h" +#define IS_OFFSET_RESET_TYPE(_t) ((_t) < 0) + +static int32_t tqSendMetaPollRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqMetaRsp* pRsp); + // stream_task:stream_id:task_id void createStreamTaskOffsetKey(char* dst, uint64_t streamId, uint32_t taskId) { int32_t n = 12; @@ -142,3 +146,345 @@ void doSaveTaskOffset(STqOffsetStore* pOffsetStore, const char* pKey, int64_t ve tqOffsetWrite(pOffsetStore, &offset); } +static int32_t tqInitDataRsp(SMqDataRsp* pRsp, const SMqPollReq* pReq, int8_t subType) { + pRsp->reqOffset = pReq->reqOffset; + + pRsp->blockData = taosArrayInit(0, sizeof(void*)); + pRsp->blockDataLen = taosArrayInit(0, sizeof(int32_t)); + + if (pRsp->blockData == NULL || pRsp->blockDataLen == NULL) { + return -1; + } + + pRsp->withTbName = 0; + pRsp->withSchema = false; + return 0; +} + +static int32_t tqInitTaosxRsp(STaosxRsp* pRsp, const SMqPollReq* pReq) { + pRsp->reqOffset = pReq->reqOffset; + + pRsp->withTbName = 1; + pRsp->withSchema = 1; + pRsp->blockData = taosArrayInit(0, sizeof(void*)); + pRsp->blockDataLen = taosArrayInit(0, sizeof(int32_t)); + pRsp->blockTbName = taosArrayInit(0, sizeof(void*)); + pRsp->blockSchema = taosArrayInit(0, sizeof(void*)); + + if (pRsp->blockData == NULL || pRsp->blockDataLen == NULL || pRsp->blockTbName == NULL || pRsp->blockSchema == NULL) { + return -1; + } + + return 0; +} + +static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, + SRpcMsg* pMsg, bool* pBlockReturned) { + uint64_t consumerId = pRequest->consumerId; + STqOffsetVal reqOffset = pRequest->reqOffset; + STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, pRequest->subKey); + int32_t vgId = TD_VID(pTq->pVnode); + + *pBlockReturned = false; + + // In this vnode, data has been polled by consumer for this topic, so let's continue from the last offset value. + if (pOffset != NULL) { + *pOffsetVal = pOffset->val; + + char formatBuf[80]; + tFormatOffset(formatBuf, 80, pOffsetVal); + tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, existed offset found, offset reset to %s and continue. reqId:0x%"PRIx64, + consumerId, pHandle->subKey, vgId, formatBuf, pRequest->reqId); + return 0; + } else { + // no poll occurs in this vnode for this topic, let's seek to the right offset value. + if (reqOffset.type == TMQ_OFFSET__RESET_EARLIEAST) { + if (pRequest->useSnapshot) { + tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey:%s, vgId:%d, (earliest) set offset to be snapshot", + consumerId, pHandle->subKey, vgId); + + if (pHandle->fetchMeta) { + tqOffsetResetToMeta(pOffsetVal, 0); + } else { + tqOffsetResetToData(pOffsetVal, 0, 0); + } + } else { + pHandle->pRef = walRefFirstVer(pTq->pVnode->pWal, pHandle->pRef); + if (pHandle->pRef == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + // offset set to previous version when init + tqOffsetResetToLog(pOffsetVal, pHandle->pRef->refVer - 1); + } + } else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) { + if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { + SMqDataRsp dataRsp = {0}; + tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType); + + tqOffsetResetToLog(&dataRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); + tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, (latest) offset reset to %" PRId64, consumerId, + pHandle->subKey, vgId, dataRsp.rspOffset.version); + int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); + tDeleteSMqDataRsp(&dataRsp); + + *pBlockReturned = true; + return code; + } else { + STaosxRsp taosxRsp = {0}; + tqInitTaosxRsp(&taosxRsp, pRequest); + tqOffsetResetToLog(&taosxRsp.rspOffset, walGetLastVer(pTq->pVnode->pWal)); + int32_t code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); + tDeleteSTaosxRsp(&taosxRsp); + + *pBlockReturned = true; + return code; + } + } else if (reqOffset.type == TMQ_OFFSET__RESET_NONE) { + tqError("tmq poll: subkey:%s, no offset committed for consumer:0x%" PRIx64 " in vg %d, subkey %s, reset none failed", + pHandle->subKey, consumerId, vgId, pRequest->subKey); + terrno = TSDB_CODE_TQ_NO_COMMITTED_OFFSET; + return -1; + } + } + + return 0; +} + +static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, + SRpcMsg* pMsg, STqOffsetVal* pOffset) { + uint64_t consumerId = pRequest->consumerId; + int32_t vgId = TD_VID(pTq->pVnode); + + SMqDataRsp dataRsp = {0}; + tqInitDataRsp(&dataRsp, pRequest, pHandle->execHandle.subType); + + // lock + taosWLockLatch(&pTq->lock); + + qSetTaskId(pHandle->execHandle.task, consumerId, pRequest->reqId); + int code = tqScanData(pTq, pHandle, &dataRsp, pOffset); + if(code != 0) { + goto end; + } + + // till now, all data has been transferred to consumer, new data needs to push client once arrived. + if (dataRsp.blockNum == 0 && dataRsp.reqOffset.type == TMQ_OFFSET__LOG && + dataRsp.reqOffset.version == dataRsp.rspOffset.version && pHandle->consumerId == pRequest->consumerId) { + code = tqRegisterPushHandle(pTq, pHandle, pRequest, pMsg, &dataRsp, TMQ_MSG_TYPE__POLL_RSP); + taosWUnLockLatch(&pTq->lock); + return code; + } + + + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&dataRsp, TMQ_MSG_TYPE__POLL_RSP); + + // NOTE: this pHandle->consumerId may have been changed already. + + end: + { + char buf[80] = {0}; + tFormatOffset(buf, 80, &dataRsp.rspOffset); + tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, rsp block:%d, rsp offset type:%s, reqId:0x%" PRIx64 " code:%d", + consumerId, pHandle->subKey, vgId, dataRsp.blockNum, buf, pRequest->reqId, code); + taosWUnLockLatch(&pTq->lock); + tDeleteSMqDataRsp(&dataRsp); + } + return code; +} + + +static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg, STqOffsetVal *offset) { + int code = 0; + int32_t vgId = TD_VID(pTq->pVnode); + SWalCkHead* pCkHead = NULL; + SMqMetaRsp metaRsp = {0}; + STaosxRsp taosxRsp = {0}; + tqInitTaosxRsp(&taosxRsp, pRequest); + + if (offset->type != TMQ_OFFSET__LOG) { + if (tqScanTaosx(pTq, pHandle, &taosxRsp, &metaRsp, offset) < 0) { + return -1; + } + + if (metaRsp.metaRspLen > 0) { + code = tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp); + tqDebug("tmq poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send meta offset type:%d,uid:%" PRId64 ",ts:%" PRId64, + pRequest->consumerId, pHandle->subKey, vgId, metaRsp.rspOffset.type, metaRsp.rspOffset.uid, metaRsp.rspOffset.ts); + taosMemoryFree(metaRsp.metaRsp); + tDeleteSTaosxRsp(&taosxRsp); + return code; + } + + tqDebug("taosx poll: consumer:0x%" PRIx64 " subkey:%s vgId:%d, send data blockNum:%d, offset type:%d,uid:%" PRId64 + ",ts:%" PRId64,pRequest->consumerId, pHandle->subKey, vgId, taosxRsp.blockNum, taosxRsp.rspOffset.type, taosxRsp.rspOffset.uid,taosxRsp.rspOffset.ts); + if (taosxRsp.blockNum > 0) { + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); + tDeleteSTaosxRsp(&taosxRsp); + return code; + }else { + *offset = taosxRsp.rspOffset; + } + } + + + if (offset->type == TMQ_OFFSET__LOG) { + int64_t fetchVer = offset->version + 1; + pCkHead = taosMemoryMalloc(sizeof(SWalCkHead) + 2048); + if (pCkHead == NULL) { + tDeleteSTaosxRsp(&taosxRsp); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + walSetReaderCapacity(pHandle->pWalReader, 2048); + int totalRows = 0; + while (1) { + int32_t savedEpoch = atomic_load_32(&pHandle->epoch); + if (savedEpoch > pRequest->epoch) { + tqWarn("tmq poll: consumer:0x%" PRIx64 " (epoch %d), subkey:%s vgId:%d offset %" PRId64 + ", found new consumer epoch %d, discard req epoch %d", pRequest->consumerId, pRequest->epoch, pHandle->subKey, vgId, fetchVer, savedEpoch, pRequest->epoch); + break; + } + + if (tqFetchLog(pTq, pHandle, &fetchVer, &pCkHead, pRequest->reqId) < 0) { + tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); + tDeleteSTaosxRsp(&taosxRsp); + taosMemoryFreeClear(pCkHead); + return code; + } + + SWalCont* pHead = &pCkHead->head; + tqDebug("tmq poll: consumer:0x%" PRIx64 " (epoch %d) iter log, vgId:%d offset %" PRId64 " msgType %d", pRequest->consumerId, + pRequest->epoch, vgId, fetchVer, pHead->msgType); + + // process meta + if (pHead->msgType != TDMT_VND_SUBMIT) { + if(totalRows > 0) { + tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer - 1); + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); + tDeleteSTaosxRsp(&taosxRsp); + taosMemoryFreeClear(pCkHead); + return code; + } + + tqDebug("fetch meta msg, ver:%" PRId64 ", type:%s", pHead->version, TMSG_INFO(pHead->msgType)); + tqOffsetResetToLog(&metaRsp.rspOffset, fetchVer); + metaRsp.resMsgType = pHead->msgType; + metaRsp.metaRspLen = pHead->bodyLen; + metaRsp.metaRsp = pHead->body; + if (tqSendMetaPollRsp(pTq, pMsg, pRequest, &metaRsp) < 0) { + code = -1; + taosMemoryFreeClear(pCkHead); + tDeleteSTaosxRsp(&taosxRsp); + return code; + } + code = 0; + taosMemoryFreeClear(pCkHead); + tDeleteSTaosxRsp(&taosxRsp); + return code; + } + + // process data + SPackedData submit = { + .msgStr = POINTER_SHIFT(pHead->body, sizeof(SSubmitReq2Msg)), + .msgLen = pHead->bodyLen - sizeof(SSubmitReq2Msg), + .ver = pHead->version, + }; + + if (tqTaosxScanLog(pTq, pHandle, submit, &taosxRsp, &totalRows) < 0) { + tqError("tmq poll: tqTaosxScanLog error %" PRId64 ", in vgId:%d, subkey %s", pRequest->consumerId, vgId, + pRequest->subKey); + taosMemoryFreeClear(pCkHead); + tDeleteSTaosxRsp(&taosxRsp); + return -1; + } + + if (totalRows >= 4096 || taosxRsp.createTableNum > 0) { + tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer); + code = tqSendDataRsp(pTq, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP); + tDeleteSTaosxRsp(&taosxRsp); + taosMemoryFreeClear(pCkHead); + return code; + } else { + fetchVer++; + } + } + } + + tDeleteSTaosxRsp(&taosxRsp); + taosMemoryFreeClear(pCkHead); + return 0; +} + +int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg) { + int32_t code = -1; + STqOffsetVal offset = {0}; + STqOffsetVal reqOffset = pRequest->reqOffset; + + // 1. reset the offset if needed + if (IS_OFFSET_RESET_TYPE(reqOffset.type)) { + // handle the reset offset cases, according to the consumer's choice. + bool blockReturned = false; + code = extractResetOffsetVal(&offset, pTq, pHandle, pRequest, pMsg, &blockReturned); + if (code != 0) { + return code; + } + + // empty block returned, quit + if (blockReturned) { + return 0; + } + } else { // use the consumer specified offset + // the offset value can not be monotonious increase?? + offset = reqOffset; + } + + // this is a normal subscribe requirement + if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { + return extractDataAndRspForNormalSubscribe(pTq, pHandle, pRequest, pMsg, &offset); + } + + // todo handle the case where re-balance occurs. + // for taosx + return extractDataAndRspForDbStbSubscribe(pTq, pHandle, pRequest, pMsg, &offset); +} + +int32_t tqSendMetaPollRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, const SMqMetaRsp* pRsp) { + int32_t len = 0; + int32_t code = 0; + tEncodeSize(tEncodeSMqMetaRsp, pRsp, len, code); + if (code < 0) { + return -1; + } + int32_t tlen = sizeof(SMqRspHead) + len; + void* buf = rpcMallocCont(tlen); + if (buf == NULL) { + return -1; + } + + ((SMqRspHead*)buf)->mqMsgType = TMQ_MSG_TYPE__POLL_META_RSP; + ((SMqRspHead*)buf)->epoch = pReq->epoch; + ((SMqRspHead*)buf)->consumerId = pReq->consumerId; + + void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead)); + + SEncoder encoder = {0}; + tEncoderInit(&encoder, abuf, len); + tEncodeSMqMetaRsp(&encoder, pRsp); + tEncoderClear(&encoder); + + SRpcMsg resp = { + .info = pMsg->info, + .pCont = buf, + .contLen = tlen, + .code = 0, + }; + tmsgSendRsp(&resp); + + tqDebug("vgId:%d, from consumer:0x%" PRIx64 " (epoch %d) send rsp, res msg type %d, offset type:%d", + TD_VID(pTq->pVnode), pReq->consumerId, pReq->epoch, pRsp->resMsgType, pRsp->rspOffset.type); + + return 0; +} From 5ab54481f0d8b974f60d549a8bce9124a3c8223b Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 11 Apr 2023 19:24:34 +0800 Subject: [PATCH 09/25] refactor: do some internal refactor. --- include/libs/stream/tstream.h | 96 ++++++++++---------------- source/dnode/snode/src/snode.c | 2 +- source/dnode/vnode/src/tq/tq.c | 22 +++--- source/dnode/vnode/src/tq/tqRestore.c | 6 +- source/dnode/vnode/src/tq/tqUtil.c | 8 +-- source/libs/stream/src/stream.c | 9 +-- source/libs/stream/src/streamExec.c | 16 ++--- source/libs/stream/src/streamMeta.c | 26 ++----- source/libs/stream/src/streamRecover.c | 10 +-- source/libs/stream/src/streamTask.c | 10 +-- 10 files changed, 86 insertions(+), 119 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 2368788824..9f7d366a46 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -13,8 +13,8 @@ * along with this program. If not, see . */ -#include "executor.h" #include "os.h" +#include "executor.h" #include "query.h" #include "streamState.h" #include "tdatablock.h" @@ -104,21 +104,8 @@ typedef struct { int8_t type; } SStreamQueueItem; -#if 0 -typedef struct { - int8_t type; - int64_t ver; - int32_t* dataRef; - SSubmitReq* data; -} SStreamDataSubmit; - -typedef struct { - int8_t type; - int64_t ver; - SArray* dataRefs; // SArray - SArray* reqs; // SArray -} SStreamMergedSubmit; -#endif +typedef void FTbSink(SStreamTask* pTask, void* vnode, int64_t ver, void* data); +typedef int32_t FTaskExpand(void* ahandle, SStreamTask* pTask, int64_t ver); typedef struct { int8_t type; @@ -220,7 +207,6 @@ static FORCE_INLINE void streamQueueProcessFail(SStreamQueue* queue) { } static FORCE_INLINE void* streamQueueCurItem(SStreamQueue* queue) { - // return queue->qItem; } @@ -249,16 +235,13 @@ typedef struct { SUseDbRsp dbInfo; } STaskDispatcherShuffle; -typedef void FTbSink(SStreamTask* pTask, void* vnode, int64_t ver, void* data); - typedef struct { int64_t stbUid; char stbFullName[TSDB_TABLE_FNAME_LEN]; SSchemaWrapper* pSchemaWrapper; - // not applicable to encoder and decoder - void* vnode; - FTbSink* tbSinkFunc; - STSchema* pTSchema; + void* vnode; // not available to encoder and decoder + FTbSink* tbSinkFunc; + STSchema* pTSchema; } STaskSinkTb; typedef void FSmaSink(void* vnode, int64_t smaId, const SArray* data); @@ -292,14 +275,18 @@ typedef struct SCheckpointInfo { int64_t version; // offset in WAL } SCheckpointInfo; +typedef struct SStreamStatus { + int8_t taskStatus; + int8_t schedStatus; +} SStreamStatus; + struct SStreamTask { SStreamId id; int32_t totalLevel; int8_t taskLevel; int8_t outputType; int16_t dispatchMsgType; - int8_t taskStatus; - int8_t schedStatus; + SStreamStatus status; int32_t selfChildId; int32_t nodeId; SEpSet epSet; @@ -329,15 +316,11 @@ struct SStreamTask { SStreamQueue* outputQueue; // trigger - int8_t triggerStatus; - int64_t triggerParam; - void* timer; - - // msg handle - SMsgCb* pMsgCb; - - // state backend - SStreamState* pState; + int8_t triggerStatus; + int64_t triggerParam; + void* timer; + SMsgCb* pMsgCb; // msg handle + SStreamState* pState; // state backend // the followings attributes don't be serialized int32_t recoverTryingDownstream; @@ -350,6 +333,21 @@ struct SStreamTask { struct SStreamMeta* pMeta; }; +// meta +typedef struct SStreamMeta { + char* path; + TDB* db; + TTB* pTaskDb; + TTB* pCheckpointDb; + SHashObj* pTasks; + SHashObj* pRestoreTasks; + void* ahandle; + TXN* txn; + FTaskExpand* expandFunc; + int32_t vgId; + SRWLatch lock; +} SStreamMeta; + int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamChildEpInfo* pInfo); @@ -566,42 +564,22 @@ int32_t streamAggRecoverPrepare(SStreamTask* pTask); // int32_t streamAggChildrenRecoverFinish(SStreamTask* pTask); int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t childId); -// expand and deploy -typedef int32_t FTaskExpand(void* ahandle, SStreamTask* pTask, int64_t ver); - -// meta -typedef struct SStreamMeta { - char* path; - TDB* db; - TTB* pTaskDb; - TTB* pCheckpointDb; - SHashObj* pTasks; - SHashObj* pRestoreTasks; - void* ahandle; - TXN* txn; - FTaskExpand* expandFunc; - int32_t vgId; - SRWLatch lock; -} SStreamMeta; - SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId); void streamMetaClose(SStreamMeta* streamMeta); int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask); int32_t streamMetaAddTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask); int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t checkpointVer, char* msg, int32_t msgLen); -// SStreamTask* streamMetaGetTask(SStreamMeta* pMeta, int32_t taskId); +SStreamTask* streamMetaAcquireTaskEx(SStreamMeta* pMeta, int32_t taskId); SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId); void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask); void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId); -SStreamTask* streamMetaAcquireTaskEx(SStreamMeta* pMeta, int32_t taskId); - -int32_t streamMetaBegin(SStreamMeta* pMeta); -int32_t streamMetaCommit(SStreamMeta* pMeta); -int32_t streamMetaRollBack(SStreamMeta* pMeta); -int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver); +int32_t streamMetaBegin(SStreamMeta* pMeta); +int32_t streamMetaCommit(SStreamMeta* pMeta); +int32_t streamMetaRollBack(SStreamMeta* pMeta); +int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver); // checkpoint int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index f8e4268aad..7ccbb3b586 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -65,7 +65,7 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) { ASSERT(taosArrayGetSize(pTask->childEpInfo) != 0); pTask->refCnt = 1; - pTask->schedStatus = TASK_SCHED_STATUS__INACTIVE; + pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; pTask->inputQueue = streamQueueOpen(); pTask->outputQueue = streamQueueOpen(); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 48c9a4f445..b029afc935 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -551,7 +551,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { int32_t vgId = TD_VID(pTq->pVnode); pTask->id.idStr = taosStrdup(buf); pTask->refCnt = 1; - pTask->schedStatus = TASK_SCHED_STATUS__INACTIVE; + pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; pTask->inputQueue = streamQueueOpen(); pTask->outputQueue = streamQueueOpen(); @@ -566,9 +566,9 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { // expand executor if (pTask->fillHistory) { - pTask->taskStatus = TASK_STATUS__WAIT_DOWNSTREAM; + pTask->status.taskStatus = TASK_STATUS__WAIT_DOWNSTREAM; } else { - pTask->taskStatus = TASK_STATUS__RESTORE; + pTask->status.taskStatus = TASK_STATUS__RESTORE; } if (pTask->taskLevel == TASK_LEVEL__SOURCE) { @@ -661,7 +661,7 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { }; SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); - if (pTask && atomic_load_8(&pTask->taskStatus) == TASK_STATUS__NORMAL) { + if (pTask && atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__NORMAL) { rsp.status = 1; } else { rsp.status = 0; @@ -788,7 +788,7 @@ int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg) { // do recovery step 1 streamSourceRecoverScanStep1(pTask); - if (atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING) { + if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) { streamMetaReleaseTask(pTq->pStreamMeta, pTask); return 0; } @@ -803,7 +803,7 @@ int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg) { streamMetaReleaseTask(pTq->pStreamMeta, pTask); - if (atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING) { + if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) { return 0; } @@ -845,7 +845,7 @@ int32_t tqProcessTaskRecover2Req(STQ* pTq, int64_t sversion, char* msg, int32_t return -1; } - if (atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING) { + if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) { streamMetaReleaseTask(pTq->pStreamMeta, pTask); return 0; } @@ -1061,9 +1061,9 @@ int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) { continue; } - if (pTask->taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { + if (pTask->status.taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { tqDebug("stream task:%d skip push data, not ready for processing, status %d", pTask->id.taskId, - pTask->taskStatus); + pTask->status.taskStatus); continue; } @@ -1137,10 +1137,10 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { } else { SStreamTask* pTask = streamMetaAcquireTaskEx(pTq->pStreamMeta, taskId); if (pTask != NULL) { - if (pTask->taskStatus == TASK_STATUS__NORMAL) { + if (pTask->status.taskStatus == TASK_STATUS__NORMAL) { tqDebug("vgId:%d s-task:%s start to process run req", vgId, pTask->id.idStr); streamProcessRunReq(pTask); - } else if (pTask->taskStatus == TASK_STATUS__RESTORE) { + } else if (pTask->status.taskStatus == TASK_STATUS__RESTORE) { tqDebug("vgId:%d s-task:%s start to process in restore procedure from last chk point:%" PRId64, vgId, pTask->id.idStr, pTask->chkInfo.version); streamProcessRunReq(pTask); diff --git a/source/dnode/vnode/src/tq/tqRestore.c b/source/dnode/vnode/src/tq/tqRestore.c index a123bdb1dc..877c686d35 100644 --- a/source/dnode/vnode/src/tq/tqRestore.c +++ b/source/dnode/vnode/src/tq/tqRestore.c @@ -56,7 +56,7 @@ int32_t transferToNormalTask(SStreamMeta* pStreamMeta, SArray* pTaskList) { taosHashRemove(pStreamMeta->pRestoreTasks, &pTask->id.taskId, sizeof(pTask->id.taskId)); // NOTE: do not change the following order - atomic_store_8(&pTask->taskStatus, TASK_STATUS__NORMAL); + atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL); taosHashPut(pStreamMeta->pTasks, &pTask->id.taskId, sizeof(pTask->id.taskId), &pTask, POINTER_BYTES); } @@ -78,8 +78,8 @@ int32_t restoreStreamTaskImpl(SStreamMeta* pStreamMeta, STqOffsetStore* pOffsetS continue; } - if (pTask->taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { - tqDebug("s-task:%s skip push data, not ready for processing, status %d", pTask->id.idStr, pTask->taskStatus); + if (pTask->status.taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { + tqDebug("s-task:%s skip push data, not ready for processing, status %d", pTask->id.idStr, pTask->status.taskStatus); continue; } diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index 8b86cb6716..2e8c6a53bb 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -89,9 +89,9 @@ void initOffsetForAllRestoreTasks(STQ* pTq) { continue; } - if (pTask->taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { + if (pTask->status.taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { tqDebug("stream task:%d skip push data, not ready for processing, status %d", pTask->id.taskId, - pTask->taskStatus); + pTask->status.taskStatus); continue; } @@ -120,9 +120,9 @@ void saveOffsetForAllTasks(STQ* pTq, int64_t ver) { continue; } - if (pTask->taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { + if (pTask->status.taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { tqDebug("stream task:%d skip push data, not ready for processing, status %d", pTask->id.taskId, - pTask->taskStatus); + pTask->status.taskStatus); continue; } diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index 59ac8a61d6..71d4e5efd8 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -52,7 +52,7 @@ void streamCleanUp() { void streamSchedByTimer(void* param, void* tmrId) { SStreamTask* pTask = (void*)param; - if (atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING) { + if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) { streamMetaReleaseTask(NULL, pTask); return; } @@ -66,8 +66,8 @@ void streamSchedByTimer(void* param, void* tmrId) { taosFreeQitem(trigger); return; } - trigger->pBlock->info.type = STREAM_GET_ALL; + trigger->pBlock->info.type = STREAM_GET_ALL; atomic_store_8(&pTask->triggerStatus, TASK_TRIGGER_STATUS__INACTIVE); if (tAppendDataForStream(pTask, (SStreamQueueItem*)trigger) < 0) { @@ -75,6 +75,7 @@ void streamSchedByTimer(void* param, void* tmrId) { taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->timer); return; } + streamSchedExec(pTask); } @@ -93,13 +94,13 @@ int32_t streamSetupTrigger(SStreamTask* pTask) { int32_t streamSchedExec(SStreamTask* pTask) { int8_t schedStatus = - atomic_val_compare_exchange_8(&pTask->schedStatus, TASK_SCHED_STATUS__INACTIVE, TASK_SCHED_STATUS__WAITING); + atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE, TASK_SCHED_STATUS__WAITING); if (schedStatus == TASK_SCHED_STATUS__INACTIVE) { SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); if (pRunReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; - atomic_store_8(&pTask->schedStatus, TASK_SCHED_STATUS__INACTIVE); + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); return -1; } diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 075e477eb3..db9be593c0 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -22,10 +22,10 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, const void* data, SArray* void* pExecutor = pTask->exec.pExecutor; while (pTask->taskLevel == TASK_LEVEL__SOURCE) { - int8_t status = atomic_load_8(&pTask->taskStatus); + int8_t status = atomic_load_8(&pTask->status.taskStatus); if (status != TASK_STATUS__NORMAL && status != TASK_STATUS__RESTORE) { qError("stream task wait for the end of fill history, s-task:%s, status:%d", pTask->id.idStr, - atomic_load_8(&pTask->taskStatus)); + atomic_load_8(&pTask->status.taskStatus)); taosMsleep(2); } else { break; @@ -66,7 +66,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, const void* data, SArray* // pExecutor while (1) { - if (pTask->taskStatus == TASK_STATUS__DROPPING) { + if (pTask->status.taskStatus == TASK_STATUS__DROPPING) { return 0; } @@ -134,7 +134,7 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) { int32_t batchCnt = 0; while (1) { - if (atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING) { + if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) { taosArrayDestroy(pRes); return 0; } @@ -267,7 +267,7 @@ int32_t streamExecForAll(SStreamTask* pTask) { } } - if (pTask->taskStatus == TASK_STATUS__DROPPING) { + if (pTask->status.taskStatus == TASK_STATUS__DROPPING) { if (pInput) { streamFreeQitem(pInput); } @@ -343,17 +343,17 @@ int32_t streamExecForAll(SStreamTask* pTask) { int32_t streamTryExec(SStreamTask* pTask) { // this function may be executed by multi-threads, so status check is required. int8_t schedStatus = - atomic_val_compare_exchange_8(&pTask->schedStatus, TASK_SCHED_STATUS__WAITING, TASK_SCHED_STATUS__ACTIVE); + atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__WAITING, TASK_SCHED_STATUS__ACTIVE); if (schedStatus == TASK_SCHED_STATUS__WAITING) { int32_t code = streamExecForAll(pTask); if (code < 0) { - atomic_store_8(&pTask->schedStatus, TASK_SCHED_STATUS__FAILED); + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__FAILED); return -1; } // todo the task should be commit here - atomic_store_8(&pTask->schedStatus, TASK_SCHED_STATUS__INACTIVE); + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); if (!taosQueueEmpty(pTask->inputQueue->queue)) { streamSchedExec(pTask); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index ae65753bed..2e9bb4d762 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -179,23 +179,11 @@ int32_t streamMetaAddTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask) { } #endif -#if 0 -SStreamTask* streamMetaGetTask(SStreamMeta* pMeta, int32_t taskId) { - SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); - if (ppTask) { - ASSERT((*ppTask)->taskId == taskId); - return *ppTask; - } else { - return NULL; - } -} -#endif - SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId) { taosRLockLatch(&pMeta->lock); SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); - if (ppTask != NULL && (atomic_load_8(&((*ppTask)->taskStatus)) != TASK_STATUS__DROPPING)) { + if (ppTask != NULL && (atomic_load_8(&((*ppTask)->status.taskStatus)) != TASK_STATUS__DROPPING)) { atomic_add_fetch_32(&(*ppTask)->refCnt, 1); taosRUnLockLatch(&pMeta->lock); return *ppTask; @@ -209,7 +197,7 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) { int32_t left = atomic_sub_fetch_32(&pTask->refCnt, 1); ASSERT(left >= 0); if (left == 0) { - ASSERT(atomic_load_8(&pTask->taskStatus) == TASK_STATUS__DROPPING); + ASSERT(atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING); tFreeStreamTask(pTask); } } @@ -223,7 +211,7 @@ SStreamTask* streamMetaAcquireTaskEx(SStreamMeta* pMeta, int32_t taskId) { SStreamTask** p = (SStreamTask**)taosHashGet(pMeta->pRestoreTasks, &taskId, sizeof(int32_t)); if (p != NULL) { pTask = *p; - if (pTask != NULL && (atomic_load_8(&(pTask->taskStatus)) != TASK_STATUS__DROPPING)) { + if (pTask != NULL && (atomic_load_8(&(pTask->status.taskStatus)) != TASK_STATUS__DROPPING)) { atomic_add_fetch_32(&pTask->refCnt, 1); taosRUnLockLatch(&pMeta->lock); return pTask; @@ -233,7 +221,7 @@ SStreamTask* streamMetaAcquireTaskEx(SStreamMeta* pMeta, int32_t taskId) { SStreamTask** p = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); if (p != NULL) { pTask = *p; - if (pTask != NULL && atomic_load_8(&(pTask->taskStatus)) != TASK_STATUS__DROPPING) { + if (pTask != NULL && atomic_load_8(&(pTask->status.taskStatus)) != TASK_STATUS__DROPPING) { atomic_add_fetch_32(&pTask->refCnt, 1); taosRUnLockLatch(&pMeta->lock); return pTask; @@ -255,7 +243,7 @@ void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { * taosTmrStop(pTask->timer);*/ /*pTask->timer = NULL;*/ /*}*/ - atomic_store_8(&pTask->taskStatus, TASK_STATUS__DROPPING); + atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING); taosWLockLatch(&pMeta->lock); streamMetaReleaseTask(pMeta, pTask); @@ -338,9 +326,9 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { return -1; } - /*pTask->taskStatus = TASK_STATUS__NORMAL;*/ + /*pTask->status.taskStatus = TASK_STATUS__NORMAL;*/ if (pTask->fillHistory) { - pTask->taskStatus = TASK_STATUS__WAIT_DOWNSTREAM; + pTask->status.taskStatus = TASK_STATUS__WAIT_DOWNSTREAM; streamTaskCheckDownstream(pTask, ver); } } diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index 3e7a02b8d5..9962cdfcc0 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -18,7 +18,7 @@ int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version) { qDebug("s-task:%s at node %d launch recover", pTask->id.idStr, pTask->nodeId); if (pTask->taskLevel == TASK_LEVEL__SOURCE) { - atomic_store_8(&pTask->taskStatus, TASK_STATUS__RECOVER_PREPARE); + atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__RECOVER_PREPARE); streamSetParamForRecover(pTask); streamSourceRecoverPrepareStep1(pTask, version); @@ -44,11 +44,11 @@ int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version) { } } else if (pTask->taskLevel == TASK_LEVEL__AGG) { - atomic_store_8(&pTask->taskStatus, TASK_STATUS__NORMAL); + atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL); streamSetParamForRecover(pTask); streamAggRecoverPrepare(pTask); } else if (pTask->taskLevel == TASK_LEVEL__SINK) { - atomic_store_8(&pTask->taskStatus, TASK_STATUS__NORMAL); + atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL); } return 0; } @@ -122,7 +122,7 @@ int32_t streamRecheckOneDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp } int32_t streamProcessTaskCheckReq(SStreamTask* pTask, const SStreamTaskCheckReq* pReq) { - return atomic_load_8(&pTask->taskStatus) == TASK_STATUS__NORMAL; + return atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__NORMAL; } int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp, int64_t version) { @@ -168,7 +168,7 @@ int32_t streamRestoreParam(SStreamTask* pTask) { return qStreamRestoreParam(exec); } int32_t streamSetStatusNormal(SStreamTask* pTask) { - atomic_store_8(&pTask->taskStatus, TASK_STATUS__NORMAL); + atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL); return 0; } diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index f45b6ad7b7..834c022a9a 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -29,7 +29,7 @@ SStreamTask* tNewStreamTask(int64_t streamId) { sprintf(buf, "0x%"PRIx64"-%d", pTask->id.streamId, pTask->id.taskId); pTask->id.idStr = taosStrdup(buf); - pTask->schedStatus = TASK_SCHED_STATUS__INACTIVE; + pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE; pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL; @@ -63,8 +63,8 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { if (tEncodeI8(pEncoder, pTask->outputType) < 0) return -1; if (tEncodeI16(pEncoder, pTask->dispatchMsgType) < 0) return -1; - if (tEncodeI8(pEncoder, pTask->taskStatus) < 0) return -1; - if (tEncodeI8(pEncoder, pTask->schedStatus) < 0) return -1; + if (tEncodeI8(pEncoder, pTask->status.taskStatus) < 0) return -1; + if (tEncodeI8(pEncoder, pTask->status.schedStatus) < 0) return -1; if (tEncodeI32(pEncoder, pTask->selfChildId) < 0) return -1; if (tEncodeI32(pEncoder, pTask->nodeId) < 0) return -1; @@ -116,8 +116,8 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tDecodeI8(pDecoder, &pTask->outputType) < 0) return -1; if (tDecodeI16(pDecoder, &pTask->dispatchMsgType) < 0) return -1; - if (tDecodeI8(pDecoder, &pTask->taskStatus) < 0) return -1; - if (tDecodeI8(pDecoder, &pTask->schedStatus) < 0) return -1; + if (tDecodeI8(pDecoder, &pTask->status.taskStatus) < 0) return -1; + if (tDecodeI8(pDecoder, &pTask->status.schedStatus) < 0) return -1; if (tDecodeI32(pDecoder, &pTask->selfChildId) < 0) return -1; if (tDecodeI32(pDecoder, &pTask->nodeId) < 0) return -1; From bb22d9ee5e228b92449d2eadf5c7bbc9286cb6af Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 12 Apr 2023 10:56:19 +0800 Subject: [PATCH 10/25] fix:open test cases for tmq & add log if rebalance error --- source/dnode/mnode/impl/src/mndSubscribe.c | 2 +- tests/parallel_test/cases.task | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndSubscribe.c b/source/dnode/mnode/impl/src/mndSubscribe.c index 64a3170d47..2eb5d2a69c 100644 --- a/source/dnode/mnode/impl/src/mndSubscribe.c +++ b/source/dnode/mnode/impl/src/mndSubscribe.c @@ -670,7 +670,7 @@ static int32_t mndProcessRebalanceReq(SRpcMsg *pMsg) { // possibly no vg is changed // when each topic is re-balanced, issue an trans to save the results in sdb. if (mndPersistRebResult(pMnode, pMsg, &rebOutput) < 0) { - mError("mq re-balance persist output error, possibly vnode splitted or dropped"); + mError("mq re-balance persist output error, possibly vnode splitted or dropped,msg:%s", terrstr()); } taosArrayDestroy(rebOutput.newConsumers); diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index d1fbacdadf..c14f316ffc 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -89,12 +89,12 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqUpdateWithConsume.py -N 3 -n 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqUpdate-multiCtb-snapshot0.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqUpdate-multiCtb-snapshot1.py -# ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqDelete-1ctb.py +,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqDelete-1ctb.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqDelete-multiCtb.py -N 3 -n 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqDropStbCtb.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqDropNtb-snapshot0.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqDropNtb-snapshot1.py -#,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqUdf.py +,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqUdf.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqUdf-multCtb-snapshot0.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqUdf-multCtb-snapshot1.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/stbTagFilter-1ctb.py From 7648e03e3131cfd5023c56865c572e657b8f07ac Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 13 Apr 2023 10:03:30 +0800 Subject: [PATCH 11/25] fix:doBitmapMerge error if remaind bytes is not 0 --- source/common/src/tdatablock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 0dd8cb9b0c..986a46036b 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -221,6 +221,7 @@ static void doBitmapMerge(SColumnInfoData* pColumnInfoData, int32_t numOfRow1, c } uint8_t* p = (uint8_t*)pSource->nullbitmap; + pColumnInfoData->nullbitmap[BitmapLen(numOfRow1) - 1] &= (0B11111111 << shiftBits); // clear remind bits pColumnInfoData->nullbitmap[BitmapLen(numOfRow1) - 1] |= (p[0] >> remindBits); // copy remind bits if (BitmapLen(numOfRow1) == BitmapLen(total)) { @@ -232,6 +233,7 @@ static void doBitmapMerge(SColumnInfoData* pColumnInfoData, int32_t numOfRow1, c uint8_t* start = (uint8_t*)&pColumnInfoData->nullbitmap[BitmapLen(numOfRow1)]; int32_t overCount = BitmapLen(total) - BitmapLen(numOfRow1); + memset(start, 0, overCount); while (i < len) { // size limit of pSource->nullbitmap if (i >= 1) { start[i - 1] |= (p[i] >> remindBits); // copy remind bits @@ -309,8 +311,9 @@ int32_t colDataMergeCol(SColumnInfoData* pColumnInfoData, int32_t numOfRow1, int pColumnInfoData->pData = tmp; if (BitmapLen(numOfRow1) < BitmapLen(finalNumOfRows)) { char* btmp = taosMemoryRealloc(pColumnInfoData->nullbitmap, BitmapLen(finalNumOfRows)); - uint32_t extend = BitmapLen(finalNumOfRows) - BitmapLen(numOfRow1); - memset(btmp + BitmapLen(numOfRow1), 0, extend); + if (btmp == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } pColumnInfoData->nullbitmap = btmp; } From 70b45a4cf035e18023002d3bc3157f26eea818d5 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 13 Apr 2023 23:22:09 +0800 Subject: [PATCH 12/25] fix(stream): all data should be extracted from wal. --- include/libs/stream/tstream.h | 18 ++- include/libs/wal/wal.h | 2 +- source/dnode/snode/src/snode.c | 2 +- source/dnode/vnode/inc/vnode.h | 3 +- source/dnode/vnode/src/inc/tq.h | 3 +- source/dnode/vnode/src/inc/vnodeInt.h | 2 +- source/dnode/vnode/src/tq/tq.c | 133 ++++++++++---------- source/dnode/vnode/src/tq/tqPush.c | 12 +- source/dnode/vnode/src/tq/tqRead.c | 27 +++- source/dnode/vnode/src/tq/tqRestore.c | 161 +++++++++++++++--------- source/dnode/vnode/src/tq/tqUtil.c | 5 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 3 - source/dnode/vnode/src/vnd/vnodeSync.c | 2 +- source/libs/stream/src/stream.c | 42 ++++--- source/libs/stream/src/streamData.c | 1 - source/libs/stream/src/streamDispatch.c | 19 +-- source/libs/stream/src/streamExec.c | 26 ++-- source/libs/stream/src/streamMeta.c | 63 ++++++---- source/libs/stream/src/streamTask.c | 7 +- source/util/src/tworker.c | 3 +- tests/script/tsim/stream/basic1.sim | 4 +- 21 files changed, 326 insertions(+), 212 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 9f7d366a46..eea8868b8c 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -31,6 +31,7 @@ extern "C" { #ifndef _STREAM_H_ #define _STREAM_H_ +typedef void (*_free_reader_fn_t)(void*); typedef struct SStreamTask SStreamTask; enum { @@ -218,9 +219,10 @@ void streamDataSubmitDestroy(SStreamDataSubmit2* pDataSubmit); SStreamDataSubmit2* streamSubmitBlockClone(SStreamDataSubmit2* pSubmit); typedef struct { - char* qmsg; - void* pExecutor; // not applicable to encoder and decoder - struct STqReader* pTqReader; // not applicable to encoder and decoder + char* qmsg; + void* pExecutor; // not applicable to encoder and decoder + struct STqReader* pTqReader; // not applicable to encoder and decoder + struct SWalReader* pWalReader; // not applicable to encoder and decoder } STaskExec; typedef struct { @@ -331,6 +333,7 @@ struct SStreamTask { int64_t checkpointingId; int32_t checkpointAlignCnt; struct SStreamMeta* pMeta; + _free_reader_fn_t freeFp; }; // meta @@ -340,12 +343,14 @@ typedef struct SStreamMeta { TTB* pTaskDb; TTB* pCheckpointDb; SHashObj* pTasks; - SHashObj* pRestoreTasks; + SHashObj* pWalReadTasks; void* ahandle; TXN* txn; FTaskExpand* expandFunc; int32_t vgId; SRWLatch lock; + int8_t walScan; + bool quit; } SStreamMeta; int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); @@ -355,7 +360,7 @@ SStreamTask* tNewStreamTask(int64_t streamId); int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask); int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask); void tFreeStreamTask(SStreamTask* pTask); -int32_t tAppendDataForStream(SStreamTask* pTask, SStreamQueueItem* pItem); +int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem); bool tInputQueueIsFull(const SStreamTask* pTask); static FORCE_INLINE void streamTaskInputFail(SStreamTask* pTask) { @@ -568,8 +573,9 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF void streamMetaClose(SStreamMeta* streamMeta); int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask); -int32_t streamMetaAddTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask); +int32_t streamMetaAddDeployedTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask); int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t checkpointVer, char* msg, int32_t msgLen); +int32_t streamMetaGetNumOfTasks(const SStreamMeta* pMeta); SStreamTask* streamMetaAcquireTaskEx(SStreamMeta* pMeta, int32_t taskId); SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId); diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index 0a359bfd42..b51289de5e 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -139,7 +139,7 @@ typedef struct { } SWalFilterCond; // todo hide this struct -typedef struct { +typedef struct SWalReader { SWal *pWal; int64_t readerId; TdFilePtr pLogFile; diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 7ccbb3b586..4235548e48 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -150,7 +150,7 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { ASSERT(pTask->taskLevel == TASK_LEVEL__AGG); // 2.save task - code = streamMetaAddTask(pSnode->pMeta, -1, pTask); + code = streamMetaAddDeployedTask(pSnode->pMeta, -1, pTask); if (code < 0) { return -1; } diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index d62eebd2e1..e6e21e1e4a 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -260,7 +260,8 @@ int32_t tqReaderAddTbUidList(STqReader *pReader, const SArray *pTableUidList); int32_t tqReaderRemoveTbUidList(STqReader *pReader, const SArray *tbUidList); int32_t tqSeekVer(STqReader *pReader, int64_t ver, const char *id); -void tqNextBlock(STqReader *pReader, SFetchRet *ret); +void tqNextBlock(STqReader *pReader, SFetchRet *ret); +int32_t extractSubmitMsgFromWal(SWalReader *pReader, SPackedData *pPackedData); int32_t tqReaderSetSubmitMsg(STqReader *pReader, void *msgStr, int32_t msgLen, int64_t ver); // int32_t tqReaderSetDataMsg(STqReader *pReader, const SSubmitReq *pMsg, int64_t ver); diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 94ba399a0a..c007f84790 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -176,7 +176,7 @@ int32_t tqOffsetRestoreFromFile(STqOffsetStore* pStore, const char* fname); // tqStream int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver); -int32_t tqDoRestoreSourceStreamTasks(STQ* pTq); +int32_t tqStreamTasksScanWal(STQ* pTq); // tq util void createStreamTaskOffsetKey(char* dst, uint64_t streamId, uint32_t taskId); @@ -187,6 +187,7 @@ int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequ void doSaveTaskOffset(STqOffsetStore* pOffsetStore, const char* pKey, int64_t ver); void saveOffsetForAllTasks(STQ* pTq, int64_t ver); void initOffsetForAllRestoreTasks(STQ* pTq); +int32_t transferToWalReadTask(SStreamMeta* pStreamMeta, SArray* pTaskList); #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 412c2549b5..16dea8aebd 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -194,7 +194,7 @@ void tqClose(STQ*); int tqPushMsg(STQ*, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver); int tqRegisterPushHandle(STQ* pTq, void* pHandle, const SMqPollReq* pRequest, SRpcMsg* pRpcMsg, SMqDataRsp* pDataRsp, int32_t type); int tqUnregisterPushHandle(STQ* pTq, const char* pKey, int32_t keyLen, uint64_t consumerId, bool rspConsumer); -int tqRestoreStreamTasks(STQ* pTq); // restore all stream tasks after vnode launching completed. +int tqStartStreamTasks(STQ* pTq); // restore all stream tasks after vnode launching completed. int tqCommit(STQ*); int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index b029afc935..2b911befcc 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -15,7 +15,7 @@ #include "tq.h" -#define ALL_STREAM_TASKS_ID (-1) +#define WAL_READ_TASKS_ID (-1) int32_t tqInit() { int8_t old; @@ -630,6 +630,9 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { return -1; } + pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, NULL); + + pTask->freeFp = (_free_reader_fn_t)tqCloseReader; SArray* pList = qGetQueriedTableListInfo(pTask->exec.pExecutor); tqReaderAddTbUidList(pTask->exec.pTqReader, pList); } @@ -640,6 +643,8 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { return 0; } +void tFreeStreamTask(SStreamTask* pTask); + int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { char* msgStr = pMsg->pCont; char* msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); @@ -754,8 +759,10 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms tDecoderClear(&decoder); // 2.save task - code = streamMetaAddTask(pTq->pStreamMeta, sversion, pTask); + code = streamMetaAddDeployedTask(pTq->pStreamMeta, sversion, pTask); if (code < 0) { + tqError("vgId:%d failed to add s-task:%s, total:%d", TD_VID(pTq->pVnode), pTask->id.idStr, + streamMetaGetNumOfTasks(pTq->pStreamMeta)); return -1; } @@ -764,6 +771,8 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms streamTaskCheckDownstream(pTask, sversion); } + tqDebug("vgId:%d s-task:%s is deployed from mnd, status:%d, total:%d", TD_VID(pTq->pVnode), pTask->id.idStr, + pTask->status.taskStatus, streamMetaGetNumOfTasks(pTq->pStreamMeta)); return 0; } @@ -973,7 +982,7 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) { pRefBlock->dataRef = pRef; atomic_add_fetch_32(pRefBlock->dataRef, 1); - if (tAppendDataForStream(pTask, (SStreamQueueItem*)pRefBlock) < 0) { + if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pRefBlock) < 0) { qError("stream task input del failed, task id %d", pTask->id.taskId); atomic_sub_fetch_32(pRef, 1); @@ -1008,7 +1017,7 @@ int32_t tqProcessDelReq(STQ* pTq, void* pReq, int32_t len, int64_t ver) { taosArrayPush(pStreamBlock->blocks, &block); if (!failed) { - if (tAppendDataForStream(pTask, (SStreamQueueItem*)pStreamBlock) < 0) { + if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pStreamBlock) < 0) { qError("stream task input del failed, task id %d", pTask->id.taskId); continue; } @@ -1036,12 +1045,13 @@ static int32_t addSubmitBlockNLaunchTask(STqOffsetStore* pOffsetStore, SStreamTa if (code == TSDB_CODE_SUCCESS) { tqOffsetDelete(pOffsetStore, key); } - return TSDB_CODE_SUCCESS; + + return code; } int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) { +#if 0 void* pIter = NULL; - SStreamDataSubmit2* pSubmit = streamDataSubmitNew(submit, STREAM_INPUT__DATA_SUBMIT); if (pSubmit == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -1050,6 +1060,8 @@ int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) { return -1; } + SArray* pInputQueueFullTasks = taosArrayInit(4, POINTER_BYTES); + while (1) { pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter); if (pIter == NULL) { @@ -1081,47 +1093,23 @@ int32_t tqProcessSubmitReq(STQ* pTq, SPackedData submit) { ver = pOffset->val.version; } - tqDebug("s-task:%s input queue is full, do nothing, start ver:%" PRId64, pTask->id.idStr, ver); + tqDebug("s-task:%s input queue is full, discard submit block, ver:%" PRId64, pTask->id.idStr, ver); + taosArrayPush(pInputQueueFullTasks, &pTask); continue; } // check if offset value exists STqOffset* pOffset = tqOffsetRead(pTq->pOffsetStore, key); - if (pOffset != NULL) { - // seek the stored version and extract data from WAL - int32_t code = tqSeekVer(pTask->exec.pTqReader, pOffset->val.version, ""); + ASSERT(pOffset == NULL); - // all data has been retrieved from WAL, let's try submit block directly. - if (code == TSDB_CODE_SUCCESS) { // all data retrieved, abort - // append the data for the stream - SFetchRet ret = {.data.info.type = STREAM_NORMAL}; - terrno = 0; - - tqNextBlock(pTask->exec.pTqReader, &ret); - if (ret.fetchType == FETCH_TYPE__DATA) { - code = launchTaskForWalBlock(pTask, &ret, pOffset); - if (code != TSDB_CODE_SUCCESS) { - continue; - } - } else { // FETCH_TYPE__NONE, let's try submit block directly - tqDebug("s-task:%s data in WAL are all consumed, try data in submit message", pTask->id.idStr); - addSubmitBlockNLaunchTask(pTq->pOffsetStore, pTask, pSubmit, key, submit.ver); - } - - // do nothing if failed, since the offset value is kept already - } else { // failed to seek to the WAL version - // todo handle the case where offset has been deleted in WAL, due to stream computing too slow - tqDebug("s-task:%s data in WAL are all consumed, try data in submit msg", pTask->id.idStr); - addSubmitBlockNLaunchTask(pTq->pOffsetStore, pTask, pSubmit, key, submit.ver); - } - } else { - addSubmitBlockNLaunchTask(pTq->pOffsetStore, pTask, pSubmit, key, submit.ver); - } + addSubmitBlockNLaunchTask(pTq->pOffsetStore, pTask, pSubmit, key, submit.ver); } streamDataSubmitDestroy(pSubmit); taosFreeQitem(pSubmit); +#endif + tqStartStreamTasks(pTq); return 0; } @@ -1131,29 +1119,31 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { int32_t taskId = pReq->taskId; int32_t vgId = TD_VID(pTq->pVnode); - if (taskId == ALL_STREAM_TASKS_ID) { // all tasks are restored from the wal - tqDoRestoreSourceStreamTasks(pTq); + if (taskId == WAL_READ_TASKS_ID) { // all tasks are extracted submit data from the wal + tqStreamTasksScanWal(pTq); + return 0; + } + + SStreamTask* pTask = streamMetaAcquireTaskEx(pTq->pStreamMeta, taskId); + if (pTask != NULL) { + if (pTask->status.taskStatus == TASK_STATUS__NORMAL) { + tqDebug("vgId:%d s-task:%s start to process run req", vgId, pTask->id.idStr); + streamProcessRunReq(pTask); + } else if (pTask->status.taskStatus == TASK_STATUS__RESTORE) { + tqDebug("vgId:%d s-task:%s start to process block from wal, last chk point:%" PRId64, vgId, + pTask->id.idStr, pTask->chkInfo.version); + streamProcessRunReq(pTask); + } else { + tqDebug("vgId:%d s-task:%s ignore run req since not in ready state", vgId, pTask->id.idStr); + } + + streamMetaReleaseTask(pTq->pStreamMeta, pTask); + + tqStartStreamTasks(pTq); return 0; } else { - SStreamTask* pTask = streamMetaAcquireTaskEx(pTq->pStreamMeta, taskId); - if (pTask != NULL) { - if (pTask->status.taskStatus == TASK_STATUS__NORMAL) { - tqDebug("vgId:%d s-task:%s start to process run req", vgId, pTask->id.idStr); - streamProcessRunReq(pTask); - } else if (pTask->status.taskStatus == TASK_STATUS__RESTORE) { - tqDebug("vgId:%d s-task:%s start to process in restore procedure from last chk point:%" PRId64, vgId, - pTask->id.idStr, pTask->chkInfo.version); - streamProcessRunReq(pTask); - } else { - tqDebug("vgId:%d s-task:%s ignore run req since not in ready state", vgId, pTask->id.idStr); - } - - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - return 0; - } else { - tqError("vgId:%d failed to found s-task, taskId:%d", vgId, taskId); - return -1; - } + tqError("vgId:%d failed to found s-task, taskId:%d", vgId, taskId); + return -1; } } @@ -1165,14 +1155,10 @@ int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) { SDecoder decoder; tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen); tDecodeStreamDispatchReq(&decoder, &req); - int32_t taskId = req.taskId; - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.taskId); if (pTask) { - SRpcMsg rsp = { - .info = pMsg->info, - .code = 0, - }; + SRpcMsg rsp = { .info = pMsg->info, .code = 0 }; streamProcessDispatchReq(pTask, &req, &rsp, exec); streamMetaReleaseTask(pTq->pStreamMeta, pTask); return 0; @@ -1294,26 +1280,39 @@ FAIL: int32_t tqCheckLogInWal(STQ* pTq, int64_t sversion) { return sversion <= pTq->walLogLastVer; } -int32_t tqRestoreStreamTasks(STQ* pTq) { +int32_t tqStartStreamTasks(STQ* pTq) { int32_t vgId = TD_VID(pTq->pVnode); + SStreamMeta* pMeta = pTq->pStreamMeta; + taosWLockLatch(&pMeta->lock); + pMeta->walScan += 1; + + if (pMeta->walScan > 1) { + tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->walScan); + taosWUnLockLatch(&pTq->pStreamMeta->lock); + return 0; + } + SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); if (pRunReq == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; tqError("vgId:%d failed restore stream tasks, code:%s", vgId, terrstr(terrno)); + taosWUnLockLatch(&pTq->pStreamMeta->lock); return -1; } - int32_t numOfTasks = taosHashGetSize(pTq->pStreamMeta->pRestoreTasks); - tqInfo("vgId:%d start restoring stream tasks, total tasks:%d", vgId, numOfTasks); + int32_t numOfTasks = taosHashGetSize(pTq->pStreamMeta->pTasks); + + tqInfo("vgId:%d start wal scan stream tasks, tasks:%d", vgId, numOfTasks); initOffsetForAllRestoreTasks(pTq); pRunReq->head.vgId = vgId; pRunReq->streamId = 0; - pRunReq->taskId = ALL_STREAM_TASKS_ID; + pRunReq->taskId = WAL_READ_TASKS_ID; SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg); + taosWUnLockLatch(&pTq->pStreamMeta->lock); return 0; } diff --git a/source/dnode/vnode/src/tq/tqPush.c b/source/dnode/vnode/src/tq/tqPush.c index d651e945b5..df6648a6af 100644 --- a/source/dnode/vnode/src/tq/tqPush.c +++ b/source/dnode/vnode/src/tq/tqPush.c @@ -322,16 +322,19 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v taosWUnLockLatch(&pTq->lock); } + tqDebug("handle submit, restore:%d, size:%d", pTq->pVnode->restored, (int)taosHashGetSize(pTq->pStreamMeta->pTasks)); + // push data for stream processing: - // 1. the vnode isn't in the restore procedure. + // 1. the vnode has already been restored. // 2. the vnode should be the leader. // 3. the stream is not suspended yet. - if (!tsDisableStream && vnodeIsRoleLeader(pTq->pVnode) && (!pTq->pVnode->restored)) { + if (!tsDisableStream && vnodeIsRoleLeader(pTq->pVnode) && pTq->pVnode->restored) { if (taosHashGetSize(pTq->pStreamMeta->pTasks) == 0) { return 0; } if (msgType == TDMT_VND_SUBMIT) { +#if 0 void* data = taosMemoryMalloc(len); if (data == NULL) { // todo: for all stream in this vnode, keep this offset in the offset files, and wait for a moment, and then retry @@ -343,7 +346,10 @@ int32_t tqPushMsg(STQ* pTq, void* msg, int32_t msgLen, tmsg_t msgType, int64_t v memcpy(data, pReq, len); SPackedData submit = {.msgStr = data, .msgLen = len, .ver = ver}; - tqDebug("tq copy submit msg:%p len:%d ver:%" PRId64 " from %p for stream", data, len, ver, pReq); + tqDebug("vgId:%d tq copy submit msg:%p len:%d ver:%" PRId64 " from %p for stream", vgId, data, len, ver, pReq); + tqProcessSubmitReq(pTq, submit); +#endif + SPackedData submit = {0}; tqProcessSubmitReq(pTq, submit); } diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 14d599551d..69624f4d10 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -300,6 +300,28 @@ int32_t tqSeekVer(STqReader* pReader, int64_t ver, const char* id) { return 0; } +int32_t extractSubmitMsgFromWal(SWalReader* pReader, SPackedData* pPackedData) { + if (walNextValidMsg(pReader) < 0) { + return -1; + } + + void* pBody = POINTER_SHIFT(pReader->pHead->head.body, sizeof(SSubmitReq2Msg)); + int32_t len = pReader->pHead->head.bodyLen - sizeof(SSubmitReq2Msg); + int64_t ver = pReader->pHead->head.version; + + void* data = taosMemoryMalloc(len); + if (data == NULL) { + // todo: for all stream in this vnode, keep this offset in the offset files, and wait for a moment, and then retry + terrno = TSDB_CODE_OUT_OF_MEMORY; + tqError("vgId:%d, failed to copy submit data for stream processing, since out of memory", 0); + return -1; + } + + memcpy(data, pBody, len); + *pPackedData = (SPackedData){.ver = ver, .msgLen = len, .msgStr = data}; + return 0; +} + void tqNextBlock(STqReader* pReader, SFetchRet* ret) { while (1) { if (pReader->msg2.msgStr == NULL) { @@ -434,7 +456,10 @@ int32_t tqRetrieveDataBlock2(SSDataBlock* pBlock, STqReader* pReader, SSubmitTbD SSubmitTbData* pSubmitTbData = taosArrayGet(pReader->submit.aSubmitTbData, pReader->nextBlk); pReader->nextBlk++; - if (pSubmitTbDataRet) *pSubmitTbDataRet = pSubmitTbData; + if (pSubmitTbDataRet) { + *pSubmitTbDataRet = pSubmitTbData; + } + int32_t sversion = pSubmitTbData->sver; int64_t suid = pSubmitTbData->suid; int64_t uid = pSubmitTbData->uid; diff --git a/source/dnode/vnode/src/tq/tqRestore.c b/source/dnode/vnode/src/tq/tqRestore.c index 877c686d35..6ed74ddcc3 100644 --- a/source/dnode/vnode/src/tq/tqRestore.c +++ b/source/dnode/vnode/src/tq/tqRestore.c @@ -15,60 +15,81 @@ #include "tq.h" -static int32_t restoreStreamTaskImpl(SStreamMeta* pStreamMeta, STqOffsetStore* pOffsetStore, SArray* pTaskList); +static int32_t streamTaskReplayWal(SStreamMeta* pStreamMeta, STqOffsetStore* pOffsetStore, bool* pScanIdle); static int32_t transferToNormalTask(SStreamMeta* pStreamMeta, SArray* pTaskList); // this function should be executed by stream threads. // there is a case that the WAL increases more fast than the restore procedure, and this restore procedure // will not stop eventually. -int tqDoRestoreSourceStreamTasks(STQ* pTq) { +int tqStreamTasksScanWal(STQ* pTq) { + int32_t vgId = TD_VID(pTq->pVnode); + SStreamMeta* pMeta = pTq->pStreamMeta; int64_t st = taosGetTimestampMs(); + while (1) { - SArray* pTaskList = taosArrayInit(4, POINTER_BYTES); + tqInfo("vgId:%d continue check if data in wal are available", vgId); // check all restore tasks - restoreStreamTaskImpl(pTq->pStreamMeta, pTq->pOffsetStore, pTaskList); - transferToNormalTask(pTq->pStreamMeta, pTaskList); - taosArrayDestroy(pTaskList); + bool allFull = true; + streamTaskReplayWal(pTq->pStreamMeta, pTq->pOffsetStore, &allFull); - int32_t numOfRestored = taosHashGetSize(pTq->pStreamMeta->pRestoreTasks); - if (numOfRestored <= 0) { - break; + int32_t times = 0; + + if (allFull) { + taosWLockLatch(&pMeta->lock); + pMeta->walScan -= 1; + times = pMeta->walScan; + + if (pMeta->walScan <= 0) { + taosWUnLockLatch(&pMeta->lock); + break; + } + + taosWUnLockLatch(&pMeta->lock); + tqInfo("vgId:%d scan wal for stream tasks for %d times", vgId, times); } } - int64_t et = taosGetTimestampMs(); - tqInfo("vgId:%d restoring task completed, elapsed time:%" PRId64 " sec.", TD_VID(pTq->pVnode), (et - st)); + double el = (taosGetTimestampMs() - st) / 1000.0; + tqInfo("vgId:%d scan wal for stream tasks completed, elapsed time:%.2f sec", vgId, el); + + // restore wal scan flag +// atomic_store_8(&pTq->pStreamMeta->walScan, 0); return 0; } -int32_t transferToNormalTask(SStreamMeta* pStreamMeta, SArray* pTaskList) { - int32_t numOfTask = taosArrayGetSize(pTaskList); - if (numOfTask <= 0) { - return TSDB_CODE_SUCCESS; - } +//int32_t transferToNormalTask(SStreamMeta* pStreamMeta, SArray* pTaskList) { +// int32_t numOfTask = taosArrayGetSize(pTaskList); +// if (numOfTask <= 0) { +// return TSDB_CODE_SUCCESS; +// } +// +// // todo: add lock +// for (int32_t i = 0; i < numOfTask; ++i) { +// SStreamTask* pTask = taosArrayGetP(pTaskList, i); +// tqDebug("vgId:%d transfer s-task:%s state restore -> ready, checkpoint:%" PRId64 " checkpoint id:%" PRId64, +// pStreamMeta->vgId, pTask->id.idStr, pTask->chkInfo.version, pTask->chkInfo.id); +// taosHashRemove(pStreamMeta->pWalReadTasks, &pTask->id.taskId, sizeof(pTask->id.taskId)); +// +// // NOTE: do not change the following order +// atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL); +// taosHashPut(pStreamMeta->pTasks, &pTask->id.taskId, sizeof(pTask->id.taskId), &pTask, POINTER_BYTES); +// } +// +// return TSDB_CODE_SUCCESS; +//} - // todo: add lock - for (int32_t i = 0; i < numOfTask; ++i) { - SStreamTask* pTask = taosArrayGetP(pTaskList, i); - tqDebug("vgId:%d transfer s-task:%s state restore -> ready, checkpoint:%" PRId64 " checkpoint id:%" PRId64, - pStreamMeta->vgId, pTask->id.idStr, pTask->chkInfo.version, pTask->chkInfo.id); - taosHashRemove(pStreamMeta->pRestoreTasks, &pTask->id.taskId, sizeof(pTask->id.taskId)); +int32_t streamTaskReplayWal(SStreamMeta* pStreamMeta, STqOffsetStore* pOffsetStore, bool* pScanIdle) { + void* pIter = NULL; + int32_t vgId = pStreamMeta->vgId; - // NOTE: do not change the following order - atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL); - taosHashPut(pStreamMeta->pTasks, &pTask->id.taskId, sizeof(pTask->id.taskId), &pTask, POINTER_BYTES); - } + *pScanIdle = true; - return TSDB_CODE_SUCCESS; -} - -int32_t restoreStreamTaskImpl(SStreamMeta* pStreamMeta, STqOffsetStore* pOffsetStore, SArray* pTaskList) { - // check all restore tasks - void* pIter = NULL; + bool allWalChecked = true; + tqDebug("vgId:%d start to check wal to extract new submit block", vgId); while (1) { - pIter = taosHashIterate(pStreamMeta->pRestoreTasks, pIter); + pIter = taosHashIterate(pStreamMeta->pTasks, pIter); if (pIter == NULL) { break; } @@ -78,8 +99,10 @@ int32_t restoreStreamTaskImpl(SStreamMeta* pStreamMeta, STqOffsetStore* pOffsetS continue; } - if (pTask->status.taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { - tqDebug("s-task:%s skip push data, not ready for processing, status %d", pTask->id.idStr, pTask->status.taskStatus); + if (pTask->status.taskStatus == TASK_STATUS__RECOVER_PREPARE || + pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { + tqDebug("s-task:%s skip push data, not ready for processing, status %d", pTask->id.idStr, + pTask->status.taskStatus); continue; } @@ -88,41 +111,57 @@ int32_t restoreStreamTaskImpl(SStreamMeta* pStreamMeta, STqOffsetStore* pOffsetS createStreamTaskOffsetKey(key, pTask->id.streamId, pTask->id.taskId); if (tInputQueueIsFull(pTask)) { - tqDebug("s-task:%s input queue is full, do nothing", pTask->id.idStr); - taosMsleep(10); + tqDebug("vgId:%d s-task:%s input queue is full, do nothing", vgId, pTask->id.idStr); continue; } + *pScanIdle = false; + // check if offset value exists STqOffset* pOffset = tqOffsetRead(pOffsetStore, key); - if (pOffset != NULL) { - // seek the stored version and extract data from WAL - int32_t code = tqSeekVer(pTask->exec.pTqReader, pOffset->val.version, ""); - if (code == TSDB_CODE_SUCCESS) { // all data retrieved, abort - // append the data for the stream - SFetchRet ret = {.data.info.type = STREAM_NORMAL}; - terrno = 0; + ASSERT(pOffset != NULL); - tqNextBlock(pTask->exec.pTqReader, &ret); - if (ret.fetchType == FETCH_TYPE__DATA) { - code = launchTaskForWalBlock(pTask, &ret, pOffset); - if (code != TSDB_CODE_SUCCESS) { - continue; - } - } else { - // FETCH_TYPE__NONE: all data has been retrieved from WAL, let's try submit block directly. - tqDebug("s-task:%s data in WAL are all consumed, transfer this task to be normal state", pTask->id.idStr); - taosArrayPush(pTaskList, &pTask); - } - } else { // failed to seek to the WAL version - tqDebug("s-task:%s data in WAL are all consumed, transfer this task to be normal state", pTask->id.idStr); - taosArrayPush(pTaskList, &pTask); - } - } else { - ASSERT(0); + // seek the stored version and extract data from WAL + int32_t code = walReadSeekVer(pTask->exec.pWalReader, pOffset->val.version); + if (code != TSDB_CODE_SUCCESS) { // no data in wal, quit + continue; } + + // append the data for the stream + tqDebug("vgId:%d wal reader seek to ver:%" PRId64 " %s", vgId, pOffset->val.version, pTask->id.idStr); + + SPackedData packData = {0}; + code = extractSubmitMsgFromWal(pTask->exec.pWalReader, &packData); + if (code != TSDB_CODE_SUCCESS) { // failed, continue + continue; + } + + SStreamDataSubmit2* p = streamDataSubmitNew(packData, STREAM_INPUT__DATA_SUBMIT); + if (p == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + tqError("%s failed to create data submit for stream since out of memory", pTask->id.idStr); + continue; + } + + allWalChecked = false; + + tqDebug("s-task:%s submit data extracted from WAL", pTask->id.idStr); + code = tqAddInputBlockNLaunchTask(pTask, (SStreamQueueItem*)p, packData.ver); + if (code == TSDB_CODE_SUCCESS) { + pOffset->val.version = walReaderGetCurrentVer(pTask->exec.pWalReader); + tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr, + pOffset->val.version); + } else { + // do nothing + } + + streamDataSubmitDestroy(p); + taosFreeQitem(p); } + if (allWalChecked) { + *pScanIdle = true; + } return 0; } diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index 2e8c6a53bb..791bfbe6df 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -35,7 +35,7 @@ void createStreamTaskOffsetKey(char* dst, uint64_t streamId, uint32_t taskId) { } int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem, int64_t ver) { - int32_t code = tAppendDataForStream(pTask, pQueueItem); + int32_t code = tAppendDataToInputQueue(pTask, pQueueItem); if (code < 0) { tqError("s-task:%s failed to put into queue, too many, next start ver:%" PRId64, pTask->id.idStr, ver); return -1; @@ -79,7 +79,7 @@ void initOffsetForAllRestoreTasks(STQ* pTq) { void* pIter = NULL; while(1) { - pIter = taosHashIterate(pTq->pStreamMeta->pRestoreTasks, pIter); + pIter = taosHashIterate(pTq->pStreamMeta->pTasks, pIter); if (pIter == NULL) { break; } @@ -103,7 +103,6 @@ void initOffsetForAllRestoreTasks(STQ* pTq) { doSaveTaskOffset(pTq->pOffsetStore, key, pTask->chkInfo.version); } } - } void saveOffsetForAllTasks(STQ* pTq, int64_t ver) { diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 76ff04b81a..24ebbe23d2 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -539,13 +539,10 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { return vnodeGetBatchMeta(pVnode, pMsg); case TDMT_VND_TMQ_CONSUME: return tqProcessPollReq(pVnode->pTq, pMsg); - case TDMT_STREAM_TASK_RUN: return tqProcessTaskRunReq(pVnode->pTq, pMsg); -#if 1 case TDMT_STREAM_TASK_DISPATCH: return tqProcessTaskDispatchReq(pVnode->pTq, pMsg, true); -#endif case TDMT_STREAM_TASK_CHECK: return tqProcessStreamTaskCheckReq(pVnode->pTq, pMsg); case TDMT_STREAM_TASK_DISPATCH_RSP: diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index eb3c5d1f64..9f5d722583 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -551,7 +551,7 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) vInfo("vgId:%d, sync restore finished", pVnode->config.vgId); // start to restore all stream tasks - tqRestoreStreamTasks(pVnode->pTq); + tqStartStreamTasks(pVnode->pTq); } static void vnodeBecomeFollower(const SSyncFSM *pFsm) { diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index 71d4e5efd8..5ec5be169e 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -70,7 +70,7 @@ void streamSchedByTimer(void* param, void* tmrId) { trigger->pBlock->info.type = STREAM_GET_ALL; atomic_store_8(&pTask->triggerStatus, TASK_TRIGGER_STATUS__INACTIVE); - if (tAppendDataForStream(pTask, (SStreamQueueItem*)trigger) < 0) { + if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)trigger) < 0) { taosFreeQitem(trigger); taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->timer); return; @@ -110,16 +110,17 @@ int32_t streamSchedExec(SStreamTask* pTask) { SRpcMsg msg = { .msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq) }; tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &msg); + qDebug("trigger to run s-task:%s", pTask->id.idStr); } return 0; } -int32_t streamTaskEnqueue(SStreamTask* pTask, const SStreamDispatchReq* pReq, SRpcMsg* pRsp) { +int32_t streamTaskEnqueueBlocks(SStreamTask* pTask, const SStreamDispatchReq* pReq, SRpcMsg* pRsp) { SStreamDataBlock* pData = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); int8_t status; - // enqueue + // enqueue data block if (pData != NULL) { pData->type = STREAM_INPUT__DATA_BLOCK; pData->srcVgId = pReq->dataSrcVgId; @@ -127,10 +128,10 @@ int32_t streamTaskEnqueue(SStreamTask* pTask, const SStreamDispatchReq* pReq, SR /*pData->blocks = pReq->data;*/ /*pBlock->sourceVer = pReq->sourceVer;*/ streamDispatchReqToData(pReq, pData); - if (tAppendDataForStream(pTask, (SStreamQueueItem*)pData) == 0) { + if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pData) == 0) { status = TASK_INPUT_STATUS__NORMAL; - } else { - status = TASK_INPUT_STATUS__FAILED; + } else { // input queue is full, upstream is blocked now + status = TASK_INPUT_STATUS__BLOCKED; } } else { streamTaskInputFail(pTask); @@ -148,8 +149,10 @@ int32_t streamTaskEnqueue(SStreamTask* pTask, const SStreamDispatchReq* pReq, SR pCont->downstreamNodeId = htonl(pTask->nodeId); pCont->downstreamTaskId = htonl(pTask->id.taskId); pRsp->pCont = buf; + pRsp->contLen = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp); tmsgSendRsp(pRsp); + return status == TASK_INPUT_STATUS__NORMAL ? 0 : -1; } @@ -168,7 +171,7 @@ int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq, /*pData->blocks = pReq->data;*/ /*pBlock->sourceVer = pReq->sourceVer;*/ streamRetrieveReqToData(pReq, pData); - if (tAppendDataForStream(pTask, (SStreamQueueItem*)pData) == 0) { + if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pData) == 0) { status = TASK_INPUT_STATUS__NORMAL; } else { status = TASK_INPUT_STATUS__FAILED; @@ -209,10 +212,10 @@ int32_t streamTaskOutput(SStreamTask* pTask, SStreamDataBlock* pBlock) { } int32_t streamProcessDispatchReq(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pRsp, bool exec) { - qDebug("task %d receive dispatch req from node %d task %d", pTask->id.taskId, pReq->upstreamNodeId, + qDebug("vgId:%d s-task:%s receive dispatch req from taskId:%d", pReq->upstreamNodeId, pTask->id.idStr, pReq->upstreamTaskId); - streamTaskEnqueue(pTask, pReq, pRsp); + streamTaskEnqueueBlocks(pTask, pReq, pRsp); tDeleteStreamDispatchReq(pReq); if (exec) { @@ -232,12 +235,14 @@ int32_t streamProcessDispatchReq(SStreamTask* pTask, SStreamDispatchReq* pReq, S int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code) { ASSERT(pRsp->inputStatus == TASK_OUTPUT_STATUS__NORMAL || pRsp->inputStatus == TASK_OUTPUT_STATUS__BLOCKED); - qDebug("task %d receive dispatch rsp, code: %x", pTask->id.taskId, code); + qDebug("s-task:%s receive dispatch rsp, code: %x", pTask->id.idStr, code); if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { int32_t leftRsp = atomic_sub_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); qDebug("task %d is shuffle, left waiting rsp %d", pTask->id.taskId, leftRsp); - if (leftRsp > 0) return 0; + if (leftRsp > 0) { + return 0; + } } int8_t old = atomic_exchange_8(&pTask->outputStatus, pRsp->inputStatus); @@ -282,7 +287,7 @@ bool tInputQueueIsFull(const SStreamTask* pTask) { return taosQueueItemSize((pTask->inputQueue->queue)) >= STREAM_TASK_INPUT_QUEUEU_CAPACITY; } -int32_t tAppendDataForStream(SStreamTask* pTask, SStreamQueueItem* pItem) { +int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) { int8_t type = pItem->type; if (type == STREAM_INPUT__DATA_SUBMIT) { @@ -295,12 +300,12 @@ int32_t tAppendDataForStream(SStreamTask* pTask, SStreamQueueItem* pItem) { } int32_t total = taosQueueItemSize(pTask->inputQueue->queue) + 1; - qDebug("s-task:%s submit enqueue %p %p %p msgLen:%d ver:%" PRId64 ", total in queue:%d", pTask->id.idStr, - pItem, pSubmitBlock, pSubmitBlock->submit.msgStr, pSubmitBlock->submit.msgLen, + qDebug("s-task:%s submit enqueue %p %p msgLen:%d ver:%" PRId64 ", total in queue:%d", pTask->id.idStr, + pItem, pSubmitBlock->submit.msgStr, pSubmitBlock->submit.msgLen, pSubmitBlock->submit.ver, total); - if (total > STREAM_TASK_INPUT_QUEUEU_CAPACITY) { - qDebug("s-task:%s input queue is full, capacity:%d, abort", pTask->id.idStr, STREAM_TASK_INPUT_QUEUEU_CAPACITY); + if ((pTask->taskLevel == TASK_LEVEL__SOURCE) && total > STREAM_TASK_INPUT_QUEUEU_CAPACITY) { + qError("s-task:%s input queue is full, capacity:%d, abort", pTask->id.idStr, STREAM_TASK_INPUT_QUEUEU_CAPACITY); streamDataSubmitDestroy(pSubmitBlock); return -1; } @@ -309,8 +314,8 @@ int32_t tAppendDataForStream(SStreamTask* pTask, SStreamQueueItem* pItem) { } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || type == STREAM_INPUT__REF_DATA_BLOCK) { int32_t total = taosQueueItemSize(pTask->inputQueue->queue) + 1; - if (total > 2) { - qDebug("stream task input queue is full, abort"); + if ((pTask->taskLevel == TASK_LEVEL__SOURCE) && total > STREAM_TASK_INPUT_QUEUEU_CAPACITY) { + qError("s-task:%s input queue is full, capacity:%d, abort", pTask->id.idStr, STREAM_TASK_INPUT_QUEUEU_CAPACITY); return -1; } @@ -327,7 +332,6 @@ int32_t tAppendDataForStream(SStreamTask* pTask, SStreamQueueItem* pItem) { } #if 0 - // TODO: back pressure atomic_store_8(&pTask->inputStatus, TASK_INPUT_STATUS__NORMAL); #endif diff --git a/source/libs/stream/src/streamData.c b/source/libs/stream/src/streamData.c index 63d15f134d..ae616260f3 100644 --- a/source/libs/stream/src/streamData.c +++ b/source/libs/stream/src/streamData.c @@ -69,7 +69,6 @@ int32_t streamRetrieveReqToData(const SStreamRetrieveReq* pReq, SStreamDataBlock SStreamDataSubmit2* streamDataSubmitNew(SPackedData submit, int32_t type) { SStreamDataSubmit2* pDataSubmit = (SStreamDataSubmit2*)taosAllocateQitem(sizeof(SStreamDataSubmit2), DEF_QITEM, 0); - if (pDataSubmit == NULL) { return NULL; } diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 4e491f906a..a9f6d29bf5 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -238,7 +238,8 @@ int32_t streamDispatchOneCheckReq(SStreamTask* pTask, const SStreamTaskCheckReq* msg.pCont = buf; msg.msgType = TDMT_STREAM_TASK_CHECK; - qDebug("dispatch from task %d to task %d node %d: check msg", pTask->id.taskId, pReq->downstreamTaskId, nodeId); + qDebug("dispatch from s-task:%s to downstream s-task:%"PRIx64":%d node %d: check msg", pTask->id.idStr, + pReq->streamId, pReq->downstreamTaskId, nodeId); tmsgSendReq(pEpSet, &msg); @@ -319,8 +320,7 @@ int32_t streamDispatchOneDataReq(SStreamTask* pTask, const SStreamDispatchReq* p msg.pCont = buf; msg.msgType = pTask->dispatchMsgType; - qDebug("dispatch from task %d to task %d node %d: data msg", pTask->id.taskId, pReq->taskId, vgId); - + qDebug("dispatch from s-task:%s to taskId:%d vgId:%d data msg", pTask->id.idStr, pReq->taskId, vgId); tmsgSendReq(pEpSet, &msg); code = 0; @@ -402,14 +402,15 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat goto FAIL_FIXED_DISPATCH; } } + int32_t vgId = pTask->fixedEpDispatcher.nodeId; SEpSet* pEpSet = &pTask->fixedEpDispatcher.epSet; int32_t downstreamTaskId = pTask->fixedEpDispatcher.taskId; req.taskId = downstreamTaskId; - qDebug("dispatch from task %d (child id %d) to down stream task %d in vnode %d", pTask->id.taskId, pTask->selfChildId, - downstreamTaskId, vgId); + qDebug("s-task:%s (child taskId:%d) dispatch blocks:%d to down stream s-task:%d in vgId:%d", pTask->id.idStr, + pTask->selfChildId, blockNum, downstreamTaskId, vgId); if (streamDispatchOneDataReq(pTask, &req, vgId, pEpSet) < 0) { goto FAIL_FIXED_DISPATCH; @@ -494,6 +495,8 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat int32_t streamDispatch(SStreamTask* pTask) { ASSERT(pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH); + qDebug("s-task:%s try to dispatch intermediate result block to downstream, numofBlocks in outputQ:%d", pTask->id.idStr, + taosQueueItemSize(pTask->outputQueue->queue)); int8_t old = atomic_val_compare_exchange_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL, TASK_OUTPUT_STATUS__WAIT); @@ -503,13 +506,12 @@ int32_t streamDispatch(SStreamTask* pTask) { SStreamDataBlock* pBlock = streamQueueNextItem(pTask->outputQueue); if (pBlock == NULL) { - qDebug("stream stop dispatching since no output: task %d", pTask->id.taskId); + qDebug("s-task:%s stream stop dispatching since no output in output queue", pTask->id.idStr); atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL); return 0; } - ASSERT(pBlock->type == STREAM_INPUT__DATA_BLOCK); - qDebug("stream dispatching: task %d", pTask->id.taskId); + ASSERT(pBlock->type == STREAM_INPUT__DATA_BLOCK); int32_t code = 0; if (streamDispatchAllBlocks(pTask, pBlock) < 0) { @@ -518,6 +520,7 @@ int32_t streamDispatch(SStreamTask* pTask) { atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL); goto FREE; } + FREE: taosArrayDestroyEx(pBlock->blocks, (FDelete)blockDataFreeRes); taosFreeQitem(pBlock); diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index db9be593c0..3d896c08ac 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -40,9 +40,9 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, const void* data, SArray* } else if (pItem->type == STREAM_INPUT__DATA_SUBMIT) { ASSERT(pTask->taskLevel == TASK_LEVEL__SOURCE); const SStreamDataSubmit2* pSubmit = (const SStreamDataSubmit2*)data; - qDebug("s-task:%s set submit blocks as input %p %p %d ver:%" PRId64, pTask->id.idStr, pSubmit, pSubmit->submit.msgStr, - pSubmit->submit.msgLen, pSubmit->submit.ver); qSetMultiStreamInput(pExecutor, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT); + qDebug("s-task:%s set submit blocks as source block completed, %p %p len:%d ver:%" PRId64, pTask->id.idStr, pSubmit, pSubmit->submit.msgStr, + pSubmit->submit.msgLen, pSubmit->submit.ver); } else if (pItem->type == STREAM_INPUT__DATA_BLOCK || pItem->type == STREAM_INPUT__DATA_RETRIEVE) { const SStreamDataBlock* pBlock = (const SStreamDataBlock*)data; @@ -241,7 +241,7 @@ int32_t streamExecForAll(SStreamTask* pTask) { while (1) { SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue); if (qItem == NULL) { - qDebug("s-task:%s stream task exec over, queue empty", pTask->id.idStr); +// qDebug("s-task:%s extract data from input queue, queue is empty, abort", pTask->id.idStr); break; } @@ -280,12 +280,13 @@ int32_t streamExecForAll(SStreamTask* pTask) { if (pTask->taskLevel == TASK_LEVEL__SINK) { ASSERT(((SStreamQueueItem*)pInput)->type == STREAM_INPUT__DATA_BLOCK); + qDebug("s-task:%s sink node start to sink result. numOfBlocks:%d", pTask->id.idStr, batchSize); streamTaskOutput(pTask, pInput); continue; } SArray* pRes = taosArrayInit(0, sizeof(SSDataBlock)); - qDebug("s-task:%s exec begin, msg batch: %d", pTask->id.idStr, batchSize); + qDebug("s-task:%s exec begin, numOfBlocks:%d", pTask->id.idStr, batchSize); streamTaskExecImpl(pTask, pInput, pRes); @@ -293,13 +294,21 @@ int32_t streamExecForAll(SStreamTask* pTask) { int64_t dataVer = 0; qGetCheckpointVersion(pTask->exec.pExecutor, &dataVer, &ckId); if (dataVer > pTask->chkInfo.version) { // save it since the checkpoint is updated - qDebug("s-task:%s exec end, checkpoint ver from %"PRId64" to %"PRId64, pTask->id.idStr, pTask->chkInfo.version, dataVer); - pTask->chkInfo = (SCheckpointInfo) {.version = dataVer, .id = ckId}; - streamMetaSaveTask(pTask->pMeta, pTask); + qDebug("s-task:%s exec end, start to update check point, ver from %" PRId64 " to %" PRId64 + ", checkPoint id:%" PRId64 " -> %" PRId64, + pTask->id.idStr, pTask->chkInfo.version, dataVer, pTask->chkInfo.id, ckId); + pTask->chkInfo = (SCheckpointInfo) {.version = dataVer, .id = ckId}; + + taosWLockLatch(&pTask->pMeta->lock); + streamMetaSaveTask(pTask->pMeta, pTask); if (streamMetaCommit(pTask->pMeta) < 0) { - qError("failed to commit stream meta, since %s", terrstr()); + taosWUnLockLatch(&pTask->pMeta->lock); + qError("s-task:%s failed to commit stream meta, since %s", pTask->id.idStr, terrstr()); return -1; + } else { + taosWUnLockLatch(&pTask->pMeta->lock); + qDebug("s-task:%s update checkpoint ver succeed", pTask->id.idStr); } } else { qDebug("s-task:%s exec end", pTask->id.idStr); @@ -354,6 +363,7 @@ int32_t streamTryExec(SStreamTask* pTask) { // todo the task should be commit here atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + qDebug("s-task:%s exec completed", pTask->id.idStr); if (!taosQueueEmpty(pTask->inputQueue->queue)) { streamSchedExec(pTask); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 2e9bb4d762..4b423cc432 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -51,8 +51,8 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF goto _err; } - pMeta->pRestoreTasks = taosHashInit(64, fp, true, HASH_ENTRY_LOCK); - if (pMeta->pRestoreTasks == NULL) { + pMeta->pWalReadTasks = taosHashInit(64, fp, true, HASH_ENTRY_LOCK); + if (pMeta->pWalReadTasks == NULL) { goto _err; } @@ -60,15 +60,16 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF goto _err; } + pMeta->vgId = vgId; pMeta->ahandle = ahandle; pMeta->expandFunc = expandFunc; - + taosInitRWLatch(&pMeta->lock); return pMeta; _err: taosMemoryFree(pMeta->path); if (pMeta->pTasks) taosHashCleanup(pMeta->pTasks); - if (pMeta->pRestoreTasks) taosHashCleanup(pMeta->pRestoreTasks); + if (pMeta->pWalReadTasks) taosHashCleanup(pMeta->pWalReadTasks); if (pMeta->pTaskDb) tdbTbClose(pMeta->pTaskDb); if (pMeta->pCheckpointDb) tdbTbClose(pMeta->pCheckpointDb); if (pMeta->db) tdbClose(pMeta->db); @@ -83,20 +84,29 @@ void streamMetaClose(SStreamMeta* pMeta) { tdbClose(pMeta->db); void* pIter = NULL; + while(pMeta->walScan) { + qDebug("wait stream daemon quit"); + taosMsleep(100); + } + while (1) { pIter = taosHashIterate(pMeta->pTasks, pIter); - if (pIter == NULL) break; + if (pIter == NULL) { + break; + } + SStreamTask* pTask = *(SStreamTask**)pIter; if (pTask->timer) { taosTmrStop(pTask->timer); pTask->timer = NULL; } + tFreeStreamTask(pTask); /*streamMetaReleaseTask(pMeta, pTask);*/ } taosHashCleanup(pMeta->pTasks); - taosHashCleanup(pMeta->pRestoreTasks); + taosHashCleanup(pMeta->pWalReadTasks); taosMemoryFree(pMeta->path); taosMemoryFree(pMeta); } @@ -164,8 +174,8 @@ int32_t streamMetaSaveTask(SStreamMeta* pMeta, SStreamTask* pTask) { return 0; } -#if 1 -int32_t streamMetaAddTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask) { +// add to the ready tasks hash map, not the restored tasks hash map +int32_t streamMetaAddDeployedTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask) { if (pMeta->expandFunc(pMeta->ahandle, pTask, ver) < 0) { return -1; } @@ -174,10 +184,16 @@ int32_t streamMetaAddTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* pTask) { return -1; } - taosHashPut(pMeta->pRestoreTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, POINTER_BYTES); + pTask->status.taskStatus = STREAM_STATUS__NORMAL; + taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, POINTER_BYTES); return 0; } -#endif + +int32_t streamMetaGetNumOfTasks(const SStreamMeta* pMeta) { + int32_t numOfReady = taosHashGetSize(pMeta->pTasks); + int32_t numOfRestoring = taosHashGetSize(pMeta->pWalReadTasks); + return numOfReady + numOfRestoring; +} SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId) { taosRLockLatch(&pMeta->lock); @@ -206,9 +222,9 @@ SStreamTask* streamMetaAcquireTaskEx(SStreamMeta* pMeta, int32_t taskId) { taosRLockLatch(&pMeta->lock); SStreamTask* pTask = NULL; - int32_t numOfRestored = taosHashGetSize(pMeta->pRestoreTasks); + int32_t numOfRestored = taosHashGetSize(pMeta->pWalReadTasks); if (numOfRestored > 0) { - SStreamTask** p = (SStreamTask**)taosHashGet(pMeta->pRestoreTasks, &taskId, sizeof(int32_t)); + SStreamTask** p = (SStreamTask**)taosHashGet(pMeta->pWalReadTasks, &taskId, sizeof(taskId)); if (p != NULL) { pTask = *p; if (pTask != NULL && (atomic_load_8(&(pTask->status.taskStatus)) != TASK_STATUS__DROPPING)) { @@ -217,15 +233,15 @@ SStreamTask* streamMetaAcquireTaskEx(SStreamMeta* pMeta, int32_t taskId) { return pTask; } } - } else { - SStreamTask** p = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); - if (p != NULL) { - pTask = *p; - if (pTask != NULL && atomic_load_8(&(pTask->status.taskStatus)) != TASK_STATUS__DROPPING) { - atomic_add_fetch_32(&pTask->refCnt, 1); - taosRUnLockLatch(&pMeta->lock); - return pTask; - } + } + + SStreamTask** p = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); + if (p != NULL) { + pTask = *p; + if (pTask != NULL && atomic_load_8(&(pTask->status.taskStatus)) != TASK_STATUS__DROPPING) { + atomic_add_fetch_32(&pTask->refCnt, 1); + taosRUnLockLatch(&pMeta->lock); + return pTask; } } @@ -261,9 +277,12 @@ int32_t streamMetaBegin(SStreamMeta* pMeta) { int32_t streamMetaCommit(SStreamMeta* pMeta) { if (tdbCommit(pMeta->db, pMeta->txn) < 0) { + ASSERT(0); return -1; } + if (tdbPostCommit(pMeta->db, pMeta->txn) < 0) { + ASSERT(0); return -1; } @@ -319,7 +338,7 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { return -1; } - if (taosHashPut(pMeta->pRestoreTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) { + if (taosHashPut(pMeta->pWalReadTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) { tdbFree(pKey); tdbFree(pVal); tdbTbcClose(pCur); diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 834c022a9a..7d2d7a666f 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -186,7 +186,8 @@ void tFreeStreamTask(SStreamTask* pTask) { pTask->exec.pExecutor = NULL; } - if (pTask->exec.pTqReader != NULL) { + if (pTask->exec.pTqReader != NULL && pTask->freeFp != NULL) { + pTask->freeFp(pTask->exec.pTqReader); pTask->exec.pTqReader = NULL; } @@ -206,5 +207,9 @@ void tFreeStreamTask(SStreamTask* pTask) { streamStateClose(pTask->pState); } + if (pTask->id.idStr != NULL) { + taosMemoryFree((void*)pTask->id.idStr); + } + taosMemoryFree(pTask); } diff --git a/source/util/src/tworker.c b/source/util/src/tworker.c index d57104dd78..a49ff0cd5b 100644 --- a/source/util/src/tworker.c +++ b/source/util/src/tworker.c @@ -248,7 +248,8 @@ STaosQueue *tAutoQWorkerAllocQueue(SAutoQWorkerPool *pool, void *ahandle, FItem } taosThreadAttrDestroy(&thAttr); - uInfo("worker:%s:%d is launched, total:%d", pool->name, worker->id, (int32_t)taosArrayGetSize(pool->workers)); + int32_t numOfThreads = taosArrayGetSize(pool->workers); + uInfo("worker:%s:%d is launched, total:%d, expect:%d", pool->name, worker->id, numOfThreads, dstWorkerNum); curWorkerNum++; } diff --git a/tests/script/tsim/stream/basic1.sim b/tests/script/tsim/stream/basic1.sim index e69875d69f..15ca6bf7c9 100644 --- a/tests/script/tsim/stream/basic1.sim +++ b/tests/script/tsim/stream/basic1.sim @@ -37,7 +37,7 @@ if $loop_count == 20 then endi if $rows != 4 then - print =====rows=$rows + print =====rows=$rows, expect 4 goto loop0 endi @@ -53,7 +53,7 @@ if $data02 != 2 then endi if $data03 != 5 then - print =====data03=$data03 + print =====data03=$data03, expect:5 goto loop0 endi From 4d83118ff756b588c4f5744195020c2612a09b4b Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 13 Apr 2023 23:49:14 +0800 Subject: [PATCH 13/25] fix(stream): fix memory leak. --- source/dnode/vnode/src/tq/tq.c | 1 + source/libs/stream/CMakeLists.txt | 2 +- source/libs/stream/src/streamTask.c | 5 +++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 2b911befcc..59a78efe53 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -635,6 +635,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { pTask->freeFp = (_free_reader_fn_t)tqCloseReader; SArray* pList = qGetQueriedTableListInfo(pTask->exec.pExecutor); tqReaderAddTbUidList(pTask->exec.pTqReader, pList); + taosArrayDestroy(pList); } streamSetupTrigger(pTask); diff --git a/source/libs/stream/CMakeLists.txt b/source/libs/stream/CMakeLists.txt index ceddf4f215..790547bb61 100644 --- a/source/libs/stream/CMakeLists.txt +++ b/source/libs/stream/CMakeLists.txt @@ -9,7 +9,7 @@ target_include_directories( target_link_libraries( stream PUBLIC tdb - PRIVATE os util transport qcom executor + PRIVATE os util transport qcom executor wal ) if(${BUILD_TEST}) diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 7d2d7a666f..4783997276 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -15,6 +15,7 @@ #include "executor.h" #include "tstream.h" +#include "wal.h" SStreamTask* tNewStreamTask(int64_t streamId) { SStreamTask* pTask = (SStreamTask*)taosMemoryCalloc(1, sizeof(SStreamTask)); @@ -191,6 +192,10 @@ void tFreeStreamTask(SStreamTask* pTask) { pTask->exec.pTqReader = NULL; } + if (pTask->exec.pWalReader != NULL) { + walCloseReader(pTask->exec.pWalReader); + } + taosArrayDestroyP(pTask->childEpInfo, taosMemoryFree); if (pTask->outputType == TASK_OUTPUT__TABLE) { tDeleteSSchemaWrapper(pTask->tbSink.pSchemaWrapper); From e985f15cf07cde40b3271bd1cc8e5f00cd295905 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 14 Apr 2023 09:36:36 +0800 Subject: [PATCH 14/25] fix:memset nullBitmap of SSDataBlock to 0 in udf --- include/libs/function/taosudf.h | 2 ++ source/common/src/tdatablock.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/libs/function/taosudf.h b/include/libs/function/taosudf.h index 1b1339340b..47f4956206 100644 --- a/include/libs/function/taosudf.h +++ b/include/libs/function/taosudf.h @@ -166,6 +166,8 @@ static FORCE_INLINE int32_t udfColEnsureCapacity(SUdfColumn *pColumn, int32_t ne if (tmp == NULL) { return TSDB_CODE_OUT_OF_MEMORY; } + uint32_t extend = BitmapLen(allocCapacity) - BitmapLen(data->rowsAlloc); + memset(tmp + BitmapLen(data->rowsAlloc), 0, extend); data->fixLenCol.nullBitmap = tmp; data->fixLenCol.nullBitmapLen = BitmapLen(allocCapacity); if (meta->type == TSDB_DATA_TYPE_NULL) { diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 986a46036b..2cdcfecbdd 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -314,7 +314,8 @@ int32_t colDataMergeCol(SColumnInfoData* pColumnInfoData, int32_t numOfRow1, int if (btmp == NULL) { return TSDB_CODE_OUT_OF_MEMORY; } - + uint32_t extend = BitmapLen(finalNumOfRows) - BitmapLen(numOfRow1); + memset(btmp + BitmapLen(numOfRow1), 0, extend); pColumnInfoData->nullbitmap = btmp; } From eb0e1f848a2939fa8ea8e7bfb2263c5b1db409ab Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 14 Apr 2023 12:00:09 +0800 Subject: [PATCH 15/25] fix(stream): remove unused tqreader, do some internal refactor, set the meta value for streamtask. --- include/libs/executor/executor.h | 2 +- include/libs/stream/tstream.h | 3 -- source/dnode/mgmt/mgmt_snode/src/smInt.c | 1 + source/dnode/mnode/impl/src/mndScheduler.c | 1 + source/dnode/snode/src/snode.c | 3 ++ source/dnode/vnode/src/sma/smaRollup.c | 2 +- source/dnode/vnode/src/tq/tq.c | 13 +------- source/dnode/vnode/src/tq/tqRead.c | 11 ++----- source/dnode/vnode/src/tq/tqUtil.c | 31 ++----------------- source/libs/executor/src/executor.c | 7 +---- source/libs/stream/src/streamMeta.c | 36 ++++------------------ source/libs/stream/src/streamRecover.c | 9 ++---- source/libs/stream/src/streamTask.c | 5 --- 13 files changed, 22 insertions(+), 102 deletions(-) diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index 6f2a6126b3..34372dc2ff 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -123,7 +123,7 @@ int32_t qSetSMAInput(qTaskInfo_t tinfo, const void* pBlocks, size_t numOfBlocks, * @param isAdd * @return */ -int32_t qUpdateTableListForStreamScanner(qTaskInfo_t tinfo, const SArray* tableIdList, bool isAdd, SArray* pList); +int32_t qUpdateTableListForStreamScanner(qTaskInfo_t tinfo, const SArray* tableIdList, bool isAdd); /** * Create the exec task object according to task json diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index eea8868b8c..4b4ef55485 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -221,7 +221,6 @@ SStreamDataSubmit2* streamSubmitBlockClone(SStreamDataSubmit2* pSubmit); typedef struct { char* qmsg; void* pExecutor; // not applicable to encoder and decoder - struct STqReader* pTqReader; // not applicable to encoder and decoder struct SWalReader* pWalReader; // not applicable to encoder and decoder } STaskExec; @@ -333,7 +332,6 @@ struct SStreamTask { int64_t checkpointingId; int32_t checkpointAlignCnt; struct SStreamMeta* pMeta; - _free_reader_fn_t freeFp; }; // meta @@ -343,7 +341,6 @@ typedef struct SStreamMeta { TTB* pTaskDb; TTB* pCheckpointDb; SHashObj* pTasks; - SHashObj* pWalReadTasks; void* ahandle; TXN* txn; FTaskExpand* expandFunc; diff --git a/source/dnode/mgmt/mgmt_snode/src/smInt.c b/source/dnode/mgmt/mgmt_snode/src/smInt.c index 28097311ac..e222349767 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smInt.c +++ b/source/dnode/mgmt/mgmt_snode/src/smInt.c @@ -55,6 +55,7 @@ int32_t smOpen(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) { smClose(pMgmt); return -1; } + tmsgReportStartup("snode-impl", "initialized"); if (smStartWorker(pMgmt) != 0) { diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index 36521fd778..734f624be0 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -356,6 +356,7 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) { qDestroyQueryPlan(pPlan); return -1; } + pInnerTask->fillHistory = pStream->fillHistory; mndAddTaskToTaskSet(taskInnerLevel, pInnerTask); diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 4235548e48..cefc4fa63e 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -32,6 +32,7 @@ void sndEnqueueStreamDispatch(SSnode *pSnode, SRpcMsg *pMsg) { tDecoderClear(&decoder); goto FAIL; } + tDecoderClear(&decoder); int32_t taskId = req.taskId; @@ -77,6 +78,7 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) { pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL; pTask->pMsgCb = &pSnode->msgCb; pTask->chkInfo.version = ver; + pTask->pMeta = pSnode->pMeta; pTask->pState = streamStateOpen(pSnode->path, pTask, false, -1, -1); if (pTask->pState == NULL) { @@ -137,6 +139,7 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { if (pTask == NULL) { return -1; } + SDecoder decoder; tDecoderInit(&decoder, (uint8_t *)msg, msgLen); code = tDecodeStreamTask(&decoder, pTask); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 8aeb705d90..ce987ca88e 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -168,7 +168,7 @@ static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids, for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { if (pRSmaInfo->taskInfo[i]) { - if ((terrno = qUpdateTableListForStreamScanner(pRSmaInfo->taskInfo[i], tbUids, isAdd, NULL)) < 0) { + if ((terrno = qUpdateTableListForStreamScanner(pRSmaInfo->taskInfo[i], tbUids, isAdd)) < 0) { tdReleaseRSmaInfo(pSma, pRSmaInfo); smaError("vgId:%d, update tbUidList failed for uid:%" PRIi64 " level %d since %s", SMA_VID(pSma), *suid, i, terrstr()); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 5f3facf476..5d4fdbe5d8 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -567,6 +567,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL; pTask->pMsgCb = &pTq->pVnode->msgCb; pTask->pMeta = pTq->pStreamMeta; + pTask->chkInfo.version = ver; // expand executor if (pTask->fillHistory) { @@ -628,18 +629,7 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { } if (pTask->taskLevel == TASK_LEVEL__SOURCE) { - pTask->exec.pTqReader = tqOpenReader(pTq->pVnode); - if (pTask->exec.pTqReader == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, NULL); - - pTask->freeFp = (_free_reader_fn_t)tqCloseReader; - SArray* pList = qGetQueriedTableListInfo(pTask->exec.pExecutor); - tqReaderAddTbUidList(pTask->exec.pTqReader, pList); - taosArrayDestroy(pList); } streamSetupTrigger(pTask); @@ -1141,7 +1131,6 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { } streamMetaReleaseTask(pTq->pStreamMeta, pTask); - tqStartStreamTasks(pTq); return 0; } else { diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 69624f4d10..25ab7209d2 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -973,7 +973,7 @@ int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) { STqHandle* pTqHandle = (STqHandle*)pIter; if (pTqHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { - int32_t code = qUpdateTableListForStreamScanner(pTqHandle->execHandle.task, tbUidList, isAdd, NULL); + int32_t code = qUpdateTableListForStreamScanner(pTqHandle->execHandle.task, tbUidList, isAdd); if (code != 0) { tqError("update qualified table error for %s", pTqHandle->subKey); continue; @@ -1031,18 +1031,11 @@ int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) { SStreamTask* pTask = *(SStreamTask**)pIter; if (pTask->taskLevel == TASK_LEVEL__SOURCE) { - SArray* pList = NULL; - int32_t code = qUpdateTableListForStreamScanner(pTask->exec.pExecutor, tbUidList, isAdd, pList); + int32_t code = qUpdateTableListForStreamScanner(pTask->exec.pExecutor, tbUidList, isAdd); if (code != 0) { tqError("vgId:%d, s-task:%s update qualified table error for stream task", vgId, pTask->id.idStr); continue; } - - if (isAdd) { // only add qualified tables - tqReaderAddTbUidList(pTask->exec.pTqReader, pList); - } else { - tqReaderRemoveTbUidList(pTask->exec.pTqReader, tbUidList); - } } } diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index 791bfbe6df..4c37e1052f 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -49,32 +49,6 @@ int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueI return TSDB_CODE_SUCCESS; } -int32_t launchTaskForWalBlock(SStreamTask* pTask, SFetchRet* pRet, STqOffset* pOffset) { - SStreamDataBlock* pBlocks = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); - if (pBlocks == NULL) { // failed, do nothing - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - pRet->data.info.type = STREAM_NORMAL; - pBlocks->type = STREAM_INPUT__DATA_BLOCK; - pBlocks->sourceVer = pOffset->val.version; - pBlocks->blocks = taosArrayInit(0, sizeof(SSDataBlock)); - taosArrayPush(pBlocks->blocks, &pRet->data); - -// int64_t* ts = (int64_t*)(((SColumnInfoData*)ret.data.pDataBlock->pData)->pData); -// tqDebug("-----------%ld\n", ts[0]); - - int32_t code = tqAddInputBlockNLaunchTask(pTask, (SStreamQueueItem*)pBlocks, pBlocks->sourceVer); - if (code == TSDB_CODE_SUCCESS) { - pOffset->val.version = walReaderGetCurrentVer(pTask->exec.pTqReader->pWalReader); - tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr, - pOffset->val.version); - } - - return 0; -} - void initOffsetForAllRestoreTasks(STQ* pTq) { void* pIter = NULL; @@ -90,8 +64,7 @@ void initOffsetForAllRestoreTasks(STQ* pTq) { } if (pTask->status.taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { - tqDebug("stream task:%d skip push data, not ready for processing, status %d", pTask->id.taskId, - pTask->status.taskStatus); + tqDebug("s-task:%s skip push data, since not ready, status %d", pTask->id.idStr, pTask->status.taskStatus); continue; } @@ -120,7 +93,7 @@ void saveOffsetForAllTasks(STQ* pTq, int64_t ver) { } if (pTask->status.taskStatus == TASK_STATUS__RECOVER_PREPARE || pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM) { - tqDebug("stream task:%d skip push data, not ready for processing, status %d", pTask->id.taskId, + tqDebug("s-task:%s skip push data, not ready for processing, status %d", pTask->id.idStr, pTask->status.taskStatus); continue; } diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 255949a588..2bc91f0cb3 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -370,7 +370,7 @@ static SArray* filterUnqualifiedTables(const SStreamScanInfo* pScanInfo, const S return qa; } -int32_t qUpdateTableListForStreamScanner(qTaskInfo_t tinfo, const SArray* tableIdList, bool isAdd, SArray* pList) { +int32_t qUpdateTableListForStreamScanner(qTaskInfo_t tinfo, const SArray* tableIdList, bool isAdd) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; const char* id = GET_TASKID(pTaskInfo); int32_t code = 0; @@ -386,11 +386,6 @@ int32_t qUpdateTableListForStreamScanner(qTaskInfo_t tinfo, const SArray* tableI if (isAdd) { // add new table id SArray* qa = filterUnqualifiedTables(pScanInfo, tableIdList, GET_TASKID(pTaskInfo)); int32_t numOfQualifiedTables = taosArrayGetSize(qa); - - if (pList != NULL) { - taosArrayAddAll(pList, qa); - } - qDebug("%d qualified child tables added into stream scanner, %s", numOfQualifiedTables, id); code = tqReaderAddTbUidList(pScanInfo->tqReader, qa); if (code != TSDB_CODE_SUCCESS) { diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 0008c8dd8c..c45c700375 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -57,11 +57,6 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF goto _err; } - pMeta->pWalReadTasks = taosHashInit(64, fp, true, HASH_ENTRY_LOCK); - if (pMeta->pWalReadTasks == NULL) { - goto _err; - } - if (streamMetaBegin(pMeta) < 0) { goto _err; } @@ -75,7 +70,6 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF _err: taosMemoryFree(pMeta->path); if (pMeta->pTasks) taosHashCleanup(pMeta->pTasks); - if (pMeta->pWalReadTasks) taosHashCleanup(pMeta->pWalReadTasks); if (pMeta->pTaskDb) tdbTbClose(pMeta->pTaskDb); if (pMeta->pCheckpointDb) tdbTbClose(pMeta->pCheckpointDb); if (pMeta->db) tdbClose(pMeta->db); @@ -112,7 +106,6 @@ void streamMetaClose(SStreamMeta* pMeta) { } taosHashCleanup(pMeta->pTasks); - taosHashCleanup(pMeta->pWalReadTasks); taosMemoryFree(pMeta->path); taosMemoryFree(pMeta); } @@ -196,9 +189,7 @@ int32_t streamMetaAddDeployedTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* } int32_t streamMetaGetNumOfTasks(const SStreamMeta* pMeta) { - int32_t numOfReady = taosHashGetSize(pMeta->pTasks); - int32_t numOfRestoring = taosHashGetSize(pMeta->pWalReadTasks); - return numOfReady + numOfRestoring; + return (int32_t) taosHashGetSize(pMeta->pTasks); } SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId) { @@ -225,34 +216,19 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) { } SStreamTask* streamMetaAcquireTaskEx(SStreamMeta* pMeta, int32_t taskId) { - taosRLockLatch(&pMeta->lock); - SStreamTask* pTask = NULL; - int32_t numOfRestored = taosHashGetSize(pMeta->pWalReadTasks); - if (numOfRestored > 0) { - SStreamTask** p = (SStreamTask**)taosHashGet(pMeta->pWalReadTasks, &taskId, sizeof(taskId)); - if (p != NULL) { - pTask = *p; - if (pTask != NULL && (atomic_load_8(&(pTask->status.taskStatus)) != TASK_STATUS__DROPPING)) { - atomic_add_fetch_32(&pTask->refCnt, 1); - taosRUnLockLatch(&pMeta->lock); - return pTask; - } - } - } + taosRLockLatch(&pMeta->lock); SStreamTask** p = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); if (p != NULL) { - pTask = *p; - if (pTask != NULL && atomic_load_8(&(pTask->status.taskStatus)) != TASK_STATUS__DROPPING) { + if ((*p) != NULL && atomic_load_8(&((*p)->status.taskStatus)) != TASK_STATUS__DROPPING) { + pTask = *p; atomic_add_fetch_32(&pTask->refCnt, 1); - taosRUnLockLatch(&pMeta->lock); - return pTask; } } taosRUnLockLatch(&pMeta->lock); - return NULL; + return pTask; } void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { @@ -344,7 +320,7 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { return -1; } - if (taosHashPut(pMeta->pWalReadTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) { + if (taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, sizeof(void*)) < 0) { tdbFree(pKey); tdbFree(pVal); tdbTbcClose(pCur); diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index 9962cdfcc0..6c24e69832 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -17,6 +17,7 @@ int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version) { qDebug("s-task:%s at node %d launch recover", pTask->id.idStr, pTask->nodeId); + if (pTask->taskLevel == TASK_LEVEL__SOURCE) { atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__RECOVER_PREPARE); streamSetParamForRecover(pTask); @@ -33,12 +34,7 @@ int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version) { memcpy(serializedReq, &req, len); - SRpcMsg rpcMsg = { - .contLen = len, - .pCont = serializedReq, - .msgType = TDMT_VND_STREAM_RECOVER_NONBLOCKING_STAGE, - }; - + SRpcMsg rpcMsg = { .contLen = len, .pCont = serializedReq, .msgType = TDMT_VND_STREAM_RECOVER_NONBLOCKING_STAGE }; if (tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &rpcMsg) < 0) { /*ASSERT(0);*/ } @@ -61,6 +57,7 @@ int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) { .upstreamNodeId = pTask->nodeId, .childId = pTask->selfChildId, }; + // serialize if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { req.reqId = tGenIdPI64(); diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index 4783997276..67c60008fd 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -187,11 +187,6 @@ void tFreeStreamTask(SStreamTask* pTask) { pTask->exec.pExecutor = NULL; } - if (pTask->exec.pTqReader != NULL && pTask->freeFp != NULL) { - pTask->freeFp(pTask->exec.pTqReader); - pTask->exec.pTqReader = NULL; - } - if (pTask->exec.pWalReader != NULL) { walCloseReader(pTask->exec.pWalReader); } From e0cb8aa534f70a760df4f7272ab25b4ffa13af20 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 14 Apr 2023 17:43:37 +0800 Subject: [PATCH 16/25] fix(stream): don't the initial task status and do some internal refactor. --- include/libs/stream/tstream.h | 2 -- source/dnode/vnode/src/tq/tq.c | 35 +++++++++++++------------- source/libs/stream/src/streamMeta.c | 17 ------------- source/libs/stream/src/streamRecover.c | 17 ++++++++++--- 4 files changed, 31 insertions(+), 40 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 4b4ef55485..103f807191 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -31,7 +31,6 @@ extern "C" { #ifndef _STREAM_H_ #define _STREAM_H_ -typedef void (*_free_reader_fn_t)(void*); typedef struct SStreamTask SStreamTask; enum { @@ -574,7 +573,6 @@ int32_t streamMetaAddDeployedTask(SStreamMeta* pMeta, int64_t ver, SStreamT int32_t streamMetaAddSerializedTask(SStreamMeta* pMeta, int64_t checkpointVer, char* msg, int32_t msgLen); int32_t streamMetaGetNumOfTasks(const SStreamMeta* pMeta); -SStreamTask* streamMetaAcquireTaskEx(SStreamMeta* pMeta, int32_t taskId); SStreamTask* streamMetaAcquireTask(SStreamMeta* pMeta, int32_t taskId); void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask); void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 5d4fdbe5d8..1230a352d9 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -659,17 +659,22 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { }; SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); - if (pTask && atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__NORMAL) { - rsp.status = 1; + if (pTask) { + rsp.status = (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__NORMAL) ? 1 : 0; + streamMetaReleaseTask(pTq->pStreamMeta, pTask); + + tqDebug("tq recv task check req(reqId:0x%" PRIx64 + ") %d at node %d task status:%d, check req from task %d at node %d, rsp status %d", + rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, pTask->status.taskStatus, rsp.upstreamTaskId, + rsp.upstreamNodeId, rsp.status); } else { rsp.status = 0; + tqDebug("tq recv task check(taskId:%d not built yet) req(reqId:0x%" PRIx64 + ") %d at node %d, check req from task %d at node %d, rsp status %d", + taskId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId, + rsp.status); } - if (pTask) streamMetaReleaseTask(pTq->pStreamMeta, pTask); - - tqDebug("tq recv task check req(reqId:0x%" PRIx64 ") %d at node %d check req from task %d at node %d, status %d", - rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); - SEncoder encoder; int32_t code; int32_t len; @@ -687,13 +692,7 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { tEncodeSStreamTaskCheckRsp(&encoder, &rsp); tEncoderClear(&encoder); - SRpcMsg rspMsg = { - .code = 0, - .pCont = buf, - .contLen = sizeof(SMsgHead) + len, - .info = pMsg->info, - }; - + SRpcMsg rspMsg = { .code = 0, .pCont = buf, .contLen = sizeof(SMsgHead) + len, .info = pMsg->info }; tmsgSendRsp(&rspMsg); return 0; } @@ -709,8 +708,8 @@ int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, char* msg, int32 tDecoderClear(&decoder); return -1; } - tDecoderClear(&decoder); + tDecoderClear(&decoder); tqDebug("tq recv task check rsp(reqId:0x%" PRIx64 ") %d at node %d check req from task %d at node %d, status %d", rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); @@ -764,8 +763,8 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms streamTaskCheckDownstream(pTask, sversion); } - tqDebug("vgId:%d s-task:%s is deployed from mnd, status:%d, total:%d", TD_VID(pTq->pVnode), pTask->id.idStr, - pTask->status.taskStatus, streamMetaGetNumOfTasks(pTq->pStreamMeta)); + tqDebug("vgId:%d s-task:%s is deployed and add meta from mnd, status:%d, total:%d", TD_VID(pTq->pVnode), + pTask->id.idStr, pTask->status.taskStatus, streamMetaGetNumOfTasks(pTq->pStreamMeta)); return 0; } @@ -1117,7 +1116,7 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { return 0; } - SStreamTask* pTask = streamMetaAcquireTaskEx(pTq->pStreamMeta, taskId); + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId); if (pTask != NULL) { if (pTask->status.taskStatus == TASK_STATUS__NORMAL) { tqDebug("vgId:%d s-task:%s start to process run req", vgId, pTask->id.idStr); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index c45c700375..fecc01f295 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -183,7 +183,6 @@ int32_t streamMetaAddDeployedTask(SStreamMeta* pMeta, int64_t ver, SStreamTask* return -1; } - pTask->status.taskStatus = STREAM_STATUS__NORMAL; taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(int32_t), &pTask, POINTER_BYTES); return 0; } @@ -215,22 +214,6 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) { } } -SStreamTask* streamMetaAcquireTaskEx(SStreamMeta* pMeta, int32_t taskId) { - SStreamTask* pTask = NULL; - taosRLockLatch(&pMeta->lock); - - SStreamTask** p = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); - if (p != NULL) { - if ((*p) != NULL && atomic_load_8(&((*p)->status.taskStatus)) != TASK_STATUS__DROPPING) { - pTask = *p; - atomic_add_fetch_32(&pTask->refCnt, 1); - } - } - - taosRUnLockLatch(&pMeta->lock); - return pTask; -} - void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) { SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t)); if (ppTask) { diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index 6c24e69832..03afc0692d 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -46,6 +46,7 @@ int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version) { } else if (pTask->taskLevel == TASK_LEVEL__SINK) { atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL); } + return 0; } @@ -125,6 +126,7 @@ int32_t streamProcessTaskCheckReq(SStreamTask* pTask, const SStreamTaskCheckReq* int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp, int64_t version) { qDebug("task %d at node %d recv check rsp from task %d at node %d: status %d", pRsp->upstreamTaskId, pRsp->upstreamNodeId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->status); + if (pRsp->status == 1) { if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) { bool found = false; @@ -135,7 +137,11 @@ int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* break; } } - if (!found) return -1; + + if (!found) { + return -1; + } + int32_t left = atomic_sub_fetch_32(&pTask->recoverTryingDownstream, 1); ASSERT(left >= 0); if (left == 0) { @@ -144,7 +150,10 @@ int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* streamTaskLaunchRecover(pTask, version); } } else if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) { - if (pRsp->reqId != pTask->checkReqId) return -1; + if (pRsp->reqId != pTask->checkReqId) { + return -1; + } + streamTaskLaunchRecover(pTask, version); } else { ASSERT(0); @@ -164,6 +173,7 @@ int32_t streamRestoreParam(SStreamTask* pTask) { void* exec = pTask->exec.pExecutor; return qStreamRestoreParam(exec); } + int32_t streamSetStatusNormal(SStreamTask* pTask) { atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL); return 0; @@ -224,8 +234,8 @@ int32_t streamDispatchRecoverFinishReq(SStreamTask* pTask) { // agg int32_t streamAggRecoverPrepare(SStreamTask* pTask) { - void* exec = pTask->exec.pExecutor; pTask->recoverWaitingUpstream = taosArrayGetSize(pTask->childEpInfo); + qDebug("s-task:%s wait for %d upstreams", pTask->id.idStr, pTask->recoverWaitingUpstream); return 0; } @@ -244,6 +254,7 @@ int32_t streamAggChildrenRecoverFinish(SStreamTask* pTask) { int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t childId) { if (pTask->taskLevel == TASK_LEVEL__AGG) { int32_t left = atomic_sub_fetch_32(&pTask->recoverWaitingUpstream, 1); + qDebug("s-task:%s remain unfinished child tasks:%d", pTask->id.idStr, left); ASSERT(left >= 0); if (left == 0) { streamAggChildrenRecoverFinish(pTask); From a3bf0468c373777ac944ac1b32bf148f8989153e Mon Sep 17 00:00:00 2001 From: xleili Date: Fri, 14 Apr 2023 18:01:56 +0800 Subject: [PATCH 17/25] release: upgrade default version --- cmake/cmake.version | 2 +- packaging/tools/makepkg.sh | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cmake/cmake.version b/cmake/cmake.version index 5150ee3b75..232e86d891 100644 --- a/cmake/cmake.version +++ b/cmake/cmake.version @@ -2,7 +2,7 @@ IF (DEFINED VERNUMBER) SET(TD_VER_NUMBER ${VERNUMBER}) ELSE () - SET(TD_VER_NUMBER "3.0.3.2") + SET(TD_VER_NUMBER "3.0.4.0") ENDIF () IF (DEFINED VERCOMPATIBLE) diff --git a/packaging/tools/makepkg.sh b/packaging/tools/makepkg.sh index 0dce526db6..e4df233d67 100755 --- a/packaging/tools/makepkg.sh +++ b/packaging/tools/makepkg.sh @@ -150,7 +150,7 @@ fi mkdir -p ${install_dir}/bin && cp ${bin_files} ${install_dir}/bin && chmod a+x ${install_dir}/bin/* || : mkdir -p ${install_dir}/init.d && cp ${init_file_deb} ${install_dir}/init.d/${serverName}.deb mkdir -p ${install_dir}/init.d && cp ${init_file_rpm} ${install_dir}/init.d/${serverName}.rpm -mkdir -p ${install_dir}/share && cp -rf ${build_dir}/share/{etc,srv} ${install_dir}/share ||: +# mkdir -p ${install_dir}/share && cp -rf ${build_dir}/share/{etc,srv} ${install_dir}/share ||: if [ $adapterName != "taosadapter" ]; then mv ${install_dir}/cfg/${clientName2}adapter.toml ${install_dir}/cfg/$adapterName.toml @@ -322,6 +322,7 @@ if [[ $dbName == "taos" ]]; then mkdir -p ${install_dir}/share/ cp -Rfap ${web_dir}/admin ${install_dir}/share/ cp ${web_dir}/png/taos.png ${install_dir}/share/admin/images/taos.png + cp -rf ${build_dir}/share/{etc,srv} ${install_dir}/share ||: else echo "directory not found for enterprise release: ${web_dir}/admin" fi From 3ee3b16ebb86bd697ac72e4ffbb4a81696de8a10 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 14 Apr 2023 19:37:58 +0800 Subject: [PATCH 18/25] fix(query): fix the invalid read. --- source/libs/executor/inc/executorimpl.h | 2 +- source/libs/executor/src/executor.c | 4 +--- source/libs/executor/src/executorimpl.c | 9 +++++---- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 8615c382d3..2cb6626b03 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -826,7 +826,7 @@ void setTaskKilled(SExecTaskInfo* pTaskInfo, int32_t rspCode); void doDestroyTask(SExecTaskInfo* pTaskInfo); void setTaskStatus(SExecTaskInfo* pTaskInfo, int8_t status); -char* buildTaskId(uint64_t taskId, uint64_t queryId); +void buildTaskId(uint64_t taskId, uint64_t queryId, char* dst); SArray* getTableListInfo(const SExecTaskInfo* pTaskInfo); diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 2bc91f0cb3..6e3a7d8725 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -173,9 +173,7 @@ void doSetTaskId(SOperatorInfo* pOperator) { void qSetTaskId(qTaskInfo_t tinfo, uint64_t taskId, uint64_t queryId) { SExecTaskInfo* pTaskInfo = tinfo; pTaskInfo->id.queryId = queryId; - - taosMemoryFreeClear(pTaskInfo->id.str); - pTaskInfo->id.str = buildTaskId(taskId, queryId); + buildTaskId(taskId, queryId, pTaskInfo->id.str); // set the idstr for tsdbReader doSetTaskId(pTaskInfo->pRoot); diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 6c1c3c21c3..7594079cfb 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1151,8 +1151,8 @@ void cleanupExprSupp(SExprSupp* pSupp) { void cleanupBasicInfo(SOptrBasicInfo* pInfo) { pInfo->pRes = blockDataDestroy(pInfo->pRes); } -char* buildTaskId(uint64_t taskId, uint64_t queryId) { - char* p = taosMemoryMalloc(64); +void buildTaskId(uint64_t taskId, uint64_t queryId, char* dst) { + char* p = dst; int32_t offset = 6; memcpy(p, "TID:0x", offset); @@ -1163,7 +1163,6 @@ char* buildTaskId(uint64_t taskId, uint64_t queryId) { offset += tintToHex(queryId, &p[offset]); p[offset] = 0; - return p; } SExecTaskInfo* doCreateExecTaskInfo(uint64_t queryId, uint64_t taskId, int32_t vgId, EOPTR_EXEC_MODEL model, @@ -1185,7 +1184,9 @@ SExecTaskInfo* doCreateExecTaskInfo(uint64_t queryId, uint64_t taskId, int32_t v taosInitRWLatch(&pTaskInfo->lock); pTaskInfo->id.vgId = vgId; pTaskInfo->id.queryId = queryId; - pTaskInfo->id.str = buildTaskId(taskId, queryId); + + pTaskInfo->id.str = taosMemoryMalloc(64); + buildTaskId(taskId, queryId, pTaskInfo->id.str); return pTaskInfo; } From 58d9f615fe1b5e245ef30bdc04535dc46278e4c2 Mon Sep 17 00:00:00 2001 From: Xiaoyu Wang Date: Fri, 14 Apr 2023 19:54:04 +0800 Subject: [PATCH 19/25] fix: udf plan error --- source/libs/nodes/src/nodesEqualFuncs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/libs/nodes/src/nodesEqualFuncs.c b/source/libs/nodes/src/nodesEqualFuncs.c index 4e23999ec2..156744ef1d 100644 --- a/source/libs/nodes/src/nodesEqualFuncs.c +++ b/source/libs/nodes/src/nodesEqualFuncs.c @@ -136,6 +136,7 @@ static bool logicConditionNodeEqual(const SLogicConditionNode* a, const SLogicCo static bool functionNodeEqual(const SFunctionNode* a, const SFunctionNode* b) { COMPARE_SCALAR_FIELD(funcId); + COMPARE_STRING_FIELD(functionName); COMPARE_NODE_LIST_FIELD(pParameterList); return true; } From 01bacc10ef7800b4e9ffc0b00d0208b59e0073a2 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 14 Apr 2023 21:43:40 +0800 Subject: [PATCH 20/25] fix: an important fix --- source/dnode/vnode/src/tsdb/tsdbUtil.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index dd11134bd0..c6f0310599 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -1259,6 +1259,7 @@ void tBlockDataReset(SBlockData *pBlockData) { pBlockData->suid = 0; pBlockData->uid = 0; pBlockData->nRow = 0; + pBlockData->nColData = 0; } void tBlockDataClear(SBlockData *pBlockData) { From bd1c8e1902f7175bc5aa2021f96386b717794e2a Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 14 Apr 2023 21:59:08 +0800 Subject: [PATCH 21/25] fix more code --- source/dnode/vnode/src/tsdb/tsdbUtil.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index c6f0310599..7286dcc149 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -13,6 +13,7 @@ * along with this program. If not, see . */ +#include "osMemory.h" #include "tdataformat.h" #include "tsdb.h" @@ -1259,7 +1260,11 @@ void tBlockDataReset(SBlockData *pBlockData) { pBlockData->suid = 0; pBlockData->uid = 0; pBlockData->nRow = 0; + for (int32_t i = 0; i < pBlockData->nColData; i++) { + tColDataDestroy(&pBlockData->aColData[i]); + } pBlockData->nColData = 0; + taosMemoryFreeClear(pBlockData->aColData); } void tBlockDataClear(SBlockData *pBlockData) { From e05d61456a580c7e3c1efbdfe8542121d25feb79 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 14 Apr 2023 22:45:22 +0800 Subject: [PATCH 22/25] fix(stream): disable the status check. --- source/libs/stream/src/streamMeta.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index fecc01f295..d3972a19d1 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -84,10 +84,10 @@ void streamMetaClose(SStreamMeta* pMeta) { tdbClose(pMeta->db); void* pIter = NULL; - while(pMeta->walScan) { - qDebug("wait stream daemon quit"); - taosMsleep(100); - } +// while(pMeta->walScan) { +// qDebug("wait stream daemon quit"); +// taosMsleep(100); +// } while (1) { pIter = taosHashIterate(pMeta->pTasks, pIter); From 3aa6897738c7ad0fecfbb7429dbfeeb976f3af20 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 14 Apr 2023 22:56:11 +0800 Subject: [PATCH 23/25] refactor: increase the buffer size --- source/libs/stream/src/stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index 5ec5be169e..7171b52912 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -16,7 +16,7 @@ #include "streamInc.h" #include "ttimer.h" -#define STREAM_TASK_INPUT_QUEUEU_CAPACITY 2 +#define STREAM_TASK_INPUT_QUEUEU_CAPACITY 2000 int32_t streamInit() { int8_t old; From edf9fdbecc99d2f8aa5b1054769bdc15ad3e5709 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Fri, 14 Apr 2023 23:19:36 +0800 Subject: [PATCH 24/25] make it compile --- source/dnode/vnode/src/tsdb/tsdbUtil.c | 1 - 1 file changed, 1 deletion(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index 7286dcc149..8e778da877 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -13,7 +13,6 @@ * along with this program. If not, see . */ -#include "osMemory.h" #include "tdataformat.h" #include "tsdb.h" From a3b02a80c056820be35acd861e8cdaa54b4218b3 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 15 Apr 2023 00:10:08 +0800 Subject: [PATCH 25/25] fix(stream): update the version when open stream tasks. --- source/libs/stream/src/streamMeta.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index d3972a19d1..51cc315780 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -296,7 +296,8 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) { tDecodeStreamTask(&decoder, pTask); tDecoderClear(&decoder); - if (pMeta->expandFunc(pMeta->ahandle, pTask, -1) < 0) { + // todo set correct initial version. + if (pMeta->expandFunc(pMeta->ahandle, pTask, 0) < 0) { tdbFree(pKey); tdbFree(pVal); tdbTbcClose(pCur);