From 3236ef7bae72d50aa526701784fd05d619447d77 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 2 Feb 2024 11:26:47 +0800 Subject: [PATCH 1/2] feat(stream): return the stream source task exec delay, and do some internal refactor. --- include/libs/executor/storageapi.h | 8 +-- include/libs/stream/tstream.h | 2 +- source/dnode/vnode/inc/vnode.h | 4 +- source/dnode/vnode/src/inc/tq.h | 7 +- source/dnode/vnode/src/inc/tsdb.h | 3 +- source/dnode/vnode/src/sma/smaRollup.c | 2 +- source/dnode/vnode/src/sma/smaTimeRange.c | 4 +- source/dnode/vnode/src/tq/tq.c | 3 +- source/dnode/vnode/src/tq/tqRead.c | 2 +- source/dnode/vnode/src/tq/tqSink.c | 8 +-- source/dnode/vnode/src/tq/tqUtil.c | 72 ++++++++++++++++++- source/dnode/vnode/src/tsdb/tsdbRead2.c | 6 +- source/dnode/vnode/src/vnd/vnodeInitApi.c | 6 +- source/dnode/vnode/src/vnd/vnodeQuery.c | 5 ++ .../executor/src/streameventwindowoperator.c | 1 - source/libs/parser/src/parTranslater.c | 58 +++++++++++---- source/libs/qworker/src/qworker.c | 7 ++ source/libs/stream/src/streamCheckpoint.c | 10 +-- source/libs/stream/src/streamQueue.c | 4 +- source/libs/wal/src/walRead.c | 11 ++- source/libs/wal/src/walWrite.c | 2 +- 21 files changed, 165 insertions(+), 60 deletions(-) diff --git a/include/libs/executor/storageapi.h b/include/libs/executor/storageapi.h index 669340f9e5..9987dab166 100644 --- a/include/libs/executor/storageapi.h +++ b/include/libs/executor/storageapi.h @@ -211,6 +211,7 @@ typedef struct SStoreTqReader { bool (*tqNextBlockImpl)(); // todo remove it SSDataBlock* (*tqGetResultBlock)(); int64_t (*tqGetResultBlockTime)(); + int32_t (*tqGetStreamExecProgress)(); void (*tqReaderSetColIdList)(); int32_t (*tqReaderSetQueryTableList)(); @@ -266,16 +267,11 @@ typedef struct SStoreMeta { // support filter and non-filter cases. [vnodeGetCtbIdList & vnodeGetCtbIdListByFilter] int32_t (*getChildTableList)(void* pVnode, int64_t suid, SArray* list); int32_t (*storeGetTableList)(void* pVnode, int8_t type, SArray* pList); - void* storeGetVersionRange; - void* storeGetLastTimestamp; - - int32_t (*getTableSchema)(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid); // tsdbGetTableSchema + int32_t (*getTableSchema)(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid); int32_t (*getNumOfChildTables)(void* pVnode, int64_t uid, int64_t* numOfTables, int32_t* numOfCols); void (*getBasicInfo)(void* pVnode, const char** dbname, int32_t* vgId, int64_t* numOfTables, int64_t* numOfNormalTables); - int64_t (*getNumOfRowsInMem)(void* pVnode); - SMCtbCursor* (*openCtbCursor)(void* pVnode, tb_uid_t uid, int lock); int32_t (*resumeCtbCursor)(SMCtbCursor* pCtbCur, int8_t first); void (*pauseCtbCursor)(SMCtbCursor* pCtbCur); diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 9738be839d..dce8fffe11 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -313,7 +313,7 @@ typedef struct SCheckpointInfo { int64_t failedId; // record the latest failed checkpoint id int64_t checkpointingId; int32_t downstreamAlignNum; - int32_t checkpointNotReadyTasks; + int32_t numOfNotReady; bool dispatchCheckpointTrigger; int64_t msgVer; int32_t transId; diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 97cf0ffebc..3c334be2f2 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -90,6 +90,8 @@ int32_t vnodeGetStbColumnNum(SVnode *pVnode, tb_uid_t suid, int *num); int32_t vnodeGetTimeSeriesNum(SVnode *pVnode, int64_t *num); int32_t vnodeGetAllCtbNum(SVnode *pVnode, int64_t *num); +int32_t vnodeGetTableSchema(void *pVnode, int64_t uid, STSchema **pSchema, int64_t *suid); + void vnodeResetLoad(SVnode *pVnode, SVnodeLoad *pLoad); int32_t vnodeGetLoad(SVnode *pVnode, SVnodeLoad *pLoad); int32_t vnodeGetLoadLite(SVnode *pVnode, SVnodeLoadLite *pLoad); @@ -180,7 +182,6 @@ int32_t tsdbCacherowsReaderOpen(void *pVnode, int32_t type, void *pTableIdList, int32_t tsdbRetrieveCacheRows(void *pReader, SSDataBlock *pResBlock, const int32_t *slotIds, const int32_t *dstSlotIds, SArray *pTableUids); void *tsdbCacherowsReaderClose(void *pReader); -int32_t tsdbGetTableSchema(void *pVnode, int64_t uid, STSchema **pSchema, int64_t *suid); void tsdbCacheSetCapacity(SVnode *pVnode, size_t capacity); size_t tsdbCacheGetCapacity(SVnode *pVnode); @@ -233,6 +234,7 @@ int32_t tqReaderSetSubmitMsg(STqReader *pReader, void *msgStr, int32_t msgLen, i bool tqNextDataBlockFilterOut(STqReader *pReader, SHashObj *filterOutUids); int32_t tqRetrieveDataBlock(STqReader *pReader, SSDataBlock **pRes, const char *idstr); int32_t tqRetrieveTaosxBlock(STqReader *pReader, SArray *blocks, SArray *schemas, SSubmitTbData **pSubmitTbDataRet); +int32_t tqGetStreamExecInfo(SVnode* pVnode, int64_t streamId, int64_t* pDelay, bool* fhFinished); // sma int32_t smaGetTSmaDays(SVnodeCfg *pCfg, void *pCont, uint32_t contLen, int32_t *days); diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index cded4ddd7c..475a26aff5 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -97,7 +97,6 @@ typedef struct { struct STQ { SVnode* pVnode; char* path; - int64_t walLogLastVer; SRWLatch lock; SHashObj* pPushMgr; // subKey -> STqHandle SHashObj* pHandle; // subKey -> STqHandle @@ -153,14 +152,14 @@ char* tqOffsetBuildFName(const char* path, int32_t fVer); int32_t tqOffsetRestoreFromFile(STqOffsetStore* pStore, const char* fname); // tq util -int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** pRefBlock, int32_t type); +int32_t tqExtractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** pRefBlock, int32_t type); int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg); int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId, int32_t type, int64_t sver, int64_t ever); int32_t tqInitDataRsp(SMqDataRsp* pRsp, STqOffsetVal pOffset); void tqUpdateNodeStage(STQ* pTq, bool isLeader); -int32_t setDstTableDataPayload(uint64_t suid, const STSchema* pTSchema, int32_t blockIndex, SSDataBlock* pDataBlock, - SSubmitTbData* pTableData, const char* id); +int32_t tqSetDstTableDataPayload(uint64_t suid, const STSchema* pTSchema, int32_t blockIndex, SSDataBlock* pDataBlock, + SSubmitTbData* pTableData, const char* id); int32_t doMergeExistedRows(SSubmitTbData* pExisted, const SSubmitTbData* pNew, const char* id); SVCreateTbReq* buildAutoCreateTableReq(const char* stbFullName, int64_t suid, int32_t numOfCols, diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 9d8d5013fa..cac3be9ee3 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -279,6 +279,7 @@ int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx); // tsdbRead.c ============================================================================================== int32_t tsdbTakeReadSnap2(STsdbReader *pReader, _query_reseek_func_t reseek, STsdbReadSnap **ppSnap); void tsdbUntakeReadSnap2(STsdbReader *pReader, STsdbReadSnap *pSnap, bool proactive); +int32_t tsdbGetTableSchema(SMeta* pMeta, int64_t uid, STSchema** pSchema, int64_t* suid); // tsdbMerge.c ============================================================================================== typedef struct { @@ -970,8 +971,6 @@ static FORCE_INLINE TSDBROW *tsdbTbDataIterGet(STbDataIter *pIter) { return pIter->pRow; } -int32_t tRowInfoCmprFn(const void *p1, const void *p2); - typedef struct { int64_t suid; int64_t uid; diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 138bcbb133..621651507e 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -1554,7 +1554,7 @@ static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SA } _resume_delete: version = RSMA_EXEC_MSG_VER(msg); - if ((terrno = extractDelDataBlock(RSMA_EXEC_MSG_BODY(msg), RSMA_EXEC_MSG_LEN(msg), version, + if ((terrno = tqExtractDelDataBlock(RSMA_EXEC_MSG_BODY(msg), RSMA_EXEC_MSG_LEN(msg), version, &packData.pDataBlock, 1))) { taosFreeQitem(msg); goto _err; diff --git a/source/dnode/vnode/src/sma/smaTimeRange.c b/source/dnode/vnode/src/sma/smaTimeRange.c index f537ede8c1..767ea47e21 100644 --- a/source/dnode/vnode/src/sma/smaTimeRange.c +++ b/source/dnode/vnode/src/sma/smaTimeRange.c @@ -203,7 +203,7 @@ int32_t smaBlockToSubmit(SVnode *pVnode, const SArray *pBlocks, const STSchema * int32_t *index = taosHashGet(pTableIndexMap, &groupId, sizeof(groupId)); if (index == NULL) { // no data yet, append it - code = setDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, ""); + code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, ""); if (code != TSDB_CODE_SUCCESS) { continue; } @@ -213,7 +213,7 @@ int32_t smaBlockToSubmit(SVnode *pVnode, const SArray *pBlocks, const STSchema * int32_t size = (int32_t)taosArrayGetSize(pReq->aSubmitTbData) - 1; taosHashPut(pTableIndexMap, &groupId, sizeof(groupId), &size, sizeof(size)); } else { - code = setDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, ""); + code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, ""); if (code != TSDB_CODE_SUCCESS) { continue; } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 8689c30a55..bde6889ecd 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -66,7 +66,6 @@ STQ* tqOpen(const char* path, SVnode* pVnode) { pTq->path = taosStrdup(path); pTq->pVnode = pVnode; - pTq->walLogLastVer = pVnode->pWal->vers.lastVer; pTq->pHandle = taosHashInit(64, MurmurHash3_32, true, HASH_ENTRY_LOCK); taosHashSetFreeFp(pTq->pHandle, tqDestroyTqHandle); @@ -1055,7 +1054,7 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { int32_t code = tqStreamTaskProcessRunReq(pTq->pStreamMeta, pMsg, vnodeIsRoleLeader(pTq->pVnode)); // let's continue scan data in the wal files - if(code == 0 && pReq->reqType >= 0){ + if (code == 0 && (pReq->reqType >= 0 || pReq->reqType == STREAM_EXEC_T_RESUME_TASK)) { tqScanWalAsync(pTq, false); } diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index 383a636f71..8392f4c479 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -344,7 +344,7 @@ int32_t extractMsgFromWal(SWalReader* pReader, void** pItem, int64_t maxVer, con void* pBody = POINTER_SHIFT(pCont->body, sizeof(SMsgHead)); int32_t len = pCont->bodyLen - sizeof(SMsgHead); - code = extractDelDataBlock(pBody, len, ver, (void**)pItem, 0); + code = tqExtractDelDataBlock(pBody, len, ver, (void**)pItem, 0); if (code == TSDB_CODE_SUCCESS) { if (*pItem == NULL) { tqDebug("s-task:%s empty delete msg, discard it, len:%d, ver:%" PRId64, id, len, ver); diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 7fcb86d84a..7050870c57 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -746,7 +746,7 @@ int32_t setDstTableDataUid(SVnode* pVnode, SStreamTask* pTask, SSDataBlock* pDat return TDB_CODE_SUCCESS; } -int32_t setDstTableDataPayload(uint64_t suid, const STSchema *pTSchema, int32_t blockIndex, SSDataBlock* pDataBlock, +int32_t tqSetDstTableDataPayload(uint64_t suid, const STSchema *pTSchema, int32_t blockIndex, SSDataBlock* pDataBlock, SSubmitTbData* pTableData, const char* id) { int32_t numOfRows = pDataBlock->info.rows; @@ -821,7 +821,7 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { continue; } - code = setDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, id); + code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, id); if (code != TSDB_CODE_SUCCESS) { continue; } @@ -868,7 +868,7 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { continue; } - code = setDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, id); + code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, id); if (code != TSDB_CODE_SUCCESS) { continue; } @@ -878,7 +878,7 @@ void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data) { int32_t size = (int32_t)taosArrayGetSize(submitReq.aSubmitTbData) - 1; taosHashPut(pTableIndexMap, &groupId, sizeof(groupId), &size, sizeof(size)); } else { - code = setDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, id); + code = tqSetDstTableDataPayload(suid, pTSchema, i, pDataBlock, &tbData, id); if (code != TSDB_CODE_SUCCESS) { continue; } diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index d18455d221..b9f578a74b 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -388,7 +388,7 @@ int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* return 0; } -int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** pRefBlock, int32_t type) { +int32_t tqExtractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** pRefBlock, int32_t type) { SDecoder* pCoder = &(SDecoder){0}; SDeleteRes* pRes = &(SDeleteRes){0}; @@ -449,3 +449,73 @@ int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** return TSDB_CODE_SUCCESS; } + +int32_t tqGetStreamExecInfo(SVnode* pVnode, int64_t streamId, int64_t* pDelay, bool* fhFinished) { + SStreamMeta* pMeta = pVnode->pTq->pStreamMeta; + int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); + int32_t code = TSDB_CODE_SUCCESS; + + if (pDelay != NULL) { + *pDelay = 0; + } + + *fhFinished = false; + + if (numOfTasks <= 0) { + return code; + } + + // extract the required source task for a given stream, identified by streamId + for (int32_t i = 0; i < numOfTasks; ++i) { + STaskId* pId = taosArrayGet(pMeta->pTaskList, i); + if (pId->streamId != streamId) { + continue; + } + + SStreamTask** ppTask = taosHashGet(pMeta->pTasksMap, pId, sizeof(*pId)); + if (ppTask == NULL) { + tqError("vgId:%d failed to acquire task:0x%" PRIx64 " in retrieving progress", pMeta->vgId, pId->taskId); + continue; + } + + if ((*ppTask)->info.taskLevel != TASK_LEVEL__SOURCE) { + continue; + } + + // here we get the required stream source task + SStreamTask* pTask = *ppTask; + *fhFinished = !HAS_RELATED_FILLHISTORY_TASK(pTask); + + int64_t ver = walReaderGetCurrentVer(pTask->exec.pWalReader); + + SVersionRange verRange = {0}; + walReaderValidVersionRange(pTask->exec.pWalReader, &verRange.minVer, &verRange.maxVer); + + SWalReader* pReader = walOpenReader(pTask->exec.pWalReader->pWal, NULL, 0); + if (pReader == NULL) { + tqError("failed to open wal reader to extract exec progress, vgId:%d", pMeta->vgId); + continue; + } + + int64_t cur = 0; + int64_t latest = 0; + + code = walFetchHead(pReader, ver); + if (code != TSDB_CODE_SUCCESS) { + cur = pReader->pHead->head.ingestTs; + } + + code = walFetchHead(pReader, verRange.maxVer); + if (code != TSDB_CODE_SUCCESS) { + latest = pReader->pHead->head.ingestTs; + } + + if (pDelay != NULL) { // delay in ms + *pDelay = (latest - cur) / 1000; + } + + walCloseReader(pReader); + } + + return TSDB_CODE_SUCCESS; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index 9d158668d2..d9b932a367 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -4995,9 +4995,9 @@ int64_t tsdbGetNumOfRowsInMemTable2(STsdbReader* pReader) { return rows; } -int32_t tsdbGetTableSchema(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid) { +int32_t tsdbGetTableSchema(SMeta* pMeta, int64_t uid, STSchema** pSchema, int64_t* suid) { SMetaReader mr = {0}; - metaReaderDoInit(&mr, ((SVnode*)pVnode)->pMeta, 0); + metaReaderDoInit(&mr, pMeta, 0); int32_t code = metaReaderGetTableEntryByUidCache(&mr, uid); if (code != TSDB_CODE_SUCCESS) { terrno = TSDB_CODE_TDB_INVALID_TABLE_ID; @@ -5027,7 +5027,7 @@ int32_t tsdbGetTableSchema(void* pVnode, int64_t uid, STSchema** pSchema, int64_ metaReaderClear(&mr); // get the newest table schema version - code = metaGetTbTSchemaEx(((SVnode*)pVnode)->pMeta, *suid, uid, -1, pSchema); + code = metaGetTbTSchemaEx(pMeta, *suid, uid, -1, pSchema); return code; } diff --git a/source/dnode/vnode/src/vnd/vnodeInitApi.c b/source/dnode/vnode/src/vnd/vnodeInitApi.c index c323a81093..2392716bbf 100644 --- a/source/dnode/vnode/src/vnd/vnodeInitApi.c +++ b/source/dnode/vnode/src/vnd/vnodeInitApi.c @@ -91,7 +91,7 @@ void initMetadataAPI(SStoreMeta* pMeta) { pMeta->getTableTypeByName = metaGetTableTypeByName; pMeta->getTableNameByUid = metaGetTableNameByUid; - pMeta->getTableSchema = tsdbGetTableSchema; // todo refactor + pMeta->getTableSchema = vnodeGetTableSchema; pMeta->storeGetTableList = vnodeGetTableList; pMeta->getCachedTableList = metaGetCachedTableUidList; @@ -135,7 +135,9 @@ void initTqAPI(SStoreTqReader* pTq) { pTq->tqReaderNextBlockFilterOut = tqNextDataBlockFilterOut; pTq->tqGetResultBlockTime = tqGetResultBlockTime; -} + + pTq->tqGetStreamExecProgress = tqGetStreamExecInfo; + } void initStateStoreAPI(SStateStore* pStore) { pStore->streamFileStateInit = streamFileStateInit; diff --git a/source/dnode/vnode/src/vnd/vnodeQuery.c b/source/dnode/vnode/src/vnd/vnodeQuery.c index b6a9360afd..4fc7a88494 100644 --- a/source/dnode/vnode/src/vnd/vnodeQuery.c +++ b/source/dnode/vnode/src/vnd/vnodeQuery.c @@ -14,6 +14,7 @@ */ #include "vnd.h" +#include "tsdb.h" #define VNODE_GET_LOAD_RESET_VALS(pVar, oVal, vType, tags) \ do { \ @@ -703,3 +704,7 @@ void *vnodeGetIvtIdx(void *pVnode) { } return metaGetIvtIdx(((SVnode *)pVnode)->pMeta); } + +int32_t vnodeGetTableSchema(void *pVnode, int64_t uid, STSchema **pSchema, int64_t *suid) { + return tsdbGetTableSchema(((SVnode*)pVnode)->pMeta, uid, pSchema, suid); +} diff --git a/source/libs/executor/src/streameventwindowoperator.c b/source/libs/executor/src/streameventwindowoperator.c index 8aca76597b..0602016268 100644 --- a/source/libs/executor/src/streameventwindowoperator.c +++ b/source/libs/executor/src/streameventwindowoperator.c @@ -725,7 +725,6 @@ SOperatorInfo* createStreamEventAggOperatorInfo(SOperatorInfo* downstream, SPhys } if (pInfo->isHistoryOp) { - _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); pInfo->pAllUpdated = tSimpleHashInit(64, hashFn); } else { pInfo->pAllUpdated = NULL; diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index e221d2158c..0047fdb514 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -8090,27 +8090,27 @@ static int32_t createLastTsSelectStmt(char* pDb, char* pTable, STableMeta* pMeta tstrncpy(col->tableAlias, pTable, tListLen(col->tableAlias)); tstrncpy(col->colName, pMeta->schema[0].name, tListLen(col->colName)); - SNodeList* pParamterList = nodesMakeList(); - if (NULL == pParamterList) { + SNodeList* pParameterList = nodesMakeList(); + if (NULL == pParameterList) { nodesDestroyNode((SNode*)col); return TSDB_CODE_OUT_OF_MEMORY; } - int32_t code = nodesListStrictAppend(pParamterList, (SNode*)col); + int32_t code = nodesListStrictAppend(pParameterList, (SNode*)col); if (code) { - nodesDestroyList(pParamterList); + nodesDestroyList(pParameterList); return code; } - SNode* pFunc = (SNode*)createFunction("last", pParamterList); + SNode* pFunc = (SNode*)createFunction("last", pParameterList); if (NULL == pFunc) { - nodesDestroyList(pParamterList); + nodesDestroyList(pParameterList); return TSDB_CODE_OUT_OF_MEMORY; } SNodeList* pProjectionList = nodesMakeList(); if (NULL == pProjectionList) { - nodesDestroyList(pParamterList); + nodesDestroyNode(pFunc); return TSDB_CODE_OUT_OF_MEMORY; } @@ -8122,7 +8122,7 @@ static int32_t createLastTsSelectStmt(char* pDb, char* pTable, STableMeta* pMeta SFunctionNode* pFunc1 = createFunction("_vgid", NULL); if (NULL == pFunc1) { - nodesDestroyList(pParamterList); + nodesDestroyList(pProjectionList); return TSDB_CODE_OUT_OF_MEMORY; } @@ -8135,7 +8135,7 @@ static int32_t createLastTsSelectStmt(char* pDb, char* pTable, STableMeta* pMeta SFunctionNode* pFunc2 = createFunction("_vgver", NULL); if (NULL == pFunc2) { - nodesDestroyList(pParamterList); + nodesDestroyList(pProjectionList); return TSDB_CODE_OUT_OF_MEMORY; } @@ -8152,24 +8152,54 @@ static int32_t createLastTsSelectStmt(char* pDb, char* pTable, STableMeta* pMeta return code; } - // todo add the group by statement SSelectStmt** pSelect1 = (SSelectStmt**)pQuery; (*pSelect1)->pGroupByList = nodesMakeList(); + if (NULL == (*pSelect1)->pGroupByList) { + return TSDB_CODE_OUT_OF_MEMORY; + } SGroupingSetNode* pNode1 = (SGroupingSetNode*)nodesMakeNode(QUERY_NODE_GROUPING_SET); + if (NULL == pNode1) { + return TSDB_CODE_OUT_OF_MEMORY; + } + pNode1->groupingSetType = GP_TYPE_NORMAL; pNode1->pParameterList = nodesMakeList(); - nodesListAppend(pNode1->pParameterList, (SNode*)pFunc1); + if (NULL == pNode1->pParameterList) { + nodesDestroyNode((SNode*)pNode1); + return TSDB_CODE_OUT_OF_MEMORY; + } - nodesListAppend((*pSelect1)->pGroupByList, (SNode*)pNode1); + code = nodesListAppend(pNode1->pParameterList, (SNode*)pFunc1); + if (code) { + nodesDestroyNode((SNode*)pNode1); + return code; + } + + code = nodesListAppend((*pSelect1)->pGroupByList, (SNode*)pNode1); + if (code) { + return code; + } SGroupingSetNode* pNode2 = (SGroupingSetNode*)nodesMakeNode(QUERY_NODE_GROUPING_SET); + if (NULL == pNode2) { + return TSDB_CODE_OUT_OF_MEMORY; + } + pNode2->groupingSetType = GP_TYPE_NORMAL; pNode2->pParameterList = nodesMakeList(); - nodesListAppend(pNode2->pParameterList, (SNode*)pFunc2); + if (NULL == pNode2->pParameterList) { + nodesDestroyNode((SNode*)pNode1); + return TSDB_CODE_OUT_OF_MEMORY; + } - nodesListAppend((*pSelect1)->pGroupByList, (SNode*)pNode2); + code = nodesListAppend(pNode2->pParameterList, (SNode*)pFunc2); + if (code) { + nodesDestroyNode((SNode*)pNode2); + return code; + } + code = nodesListAppend((*pSelect1)->pGroupByList, (SNode*)pNode2); return code; } diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index 7376aa3a9c..93559745be 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -737,6 +737,13 @@ int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, char *sql) { QW_ERR_JRET(code); } +#if 0 + SReadHandle* pReadHandle = qwMsg->node; + int64_t delay = 0; + bool fhFinish = false; + pReadHandle->api.tqReaderFn.tqGetStreamExecProgress(pReadHandle->vnode, 0, &delay, &fhFinish); +#endif + code = qCreateExecTask(qwMsg->node, mgmt->nodeId, tId, plan, &pTaskInfo, &sinkHandle, sql, OPTR_EXEC_MODEL_BATCH); sql = NULL; if (code) { diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index f45904f036..b1783fb640 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -158,7 +158,7 @@ int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSo pTask->chkInfo.transId = pReq->transId; pTask->chkInfo.checkpointingId = pReq->checkpointId; - pTask->chkInfo.checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask); + pTask->chkInfo.numOfNotReady = streamTaskGetNumOfDownstream(pTask); pTask->chkInfo.startTs = taosGetTimestampMs(); pTask->execInfo.checkpoint += 1; @@ -214,7 +214,7 @@ int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBloc stDebug("s-task:%s set childIdx:%d, and add checkpoint-trigger block into outputQ", id, pTask->info.selfChildId); continueDispatchCheckpointBlock(pBlock, pTask); } else { // only one task exists, no need to dispatch downstream info - atomic_add_fetch_32(&pTask->chkInfo.checkpointNotReadyTasks, 1); + atomic_add_fetch_32(&pTask->chkInfo.numOfNotReady, 1); streamProcessCheckpointReadyMsg(pTask); streamFreeQitem((SStreamQueueItem*)pBlock); } @@ -249,7 +249,7 @@ int32_t streamProcessCheckpointBlock(SStreamTask* pTask, SStreamDataBlock* pBloc // set the needed checked downstream tasks, only when all downstream tasks do checkpoint complete, this task // can start local checkpoint procedure - pTask->chkInfo.checkpointNotReadyTasks = streamTaskGetNumOfDownstream(pTask); + pTask->chkInfo.numOfNotReady = streamTaskGetNumOfDownstream(pTask); // Put the checkpoint block into inputQ, to make sure all blocks with less version have been handled by this task // already. And then, dispatch check point msg to all downstream tasks @@ -268,7 +268,7 @@ int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask) { ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE || pTask->info.taskLevel == TASK_LEVEL__AGG); // only when all downstream tasks are send checkpoint rsp, we can start the checkpoint procedure for the agg task - int32_t notReady = atomic_sub_fetch_32(&pTask->chkInfo.checkpointNotReadyTasks, 1); + int32_t notReady = atomic_sub_fetch_32(&pTask->chkInfo.numOfNotReady, 1); ASSERT(notReady >= 0); if (notReady == 0) { @@ -287,7 +287,7 @@ void streamTaskClearCheckInfo(SStreamTask* pTask, bool clearChkpReadyMsg) { pTask->chkInfo.checkpointingId = 0; // clear the checkpoint id pTask->chkInfo.failedId = 0; pTask->chkInfo.startTs = 0; // clear the recorded start time - pTask->chkInfo.checkpointNotReadyTasks = 0; + pTask->chkInfo.numOfNotReady = 0; pTask->chkInfo.transId = 0; pTask->chkInfo.dispatchCheckpointTrigger = false; diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 78929c365e..0936d410bf 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -155,14 +155,14 @@ int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInpu *blockSize = 0; // no available token in bucket for sink task, let's wait for a little bit - if (taskLevel == TASK_LEVEL__SINK && (!streamTaskExtractAvailableToken(pTask->outputInfo.pTokenBucket, pTask->id.idStr))) { + if (taskLevel == TASK_LEVEL__SINK && (!streamTaskExtractAvailableToken(pTask->outputInfo.pTokenBucket, id))) { stDebug("s-task:%s no available token in bucket for sink data, wait for 10ms", id); return TSDB_CODE_SUCCESS; } while (1) { if (streamTaskShouldPause(pTask) || streamTaskShouldStop(pTask)) { - stDebug("s-task:%s task should pause, extract input blocks:%d", pTask->id.idStr, *numOfBlocks); + stDebug("s-task:%s task should pause, extract input blocks:%d", id, *numOfBlocks); return TSDB_CODE_SUCCESS; } diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c index 3854e90901..d491b00e73 100644 --- a/source/libs/wal/src/walRead.c +++ b/source/libs/wal/src/walRead.c @@ -70,10 +70,9 @@ int32_t walNextValidMsg(SWalReader *pReader) { int64_t committedVer = walGetCommittedVer(pReader->pWal); int64_t appliedVer = walGetAppliedVer(pReader->pWal); - wDebug("vgId:%d, wal start to fetch, index:%" PRId64 ", last index:%" PRId64 " commit index:%" PRId64 - ", applied index:%" PRId64, + wDebug("vgId:%d, wal start to fetch, index:%" PRId64 ", last:%" PRId64 " commit:%" PRId64 ", applied:%" PRId64, pReader->pWal->cfg.vgId, fetchVer, lastVer, committedVer, appliedVer); - if (fetchVer > appliedVer){ + if (fetchVer > appliedVer) { terrno = TSDB_CODE_WAL_LOG_NOT_EXIST; return -1; } @@ -86,10 +85,8 @@ int32_t walNextValidMsg(SWalReader *pReader) { int32_t type = pReader->pHead->head.msgType; if (type == TDMT_VND_SUBMIT || ((type == TDMT_VND_DELETE) && (pReader->cond.deleteMsg == 1)) || (IS_META_MSG(type) && pReader->cond.scanMeta)) { - if (walFetchBody(pReader) < 0) { - return -1; - } - return 0; + int32_t code = walFetchBody(pReader); + return (code == TSDB_CODE_SUCCESS)? 0:-1; } else { if (walSkipFetchBody(pReader) < 0) { return -1; diff --git a/source/libs/wal/src/walWrite.c b/source/libs/wal/src/walWrite.c index 341d989f8f..9783705bad 100644 --- a/source/libs/wal/src/walWrite.c +++ b/source/libs/wal/src/walWrite.c @@ -498,7 +498,7 @@ static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgTy pWal->writeHead.head.version = index; pWal->writeHead.head.bodyLen = bodyLen; pWal->writeHead.head.msgType = msgType; - pWal->writeHead.head.ingestTs = 0; + pWal->writeHead.head.ingestTs = taosGetTimestampUs(); // sync info for sync module pWal->writeHead.head.syncMeta = syncMeta; From 6736fd1615b946d0a50cad8a6278dc008d4c3c87 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sun, 4 Feb 2024 14:11:17 +0800 Subject: [PATCH 2/2] fix(tsdb): check and return if the rows in stt are before the data rows in data files. --- source/dnode/vnode/src/tsdb/tsdbRead2.c | 126 +++++++++++++++--------- 1 file changed, 82 insertions(+), 44 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index d9b932a367..86f58717e2 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -2628,6 +2628,58 @@ static bool moveToNextTableForPreFileSetMem(SReaderStatus* pStatus) { return (pStatus->pProcMemTableIter != NULL); } +static void buildCleanBlockFromSttFiles(STsdbReader* pReader, STableBlockScanInfo* pScanInfo) { + SReaderStatus* pStatus = &pReader->status; + SSttBlockReader* pSttBlockReader = pStatus->fileIter.pSttBlockReader; + SSDataBlock* pResBlock = pReader->resBlockInfo.pResBlock; + + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + + SDataBlockInfo* pInfo = &pResBlock->info; + blockDataEnsureCapacity(pResBlock, pScanInfo->numOfRowsInStt); + + pInfo->rows = pScanInfo->numOfRowsInStt; + pInfo->id.uid = pScanInfo->uid; + pInfo->dataLoad = 1; + pInfo->window = pScanInfo->sttWindow; + + setComposedBlockFlag(pReader, true); + + pScanInfo->sttKeyInfo.nextProcKey = asc ? pScanInfo->sttWindow.ekey + 1 : pScanInfo->sttWindow.skey - 1; + pScanInfo->sttKeyInfo.status = STT_FILE_NO_DATA; + pScanInfo->lastProcKey = asc ? pScanInfo->sttWindow.ekey : pScanInfo->sttWindow.skey; + pScanInfo->sttBlockReturned = true; + + pSttBlockReader->mergeTree.pIter = NULL; + + tsdbDebug("%p uid:%" PRId64 " return clean stt block as one, brange:%" PRId64 "-%" PRId64 " rows:%" PRId64 " %s", + pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, + pResBlock->info.rows, pReader->idStr); +} + +static void buildCleanBlockFromDataFiles(STsdbReader* pReader, STableBlockScanInfo* pScanInfo, + SFileDataBlockInfo* pBlockInfo, int32_t blockIndex) { + // whole block is required, return it directly + SReaderStatus* pStatus = &pReader->status; + SDataBlockInfo* pInfo = &pReader->resBlockInfo.pResBlock->info; + bool asc = ASCENDING_TRAVERSE(pReader->info.order); + + pInfo->rows = pBlockInfo->numRow; + pInfo->id.uid = pScanInfo->uid; + pInfo->dataLoad = 0; + pInfo->version = pReader->info.verRange.maxVer; + pInfo->window = (STimeWindow){.skey = pBlockInfo->firstKey, .ekey = pBlockInfo->lastKey}; + setComposedBlockFlag(pReader, false); + setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlockInfo->lastKey, pReader->info.order); + + // update the last key for the corresponding table + pScanInfo->lastProcKey = asc ? pInfo->window.ekey : pInfo->window.skey; + tsdbDebug("%p uid:%" PRIu64 " clean file block retrieved from file, global index:%d, " + "table index:%d, rows:%d, brange:%" PRId64 "-%" PRId64 ", %s", + pReader, pScanInfo->uid, blockIndex, pBlockInfo->tbBlockIdx, pBlockInfo->numRow, pBlockInfo->firstKey, + pBlockInfo->lastKey, pReader->idStr); +} + static int32_t doLoadSttBlockSequentially(STsdbReader* pReader) { SReaderStatus* pStatus = &pReader->status; SSttBlockReader* pSttBlockReader = pStatus->fileIter.pSttBlockReader; @@ -2680,28 +2732,7 @@ static int32_t doLoadSttBlockSequentially(STsdbReader* pReader) { // if only require the total rows, no need to load data from stt file if it is clean stt blocks if (pReader->info.execMode == READER_EXEC_ROWS && pScanInfo->cleanSttBlocks) { - bool asc = ASCENDING_TRAVERSE(pReader->info.order); - - SDataBlockInfo* pInfo = &pResBlock->info; - blockDataEnsureCapacity(pResBlock, pScanInfo->numOfRowsInStt); - - pInfo->rows = pScanInfo->numOfRowsInStt; - pInfo->id.uid = pScanInfo->uid; - pInfo->dataLoad = 1; - pInfo->window = pScanInfo->sttWindow; - - setComposedBlockFlag(pReader, true); - - pScanInfo->sttKeyInfo.nextProcKey = asc ? pScanInfo->sttWindow.ekey + 1 : pScanInfo->sttWindow.skey - 1; - pScanInfo->sttKeyInfo.status = STT_FILE_NO_DATA; - pScanInfo->lastProcKey = asc ? pScanInfo->sttWindow.ekey : pScanInfo->sttWindow.skey; - pScanInfo->sttBlockReturned = true; - - pSttBlockReader->mergeTree.pIter = NULL; - - tsdbDebug("%p uid:%" PRId64 " return clean stt block as one, brange:%" PRId64 "-%" PRId64 " rows:%" PRId64 " %s", - pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, - pResBlock->info.rows, pReader->idStr); + buildCleanBlockFromSttFiles(pReader, pScanInfo); return TSDB_CODE_SUCCESS; } @@ -2741,10 +2772,11 @@ static int32_t doLoadSttBlockSequentially(STsdbReader* pReader) { } } -static bool notOverlapWithSttFiles(SFileDataBlockInfo* pBlockInfo, STableBlockScanInfo* pScanInfo, bool asc) { +// current active data block not overlap with the stt-files/stt-blocks +static bool notOverlapWithFiles(SFileDataBlockInfo* pBlockInfo, STableBlockScanInfo* pScanInfo, bool asc) { ASSERT(pScanInfo->sttKeyInfo.status != STT_FILE_READER_UNINIT); - if (pScanInfo->sttKeyInfo.status == STT_FILE_NO_DATA) { + if ((!hasDataInSttBlock(pScanInfo)) || (pScanInfo->cleanSttBlocks == true)) { return true; } else { int64_t keyInStt = pScanInfo->sttKeyInfo.nextProcKey; @@ -2794,24 +2826,32 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { int64_t endKey = getBoarderKeyInFiles(pBlockInfo, pScanInfo, pReader->info.order); code = buildDataBlockFromBuf(pReader, pScanInfo, endKey); } else { - if (notOverlapWithSttFiles(pBlockInfo, pScanInfo, asc)) { - // whole block is required, return it directly - SDataBlockInfo* pInfo = &pReader->resBlockInfo.pResBlock->info; - pInfo->rows = pBlockInfo->numRow; - pInfo->id.uid = pScanInfo->uid; - pInfo->dataLoad = 0; - pInfo->version = pReader->info.verRange.maxVer; - pInfo->window = (STimeWindow){.skey = pBlockInfo->firstKey, .ekey = pBlockInfo->lastKey}; - setComposedBlockFlag(pReader, false); - setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlockInfo->lastKey, pReader->info.order); + if (notOverlapWithFiles(pBlockInfo, pScanInfo, asc)) { + int64_t keyInStt = pScanInfo->sttKeyInfo.nextProcKey; - // update the last key for the corresponding table - pScanInfo->lastProcKey = asc ? pInfo->window.ekey : pInfo->window.skey; - tsdbDebug("%p uid:%" PRIu64 - " clean file block retrieved from file, global index:%d, " - "table index:%d, rows:%d, brange:%" PRId64 "-%" PRId64 ", %s", - pReader, pScanInfo->uid, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlockInfo->numRow, - pBlockInfo->firstKey, pBlockInfo->lastKey, pReader->idStr); + if ((!hasDataInSttBlock(pScanInfo)) || (asc && pBlockInfo->lastKey < keyInStt) || + (!asc && pBlockInfo->firstKey > keyInStt)) { + if (pScanInfo->cleanSttBlocks && hasDataInSttBlock(pScanInfo)) { + if (asc) { // file block is located before the stt block + ASSERT(pScanInfo->sttWindow.skey > pBlockInfo->lastKey); + } else { // stt block is before the file block + ASSERT(pScanInfo->sttWindow.ekey < pBlockInfo->firstKey); + } + } + + buildCleanBlockFromDataFiles(pReader, pScanInfo, pBlockInfo, pBlockIter->index); + } else { // clean stt block + if (asc) { + ASSERT(pScanInfo->sttWindow.ekey < pBlockInfo->firstKey); + } else { + ASSERT(pScanInfo->sttWindow.skey > pBlockInfo->lastKey); + } + + // return the stt file block + ASSERT(pReader->info.execMode == READER_EXEC_ROWS && pSttBlockReader->mergeTree.pIter == NULL); + buildCleanBlockFromSttFiles(pReader, pScanInfo); + return TSDB_CODE_SUCCESS; + } } else { SBlockData* pBData = &pReader->status.fileBlockData; tBlockDataReset(pBData); @@ -2822,7 +2862,6 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { int64_t st = taosGetTimestampUs(); // let's load data from stt files, make sure clear the cleanStt block flag before load the data from stt files - pScanInfo->cleanSttBlocks = false; initSttBlockReader(pSttBlockReader, pScanInfo, pReader); // no data in stt block, no need to proceed. @@ -2840,8 +2879,7 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { // data in stt now overlaps with current active file data block, need to composed with file data block. int64_t lastKeyInStt = getCurrentKeyInSttBlock(pSttBlockReader); - if ((lastKeyInStt >= pBlockInfo->firstKey && asc) || - (lastKeyInStt <= pBlockInfo->lastKey && (!asc))) { + if ((lastKeyInStt >= pBlockInfo->firstKey && asc) || (lastKeyInStt <= pBlockInfo->lastKey && (!asc))) { tsdbDebug("%p lastKeyInStt:%" PRId64 ", overlap with file block, brange:%" PRId64 "-%" PRId64 " %s", pReader, lastKeyInStt, pBlockInfo->firstKey, pBlockInfo->lastKey, pReader->idStr); break;