From 8c5dbde918299250fc6649c0903cf80bc4dca19c Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Wed, 6 Jul 2022 14:20:07 +0800 Subject: [PATCH 1/7] refactor(stream): stream reader created in scanner --- examples/c/tmq.c | 6 +- include/common/tmsg.h | 8 +- include/libs/executor/executor.h | 6 +- include/libs/stream/tstream.h | 13 +- include/libs/wal/wal.h | 4 +- include/util/tutil.h | 1 + source/client/src/tmq.c | 27 ++- source/dnode/vnode/src/sma/smaRollup.c | 2 +- source/dnode/vnode/src/tq/tq.c | 25 ++- source/dnode/vnode/src/tq/tqMeta.c | 2 +- source/libs/executor/inc/executorimpl.h | 8 +- source/libs/executor/src/executor.c | 14 +- source/libs/executor/src/executorimpl.c | 16 +- source/libs/executor/src/scanoperator.c | 164 +++++++++--------- source/libs/executor/src/timewindowoperator.c | 14 +- source/libs/stream/inc/streamInc.h | 6 +- source/libs/stream/src/stream.c | 3 - source/libs/wal/src/walWrite.c | 2 +- source/util/src/tutil.c | 14 ++ tests/system-test/7-tmq/stbTagFilter.py | 4 +- 20 files changed, 189 insertions(+), 150 deletions(-) diff --git a/examples/c/tmq.c b/examples/c/tmq.c index 697a53e570..378f9ffb24 100644 --- a/examples/c/tmq.c +++ b/examples/c/tmq.c @@ -137,8 +137,8 @@ int32_t create_topic() { } taos_free_result(pRes); - /*pRes = taos_query(pConn, "create topic topic_ctb_column with meta as database abc1");*/ - pRes = taos_query(pConn, "create topic topic_ctb_column as select ts, c1, c2, c3 from st1"); + pRes = taos_query(pConn, "create topic topic_ctb_column with meta as database abc1"); + /*pRes = taos_query(pConn, "create topic topic_ctb_column as select ts, c1, c2, c3 from st1");*/ if (taos_errno(pRes) != 0) { printf("failed to create topic topic_ctb_column, reason:%s\n", taos_errstr(pRes)); return -1; @@ -199,7 +199,7 @@ tmq_t* build_consumer() { tmq_conf_set(conf, "msg.with.table.name", "true"); tmq_conf_set(conf, "enable.auto.commit", "true"); - tmq_conf_set(conf, "experimental.snapshot.enable", "true"); + /*tmq_conf_set(conf, "experimental.snapshot.enable", "true");*/ tmq_conf_set_auto_commit_cb(conf, tmq_commit_cb_print, NULL); tmq_t* tmq = tmq_consumer_new(conf, NULL, 0); diff --git a/include/common/tmsg.h b/include/common/tmsg.h index dedc06a2b9..7e6231d7dd 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -2826,8 +2826,8 @@ typedef struct { static FORCE_INLINE int32_t tEncodeSMqMetaRsp(void** buf, const SMqMetaRsp* pRsp) { int32_t tlen = 0; - tlen += taosEncodeFixedI64(buf, pRsp->reqOffset); - tlen += taosEncodeFixedI64(buf, pRsp->rspOffset); + // tlen += taosEncodeFixedI64(buf, pRsp->reqOffset); + // tlen += taosEncodeFixedI64(buf, pRsp->rspOffset); tlen += taosEncodeFixedI16(buf, pRsp->resMsgType); tlen += taosEncodeFixedI32(buf, pRsp->metaRspLen); tlen += taosEncodeBinary(buf, pRsp->metaRsp, pRsp->metaRspLen); @@ -2835,8 +2835,8 @@ static FORCE_INLINE int32_t tEncodeSMqMetaRsp(void** buf, const SMqMetaRsp* pRsp } static FORCE_INLINE void* tDecodeSMqMetaRsp(const void* buf, SMqMetaRsp* pRsp) { - buf = taosDecodeFixedI64(buf, &pRsp->reqOffset); - buf = taosDecodeFixedI64(buf, &pRsp->rspOffset); + // buf = taosDecodeFixedI64(buf, &pRsp->reqOffset); + // buf = taosDecodeFixedI64(buf, &pRsp->rspOffset); buf = taosDecodeFixedI16(buf, &pRsp->resMsgType); buf = taosDecodeFixedI32(buf, &pRsp->metaRspLen); buf = taosDecodeBinary(buf, &pRsp->metaRsp, pRsp->metaRspLen); diff --git a/include/libs/executor/executor.h b/include/libs/executor/executor.h index 45fa94b3bf..7b4565a99f 100644 --- a/include/libs/executor/executor.h +++ b/include/libs/executor/executor.h @@ -30,13 +30,15 @@ struct SRpcMsg; struct SSubplan; typedef struct SReadHandle { - void* reader; + void* streamReader; void* meta; void* config; void* vnode; void* mnd; SMsgCb* pMsgCb; - bool tqReader; + bool initMetaReader; + bool initTableReader; + bool initStreamReader; } SReadHandle; typedef enum { diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 67074c789e..52f671e176 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -223,7 +223,7 @@ typedef struct { SEpSet epSet; } SStreamChildEpInfo; -struct SStreamTask { +typedef struct SStreamTask { int64_t streamId; int32_t taskId; int8_t isDataScan; @@ -235,6 +235,11 @@ struct SStreamTask { int8_t taskStatus; int8_t execStatus; + // exec info + int64_t enqueueVer; + int64_t processedVer; + int64_t checkpointVer; + // node info int32_t selfChildId; int32_t nodeId; @@ -277,7 +282,7 @@ struct SStreamTask { // msg handle SMsgCb* pMsgCb; -}; +} SStreamTask; int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo); int32_t tDecodeStreamEpInfo(SDecoder* pDecoder, SStreamChildEpInfo* pInfo); @@ -288,6 +293,7 @@ int32_t tDecodeSStreamTask(SDecoder* pDecoder, SStreamTask* pTask); void tFreeSStreamTask(SStreamTask* pTask); static FORCE_INLINE int32_t streamTaskInput(SStreamTask* pTask, SStreamQueueItem* pItem) { +#if 0 while (1) { int8_t inputStatus = atomic_val_compare_exchange_8(&pTask->inputStatus, TASK_INPUT_STATUS__NORMAL, TASK_INPUT_STATUS__PROCESSING); @@ -296,6 +302,7 @@ static FORCE_INLINE int32_t streamTaskInput(SStreamTask* pTask, SStreamQueueItem } ASSERT(0); } +#endif if (pItem->type == STREAM_INPUT__DATA_SUBMIT) { SStreamDataSubmit* pSubmitClone = streamSubmitRefClone((SStreamDataSubmit*)pItem); @@ -316,8 +323,10 @@ static FORCE_INLINE int32_t streamTaskInput(SStreamTask* pTask, SStreamQueueItem atomic_val_compare_exchange_8(&pTask->triggerStatus, TASK_TRIGGER_STATUS__IN_ACTIVE, TASK_TRIGGER_STATUS__ACTIVE); } +#if 0 // TODO: back pressure atomic_store_8(&pTask->inputStatus, TASK_INPUT_STATUS__NORMAL); +#endif return 0; } diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index c11651970c..e32a8d1055 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -178,7 +178,6 @@ void walFsync(SWal *, bool force); // apis for lifecycle management int32_t walCommit(SWal *, int64_t ver); -// truncate after int32_t walRollback(SWal *, int64_t ver); // notify that previous logs can be pruned safely int32_t walBeginSnapshot(SWal *, int64_t ver); @@ -207,10 +206,11 @@ void walCloseRef(SWalRef *); int32_t walRefVer(SWalRef *, int64_t ver); int32_t walUnrefVer(SWal *); +// help function for raft bool walLogExist(SWal *, int64_t ver); +bool walIsEmpty(SWal *); // lifecycle check -bool walIsEmpty(SWal *); int64_t walGetFirstVer(SWal *); int64_t walGetSnapshotVer(SWal *); int64_t walGetLastVer(SWal *); diff --git a/include/util/tutil.h b/include/util/tutil.h index 6a1a40f14c..2e96c5b88e 100644 --- a/include/util/tutil.h +++ b/include/util/tutil.h @@ -45,6 +45,7 @@ void taosIp2String(uint32_t ip, char *str); void taosIpPort2String(uint32_t ip, uint16_t port, char *str); void *tmemmem(const char *haystack, int hlen, const char *needle, int nlen); +char *strDupUnquo(const char *src); static FORCE_INLINE void taosEncryptPass(uint8_t *inBuf, size_t inLen, char *target) { T_MD5_CTX context; diff --git a/source/client/src/tmq.c b/source/client/src/tmq.c index 667f5b1dbc..331f149e13 100644 --- a/source/client/src/tmq.c +++ b/source/client/src/tmq.c @@ -49,19 +49,18 @@ struct tmq_list_t { }; struct tmq_conf_t { - char clientId[256]; - char groupId[TSDB_CGROUP_LEN]; - int8_t autoCommit; - int8_t resetOffset; - int8_t withTbName; - int8_t spEnable; - int32_t spBatchSize; - uint16_t port; - int32_t autoCommitInterval; - char* ip; - char* user; - char* pass; - /*char* db;*/ + char clientId[256]; + char groupId[TSDB_CGROUP_LEN]; + int8_t autoCommit; + int8_t resetOffset; + int8_t withTbName; + int8_t spEnable; + int32_t spBatchSize; + uint16_t port; + int32_t autoCommitInterval; + char* ip; + char* user; + char* pass; tmq_commit_cb* commitCb; void* commitCbUserParam; }; @@ -337,7 +336,7 @@ tmq_list_t* tmq_list_new() { int32_t tmq_list_append(tmq_list_t* list, const char* src) { SArray* container = &list->container; - char* topic = strdup(src); + char* topic = strDupUnquo(src); if (taosArrayPush(container, &topic) == NULL) return -1; return 0; } diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 4e1b2db44a..7a147f5ffa 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -332,7 +332,7 @@ int32_t tdProcessRSmaCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, con } SReadHandle handle = { - .reader = pReadHandle, + .streamReader = pReadHandle, .meta = pMeta, .pMsgCb = pMsgCb, .vnode = pVnode, diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 5ce3cfab45..95ec99c9e5 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -28,8 +28,12 @@ int32_t tqInit() { atomic_store_8(&tqMgmt.inited, 0); return -1; } + if (streamInit() < 0) { + return -1; + } atomic_store_8(&tqMgmt.inited, 1); } + return 0; } @@ -361,8 +365,11 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg, int32_t workerId) { ASSERT(IS_META_MSG(pHead->msgType)); tqInfo("fetch meta msg, ver: %ld, type: %d", pHead->version, pHead->msgType); SMqMetaRsp metaRsp = {0}; - metaRsp.reqOffset = pReq->reqOffset.version; - metaRsp.rspOffset = fetchVer; + /*metaRsp.reqOffset = pReq->reqOffset.version;*/ + /*metaRsp.rspOffset = fetchVer;*/ + /*metaRsp.rspOffsetNew.version = fetchVer;*/ + tqOffsetResetToLog(&metaRsp.reqOffsetNew, pReq->reqOffset.version); + tqOffsetResetToLog(&metaRsp.rspOffsetNew, fetchVer); metaRsp.resMsgType = pHead->msgType; metaRsp.metaRspLen = pHead->bodyLen; metaRsp.metaRsp = pHead->body; @@ -448,10 +455,10 @@ int32_t tqProcessVgChangeReq(STQ* pTq, char* msg, int32_t msgLen) { req.qmsg = NULL; for (int32_t i = 0; i < 5; i++) { SReadHandle handle = { - .reader = pHandle->execHandle.pExecReader[i], + .streamReader = pHandle->execHandle.pExecReader[i], .meta = pTq->pVnode->pMeta, .vnode = pTq->pVnode, - .tqReader = true, + .initTableReader = true, }; pHandle->execHandle.execCol.task[i] = qCreateStreamExecTaskInfo(pHandle->execHandle.execCol.qmsg, &handle); ASSERT(pHandle->execHandle.execCol.task[i]); @@ -522,11 +529,11 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, char* msg, int32_t msgLen) { if (pTask->execType != TASK_EXEC__NONE) { // expand runners if (pTask->isDataScan) { - SStreamReader* pStreamReader = tqInitSubmitMsgScanner(pTq->pVnode->pMeta); - SReadHandle handle = { - .reader = pStreamReader, - .meta = pTq->pVnode->pMeta, - .vnode = pTq->pVnode, + /*SStreamReader* pStreamReader = tqInitSubmitMsgScanner(pTq->pVnode->pMeta);*/ + SReadHandle handle = { + .meta = pTq->pVnode->pMeta, + .vnode = pTq->pVnode, + .initStreamReader = 1, }; /*pTask->exec.inputHandle = pStreamReader;*/ pTask->exec.executor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle); diff --git a/source/dnode/vnode/src/tq/tqMeta.c b/source/dnode/vnode/src/tq/tqMeta.c index 398a09ecbc..17f4dc5426 100644 --- a/source/dnode/vnode/src/tq/tqMeta.c +++ b/source/dnode/vnode/src/tq/tqMeta.c @@ -84,7 +84,7 @@ int32_t tqMetaOpen(STQ* pTq) { if (handle.execHandle.subType == TOPIC_SUB_TYPE__COLUMN) { for (int32_t i = 0; i < 5; i++) { SReadHandle reader = { - .reader = handle.execHandle.pExecReader[i], + .streamReader = handle.execHandle.pExecReader[i], .meta = pTq->pVnode->pMeta, .pMsgCb = &pTq->pVnode->msgCb, }; diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 00f2e09e0c..9c7dc234ec 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -338,7 +338,7 @@ typedef struct SessionWindowSupporter { uint8_t parentType; } SessionWindowSupporter; -typedef struct SStreamBlockScanInfo { +typedef struct SStreamScanInfo { uint64_t tableUid; // queried super table uid SExprInfo* pPseudoExpr; int32_t numOfPseudoExpr; @@ -355,7 +355,7 @@ typedef struct SStreamBlockScanInfo { int32_t blockType; // current block type int32_t validBlockIndex; // Is current data has returned? uint64_t numOfExec; // execution times - void* streamBlockReader;// stream block reader handle + void* streamReader;// stream block reader handle int32_t tsArrayIndex; SArray* tsArray; @@ -364,7 +364,7 @@ typedef struct SStreamBlockScanInfo { EStreamScanMode scanMode; SOperatorInfo* pStreamScanOp; - SOperatorInfo* pSnapshotReadOp; + SOperatorInfo* pTableScanOp; SArray* childIds; SessionWindowSupporter sessionSup; bool assignBlockUid; // assign block uid to groupId, temporarily used for generating rollup SMA. @@ -373,7 +373,7 @@ typedef struct SStreamBlockScanInfo { SSDataBlock* pPullDataRes; // pull data SSDataBlock SSDataBlock* pDeleteDataRes; // delete data SSDataBlock int32_t deleteDataIndex; -} SStreamBlockScanInfo; +} SStreamScanInfo; typedef struct SSysTableScanInfo { SRetrieveMetaTableRsp* pRsp; diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 0e76607c8f..3591aaf975 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -37,7 +37,7 @@ static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t nu } else { pOperator->status = OP_NOT_OPENED; - SStreamBlockScanInfo* pInfo = pOperator->info; + SStreamScanInfo* pInfo = pOperator->info; pInfo->assignBlockUid = assignUid; // TODO: if a block was set but not consumed, @@ -45,7 +45,7 @@ static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t nu pInfo->blockType = type; if (type == STREAM_INPUT__DATA_SUBMIT) { - if (tqReadHandleSetMsg(pInfo->streamBlockReader, input, 0) < 0) { + if (tqReadHandleSetMsg(pInfo->streamReader, input, 0) < 0) { qError("submit msg messed up when initing stream block, %s" PRIx64, id); return TSDB_CODE_QRY_APP_ERROR; } @@ -130,7 +130,7 @@ qTaskInfo_t qCreateStreamExecTaskInfo(void* msg, void* streamReadHandle) { return pTaskInfo; } -static SArray* filterQualifiedChildTables(const SStreamBlockScanInfo* pScanInfo, const SArray* tableIdList) { +static SArray* filterQualifiedChildTables(const SStreamScanInfo* pScanInfo, const SArray* tableIdList) { SArray* qa = taosArrayInit(4, sizeof(tb_uid_t)); // let's discard the tables those are not created according to the queried super table. @@ -168,17 +168,17 @@ int32_t qUpdateQualifiedTableId(qTaskInfo_t tinfo, const SArray* tableIdList, bo pInfo = pInfo->pDownstream[0]; } - int32_t code = 0; - SStreamBlockScanInfo* pScanInfo = pInfo->info; + int32_t code = 0; + SStreamScanInfo* pScanInfo = pInfo->info; if (isAdd) { // add new table id SArray* qa = filterQualifiedChildTables(pScanInfo, tableIdList); qDebug(" %d qualified child tables added into stream scanner", (int32_t)taosArrayGetSize(qa)); - code = tqReadHandleAddTbUidList(pScanInfo->streamBlockReader, qa); + code = tqReadHandleAddTbUidList(pScanInfo->streamReader, qa); taosArrayDestroy(qa); } else { // remove the table id in current list qDebug(" %d remove child tables from the stream scanner", (int32_t)taosArrayGetSize(tableIdList)); - code = tqReadHandleRemoveTbUidList(pScanInfo->streamBlockReader, tableIdList); + code = tqReadHandleRemoveTbUidList(pScanInfo->streamReader, tableIdList); } return code; diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 25b61e15c3..c92fc1b1fb 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -2847,12 +2847,12 @@ int32_t doPrepareScan(SOperatorInfo* pOperator, uint64_t uid, int64_t ts) { pOperator->status = OP_OPENED; if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { - SStreamBlockScanInfo* pScanInfo = pOperator->info; + SStreamScanInfo* pScanInfo = pOperator->info; pScanInfo->blockType = STREAM_INPUT__DATA_SCAN; - pScanInfo->pSnapshotReadOp->status = OP_OPENED; + pScanInfo->pTableScanOp->status = OP_OPENED; - STableScanInfo* pInfo = pScanInfo->pSnapshotReadOp->info; + STableScanInfo* pInfo = pScanInfo->pTableScanOp->info; ASSERT(pInfo->scanMode == TABLE_SCAN__TABLE_ORDER); if (uid == 0) { @@ -2912,8 +2912,8 @@ int32_t doPrepareScan(SOperatorInfo* pOperator, uint64_t uid, int64_t ts) { int32_t doGetScanStatus(SOperatorInfo* pOperator, uint64_t* uid, int64_t* ts) { int32_t type = pOperator->operatorType; if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { - SStreamBlockScanInfo* pScanInfo = pOperator->info; - STableScanInfo* pSnapShotScanInfo = pScanInfo->pSnapshotReadOp->info; + SStreamScanInfo* pScanInfo = pOperator->info; + STableScanInfo* pSnapShotScanInfo = pScanInfo->pTableScanOp->info; *uid = pSnapShotScanInfo->lastStatus.uid; *ts = pSnapShotScanInfo->lastStatus.ts; } else { @@ -4537,9 +4537,9 @@ static int32_t extractTbscanInStreamOpTree(SOperatorInfo* pOperator, STableScanI } return extractTbscanInStreamOpTree(pOperator->pDownstream[0], ppInfo); } else { - SStreamBlockScanInfo* pInfo = pOperator->info; - ASSERT(pInfo->pSnapshotReadOp->operatorType == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN); - *ppInfo = pInfo->pSnapshotReadOp->info; + SStreamScanInfo* pInfo = pOperator->info; + ASSERT(pInfo->pTableScanOp->operatorType == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN); + *ppInfo = pInfo->pTableScanOp->info; return 0; } } diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 515efb86f3..2ef0b7470e 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -788,7 +788,7 @@ _error: return NULL; } -static void doClearBufferedBlocks(SStreamBlockScanInfo* pInfo) { +static void doClearBufferedBlocks(SStreamScanInfo* pInfo) { size_t total = taosArrayGetSize(pInfo->pBlockLists); pInfo->validBlockIndex = 0; @@ -799,11 +799,11 @@ static void doClearBufferedBlocks(SStreamBlockScanInfo* pInfo) { taosArrayClear(pInfo->pBlockLists); } -static bool isSessionWindow(SStreamBlockScanInfo* pInfo) { +static bool isSessionWindow(SStreamScanInfo* pInfo) { return pInfo->sessionSup.parentType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION; } -static bool isStateWindow(SStreamBlockScanInfo* pInfo) { +static bool isStateWindow(SStreamScanInfo* pInfo) { return pInfo->sessionSup.parentType == QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE; } @@ -828,23 +828,21 @@ static uint64_t getGroupId(SOperatorInfo* pOperator, uint64_t uid) { */ } -static void setGroupId(SStreamBlockScanInfo* pInfo, SSDataBlock* pBlock, int32_t groupColIndex, int32_t rowIndex) { +static void setGroupId(SStreamScanInfo* pInfo, SSDataBlock* pBlock, int32_t groupColIndex, int32_t rowIndex) { ASSERT(rowIndex < pBlock->info.rows); - switch (pBlock->info.type) - { - case STREAM_DELETE_DATA: - case STREAM_RETRIEVE: { - SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, groupColIndex); - uint64_t* groupCol = (uint64_t*)pColInfo->pData; - pInfo->groupId = groupCol[rowIndex]; - } - break; - default: - break; + switch (pBlock->info.type) { + case STREAM_DELETE_DATA: + case STREAM_RETRIEVE: { + SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, groupColIndex); + uint64_t* groupCol = (uint64_t*)pColInfo->pData; + pInfo->groupId = groupCol[rowIndex]; + } break; + default: + break; } } -static bool prepareDataScan(SStreamBlockScanInfo* pInfo, SSDataBlock* pSDB, int32_t tsColIndex, int32_t* pRowIndex) { +static bool prepareDataScan(SStreamScanInfo* pInfo, SSDataBlock* pSDB, int32_t tsColIndex, int32_t* pRowIndex) { STimeWindow win = { .skey = INT64_MIN, .ekey = INT64_MAX, @@ -864,11 +862,10 @@ static bool prepareDataScan(SStreamBlockScanInfo* pInfo, SSDataBlock* pSDB, int3 win = pCurWin->win; (*pRowIndex) += updateSessionWindowInfo(pCurWin, tsCols, NULL, pSDB->info.rows, *pRowIndex, gap, NULL); } else { - win = - getActiveTimeWindow(NULL, &dumyInfo, tsCols[*pRowIndex], &pInfo->interval, pInfo->interval.precision, NULL); + win = getActiveTimeWindow(NULL, &dumyInfo, tsCols[*pRowIndex], &pInfo->interval, pInfo->interval.precision, NULL); setGroupId(pInfo, pSDB, GROUPID_COLUMN_INDEX, *pRowIndex); - (*pRowIndex) += getNumOfRowsInTimeWindow(&pSDB->info, tsCols, *pRowIndex, win.ekey, binarySearchForKey, NULL, - TSDB_ORDER_ASC); + (*pRowIndex) += + getNumOfRowsInTimeWindow(&pSDB->info, tsCols, *pRowIndex, win.ekey, binarySearchForKey, NULL, TSDB_ORDER_ASC); } needRead = true; } else if (isStateWindow(pInfo)) { @@ -886,7 +883,7 @@ static bool prepareDataScan(SStreamBlockScanInfo* pInfo, SSDataBlock* pSDB, int3 if (!needRead) { return false; } - STableScanInfo* pTableScanInfo = pInfo->pSnapshotReadOp->info; + STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; pTableScanInfo->cond.twindows[0] = win; pTableScanInfo->curTWinIdx = 0; // tsdbResetReadHandle(pTableScanInfo->dataReader, &pTableScanInfo->cond, 0); @@ -911,14 +908,14 @@ static void copyOneRow(SSDataBlock* dest, SSDataBlock* source, int32_t sourceRow dest->info.rows++; } -static SSDataBlock* doDataScan(SStreamBlockScanInfo* pInfo, SSDataBlock* pSDB, int32_t tsColIndex, int32_t* pRowIndex) { +static SSDataBlock* doDataScan(SStreamScanInfo* pInfo, SSDataBlock* pSDB, int32_t tsColIndex, int32_t* pRowIndex) { while (1) { SSDataBlock* pResult = NULL; - pResult = doTableScan(pInfo->pSnapshotReadOp); + pResult = doTableScan(pInfo->pTableScanOp); if (pResult == NULL) { if (prepareDataScan(pInfo, pSDB, tsColIndex, pRowIndex)) { // scan next window data - pResult = doTableScan(pInfo->pSnapshotReadOp); + pResult = doTableScan(pInfo->pTableScanOp); } } if (!pResult) { @@ -943,7 +940,8 @@ static SSDataBlock* doDataScan(SStreamBlockScanInfo* pInfo, SSDataBlock* pSDB, i */ } -static void copyDeleteDataBlock(SStreamBlockScanInfo* pInfo, SSDataBlock* pDelBlock, SOperatorInfo* pOperator, SSDataBlock* pUpdateRes) { +static void copyDeleteDataBlock(SStreamScanInfo* pInfo, SSDataBlock* pDelBlock, SOperatorInfo* pOperator, + SSDataBlock* pUpdateRes) { if (pDelBlock->info.rows == 0) { return; } @@ -951,18 +949,20 @@ static void copyDeleteDataBlock(SStreamBlockScanInfo* pInfo, SSDataBlock* pDelBl blockDataEnsureCapacity(pUpdateRes, 64); ASSERT(taosArrayGetSize(pDelBlock->pDataBlock) >= 3); SColumnInfoData* pStartTsCol = taosArrayGet(pDelBlock->pDataBlock, START_TS_COLUMN_INDEX); - TSKEY* startData = (TSKEY*)pStartTsCol->pData; + TSKEY* startData = (TSKEY*)pStartTsCol->pData; SColumnInfoData* pEndTsCol = taosArrayGet(pDelBlock->pDataBlock, END_TS_COLUMN_INDEX); - TSKEY* endData = (TSKEY*)pEndTsCol->pData; + TSKEY* endData = (TSKEY*)pEndTsCol->pData; SColumnInfoData* pGpCol = taosArrayGet(pDelBlock->pDataBlock, UID_COLUMN_INDEX); - uint64_t* uidCol = (uint64_t*)pGpCol->pData; + uint64_t* uidCol = (uint64_t*)pGpCol->pData; SColumnInfoData* pDestTsCol = taosArrayGet(pUpdateRes->pDataBlock, START_TS_COLUMN_INDEX); SColumnInfoData* pDestGpCol = taosArrayGet(pUpdateRes->pDataBlock, DELETE_GROUPID_COLUMN_INDEX); - for (int32_t i = pInfo->deleteDataIndex ; i < pDelBlock->info.rows && - i < pDelBlock->info.capacity - (endData[i] - startData[i])/pInfo->interval.interval - 1; i++) { + for (int32_t i = pInfo->deleteDataIndex; + i < pDelBlock->info.rows && + i < pDelBlock->info.capacity - (endData[i] - startData[i]) / pInfo->interval.interval - 1; + i++) { uint64_t groupId = getGroupId(pOperator, uidCol[i]); - for (TSKEY startTs = startData[i]; startTs <= endData[i]; ) { + for (TSKEY startTs = startData[i]; startTs <= endData[i];) { colDataAppend(pDestTsCol, pUpdateRes->info.rows, (const char*)&startTs, false); colDataAppend(pDestGpCol, pUpdateRes->info.rows, (const char*)&groupId, false); pUpdateRes->info.rows++; @@ -977,7 +977,7 @@ static void copyDeleteDataBlock(SStreamBlockScanInfo* pInfo, SSDataBlock* pDelBl } } -static void setUpdateData(SStreamBlockScanInfo* pInfo, SSDataBlock* pBlock, SSDataBlock* pUpdateBlock) { +static void setUpdateData(SStreamScanInfo* pInfo, SSDataBlock* pBlock, SSDataBlock* pUpdateBlock) { blockDataCleanup(pUpdateBlock); int32_t size = taosArrayGetSize(pInfo->tsArray); if (pInfo->tsArrayIndex < size) { @@ -986,11 +986,11 @@ static void setUpdateData(SStreamBlockScanInfo* pInfo, SSDataBlock* pBlock, SSDa blockDataEnsureCapacity(pUpdateBlock, size); int32_t rowId = *(int32_t*)taosArrayGet(pInfo->tsArray, pInfo->tsArrayIndex); - pInfo->groupId = getGroupId(pInfo->pSnapshotReadOp, pBlock->info.uid); + pInfo->groupId = getGroupId(pInfo->pTableScanOp, pBlock->info.uid); int32_t i = 0; for (; i < size; i++) { rowId = *(int32_t*)taosArrayGet(pInfo->tsArray, i + pInfo->tsArrayIndex); - uint64_t id = getGroupId(pInfo->pSnapshotReadOp, pBlock->info.uid); + uint64_t id = getGroupId(pInfo->pTableScanOp, pBlock->info.uid); if (pInfo->groupId != id) { break; } @@ -1009,12 +1009,11 @@ static void setUpdateData(SStreamBlockScanInfo* pInfo, SSDataBlock* pBlock, SSDa } if (size == 0) { - copyDeleteDataBlock(pInfo, pInfo->pDeleteDataRes, pInfo->pSnapshotReadOp, pUpdateBlock); + copyDeleteDataBlock(pInfo, pInfo->pDeleteDataRes, pInfo->pTableScanOp, pUpdateBlock); } } -static void checkUpdateData(SStreamBlockScanInfo* pInfo, bool invertible, SSDataBlock* pBlock, - bool out) { +static void checkUpdateData(SStreamScanInfo* pInfo, bool invertible, SSDataBlock* pBlock, bool out) { SColumnInfoData* pColDataInfo = taosArrayGet(pBlock->pDataBlock, pInfo->primaryTsIndex); ASSERT(pColDataInfo->info.type == TSDB_DATA_TYPE_TIMESTAMP); TSKEY* ts = (TSKEY*)pColDataInfo->pData; @@ -1030,15 +1029,15 @@ static void setBlockGroupId(SOperatorInfo* pOperator, SSDataBlock* pBlock, int32 SColumnInfoData* pColDataInfo = taosArrayGet(pBlock->pDataBlock, uidColIndex); uint64_t* uidCol = (uint64_t*)pColDataInfo->pData; ASSERT(pBlock->info.rows > 0); - for (int32_t i = 0 ; i < pBlock->info.rows; i++) { + for (int32_t i = 0; i < pBlock->info.rows; i++) { uidCol[i] = getGroupId(pOperator, uidCol[i]); } } -static SSDataBlock* doStreamBlockScan(SOperatorInfo* pOperator) { +static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { // NOTE: this operator does never check if current status is done or not - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SStreamBlockScanInfo* pInfo = pOperator->info; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SStreamScanInfo* pInfo = pOperator->info; pTaskInfo->code = pOperator->fpSet._openFn(pOperator); if (pTaskInfo->code != TSDB_CODE_SUCCESS || pOperator->status == OP_EXEC_DONE) { @@ -1056,30 +1055,29 @@ static SSDataBlock* doStreamBlockScan(SOperatorInfo* pOperator) { int32_t current = pInfo->validBlockIndex++; SSDataBlock* pBlock = taosArrayGetP(pInfo->pBlockLists, current); + // TODO move into scan blockDataUpdateTsWindow(pBlock, 0); switch (pBlock->info.type) { - case STREAM_RETRIEVE:{ - pInfo->blockType = STREAM_INPUT__DATA_SUBMIT; - pInfo->scanMode = STREAM_SCAN_FROM_DATAREADER_RETRIEVE; - copyDataBlock(pInfo->pPullDataRes, pBlock); - pInfo->pullDataResIndex = 0; - prepareDataScan(pInfo, pInfo->pPullDataRes, START_TS_COLUMN_INDEX, &pInfo->pullDataResIndex); - updateInfoAddCloseWindowSBF(pInfo->pUpdateInfo); - } - break; - case STREAM_DELETE_DATA: { - pInfo->blockType = STREAM_INPUT__DATA_SUBMIT; - pInfo->scanMode = STREAM_SCAN_FROM_DATAREADER; - copyDataBlock(pInfo->pDeleteDataRes, pBlock); - copyDeleteDataBlock(pInfo, pInfo->pDeleteDataRes, pInfo->pSnapshotReadOp, pInfo->pUpdateRes); - pInfo->updateResIndex = 0; - prepareDataScan(pInfo, pInfo->pUpdateRes, START_TS_COLUMN_INDEX, &pInfo->updateResIndex); - pInfo->pUpdateRes->info.type = STREAM_DELETE_DATA; - return pInfo->pUpdateRes; - } - break; - default: - break; + case STREAM_RETRIEVE: { + pInfo->blockType = STREAM_INPUT__DATA_SUBMIT; + pInfo->scanMode = STREAM_SCAN_FROM_DATAREADER_RETRIEVE; + copyDataBlock(pInfo->pPullDataRes, pBlock); + pInfo->pullDataResIndex = 0; + prepareDataScan(pInfo, pInfo->pPullDataRes, START_TS_COLUMN_INDEX, &pInfo->pullDataResIndex); + updateInfoAddCloseWindowSBF(pInfo->pUpdateInfo); + } break; + case STREAM_DELETE_DATA: { + pInfo->blockType = STREAM_INPUT__DATA_SUBMIT; + pInfo->scanMode = STREAM_SCAN_FROM_DATAREADER; + copyDataBlock(pInfo->pDeleteDataRes, pBlock); + copyDeleteDataBlock(pInfo, pInfo->pDeleteDataRes, pInfo->pTableScanOp, pInfo->pUpdateRes); + pInfo->updateResIndex = 0; + prepareDataScan(pInfo, pInfo->pUpdateRes, START_TS_COLUMN_INDEX, &pInfo->updateResIndex); + pInfo->pUpdateRes->info.type = STREAM_DELETE_DATA; + return pInfo->pUpdateRes; + } break; + default: + break; } return pBlock; } else if (pInfo->blockType == STREAM_INPUT__DATA_SUBMIT) { @@ -1128,11 +1126,11 @@ static SSDataBlock* doStreamBlockScan(SOperatorInfo* pOperator) { SDataBlockInfo* pBlockInfo = &pInfo->pRes->info; blockDataCleanup(pInfo->pRes); - while (tqNextDataBlock(pInfo->streamBlockReader)) { + while (tqNextDataBlock(pInfo->streamReader)) { SSDataBlock block = {0}; // todo refactor - int32_t code = tqRetrieveDataBlock(&block, pInfo->streamBlockReader); + int32_t code = tqRetrieveDataBlock(&block, pInfo->streamReader); uint64_t groupId = block.info.groupId; uint64_t uid = block.info.uid; @@ -1228,11 +1226,13 @@ static SSDataBlock* doStreamBlockScan(SOperatorInfo* pOperator) { } } } + return (pBlockInfo->rows == 0) ? NULL : pInfo->pRes; + } else if (pInfo->blockType == STREAM_INPUT__DATA_SCAN) { // check reader last status // if not match, reset status - SSDataBlock* pResult = doTableScan(pInfo->pSnapshotReadOp); + SSDataBlock* pResult = doTableScan(pInfo->pTableScanOp); return pResult && pResult->info.rows > 0 ? pResult : NULL; } else { @@ -1256,8 +1256,8 @@ static SArray* extractTableIdList(const STableListInfo* pTableGroupInfo) { SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhysiNode* pTableScanNode, SExecTaskInfo* pTaskInfo, STimeWindowAggSupp* pTwSup, uint64_t queryId, uint64_t taskId) { - SStreamBlockScanInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamBlockScanInfo)); - SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); + SStreamScanInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamScanInfo)); + SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); if (pInfo == NULL || pOperator == NULL) { terrno = TSDB_CODE_QRY_OUT_OF_MEMORY; @@ -1295,32 +1295,40 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys } if (pHandle) { - SOperatorInfo* pTableScanDummy = createTableScanOperatorInfo(pTableScanNode, pHandle, pTaskInfo, queryId, taskId); - STableScanInfo* pSTInfo = (STableScanInfo*)pTableScanDummy->info; + SOperatorInfo* pTableScanOp = createTableScanOperatorInfo(pTableScanNode, pHandle, pTaskInfo, queryId, taskId); + STableScanInfo* pSTInfo = (STableScanInfo*)pTableScanOp->info; SArray* tableList = taosArrayGetP(pTaskInfo->tableqinfoList.pGroupList, 0); - if (pHandle->tqReader) { + if (pHandle->initTableReader) { pSTInfo->scanMode = TABLE_SCAN__TABLE_ORDER; pSTInfo->dataReader = tsdbReaderOpen(pHandle->vnode, &pSTInfo->cond, tableList, 0, 0); } + if (pHandle->initStreamReader) { + ASSERT(pHandle->streamReader == NULL); + pInfo->streamReader = tqInitSubmitMsgScanner(pHandle->meta); + ASSERT(pInfo->streamReader); + } else { + ASSERT(pHandle->streamReader); + pInfo->streamReader = pHandle->streamReader; + } + if (pSTInfo->interval.interval > 0) { pInfo->pUpdateInfo = updateInfoInitP(&pSTInfo->interval, pTwSup->waterMark); } else { pInfo->pUpdateInfo = NULL; } - pInfo->pSnapshotReadOp = pTableScanDummy; + + pInfo->pTableScanOp = pTableScanOp; pInfo->interval = pSTInfo->interval; pInfo->readHandle = *pHandle; - ASSERT(pHandle->reader); - pInfo->streamBlockReader = pHandle->reader; pInfo->tableUid = pScanPhyNode->uid; // set the extract column id to streamHandle - tqReadHandleSetColIdList((SStreamReader*)pHandle->reader, pColIds); + tqReadHandleSetColIdList((SStreamReader*)pHandle->streamReader, pColIds); SArray* tableIdList = extractTableIdList(&pTaskInfo->tableqinfoList); - int32_t code = tqReadHandleSetTbUidList(pHandle->reader, tableIdList); + int32_t code = tqReadHandleSetTbUidList(pHandle->streamReader, tableIdList); if (code != 0) { taosArrayDestroy(tableIdList); goto _error; @@ -1344,7 +1352,7 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys pInfo->deleteDataIndex = 0; pInfo->pDeleteDataRes = createPullDataBlock(); - pOperator->name = "StreamBlockScanOperator"; + pOperator->name = "StreamScanOperator"; pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN; pOperator->blocking = false; pOperator->status = OP_NOT_OPENED; @@ -1353,7 +1361,7 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys pOperator->pTaskInfo = pTaskInfo; pOperator->fpSet = - createOperatorFpSet(operatorDummyOpenFn, doStreamBlockScan, NULL, NULL, operatorDummyCloseFn, NULL, NULL, NULL); + createOperatorFpSet(operatorDummyOpenFn, doStreamScan, NULL, NULL, operatorDummyCloseFn, NULL, NULL, NULL); return pOperator; diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index fdef432d95..24749fde2c 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -2980,7 +2980,7 @@ void initDummyFunction(SqlFunctionCtx* pDummy, SqlFunctionCtx* pCtx, int32_t num void initDownStream(SOperatorInfo* downstream, SStreamAggSupporter* pAggSup, int64_t gap, int64_t waterMark, uint8_t type) { ASSERT(downstream->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN); - SStreamBlockScanInfo* pScanInfo = downstream->info; + SStreamScanInfo* pScanInfo = downstream->info; pScanInfo->sessionSup = (SessionWindowSupporter){.pStreamAggSup = pAggSup, .gap = gap, .parentType = type}; pScanInfo->pUpdateInfo = updateInfoInit(60000, TSDB_TIME_PRECISION_MILLI, waterMark); } @@ -3464,8 +3464,8 @@ typedef SResultWindowInfo* (*__get_win_info_)(void*); SResultWindowInfo* getResWinForSession(void* pData) { return (SResultWindowInfo*)pData; } SResultWindowInfo* getResWinForState(void* pData) { return &((SStateWindowInfo*)pData)->winInfo; } -int32_t closeSessionWindow(SHashObj* pHashMap, STimeWindowAggSupp* pTwSup, - SArray* pClosed, __get_win_info_ fn, bool delete) { +int32_t closeSessionWindow(SHashObj* pHashMap, STimeWindowAggSupp* pTwSup, SArray* pClosed, __get_win_info_ fn, + bool delete) { // Todo(liuyao) save window to tdb void** pIte = NULL; size_t keyLen = 0; @@ -3604,8 +3604,8 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { // restore the value pOperator->status = OP_RES_TO_RETURN; - closeSessionWindow(pInfo->streamAggSup.pResultRows, &pInfo->twAggSup, pUpdated, - getResWinForSession, pInfo->ignoreExpiredData); + closeSessionWindow(pInfo->streamAggSup.pResultRows, &pInfo->twAggSup, pUpdated, getResWinForSession, + pInfo->ignoreExpiredData); closeChildSessionWindow(pInfo->pChildren, pInfo->twAggSup.maxTs, pInfo->ignoreExpiredData); copyUpdateResult(pStUpdated, pUpdated); taosHashCleanup(pStUpdated); @@ -4097,8 +4097,8 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { // restore the value pOperator->status = OP_RES_TO_RETURN; - closeSessionWindow(pInfo->streamAggSup.pResultRows, &pInfo->twAggSup, pUpdated, - getResWinForState, pInfo->ignoreExpiredData); + closeSessionWindow(pInfo->streamAggSup.pResultRows, &pInfo->twAggSup, pUpdated, getResWinForState, + pInfo->ignoreExpiredData); closeChildSessionWindow(pInfo->pChildren, pInfo->twAggSup.maxTs, pInfo->ignoreExpiredData); copyUpdateResult(pSeUpdated, pUpdated); taosHashCleanup(pSeUpdated); diff --git a/source/libs/stream/inc/streamInc.h b/source/libs/stream/inc/streamInc.h index 2f41c08354..1629c863d5 100644 --- a/source/libs/stream/inc/streamInc.h +++ b/source/libs/stream/inc/streamInc.h @@ -17,6 +17,7 @@ #define _STREAM_INC_H_ #include "executor.h" +#include "tref.h" #include "tstream.h" #ifdef __cplusplus @@ -24,8 +25,9 @@ extern "C" { #endif typedef struct { - int8_t inited; - void* timer; + int8_t inited; + int32_t refPool; + void* timer; } SStreamGlobalEnv; static SStreamGlobalEnv streamEnv; diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index 56d063ae51..8b8badd67a 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -76,9 +76,6 @@ void streamTriggerByTimer(void* param, void* tmrId) { int32_t streamSetupTrigger(SStreamTask* pTask) { if (pTask->triggerParam != 0) { - if (streamInit() < 0) { - return -1; - } pTask->timer = taosTmrStart(streamTriggerByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer); pTask->triggerStatus = TASK_TRIGGER_STATUS__IN_ACTIVE; } diff --git a/source/libs/wal/src/walWrite.c b/source/libs/wal/src/walWrite.c index 27f12259bc..d7fa6695d0 100644 --- a/source/libs/wal/src/walWrite.c +++ b/source/libs/wal/src/walWrite.c @@ -79,13 +79,13 @@ int32_t walCommit(SWal *pWal, int64_t ver) { } int32_t walRollback(SWal *pWal, int64_t ver) { + taosThreadMutexLock(&pWal->mutex); int64_t code; char fnameStr[WAL_FILE_LEN]; if (ver > pWal->vers.lastVer || ver < pWal->vers.commitVer) { terrno = TSDB_CODE_WAL_INVALID_VER; return -1; } - taosThreadMutexLock(&pWal->mutex); // find correct file if (ver < walGetLastFileFirstVer(pWal)) { diff --git a/source/util/src/tutil.c b/source/util/src/tutil.c index addb9f55ba..7f3728e2ad 100644 --- a/source/util/src/tutil.c +++ b/source/util/src/tutil.c @@ -64,6 +64,20 @@ int32_t strdequote(char *z) { return j + 1; // only one quote, do nothing } +char *strDupUnquo(const char *src) { + if (src == NULL) return NULL; + if (src[0] != '`') return strdup(src); + int32_t len = (int32_t)strlen(src); + if (src[len - 1] != '`') return NULL; + char *ret = taosMemoryMalloc(len); + if (ret == NULL) return NULL; + for (int32_t i = 0; i < len - 1; i++) { + ret[i] = src[i + 1]; + } + ret[len - 1] = 0; + return ret; +} + size_t strtrim(char *z) { int32_t i = 0; int32_t j = 0; diff --git a/tests/system-test/7-tmq/stbTagFilter.py b/tests/system-test/7-tmq/stbTagFilter.py index 2a2cb40c09..65609629bc 100644 --- a/tests/system-test/7-tmq/stbTagFilter.py +++ b/tests/system-test/7-tmq/stbTagFilter.py @@ -25,7 +25,7 @@ class TDTestCase: paraDict = {'dbName': 'db2', 'dropFlag': 1, 'event': '', - 'vgroups': 4, + 'vgroups': 1, 'stbName': 'stb', 'colPrefix': 'c', 'tagPrefix': 't', @@ -44,7 +44,7 @@ class TDTestCase: topicNameList = ['topic1'] expectRowsList = [] tmqCom.initConsumerTable() - tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=4,replica=1) + tdCom.create_database(tdSql, paraDict["dbName"],paraDict["dropFlag"], vgroups=1,replica=1) tdLog.info("create stb") tdCom.create_stable(tdSql, dbname=paraDict["dbName"],stbname=paraDict["stbName"], column_elm_list=paraDict['colSchema'], tag_elm_list=paraDict['tagSchema']) tdLog.info("create ctb") From e51e261e01f539162bff176751ab12a394796188 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Wed, 6 Jul 2022 21:00:15 +0800 Subject: [PATCH 2/7] fix(stream): create operator --- source/libs/executor/src/scanoperator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 2ef0b7470e..7d9889e6a1 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1326,9 +1326,9 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys pInfo->tableUid = pScanPhyNode->uid; // set the extract column id to streamHandle - tqReadHandleSetColIdList((SStreamReader*)pHandle->streamReader, pColIds); + tqReadHandleSetColIdList(pInfo->streamReader, pColIds); SArray* tableIdList = extractTableIdList(&pTaskInfo->tableqinfoList); - int32_t code = tqReadHandleSetTbUidList(pHandle->streamReader, tableIdList); + int32_t code = tqReadHandleSetTbUidList(pInfo->streamReader, tableIdList); if (code != 0) { taosArrayDestroy(tableIdList); goto _error; From baa48489b7f901db5ba89c7e0b220eb32dfd4a57 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Wed, 6 Jul 2022 21:25:34 +0800 Subject: [PATCH 3/7] merge from 3.0 --- include/common/tmsgdef.h | 1 + include/libs/nodes/plannodes.h | 1 + source/client/src/clientEnv.c | 2 +- source/dnode/mgmt/mgmt_mnode/src/mmHandle.c | 1 + source/dnode/mgmt/mgmt_qnode/src/qmHandle.c | 1 + source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 1 + source/dnode/mgmt/mgmt_vnode/src/vmWorker.c | 4 +- source/dnode/mgmt/node_mgmt/src/dmTransport.c | 5 +- source/dnode/mnode/impl/src/mndMain.c | 3 +- source/dnode/mnode/impl/src/mndQuery.c | 2 + source/dnode/qnode/src/qnode.c | 1 + source/dnode/vnode/inc/vnode.h | 25 +- source/dnode/vnode/src/inc/vnd.h | 2 + source/dnode/vnode/src/vnd/vnodeSvr.c | 16 +- source/dnode/vnode/src/vnd/vnodeSync.c | 52 +++-- source/libs/executor/inc/executorimpl.h | 7 +- source/libs/executor/src/executorimpl.c | 2 +- source/libs/executor/src/scanoperator.c | 158 +++++++++++-- source/libs/executor/src/timewindowoperator.c | 217 +++++++++++++----- source/libs/function/src/udfd.c | 2 +- source/libs/nodes/src/nodesCloneFuncs.c | 1 + source/libs/nodes/src/nodesCodeFuncs.c | 7 + source/libs/qworker/inc/qwInt.h | 1 + source/libs/qworker/inc/qwMsg.h | 3 +- source/libs/qworker/src/qwMsg.c | 6 +- source/libs/qworker/src/qworker.c | 8 +- source/libs/qworker/test/qworkerTests.cpp | 4 +- source/libs/scheduler/inc/schInt.h | 5 +- source/libs/scheduler/src/schRemote.c | 8 +- source/libs/scheduler/src/schTask.c | 6 +- source/libs/sync/src/syncMain.c | 7 +- tests/script/tsim/stream/state0.sim | 49 ++++ 32 files changed, 473 insertions(+), 135 deletions(-) diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 806c0b5122..8b39530e84 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -209,6 +209,7 @@ enum { TD_DEF_MSG_TYPE(TDMT_SCH_QUERY_CONTINUE, "query-continue", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SCH_QUERY_HEARTBEAT, "query-heartbeat", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SCH_FETCH, "fetch", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SCH_MERGE_FETCH, "merge-fetch", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SCH_CANCEL_TASK, "cancel-task", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SCH_DROP_TASK, "drop-task", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SCH_EXPLAIN, "explain", NULL, NULL) diff --git a/include/libs/nodes/plannodes.h b/include/libs/nodes/plannodes.h index d3e9840987..6a865b4e2a 100644 --- a/include/libs/nodes/plannodes.h +++ b/include/libs/nodes/plannodes.h @@ -353,6 +353,7 @@ typedef struct SDownstreamSourceNode { uint64_t taskId; uint64_t schedId; int32_t execId; + int32_t fetchMsgType; } SDownstreamSourceNode; typedef struct SExchangePhysiNode { diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 797d58e6ef..89ecf16b40 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -102,7 +102,7 @@ void closeTransporter(SAppInstInfo *pAppInfo) { static bool clientRpcRfp(int32_t code, tmsg_t msgType) { if (NEED_REDIRECT_ERROR(code)) { - if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH) { + if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH || msgType == TDMT_SCH_MERGE_FETCH) { return false; } return true; diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c index 59d68b2110..d60b69daba 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c @@ -214,6 +214,7 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY_CONTINUE, mmPutMsgToQueryQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY_HEARTBEAT, mmPutMsgToFetchQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_FETCH, mmPutMsgToFetchQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_MERGE_FETCH, mmPutMsgToFetchQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_CREATE_STB_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_STB_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_DROP_STB_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c b/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c index 1f22eefddf..14cb1bd533 100644 --- a/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c +++ b/source/dnode/mgmt/mgmt_qnode/src/qmHandle.c @@ -111,6 +111,7 @@ SArray *qmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_SCH_MERGE_QUERY, qmPutNodeMsgToQueryQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY_CONTINUE, qmPutNodeMsgToQueryQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_FETCH, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_MERGE_FETCH, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_FETCH_RSP, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY_HEARTBEAT, qmPutNodeMsgToFetchQueue, 1) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 1f7347203d..a3df32a08c 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -328,6 +328,7 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_SCH_MERGE_QUERY, vmPutMsgToQueryQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_QUERY_CONTINUE, vmPutMsgToQueryQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SCH_FETCH, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SCH_MERGE_FETCH, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_ALTER_TABLE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_UPDATE_TAG_VAL, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_TABLE_META, vmPutMsgToFetchQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c index 3913e3fda8..1d795c74f2 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c @@ -238,9 +238,9 @@ int32_t vmGetQueueSize(SVnodeMgmt *pMgmt, int32_t vgId, EQueueType qtype) { } int32_t vmAllocQueue(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) { - pVnode->pWriteQ = tWWorkerAllocQueue(&pMgmt->writePool, pVnode->pImpl, (FItems)vnodeProposeMsg); + pVnode->pWriteQ = tWWorkerAllocQueue(&pMgmt->writePool, pVnode->pImpl, (FItems)vnodeProposeWriteMsg); pVnode->pSyncQ = tWWorkerAllocQueue(&pMgmt->syncPool, pVnode, (FItems)vmProcessSyncQueue); - pVnode->pApplyQ = tWWorkerAllocQueue(&pMgmt->applyPool, pVnode->pImpl, (FItems)vnodeApplyMsg); + pVnode->pApplyQ = tWWorkerAllocQueue(&pMgmt->applyPool, pVnode->pImpl, (FItems)vnodeApplyWriteMsg); pVnode->pQueryQ = tQWorkerAllocQueue(&pMgmt->queryPool, pVnode, (FItem)vmProcessQueryQueue); pVnode->pFetchQ = tQWorkerAllocQueue(&pMgmt->fetchPool, pVnode, (FItem)vmProcessFetchQueue); diff --git a/source/dnode/mgmt/node_mgmt/src/dmTransport.c b/source/dnode/mgmt/node_mgmt/src/dmTransport.c index 7043991525..ad7fbf4344 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmTransport.c +++ b/source/dnode/mgmt/node_mgmt/src/dmTransport.c @@ -42,7 +42,7 @@ static inline void dmBuildMnodeRedirectRsp(SDnode *pDnode, SRpcMsg *pMsg) { static inline void dmSendRedirectRsp(SRpcMsg *pMsg, const SEpSet *pNewEpSet) { pMsg->info.hasEpSet = 1; - SRpcMsg rsp = {.code = TSDB_CODE_RPC_REDIRECT, .info = pMsg->info}; + SRpcMsg rsp = {.code = TSDB_CODE_RPC_REDIRECT, .info = pMsg->info, .msgType = pMsg->msgType}; int32_t contLen = tSerializeSEpSet(NULL, 0, pNewEpSet); rsp.pCont = rpcMallocCont(contLen); @@ -88,6 +88,7 @@ static void dmProcessRpcMsg(SDnode *pDnode, SRpcMsg *pRpc, SEpSet *pEpSet) { case TDMT_MND_SYSTABLE_RETRIEVE_RSP: case TDMT_DND_SYSTABLE_RETRIEVE_RSP: case TDMT_SCH_FETCH_RSP: + case TDMT_SCH_MERGE_FETCH_RSP: qWorkerProcessFetchRsp(NULL, NULL, pRpc, 0); return; case TDMT_MND_STATUS_RSP: @@ -253,7 +254,7 @@ static inline void dmReleaseHandle(SRpcHandleInfo *pHandle, int8_t type) { static bool rpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_REDIRECT || code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_NODE_NOT_DEPLOYED || code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_APP_NOT_READY || code == TSDB_CODE_RPC_BROKEN_LINK) { - if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH) { + if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH || msgType == TDMT_SCH_MERGE_FETCH) { return false; } return true; diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 5e708616fd..f18f3c983e 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -531,7 +531,8 @@ static int32_t mndCheckMnodeState(SRpcMsg *pMsg) { if (!IsReq(pMsg)) return 0; if (pMsg->msgType == TDMT_SCH_QUERY || pMsg->msgType == TDMT_SCH_MERGE_QUERY || pMsg->msgType == TDMT_SCH_QUERY_CONTINUE || pMsg->msgType == TDMT_SCH_QUERY_HEARTBEAT || - pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_SCH_DROP_TASK) { + pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_SCH_MERGE_FETCH || + pMsg->msgType == TDMT_SCH_DROP_TASK) { return 0; } if (mndAcquireRpcRef(pMsg->info.node) == 0) return 0; diff --git a/source/dnode/mnode/impl/src/mndQuery.c b/source/dnode/mnode/impl/src/mndQuery.c index aec99fa3b7..5a527b994e 100644 --- a/source/dnode/mnode/impl/src/mndQuery.c +++ b/source/dnode/mnode/impl/src/mndQuery.c @@ -45,6 +45,7 @@ int32_t mndProcessQueryMsg(SRpcMsg *pMsg) { code = qWorkerProcessCQueryMsg(&handle, pMnode->pQuery, pMsg, 0); break; case TDMT_SCH_FETCH: + case TDMT_SCH_MERGE_FETCH: code = qWorkerProcessFetchMsg(pMnode, pMnode->pQuery, pMsg, 0); break; case TDMT_SCH_DROP_TASK: @@ -72,6 +73,7 @@ int32_t mndInitQuery(SMnode *pMnode) { mndSetMsgHandle(pMnode, TDMT_SCH_MERGE_QUERY, mndProcessQueryMsg); mndSetMsgHandle(pMnode, TDMT_SCH_QUERY_CONTINUE, mndProcessQueryMsg); mndSetMsgHandle(pMnode, TDMT_SCH_FETCH, mndProcessQueryMsg); + mndSetMsgHandle(pMnode, TDMT_SCH_MERGE_FETCH, mndProcessQueryMsg); mndSetMsgHandle(pMnode, TDMT_SCH_DROP_TASK, mndProcessQueryMsg); mndSetMsgHandle(pMnode, TDMT_SCH_QUERY_HEARTBEAT, mndProcessQueryMsg); diff --git a/source/dnode/qnode/src/qnode.c b/source/dnode/qnode/src/qnode.c index cfc63b083d..723402e639 100644 --- a/source/dnode/qnode/src/qnode.c +++ b/source/dnode/qnode/src/qnode.c @@ -86,6 +86,7 @@ int32_t qndProcessQueryMsg(SQnode *pQnode, int64_t ts, SRpcMsg *pMsg) { code = qWorkerProcessCQueryMsg(&handle, pQnode->pQuery, pMsg, ts); break; case TDMT_SCH_FETCH: + case TDMT_SCH_MERGE_FETCH: code = qWorkerProcessFetchMsg(pQnode, pQnode->pQuery, pMsg, ts); break; case TDMT_SCH_FETCH_RSP: diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 0498d889d6..6320f4719d 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -51,15 +51,7 @@ int32_t vnodeCreate(const char *path, SVnodeCfg *pCfg, STfs *pTfs); void vnodeDestroy(const char *path, STfs *pTfs); SVnode *vnodeOpen(const char *path, STfs *pTfs, SMsgCb msgCb); void vnodeClose(SVnode *pVnode); -int32_t vnodePreProcessReq(SVnode *pVnode, SRpcMsg *pMsg); -int32_t vnodeProcessWriteReq(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRpcMsg *pRsp); -int32_t vnodeProcessCMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp); -int32_t vnodeProcessSyncMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp); -int32_t vnodePreprocessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg); -int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg); -int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo); -int32_t vnodeGetLoad(SVnode *pVnode, SVnodeLoad *pLoad); -int32_t vnodeValidateTableHash(SVnode *pVnode, char *tableFName); + int32_t vnodeStart(SVnode *pVnode); void vnodeStop(SVnode *pVnode); int64_t vnodeGetSyncHandle(SVnode *pVnode); @@ -68,14 +60,25 @@ void vnodeGetInfo(SVnode *pVnode, const char **dbname, int32_t *vgId); int32_t vnodeSnapshotReaderOpen(SVnode *pVnode, SVSnapshotReader **ppReader, int64_t sver, int64_t ever); int32_t vnodeSnapshotReaderClose(SVSnapshotReader *pReader); int32_t vnodeSnapshotRead(SVSnapshotReader *pReader, const void **ppData, uint32_t *nData); + int32_t vnodeProcessCreateTSma(SVnode *pVnode, void *pCont, uint32_t contLen); int32_t vnodeGetAllTableList(SVnode *pVnode, uint64_t uid, SArray *list); int32_t vnodeGetCtbIdList(SVnode *pVnode, int64_t suid, SArray *list); void *vnodeGetIdx(SVnode *pVnode); void *vnodeGetIvtIdx(SVnode *pVnode); -void vnodeProposeMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs); -void vnodeApplyMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs); +int32_t vnodeGetLoad(SVnode *pVnode, SVnodeLoad *pLoad); +int32_t vnodeValidateTableHash(SVnode *pVnode, char *tableFName); + +int32_t vnodePreProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg); +int32_t vnodePreprocessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg); + +int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRpcMsg *pRsp); +int32_t vnodeProcessSyncMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp); +int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg); +int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo); +void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs); +void vnodeApplyWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs); // meta typedef struct SMeta SMeta; // todo: remove diff --git a/source/dnode/vnode/src/inc/vnd.h b/source/dnode/vnode/src/inc/vnd.h index 32be479116..cb25e93cde 100644 --- a/source/dnode/vnode/src/inc/vnd.h +++ b/source/dnode/vnode/src/inc/vnd.h @@ -94,6 +94,8 @@ int32_t vnodeAsyncCommit(SVnode* pVnode); int32_t vnodeSyncOpen(SVnode* pVnode, char* path); void vnodeSyncStart(SVnode* pVnode); void vnodeSyncClose(SVnode* pVnode); +void vnodeRedirectRpcMsg(SVnode* pVnode, SRpcMsg* pMsg); +bool vnodeIsLeader(SVnode* pVnode); #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 8ad92d0478..b3e9f53a5a 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -28,7 +28,7 @@ static int32_t vnodeProcessAlterHasnRangeReq(SVnode *pVnode, int64_t version, vo static int32_t vnodeProcessDropTtlTbReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessDeleteReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp); -int32_t vnodePreProcessReq(SVnode *pVnode, SRpcMsg *pMsg) { +int32_t vnodePreProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg) { int32_t code = 0; SDecoder dc = {0}; @@ -133,7 +133,7 @@ _err: return code; } -int32_t vnodeProcessWriteReq(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRpcMsg *pRsp) { +int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRpcMsg *pRsp) { void *ptr = NULL; void *pReq; int32_t len; @@ -261,6 +261,11 @@ int32_t vnodePreprocessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) { int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) { vTrace("message in vnode query queue is processing"); + if ((pMsg->msgType == TDMT_SCH_QUERY) && !vnodeIsLeader(pVnode)) { + vnodeRedirectRpcMsg(pVnode, pMsg); + return 0; + } + SReadHandle handle = {.meta = pVnode->pMeta, .config = &pVnode->config, .vnode = pVnode, .pMsgCb = &pVnode->msgCb}; switch (pMsg->msgType) { case TDMT_SCH_QUERY: @@ -276,11 +281,18 @@ int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg) { int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) { vTrace("message in fetch queue is processing"); + if ((pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_VND_TABLE_META || pMsg->msgType == TDMT_VND_TABLE_CFG) + && !vnodeIsLeader(pVnode)) { + vnodeRedirectRpcMsg(pVnode, pMsg); + return 0; + } + char *msgstr = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); switch (pMsg->msgType) { case TDMT_SCH_FETCH: + case TDMT_SCH_MERGE_FETCH: return qWorkerProcessFetchMsg(pVnode, pVnode->pQuery, pMsg, 0); case TDMT_SCH_FETCH_RSP: return qWorkerProcessFetchRsp(pVnode, pVnode->pQuery, pMsg, 0); diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 6f6102ea14..f75ccba4bb 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -120,7 +120,24 @@ static int32_t vnodeProcessAlterReplicaReq(SVnode *pVnode, SRpcMsg *pMsg) { return code; } -void vnodeProposeMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { +void vnodeRedirectRpcMsg(SVnode *pVnode, SRpcMsg *pMsg) { + SEpSet newEpSet = {0}; + syncGetRetryEpSet(pVnode->sync, &newEpSet); + + const STraceId *trace = &pMsg->info.traceId; + vGTrace("vgId:%d, msg:%p is redirect since not leader, numOfEps:%d inUse:%d", pVnode->config.vgId, pMsg, + newEpSet.numOfEps, newEpSet.inUse); + for (int32_t i = 0; i < newEpSet.numOfEps; ++i) { + vGTrace("vgId:%d, msg:%p redirect:%d ep:%s:%u", pVnode->config.vgId, pMsg, i, newEpSet.eps[i].fqdn, + newEpSet.eps[i].port); + } + pMsg->info.hasEpSet = 1; + + SRpcMsg rsp = {.code = TSDB_CODE_RPC_REDIRECT, .info = pMsg->info, .msgType = pMsg->msgType + 1}; + tmsgSendRedirectRsp(&rsp, &newEpSet); +} + +void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { SVnode *pVnode = pInfo->ahandle; int32_t vgId = pVnode->config.vgId; int32_t code = 0; @@ -131,7 +148,7 @@ void vnodeProposeMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { const STraceId *trace = &pMsg->info.traceId; vGTrace("vgId:%d, msg:%p get from vnode-write queue handle:%p", vgId, pMsg, pMsg->info.handle); - code = vnodePreProcessReq(pVnode, pMsg); + code = vnodePreProcessWriteMsg(pVnode, pMsg); if (code != 0) { vError("vgId:%d, msg:%p failed to pre-process since %s", vgId, pMsg, terrstr()); } else { @@ -141,7 +158,7 @@ void vnodeProposeMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { code = syncPropose(pVnode->sync, pMsg, vnodeIsMsgWeak(pMsg->msgType)); if (code > 0) { SRpcMsg rsp = {.code = pMsg->code, .info = pMsg->info}; - if (vnodeProcessWriteReq(pVnode, pMsg, pMsg->info.conn.applyIndex, &rsp) < 0) { + if (vnodeProcessWriteMsg(pVnode, pMsg, pMsg->info.conn.applyIndex, &rsp) < 0) { rsp.code = terrno; vError("vgId:%d, msg:%p failed to apply right now since %s", vgId, pMsg, terrstr()); } @@ -156,16 +173,7 @@ void vnodeProposeMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { vnodeAccumBlockMsg(pVnode, pMsg->msgType); } else if (code < 0) { if (terrno == TSDB_CODE_SYN_NOT_LEADER) { - SEpSet newEpSet = {0}; - syncGetRetryEpSet(pVnode->sync, &newEpSet); - vGTrace("vgId:%d, msg:%p is redirect since not leader, numOfEps:%d inUse:%d", vgId, pMsg, newEpSet.numOfEps, - newEpSet.inUse); - for (int32_t i = 0; i < newEpSet.numOfEps; ++i) { - vGTrace("vgId:%d, msg:%p redirect:%d ep:%s:%u", vgId, pMsg, i, newEpSet.eps[i].fqdn, newEpSet.eps[i].port); - } - pMsg->info.hasEpSet = 1; - SRpcMsg rsp = {.code = TSDB_CODE_RPC_REDIRECT, .info = pMsg->info}; - tmsgSendRedirectRsp(&rsp, &newEpSet); + vnodeRedirectRpcMsg(pVnode, pMsg); } else { if (terrno != 0) code = terrno; vError("vgId:%d, msg:%p failed to propose since %s, code:0x%x", vgId, pMsg, tstrerror(code), code); @@ -185,7 +193,7 @@ void vnodeProposeMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { vnodeWaitBlockMsg(pVnode); } -void vnodeApplyMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { +void vnodeApplyWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { SVnode *pVnode = pInfo->ahandle; int32_t vgId = pVnode->config.vgId; int32_t code = 0; @@ -199,7 +207,7 @@ void vnodeApplyMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { SRpcMsg rsp = {.code = pMsg->code, .info = pMsg->info}; if (rsp.code == 0) { - if (vnodeProcessWriteReq(pVnode, pMsg, pMsg->info.conn.applyIndex, &rsp) < 0) { + if (vnodeProcessWriteMsg(pVnode, pMsg, pMsg->info.conn.applyIndex, &rsp) < 0) { rsp.code = terrno; vError("vgId:%d, msg:%p failed to apply since %s", vgId, pMsg, terrstr()); } @@ -513,3 +521,17 @@ void vnodeSyncStart(SVnode *pVnode) { } void vnodeSyncClose(SVnode *pVnode) { syncStop(pVnode->sync); } + +bool vnodeIsLeader(SVnode *pVnode) { + if (!syncIsReady(pVnode->sync)) { + return false; + } + + // todo + // if (!pVnode->restored) { + // terrno = TSDB_CODE_APP_NOT_READY; + // return false; + // } + + return true; +} \ No newline at end of file diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index 1d93903019..9b32560041 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -319,8 +319,9 @@ typedef enum EStreamScanMode { STREAM_SCAN_FROM_READERHANDLE = 1, STREAM_SCAN_FROM_RES, STREAM_SCAN_FROM_UPDATERES, - STREAM_SCAN_FROM_DATAREADER, + STREAM_SCAN_FROM_DATAREADER, // todo(liuyao) delete it STREAM_SCAN_FROM_DATAREADER_RETRIEVE, + STREAM_SCAN_FROM_DATAREADER_RANGE, } EStreamScanMode; typedef struct SCatchSupporter { @@ -612,6 +613,7 @@ typedef struct SStreamSessionAggOperatorInfo { SSDataBlock* pWinBlock; // window result SqlFunctionCtx* pDummyCtx; // for combine SSDataBlock* pDelRes; // delete result + bool returnDelete; SSDataBlock* pUpdateRes; // update window SHashObj* pStDeleted; void* pDelIterator; @@ -889,6 +891,9 @@ int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, const char* pKey, SqlF SResultRow* getNewResultRow(SDiskbasedBuf* pResultBuf, int64_t tableGroupId, int32_t interBufSize); SResultWindowInfo* getSessionTimeWindow(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endTs, uint64_t groupId, int64_t gap, int32_t* pIndex); +SResultWindowInfo* getCurSessionWindow(SStreamAggSupporter* pAggSup, TSKEY startTs, + TSKEY endTs, uint64_t groupId, int64_t gap, int32_t* pIndex); +bool isInTimeWindow(STimeWindow* pWin, TSKEY ts, int64_t gap); int32_t updateSessionWindowInfo(SResultWindowInfo* pWinInfo, TSKEY* pStartTs, TSKEY* pEndTs, int32_t rows, int32_t start, int64_t gap, SHashObj* pStDeleted); bool functionNeedToExecute(SqlFunctionCtx* pCtx); diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 63f8c9769b..fa84d79f56 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -2053,7 +2053,7 @@ static int32_t doSendFetchDataRequest(SExchangeInfo* pExchangeInfo, SExecTaskInf pMsgSendInfo->param = pWrapper; pMsgSendInfo->msgInfo.pData = pMsg; pMsgSendInfo->msgInfo.len = sizeof(SResFetchReq); - pMsgSendInfo->msgType = TDMT_SCH_FETCH; + pMsgSendInfo->msgType = pSource->fetchMsgType; pMsgSendInfo->fp = loadRemoteDataCallback; int64_t transporterId = 0; diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 49e79639a8..aae85bbbc1 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -792,13 +792,19 @@ static void doClearBufferedBlocks(SStreamScanInfo* pInfo) { } static bool isSessionWindow(SStreamScanInfo* pInfo) { - return pInfo->sessionSup.parentType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION; + return pInfo->sessionSup.parentType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION || + pInfo->sessionSup.parentType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION; } static bool isStateWindow(SStreamScanInfo* pInfo) { return pInfo->sessionSup.parentType == QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE; } +static bool isIntervalWindow(SStreamScanInfo* pInfo) { + return pInfo->sessionSup.parentType == QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL || + pInfo->sessionSup.parentType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL; +} + static uint64_t getGroupId(SOperatorInfo* pOperator, uint64_t uid) { uint64_t* groupId = taosHashGet(pOperator->pTaskInfo->tableqinfoList.map, &uid, sizeof(int64_t)); if (groupId) { @@ -834,6 +840,49 @@ static void setGroupId(SStreamScanInfo* pInfo, SSDataBlock* pBlock, int32_t grou } } +void resetTableScanInfo(STableScanInfo* pTableScanInfo, STimeWindow* pWin) { + pTableScanInfo->cond.twindows[0] = *pWin; + pTableScanInfo->curTWinIdx = 0; + // tsdbResetReadHandle(pTableScanInfo->dataReader, &pTableScanInfo->cond, 0); + // if (!pTableScanInfo->dataReader) { + // return false; + // } + pTableScanInfo->scanTimes = 0; + pTableScanInfo->currentGroupId = -1; +} + +static bool prepareRangeScan(SStreamScanInfo* pInfo, SSDataBlock* pBlock, int32_t* pRowIndex) { + if ((*pRowIndex) == pBlock->info.rows) { + return false; + } + + ASSERT(taosArrayGetSize(pBlock->pDataBlock) >= 3); + SColumnInfoData* pStartTsCol = taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); + TSKEY* startData = (TSKEY*)pStartTsCol->pData; + SColumnInfoData* pEndTsCol = taosArrayGet(pBlock->pDataBlock, END_TS_COLUMN_INDEX); + TSKEY* endData = (TSKEY*)pEndTsCol->pData; + STimeWindow win = {.skey = startData[*pRowIndex], .ekey = endData[*pRowIndex]}; + setGroupId(pInfo, pBlock, GROUPID_COLUMN_INDEX, *pRowIndex); + (*pRowIndex)++; + + for (; *pRowIndex < pBlock->info.rows; (*pRowIndex)++) { + if (win.skey == startData[*pRowIndex]) { + win.ekey = TMAX(win.ekey, endData[*pRowIndex]); + continue; + } + if (win.skey == endData[*pRowIndex]) { + win.skey = TMIN(win.skey, startData[*pRowIndex]); + continue; + } + ASSERT((win.skey > startData[*pRowIndex] && win.ekey < endData[*pRowIndex]) || + (isInTimeWindow(&win, startData[*pRowIndex], 0) || isInTimeWindow(&win, endData[*pRowIndex], 0))); + break; + } + + resetTableScanInfo(pInfo->pTableScanOp->info, &win); + return true; +} + static bool prepareDataScan(SStreamScanInfo* pInfo, SSDataBlock* pSDB, int32_t tsColIndex, int32_t* pRowIndex) { STimeWindow win = { .skey = INT64_MIN, @@ -852,6 +901,7 @@ static bool prepareDataScan(SStreamScanInfo* pInfo, SSDataBlock* pSDB, int32_t t SResultWindowInfo* pCurWin = getSessionTimeWindow(pAggSup, tsCols[*pRowIndex], INT64_MIN, pSDB->info.groupId, gap, &winIndex); win = pCurWin->win; + setGroupId(pInfo, pSDB, GROUPID_COLUMN_INDEX, *pRowIndex); (*pRowIndex) += updateSessionWindowInfo(pCurWin, tsCols, NULL, pSDB->info.rows, *pRowIndex, gap, NULL); } else { win = getActiveTimeWindow(NULL, &dumyInfo, tsCols[*pRowIndex], &pInfo->interval, pInfo->interval.precision, NULL); @@ -875,15 +925,7 @@ static bool prepareDataScan(SStreamScanInfo* pInfo, SSDataBlock* pSDB, int32_t t if (!needRead) { return false; } - STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; - pTableScanInfo->cond.twindows[0] = win; - pTableScanInfo->curTWinIdx = 0; - // tsdbReaderReset(pTableScanInfo->dataReader, &pTableScanInfo->cond, 0); - // if (!pTableScanInfo->dataReader) { - // return false; - // } - pTableScanInfo->scanTimes = 0; - pTableScanInfo->currentGroupId = -1; + resetTableScanInfo(pInfo->pTableScanOp->info, &win); return true; } @@ -900,6 +942,26 @@ static void copyOneRow(SSDataBlock* dest, SSDataBlock* source, int32_t sourceRow dest->info.rows++; } +static SSDataBlock* doRangeScan(SStreamScanInfo* pInfo, SSDataBlock* pSDB, int32_t tsColIndex, int32_t* pRowIndex) { + while (1) { + SSDataBlock* pResult = NULL; + pResult = doTableScan(pInfo->pTableScanOp); + if (!pResult && prepareRangeScan(pInfo, pSDB, pRowIndex)) { + // scan next window data + pResult = doTableScan(pInfo->pTableScanOp); + } + if (!pResult) { + blockDataCleanup(pSDB); + *pRowIndex = 0; + return NULL; + } + + if (pResult->info.groupId == pInfo->groupId) { + return pResult; + } + } +} + static SSDataBlock* doDataScan(SStreamScanInfo* pInfo, SSDataBlock* pSDB, int32_t tsColIndex, int32_t* pRowIndex) { while (1) { SSDataBlock* pResult = NULL; @@ -931,9 +993,8 @@ static SSDataBlock* doDataScan(SStreamScanInfo* pInfo, SSDataBlock* pSDB, int32_ return pResult; */ } - -static void copyDeleteDataBlock(SStreamScanInfo* pInfo, SSDataBlock* pDelBlock, SOperatorInfo* pOperator, - SSDataBlock* pUpdateRes) { +static void generateIntervalTs(SStreamScanInfo* pInfo, SSDataBlock* pDelBlock, SOperatorInfo* pOperator, + SSDataBlock* pUpdateRes) { if (pDelBlock->info.rows == 0) { return; } @@ -948,7 +1009,7 @@ static void copyDeleteDataBlock(SStreamScanInfo* pInfo, SSDataBlock* pDelBlock, uint64_t* uidCol = (uint64_t*)pGpCol->pData; SColumnInfoData* pDestTsCol = taosArrayGet(pUpdateRes->pDataBlock, START_TS_COLUMN_INDEX); - SColumnInfoData* pDestGpCol = taosArrayGet(pUpdateRes->pDataBlock, DELETE_GROUPID_COLUMN_INDEX); + SColumnInfoData* pDestGpCol = taosArrayGet(pUpdateRes->pDataBlock, GROUPID_COLUMN_INDEX); for (int32_t i = pInfo->deleteDataIndex; i < pDelBlock->info.rows && i < pDelBlock->info.capacity - (endData[i] - startData[i]) / pInfo->interval.interval - 1; @@ -969,6 +1030,43 @@ static void copyDeleteDataBlock(SStreamScanInfo* pInfo, SSDataBlock* pDelBlock, } } +static void generateScanRange(SStreamScanInfo* pInfo, SSDataBlock* pBlock, SOperatorInfo* pOperator, + SSDataBlock* pUpdateRes) { + if (pBlock->info.rows == 0) { + return; + } + blockDataCleanup(pUpdateRes); + blockDataEnsureCapacity(pUpdateRes, pBlock->info.rows); + ASSERT(taosArrayGetSize(pBlock->pDataBlock) >= 3); + SColumnInfoData* pStartTsCol = taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); + TSKEY* startData = (TSKEY*)pStartTsCol->pData; + SColumnInfoData* pEndTsCol = taosArrayGet(pBlock->pDataBlock, END_TS_COLUMN_INDEX); + TSKEY* endData = (TSKEY*)pEndTsCol->pData; + SColumnInfoData* pGpCol = taosArrayGet(pBlock->pDataBlock, UID_COLUMN_INDEX); + uint64_t* uidCol = (uint64_t*)pGpCol->pData; + + SColumnInfoData* pDestStartCol = taosArrayGet(pUpdateRes->pDataBlock, START_TS_COLUMN_INDEX); + SColumnInfoData* pDestEndCol = taosArrayGet(pUpdateRes->pDataBlock, END_TS_COLUMN_INDEX); + SColumnInfoData* pDestGpCol = taosArrayGet(pUpdateRes->pDataBlock, GROUPID_COLUMN_INDEX); + int32_t dummy = 0; + for (int32_t i = 0; i < pBlock->info.rows; i++) { + uint64_t groupId = getGroupId(pOperator, uidCol[i]); + // gap must be 0. + SResultWindowInfo* pStartWin = + getCurSessionWindow(pInfo->sessionSup.pStreamAggSup, startData[i], endData[i], groupId, 0, &dummy); + if (!pStartWin) { + // window has been closed. + continue; + } + SResultWindowInfo* pEndWin = + getCurSessionWindow(pInfo->sessionSup.pStreamAggSup, endData[i], endData[i], groupId, 0, &dummy); + ASSERT(pEndWin); + colDataAppend(pDestStartCol, i, (const char*)&pStartWin->win.skey, false); + colDataAppend(pDestEndCol, i, (const char*)&pEndWin->win.ekey, false); + colDataAppend(pDestGpCol, i, (const char*)&groupId, false); + pUpdateRes->info.rows++; + } +} static void setUpdateData(SStreamScanInfo* pInfo, SSDataBlock* pBlock, SSDataBlock* pUpdateBlock) { blockDataCleanup(pUpdateBlock); int32_t size = taosArrayGetSize(pInfo->tsArray); @@ -1001,7 +1099,7 @@ static void setUpdateData(SStreamScanInfo* pInfo, SSDataBlock* pBlock, SSDataBlo } if (size == 0) { - copyDeleteDataBlock(pInfo, pInfo->pDeleteDataRes, pInfo->pTableScanOp, pUpdateBlock); + generateIntervalTs(pInfo, pInfo->pDeleteDataRes, pInfo->pTableScanOp, pUpdateBlock); } } @@ -1060,11 +1158,17 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { } break; case STREAM_DELETE_DATA: { pInfo->blockType = STREAM_INPUT__DATA_SUBMIT; - pInfo->scanMode = STREAM_SCAN_FROM_DATAREADER; - copyDataBlock(pInfo->pDeleteDataRes, pBlock); - copyDeleteDataBlock(pInfo, pInfo->pDeleteDataRes, pInfo->pTableScanOp, pInfo->pUpdateRes); pInfo->updateResIndex = 0; - prepareDataScan(pInfo, pInfo->pUpdateRes, START_TS_COLUMN_INDEX, &pInfo->updateResIndex); + if (isIntervalWindow(pInfo)) { + copyDataBlock(pInfo->pDeleteDataRes, pBlock); + generateIntervalTs(pInfo, pInfo->pDeleteDataRes, pInfo->pTableScanOp, pInfo->pUpdateRes); + prepareDataScan(pInfo, pInfo->pUpdateRes, START_TS_COLUMN_INDEX, &pInfo->updateResIndex); + pInfo->scanMode = STREAM_SCAN_FROM_DATAREADER; + } else { + generateScanRange(pInfo, pBlock, pInfo->pTableScanOp, pInfo->pUpdateRes); + prepareRangeScan(pInfo, pInfo->pUpdateRes, &pInfo->updateResIndex); + pInfo->scanMode = STREAM_SCAN_FROM_DATAREADER_RANGE; + } pInfo->pUpdateRes->info.type = STREAM_DELETE_DATA; return pInfo->pUpdateRes; } break; @@ -1078,8 +1182,10 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { pInfo->scanMode = STREAM_SCAN_FROM_READERHANDLE; return pInfo->pRes; } else if (pInfo->scanMode == STREAM_SCAN_FROM_UPDATERES) { - pInfo->scanMode = STREAM_SCAN_FROM_DATAREADER; - if (!isStateWindow(pInfo)) { + if (isStateWindow(pInfo)) { + pInfo->scanMode = STREAM_SCAN_FROM_READERHANDLE; + } else { + pInfo->scanMode = STREAM_SCAN_FROM_DATAREADER; prepareDataScan(pInfo, pInfo->pUpdateRes, pInfo->primaryTsIndex, &pInfo->updateResIndex); } return pInfo->pUpdateRes; @@ -1104,11 +1210,19 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { return pInfo->pUpdateRes; } pInfo->scanMode = STREAM_SCAN_FROM_READERHANDLE; + } else if (pInfo->scanMode == STREAM_SCAN_FROM_DATAREADER_RANGE) { + SSDataBlock* pSDB = doRangeScan(pInfo, pInfo->pUpdateRes, pInfo->primaryTsIndex, &pInfo->updateResIndex); + if (pSDB) { + pSDB->info.type = STREAM_NORMAL; + checkUpdateData(pInfo, true, pSDB, false); + return pSDB; + } + pInfo->scanMode = STREAM_SCAN_FROM_READERHANDLE; } else if (isStateWindow(pInfo)) { pInfo->scanMode = STREAM_SCAN_FROM_DATAREADER; pInfo->updateResIndex = pInfo->pUpdateRes->info.rows; if (prepareDataScan(pInfo, pInfo->pUpdateRes, pInfo->primaryTsIndex, &pInfo->updateResIndex)) { - ASSERT(pInfo->pUpdateRes->info.rows == 0); + blockDataCleanup(pInfo->pUpdateRes); // return empty data blcok return pInfo->pUpdateRes; } diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index de11b06c17..37e98693cc 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -1323,13 +1323,13 @@ void doDeleteSpecifyIntervalWindow(SAggSupporter* pAggSup, SSDataBlock* pBlock, } } -static void doClearWindows(SAggSupporter* pAggSup, SExprSupp* pSup1, SInterval* pInterval, int32_t tsIndex, +static void doClearWindows(SAggSupporter* pAggSup, SExprSupp* pSup1, SInterval* pInterval, int32_t numOfOutput, SSDataBlock* pBlock, SArray* pUpWins) { - SColumnInfoData* pTsCol = taosArrayGet(pBlock->pDataBlock, tsIndex); + SColumnInfoData* pTsCol = taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); TSKEY* tsCols = (TSKEY*)pTsCol->pData; uint64_t* pGpDatas = NULL; if (pBlock->info.type == STREAM_RETRIEVE) { - SColumnInfoData* pGpCol = taosArrayGet(pBlock->pDataBlock, 2); + SColumnInfoData* pGpCol = taosArrayGet(pBlock->pDataBlock, GROUPID_COLUMN_INDEX); pGpDatas = (uint64_t*)pGpCol->pData; } int32_t step = 0; @@ -1492,7 +1492,7 @@ static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { printDataBlock(pBlock, "single interval recv"); if (pBlock->info.type == STREAM_CLEAR) { - doClearWindows(&pInfo->aggSup, &pOperator->exprSupp, &pInfo->interval, 0, + doClearWindows(&pInfo->aggSup, &pOperator->exprSupp, &pInfo->interval, pOperator->exprSupp.numOfExprs, pBlock, NULL); qDebug("%s clear existed time window results for updates checked", GET_TASKID(pTaskInfo)); continue; @@ -1710,7 +1710,7 @@ SOperatorInfo* createIntervalOperatorInfo(SOperatorInfo* downstream, SExprInfo* pInfo->pRecycledPages = taosArrayInit(4, sizeof(int32_t)); pInfo->pDelWins = taosArrayInit(4, sizeof(SWinRes)); pInfo->delIndex = 0; - // pInfo->pDelRes = createDeleteBlock(); todo(liuyao) for delete + // pInfo->pDelRes = createPullDataBlock(); todo(liuyao) for delete pInfo->pDelRes = createOneDataBlock(pInfo->binfo.pRes, false);// todo(liuyao) for delete pInfo->pDelRes->info.type = STREAM_DELETE_RESULT;// todo(liuyao) for delete @@ -2571,13 +2571,13 @@ static void doBuildPullDataBlock(SArray* array, int32_t* pIndex, SSDataBlock* pB ASSERT(3 <= taosArrayGetSize(pBlock->pDataBlock)); for (; (*pIndex) < size; (*pIndex)++) { SPullWindowInfo* pWin = taosArrayGet(array, (*pIndex)); - SColumnInfoData* pStartTs = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, 0); + SColumnInfoData* pStartTs = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); colDataAppend(pStartTs, pBlock->info.rows, (const char*)&pWin->window.skey, false); - SColumnInfoData* pEndTs = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, 1); + SColumnInfoData* pEndTs = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, END_TS_COLUMN_INDEX); colDataAppend(pEndTs, pBlock->info.rows, (const char*)&pWin->window.ekey, false); - SColumnInfoData* pGroupId = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, 2); + SColumnInfoData* pGroupId = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, GROUPID_COLUMN_INDEX); colDataAppend(pGroupId, pBlock->info.rows, (const char*)&pWin->groupId, false); pBlock->info.rows++; } @@ -2589,9 +2589,9 @@ static void doBuildPullDataBlock(SArray* array, int32_t* pIndex, SSDataBlock* pB } void processPullOver(SSDataBlock* pBlock, SHashObj* pMap) { - SColumnInfoData* pStartCol = taosArrayGet(pBlock->pDataBlock, 0); + SColumnInfoData* pStartCol = taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); TSKEY* tsData = (TSKEY*)pStartCol->pData; - SColumnInfoData* pGroupCol = taosArrayGet(pBlock->pDataBlock, 2); + SColumnInfoData* pGroupCol = taosArrayGet(pBlock->pDataBlock, GROUPID_COLUMN_INDEX); uint64_t* groupIdData = (uint64_t*)pGroupCol->pData; int32_t chId = getChildIndex(pBlock); for (int32_t i = 0; i < pBlock->info.rows; i++) { @@ -2680,7 +2680,7 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { pInfo->binfo.pRes->info.type = pBlock->info.type; } else if (pBlock->info.type == STREAM_CLEAR) { SArray* pUpWins = taosArrayInit(8, sizeof(SWinRes)); - doClearWindows(&pInfo->aggSup, pSup, &pInfo->interval, pInfo->primaryTsIndex, pOperator->exprSupp.numOfExprs, + doClearWindows(&pInfo->aggSup, pSup, &pInfo->interval, pOperator->exprSupp.numOfExprs, pBlock, pUpWins); if (IS_FINAL_OP(pInfo)) { int32_t childIndex = getChildIndex(pBlock); @@ -2688,7 +2688,7 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { SStreamFinalIntervalOperatorInfo* pChildInfo = pChildOp->info; SExprSupp* pChildSup = &pChildOp->exprSupp; - doClearWindows(&pChildInfo->aggSup, pChildSup, &pChildInfo->interval, pChildInfo->primaryTsIndex, + doClearWindows(&pChildInfo->aggSup, pChildSup, &pChildInfo->interval, pChildSup->numOfExprs, pBlock, NULL); rebuildIntervalWindow(pInfo, pSup, pUpWins, pInfo->binfo.pRes->info.groupId, pOperator->exprSupp.numOfExprs, pOperator->pTaskInfo, NULL); @@ -2719,7 +2719,7 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { continue; } else if (pBlock->info.type == STREAM_RETRIEVE && !IS_FINAL_OP(pInfo)) { SArray* pUpWins = taosArrayInit(8, sizeof(SWinRes)); - doClearWindows(&pInfo->aggSup, pSup, &pInfo->interval, 0, pOperator->exprSupp.numOfExprs, pBlock, pUpWins); + doClearWindows(&pInfo->aggSup, pSup, &pInfo->interval, pOperator->exprSupp.numOfExprs, pBlock, pUpWins); removeResults(pUpWins, pUpdated); taosArrayDestroy(pUpWins); if (taosArrayGetSize(pUpdated) > 0) { @@ -2901,7 +2901,7 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, pInfo->pPullDataMap = taosHashInit(64, hashFn, false, HASH_NO_LOCK); pInfo->pPullDataRes = createPullDataBlock(); pInfo->ignoreExpiredData = pIntervalPhyNode->window.igExpired; - // pInfo->pDelRes = createDeleteBlock(); // todo(liuyao) for delete + // pInfo->pDelRes = createPullDataBlock(); // todo(liuyao) for delete pInfo->pDelRes = createOneDataBlock(pInfo->binfo.pRes, false);// todo(liuyao) for delete pInfo->pDelRes->info.type = STREAM_DELETE_RESULT;// todo(liuyao) for delete pInfo->delIndex = 0; @@ -3046,13 +3046,14 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); pInfo->pStDeleted = taosHashInit(64, hashFn, true, HASH_NO_LOCK); pInfo->pDelIterator = NULL; - // pInfo->pDelRes = createDeleteBlock(); // todo(liuyao) for delete + // pInfo->pDelRes = createPullDataBlock(); pInfo->pDelRes = createOneDataBlock(pInfo->binfo.pRes, false);// todo(liuyao) for delete pInfo->pDelRes->info.type = STREAM_DELETE_RESULT;// todo(liuyao) for delete pInfo->pChildren = NULL; pInfo->isFinal = false; pInfo->pPhyNode = pPhyNode; pInfo->ignoreExpiredData = pSessionNode->window.igExpired; + pInfo->returnDelete = false; pOperator->name = "StreamSessionWindowAggOperator"; pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION; @@ -3087,15 +3088,23 @@ int64_t getSessionWindowEndkey(void* data, int32_t index) { SResultWindowInfo* pWin = taosArrayGet(pWinInfos, index); return pWin->win.ekey; } -static bool isInWindow(SResultWindowInfo* pWin, TSKEY ts, int64_t gap) { - int64_t sGap = ts - pWin->win.skey; - int64_t eGap = pWin->win.ekey - ts; - if ((sGap < 0 && sGap >= -gap) || (eGap < 0 && eGap >= -gap) || (sGap >= 0 && eGap >= 0)) { + +bool isInTimeWindow(STimeWindow* pWin, TSKEY ts, int64_t gap) { + int64_t sGap = ts - pWin->skey + gap; + int64_t eGap = pWin->ekey - ts + gap; + // if ((sGap < 0 && sGap >= -gap) || (eGap < 0 && eGap >= -gap) || (sGap >= 0 && eGap >= 0)) { + // return true; + // } + if (sGap >= 0 && eGap >= 0) { return true; } return false; } +bool isInWindow(SResultWindowInfo* pWinInfo, TSKEY ts, int64_t gap) { + return isInTimeWindow(&pWinInfo->win, ts, gap); +} + static SResultWindowInfo* insertNewSessionWindow(SArray* pWinInfos, TSKEY ts, int32_t index) { SResultWindowInfo win = {.pos.offset = -1, .pos.pageId = -1, .win.skey = ts, .win.ekey = ts, .isOutput = false}; return taosArrayInsert(pWinInfos, index, &win); @@ -3118,6 +3127,41 @@ SArray* getWinInfos(SStreamAggSupporter* pAggSup, uint64_t groupId) { return pWinInfos; } +// don't add new window +SResultWindowInfo* getCurSessionWindow(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endTs, uint64_t groupId, + int64_t gap, int32_t* pIndex) { + SArray* pWinInfos = getWinInfos(pAggSup, groupId); + pAggSup->pCurWins = pWinInfos; + + int32_t size = taosArrayGetSize(pWinInfos); + if (size == 0) { + return NULL; + } + // find the first position which is smaller than the key + int32_t index = binarySearch(pWinInfos, size, startTs, TSDB_ORDER_DESC, getSessionWindowEndkey); + SResultWindowInfo* pWin = NULL; + if (index >= 0) { + pWin = taosArrayGet(pWinInfos, index); + if (isInWindow(pWin, startTs, gap)) { + *pIndex = index; + return pWin; + } + } + + if (index + 1 < size) { + pWin = taosArrayGet(pWinInfos, index + 1); + if (isInWindow(pWin, startTs, gap)) { + *pIndex = index + 1; + return pWin; + } else if (endTs != INT64_MIN && isInWindow(pWin, endTs, gap)) { + *pIndex = index; + return pWin; + } + } + + return NULL; +} + SResultWindowInfo* getSessionTimeWindow(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endTs, uint64_t groupId, int64_t gap, int32_t* pIndex) { SArray* pWinInfos = getWinInfos(pAggSup, groupId); @@ -3358,6 +3402,34 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData } } +void deleteWindow(SArray* pWinInfos, int32_t index) { + ASSERT(index >= 0 && index < taosArrayGetSize(pWinInfos)); + taosArrayRemove(pWinInfos, index); +} + +static void doDeleteSessionWindows(SStreamAggSupporter* pAggSup, SSDataBlock* pBlock, int64_t gap, SArray* result) { + SColumnInfoData* pStartTsCol = taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); + TSKEY* startDatas = (TSKEY*)pStartTsCol->pData; + SColumnInfoData* pEndTsCol = taosArrayGet(pBlock->pDataBlock, END_TS_COLUMN_INDEX); + TSKEY* endDatas = (TSKEY*)pEndTsCol->pData; + SColumnInfoData* pGroupCol = taosArrayGet(pBlock->pDataBlock, UID_COLUMN_INDEX); + uint64_t* gpDatas = (uint64_t*)pGroupCol->pData; + for (int32_t i = 0; i < pBlock->info.rows; i++) { + int32_t winIndex = 0; + while(1) { + SResultWindowInfo* pCurWin = + getCurSessionWindow(pAggSup, startDatas[i], endDatas[i], gpDatas[i], gap, &winIndex); + if (!pCurWin) { + break; + } + deleteWindow(pAggSup->pCurWins, winIndex); + if (result) { + taosArrayPush(result, pCurWin); + } + } + } +} + static void doClearSessionWindows(SStreamAggSupporter* pAggSup, SExprSupp* pSup, SSDataBlock* pBlock, int32_t tsIndex, int32_t numOfOutput, int64_t gap, SArray* result) { SColumnInfoData* pColDataInfo = taosArrayGet(pBlock->pDataBlock, tsIndex); @@ -3366,13 +3438,14 @@ static void doClearSessionWindows(SStreamAggSupporter* pAggSup, SExprSupp* pSup, for (int32_t i = 0; i < pBlock->info.rows; i += step) { int32_t winIndex = 0; SResultWindowInfo* pCurWin = - getSessionTimeWindow(pAggSup, tsCols[i], INT64_MIN, pBlock->info.groupId, gap, &winIndex); - step = updateSessionWindowInfo(pCurWin, tsCols, NULL, pBlock->info.rows, i, gap, NULL); - ASSERT(isInWindow(pCurWin, tsCols[i], gap)); - if (pCurWin->pos.pageId == -1) { + getCurSessionWindow(pAggSup, tsCols[i], INT64_MIN, pBlock->info.groupId, gap, &winIndex); + if (!pCurWin || pCurWin->pos.pageId == -1) { // window has been closed. + step = 1; continue; } + step = updateSessionWindowInfo(pCurWin, tsCols, NULL, pBlock->info.rows, i, gap, NULL); + ASSERT(isInWindow(pCurWin, tsCols[i], gap)); doClearWindowImpl(&pCurWin->pos, pAggSup->pResultBuf, pSup, numOfOutput); if (result) { taosArrayPush(result, pCurWin); @@ -3407,7 +3480,7 @@ void doBuildDeleteDataBlock(SHashObj* pStDeleted, SSDataBlock* pBlock, void** It blockDataEnsureCapacity(pBlock, size); size_t keyLen = 0; while (((*Ite) = taosHashIterate(pStDeleted, *Ite)) != NULL) { - SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, 0); + SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); colDataAppend(pColInfoData, pBlock->info.rows, *Ite, false); for (int32_t i = 1; i < taosArrayGetSize(pBlock->pDataBlock); i++) { pColInfoData = taosArrayGet(pBlock->pDataBlock, i); @@ -3495,7 +3568,7 @@ int32_t closeSessionWindow(SHashObj* pHashMap, STimeWindowAggSupp* pTwSup, SArra pSeWin->isOutput = true; } if (delete) { - taosArrayRemove(pWins, i); + deleteWindow(pWins, i); i--; size = taosArrayGetSize(pWins); } @@ -3535,6 +3608,14 @@ int32_t getAllSessionWindow(SHashObj* pHashMap, SArray* pClosed, __get_win_info_ return TSDB_CODE_SUCCESS; } +static void copyDeleteWindowInfo(SArray* pResWins, SHashObj* pStDeleted) { + int32_t size = taosArrayGetSize(pResWins); + for (int32_t i = 0; i < size; i++) { + SResultWindowInfo* pWinInfo = taosArrayGet(pResWins, i); + taosHashPut(pStDeleted, &pWinInfo->pos, sizeof(SResultRowPosition), &pWinInfo->win.skey, sizeof(TSKEY)); + } +} + static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { SExprSupp* pSup = &pOperator->exprSupp; SStreamSessionAggOperatorInfo* pInfo = pOperator->info; @@ -3570,17 +3651,32 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { if (pBlock->info.type == STREAM_CLEAR) { SArray* pWins = taosArrayInit(16, sizeof(SResultWindowInfo)); doClearSessionWindows(&pInfo->streamAggSup, &pOperator->exprSupp, pBlock, 0, pOperator->exprSupp.numOfExprs, - pInfo->gap, pWins); + 0, pWins); if (IS_FINAL_OP(pInfo)) { int32_t childIndex = getChildIndex(pBlock); SOperatorInfo* pChildOp = taosArrayGetP(pInfo->pChildren, childIndex); SStreamSessionAggOperatorInfo* pChildInfo = pChildOp->info; doClearSessionWindows(&pChildInfo->streamAggSup, &pChildOp->exprSupp, pBlock, 0, pChildOp->exprSupp.numOfExprs, - pChildInfo->gap, NULL); + 0, NULL); rebuildTimeWindow(pInfo, pWins, pBlock->info.groupId, pOperator->exprSupp.numOfExprs, pOperator); } taosArrayDestroy(pWins); continue; + } else if (pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_DELETE_RESULT) { + SArray* pWins = taosArrayInit(16, sizeof(SResultWindowInfo)); + // gap must be 0 + doDeleteSessionWindows(&pInfo->streamAggSup, pBlock, 0, pWins); + if (IS_FINAL_OP(pInfo)) { + int32_t childIndex = getChildIndex(pBlock); + SOperatorInfo* pChildOp = taosArrayGetP(pInfo->pChildren, childIndex); + SStreamSessionAggOperatorInfo* pChildInfo = pChildOp->info; + // gap must be 0 + doDeleteSessionWindows(&pChildInfo->streamAggSup, pBlock, 0, NULL); + rebuildTimeWindow(pInfo, pWins, pBlock->info.groupId, pOperator->exprSupp.numOfExprs, pOperator); + } + copyDeleteWindowInfo(pWins, pInfo->pStDeleted); + taosArrayDestroy(pWins); + continue; } else if (pBlock->info.type == STREAM_GET_ALL) { getAllSessionWindow(pInfo->streamAggSup.pResultRows, pUpdated, getResWinForSession); continue; @@ -3664,26 +3760,29 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { if (pOperator->status == OP_EXEC_DONE) { return NULL; } else if (pOperator->status == OP_RES_TO_RETURN) { - doBuildDeleteDataBlock(pInfo->pStDeleted, pInfo->pDelRes, &pInfo->pDelIterator); - if (pInfo->pDelRes->info.rows > 0) { + doBuildResultDatablock(pOperator, pBInfo, &pInfo->groupResInfo, pInfo->streamAggSup.pResultBuf); + if (pBInfo->pRes->info.rows > 0) { + printDataBlock(pBInfo->pRes, "Semi Session"); + return pBInfo->pRes; + } + + // doBuildDeleteDataBlock(pInfo->pStDeleted, pInfo->pDelRes, &pInfo->pDelIterator); + if (pInfo->pDelRes->info.rows > 0 && !pInfo->returnDelete) { + pInfo->returnDelete = true; printDataBlock(pInfo->pDelRes, "Semi Session"); return pInfo->pDelRes; } - doBuildResultDatablock(pOperator, pBInfo, &pInfo->groupResInfo, pInfo->streamAggSup.pResultBuf); - if (pBInfo->pRes->info.rows == 0) { - pOperator->status = OP_EXEC_DONE; - if (pInfo->pUpdateRes->info.rows == 0) { - // semi interval operator clear disk buffer - clearStreamSessionOperator(pInfo); - return NULL; - } + + if (pInfo->pUpdateRes->info.rows > 0) { // process the rest of the data pOperator->status = OP_OPENED; printDataBlock(pInfo->pUpdateRes, "Semi Session"); return pInfo->pUpdateRes; } - printDataBlock(pBInfo->pRes, "Semi Session"); - return pBInfo->pRes; + // semi interval operator clear disk buffer + clearStreamSessionOperator(pInfo); + pOperator->status = OP_EXEC_DONE; + return NULL; } _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); @@ -3699,11 +3798,17 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { if (pBlock->info.type == STREAM_CLEAR) { SArray* pWins = taosArrayInit(16, sizeof(SResultWindowInfo)); - doClearSessionWindows(&pInfo->streamAggSup, pSup, pBlock, 0, pSup->numOfExprs, pInfo->gap, pWins); + doClearSessionWindows(&pInfo->streamAggSup, pSup, pBlock, 0, pSup->numOfExprs, 0, pWins); removeSessionResults(pStUpdated, pWins); taosArrayDestroy(pWins); copyUpdateDataBlock(pInfo->pUpdateRes, pBlock, pInfo->primaryTsIndex); break; + } else if (pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_DELETE_RESULT) { + // gap must be 0 + doDeleteSessionWindows(&pInfo->streamAggSup, pBlock, 0, NULL); + copyDataBlock(pInfo->pDelRes, pBlock); + pInfo->pDelRes->info.type = STREAM_DELETE_RESULT; + break; } else if (pBlock->info.type == STREAM_GET_ALL) { getAllSessionWindow(pInfo->streamAggSup.pResultRows, pUpdated, getResWinForSession); continue; @@ -3728,24 +3833,29 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { pSup->rowEntryInfoOffset); initMultiResInfoFromArrayList(&pInfo->groupResInfo, pUpdated); blockDataEnsureCapacity(pBInfo->pRes, pOperator->resultInfo.capacity); - doBuildDeleteDataBlock(pInfo->pStDeleted, pInfo->pDelRes, &pInfo->pDelIterator); - if (pInfo->pDelRes->info.rows > 0) { + + doBuildResultDatablock(pOperator, pBInfo, &pInfo->groupResInfo, pInfo->streamAggSup.pResultBuf); + if (pBInfo->pRes->info.rows > 0) { + printDataBlock(pBInfo->pRes, "Semi Session"); + return pBInfo->pRes; + } + + // doBuildDeleteDataBlock(pInfo->pStDeleted, pInfo->pDelRes, &pInfo->pDelIterator); + if (pInfo->pDelRes->info.rows > 0 && !pInfo->returnDelete) { + pInfo->returnDelete = true; printDataBlock(pInfo->pDelRes, "Semi Session"); return pInfo->pDelRes; } - doBuildResultDatablock(pOperator, pBInfo, &pInfo->groupResInfo, pInfo->streamAggSup.pResultBuf); - if (pBInfo->pRes->info.rows == 0) { - pOperator->status = OP_EXEC_DONE; - if (pInfo->pUpdateRes->info.rows == 0) { - return NULL; - } + + if (pInfo->pUpdateRes->info.rows > 0) { // process the rest of the data pOperator->status = OP_OPENED; printDataBlock(pInfo->pUpdateRes, "Semi Session"); return pInfo->pUpdateRes; } - printDataBlock(pBInfo->pRes, "Semi Session"); - return pBInfo->pRes->info.rows == 0 ? NULL : pBInfo->pRes; + + pOperator->status = OP_EXEC_DONE; + return NULL; } SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, @@ -3971,11 +4081,6 @@ int32_t updateStateWindowInfo(SArray* pWinInfos, int32_t winIndex, TSKEY* pTs, S return rows - start; } -void deleteWindow(SArray* pWinInfos, int32_t index) { - ASSERT(index >= 0 && index < taosArrayGetSize(pWinInfos)); - taosArrayRemove(pWinInfos, index); -} - static void doClearStateWindows(SStreamAggSupporter* pAggSup, SSDataBlock* pBlock, int32_t tsIndex, SColumn* pCol, int32_t keyIndex, SHashObj* pSeUpdated, SHashObj* pSeDeleted) { SColumnInfoData* pTsColInfo = taosArrayGet(pBlock->pDataBlock, tsIndex); @@ -4179,7 +4284,7 @@ SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhys _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); pInfo->pSeDeleted = taosHashInit(64, hashFn, true, HASH_NO_LOCK); pInfo->pDelIterator = NULL; - // pInfo->pDelRes = createDeleteBlock(); // todo(liuyao) for delete + // pInfo->pDelRes = createPullDataBlock(); // todo(liuyao) for delete pInfo->pDelRes = createOneDataBlock(pInfo->binfo.pRes, false);// todo(liuyao) for delete pInfo->pDelRes->info.type = STREAM_DELETE_RESULT;// todo(liuyao) for delete pInfo->pChildren = NULL; diff --git a/source/libs/function/src/udfd.c b/source/libs/function/src/udfd.c index 708ea4bd38..a412b589a9 100644 --- a/source/libs/function/src/udfd.c +++ b/source/libs/function/src/udfd.c @@ -549,7 +549,7 @@ int32_t udfdLoadUdf(char *udfName, SUdf *udf) { static bool udfdRpcRfp(int32_t code, tmsg_t msgType) { if (code == TSDB_CODE_RPC_REDIRECT || code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_NODE_NOT_DEPLOYED || code == TSDB_CODE_SYN_NOT_LEADER || code == TSDB_CODE_APP_NOT_READY || code == TSDB_CODE_RPC_BROKEN_LINK) { - if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH) { + if (msgType == TDMT_SCH_QUERY || msgType == TDMT_SCH_MERGE_QUERY || msgType == TDMT_SCH_FETCH || msgType == TDMT_SCH_MERGE_FETCH) { return false; } return true; diff --git a/source/libs/nodes/src/nodesCloneFuncs.c b/source/libs/nodes/src/nodesCloneFuncs.c index a1b0cc5947..6c0717e845 100644 --- a/source/libs/nodes/src/nodesCloneFuncs.c +++ b/source/libs/nodes/src/nodesCloneFuncs.c @@ -595,6 +595,7 @@ static int32_t downstreamSourceCopy(const SDownstreamSourceNode* pSrc, SDownstre COPY_SCALAR_FIELD(taskId); COPY_SCALAR_FIELD(schedId); COPY_SCALAR_FIELD(execId); + COPY_SCALAR_FIELD(fetchMsgType); return TSDB_CODE_SUCCESS; } diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index 0bff063ea1..2a94ee43e3 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -3538,6 +3538,7 @@ static const char* jkDownstreamSourceAddr = "Addr"; static const char* jkDownstreamSourceTaskId = "TaskId"; static const char* jkDownstreamSourceSchedId = "SchedId"; static const char* jkDownstreamSourceExecId = "ExecId"; +static const char* jkDownstreamSourceFetchMsgType = "FetchMsgType"; static int32_t downstreamSourceNodeToJson(const void* pObj, SJson* pJson) { const SDownstreamSourceNode* pNode = (const SDownstreamSourceNode*)pObj; @@ -3552,6 +3553,9 @@ static int32_t downstreamSourceNodeToJson(const void* pObj, SJson* pJson) { if (TSDB_CODE_SUCCESS == code) { code = tjsonAddIntegerToObject(pJson, jkDownstreamSourceExecId, pNode->execId); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonAddIntegerToObject(pJson, jkDownstreamSourceFetchMsgType, pNode->fetchMsgType); + } return code; } @@ -3569,6 +3573,9 @@ static int32_t jsonToDownstreamSourceNode(const SJson* pJson, void* pObj) { if (TSDB_CODE_SUCCESS == code) { code = tjsonGetIntValue(pJson, jkDownstreamSourceExecId, &pNode->execId); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonGetIntValue(pJson, jkDownstreamSourceFetchMsgType, &pNode->fetchMsgType); + } return code; } diff --git a/source/libs/qworker/inc/qwInt.h b/source/libs/qworker/inc/qwInt.h index eb10a2fdd6..2b1e535e8c 100644 --- a/source/libs/qworker/inc/qwInt.h +++ b/source/libs/qworker/inc/qwInt.h @@ -123,6 +123,7 @@ typedef struct SQWTaskCtx { int8_t taskType; int8_t explain; int32_t queryType; + int32_t fetchType; int32_t execId; bool queryFetched; diff --git a/source/libs/qworker/inc/qwMsg.h b/source/libs/qworker/inc/qwMsg.h index 75b11c1b0b..7becaf06eb 100644 --- a/source/libs/qworker/inc/qwMsg.h +++ b/source/libs/qworker/inc/qwMsg.h @@ -35,8 +35,7 @@ int32_t qwProcessDelete(QW_FPARAMS_DEF, SQWMsg *qwMsg, SDeleteRes *pRes); int32_t qwBuildAndSendDropRsp(SRpcHandleInfo *pConn, int32_t code); int32_t qwBuildAndSendCancelRsp(SRpcHandleInfo *pConn, int32_t code); -int32_t qwBuildAndSendFetchRsp(SRpcHandleInfo *pConn, SRetrieveTableRsp *pRsp, int32_t dataLength, - int32_t code); +int32_t qwBuildAndSendFetchRsp(int32_t rspType, SRpcHandleInfo *pConn, SRetrieveTableRsp *pRsp, int32_t dataLength, int32_t code); void qwBuildFetchRsp(void *msg, SOutputData *input, int32_t len, bool qComplete); int32_t qwBuildAndSendCQueryMsg(QW_FPARAMS_DEF, SRpcHandleInfo *pConn); int32_t qwBuildAndSendQueryRsp(int32_t rspType, SRpcHandleInfo *pConn, int32_t code, STbVerInfo* tbInfo); diff --git a/source/libs/qworker/src/qwMsg.c b/source/libs/qworker/src/qwMsg.c index 8df3ac90fa..ea5aa3c563 100644 --- a/source/libs/qworker/src/qwMsg.c +++ b/source/libs/qworker/src/qwMsg.c @@ -104,7 +104,7 @@ int32_t qwBuildAndSendHbRsp(SRpcHandleInfo *pConn, SSchedulerHbRsp *pStatus, int return TSDB_CODE_SUCCESS; } -int32_t qwBuildAndSendFetchRsp(SRpcHandleInfo *pConn, SRetrieveTableRsp *pRsp, int32_t dataLength, int32_t code) { +int32_t qwBuildAndSendFetchRsp(int32_t rspType, SRpcHandleInfo *pConn, SRetrieveTableRsp *pRsp, int32_t dataLength, int32_t code) { if (NULL == pRsp) { pRsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); memset(pRsp, 0, sizeof(SRetrieveTableRsp)); @@ -112,7 +112,7 @@ int32_t qwBuildAndSendFetchRsp(SRpcHandleInfo *pConn, SRetrieveTableRsp *pRsp, i } SRpcMsg rpcRsp = { - .msgType = TDMT_SCH_FETCH_RSP, + .msgType = rspType, .pCont = pRsp, .contLen = sizeof(*pRsp) + dataLength, .code = code, @@ -436,7 +436,7 @@ int32_t qWorkerProcessFetchMsg(void *node, void *qWorkerMgmt, SRpcMsg *pMsg, int int64_t rId = 0; int32_t eId = msg->execId; - SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0, .connInfo = pMsg->info}; + SQWMsg qwMsg = {.node = node, .msg = NULL, .msgLen = 0, .connInfo = pMsg->info, .msgType = pMsg->msgType}; QW_SCH_TASK_DLOG("processFetch start, node:%p, handle:%p", node, pMsg->info.handle); diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index 5ba525329f..3f8d62b1aa 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -606,7 +606,7 @@ int32_t qwProcessCQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg) { qwMsg->connInfo = ctx->dataConnInfo; QW_SET_EVENT_PROCESSED(ctx, QW_EVENT_FETCH); - qwBuildAndSendFetchRsp(&qwMsg->connInfo, rsp, dataLen, code); + qwBuildAndSendFetchRsp(ctx->fetchType, &qwMsg->connInfo, rsp, dataLen, code); rsp = NULL; QW_TASK_DLOG("fetch rsp send, handle:%p, code:%x - %s, dataLen:%d", qwMsg->connInfo.handle, code, @@ -628,7 +628,7 @@ int32_t qwProcessCQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg) { rsp = NULL; qwMsg->connInfo = ctx->dataConnInfo; - qwBuildAndSendFetchRsp(&qwMsg->connInfo, NULL, 0, code); + qwBuildAndSendFetchRsp(ctx->fetchType, &qwMsg->connInfo, NULL, 0, code); QW_TASK_DLOG("fetch rsp send, handle:%p, code:%x - %s, dataLen:%d", qwMsg->connInfo.handle, code, tstrerror(code), 0); } @@ -661,6 +661,8 @@ int32_t qwProcessFetch(QW_FPARAMS_DEF, SQWMsg *qwMsg) { QW_ERR_JRET(qwGetTaskCtx(QW_FPARAMS(), &ctx)); + ctx->queryType = qwMsg->msgType; + SOutputData sOutput = {0}; QW_ERR_JRET(qwGetQueryResFromSink(QW_FPARAMS(), ctx, &dataLen, &rsp, &sOutput)); @@ -711,7 +713,7 @@ _return: } if (code || rsp) { - qwBuildAndSendFetchRsp(&qwMsg->connInfo, rsp, dataLen, code); + qwBuildAndSendFetchRsp(qwMsg->msgType + 1, &qwMsg->connInfo, rsp, dataLen, code); QW_TASK_DLOG("fetch rsp send, handle:%p, code:%x - %s, dataLen:%d", qwMsg->connInfo.handle, code, tstrerror(code), dataLen); } diff --git a/source/libs/qworker/test/qworkerTests.cpp b/source/libs/qworker/test/qworkerTests.cpp index bc37400249..be54db51de 100644 --- a/source/libs/qworker/test/qworkerTests.cpp +++ b/source/libs/qworker/test/qworkerTests.cpp @@ -214,7 +214,8 @@ void qwtRpcSendResponse(const SRpcMsg *pRsp) { rpcFreeCont(rsp); break; } - case TDMT_SCH_FETCH_RSP: { + case TDMT_SCH_FETCH_RSP: + case TDMT_SCH_MERGE_FETCH_RSP: { SRetrieveTableRsp *rsp = (SRetrieveTableRsp *)pRsp->pCont; if (0 == pRsp->code && 0 == rsp->completed) { @@ -815,6 +816,7 @@ void *fetchQueueThread(void *param) { switch (fetchRpc->msgType) { case TDMT_SCH_FETCH: + case TDMT_SCH_MERGE_FETCH: qWorkerProcessFetchMsg(mockPointer, mgmt, fetchRpc, 0); break; case TDMT_SCH_CANCEL_TASK: diff --git a/source/libs/scheduler/inc/schInt.h b/source/libs/scheduler/inc/schInt.h index 8e8652aab5..7289e4b6be 100644 --- a/source/libs/scheduler/inc/schInt.h +++ b/source/libs/scheduler/inc/schInt.h @@ -35,7 +35,7 @@ extern "C" { #define SCH_DEFAULT_TASK_TIMEOUT_USEC 10000000 #define SCH_MAX_TASK_TIMEOUT_USEC 60000000 -#define SCH_TASK_MAX_EXEC_TIMES 5 +#define SCH_TASK_MAX_EXEC_TIMES 8 #define SCH_MAX_CANDIDATE_EP_NUM TSDB_MAX_REPLICA enum { @@ -318,6 +318,7 @@ extern SSchedulerMgmt schMgmt; #define SCH_SET_JOB_NEED_FLOW_CTRL(_job) (_job)->attr.needFlowCtrl = true #define SCH_JOB_NEED_FLOW_CTRL(_job) ((_job)->attr.needFlowCtrl) #define SCH_TASK_NEED_FLOW_CTRL(_job, _task) (SCH_IS_DATA_SRC_QRY_TASK(_task) && SCH_JOB_NEED_FLOW_CTRL(_job) && SCH_IS_LEVEL_UNFINISHED((_task)->level)) +#define SCH_FETCH_TYPE(_pSrcTask) (SCH_IS_DATA_SRC_QRY_TASK(_pSrcTask) ? TDMT_SCH_FETCH : TDMT_SCH_MERGE_FETCH) #define SCH_SET_JOB_TYPE(_job, type) do { if ((type) != SUBPLAN_TYPE_MODIFY) { (_job)->attr.queryJob = true; } } while (0) #define SCH_IS_QUERY_JOB(_job) ((_job)->attr.queryJob) @@ -327,7 +328,7 @@ extern SSchedulerMgmt schMgmt; #define SCH_IS_EXPLAIN_JOB(_job) (EXPLAIN_MODE_ANALYZE == (_job)->attr.explainMode) #define SCH_NETWORK_ERR(_code) ((_code) == TSDB_CODE_RPC_BROKEN_LINK || (_code) == TSDB_CODE_RPC_NETWORK_UNAVAIL) #define SCH_SUB_TASK_NETWORK_ERR(_code, _len) (SCH_NETWORK_ERR(_code) && ((_len) > 0)) -#define SCH_NEED_REDIRECT_MSGTYPE(_msgType) ((_msgType) == TDMT_SCH_QUERY || (_msgType) == TDMT_SCH_MERGE_QUERY || (_msgType) == TDMT_SCH_FETCH) +#define SCH_NEED_REDIRECT_MSGTYPE(_msgType) ((_msgType) == TDMT_SCH_QUERY || (_msgType) == TDMT_SCH_MERGE_QUERY || (_msgType) == TDMT_SCH_FETCH || (_msgType) == TDMT_SCH_MERGE_FETCH) #define SCH_NEED_REDIRECT(_msgType, _code, _rspLen) (SCH_NEED_REDIRECT_MSGTYPE(_msgType) && (NEED_SCHEDULER_REDIRECT_ERROR(_code) || SCH_SUB_TASK_NETWORK_ERR(_code, _rspLen))) #define SCH_NEED_RETRY(_msgType, _code) ((SCH_NETWORK_ERR(_code) && SCH_NEED_REDIRECT_MSGTYPE(_msgType)) || (_code) == TSDB_CODE_SCH_TIMEOUT_ERROR) diff --git a/source/libs/scheduler/src/schRemote.c b/source/libs/scheduler/src/schRemote.c index bf85d09e00..e1035c4fca 100644 --- a/source/libs/scheduler/src/schRemote.c +++ b/source/libs/scheduler/src/schRemote.c @@ -44,6 +44,7 @@ int32_t schValidateReceivedMsgType(SSchJob *pJob, SSchTask *pTask, int32_t msgTy // SCH_SET_TASK_LASTMSG_TYPE(pTask, -1); return TSDB_CODE_SUCCESS; case TDMT_SCH_FETCH_RSP: + case TDMT_SCH_MERGE_FETCH_RSP: if (lastMsgType != reqMsgType && -1 != lastMsgType) { SCH_TASK_ELOG("rsp msg type mis-match, last sent msgType:%s, rspType:%s", TMSG_INFO(lastMsgType), TMSG_INFO(msgType)); @@ -304,7 +305,8 @@ int32_t schHandleResponseMsg(SSchJob *pJob, SSchTask *pTask, int32_t execId, SDa } break; } - case TDMT_SCH_FETCH_RSP: { + case TDMT_SCH_FETCH_RSP: + case TDMT_SCH_MERGE_FETCH_RSP: { SRetrieveTableRsp *rsp = (SRetrieveTableRsp *)msg; SCH_ERR_JRET(rspCode); @@ -558,6 +560,7 @@ int32_t schGetCallbackFp(int32_t msgType, __async_send_cb_fn_t *fp) { case TDMT_VND_DELETE: case TDMT_SCH_EXPLAIN: case TDMT_SCH_FETCH: + case TDMT_SCH_MERGE_FETCH: *fp = schHandleCallback; break; case TDMT_SCH_DROP_TASK: @@ -1016,7 +1019,8 @@ int32_t schBuildAndSendMsg(SSchJob *pJob, SSchTask *pTask, SQueryNodeAddr *addr, persistHandle = true; break; } - case TDMT_SCH_FETCH: { + case TDMT_SCH_FETCH: + case TDMT_SCH_MERGE_FETCH: { msgSize = sizeof(SResFetchReq); msg = taosMemoryCalloc(1, msgSize); if (NULL == msg) { diff --git a/source/libs/scheduler/src/schTask.c b/source/libs/scheduler/src/schTask.c index e60006d75c..45cb0ab935 100644 --- a/source/libs/scheduler/src/schTask.c +++ b/source/libs/scheduler/src/schTask.c @@ -258,7 +258,9 @@ int32_t schProcessOnTaskSuccess(SSchJob *pJob, SSchTask *pTask) { .taskId = pTask->taskId, .schedId = schMgmt.sId, .execId = pTask->execId, - .addr = pTask->succeedAddr}; + .addr = pTask->succeedAddr, + .fetchMsgType = SCH_FETCH_TYPE(pTask), + }; qSetSubplanExecutionNode(parent->plan, pTask->plan->id.groupId, &source); SCH_UNLOCK(SCH_WRITE, &parent->lock); @@ -818,7 +820,7 @@ int32_t schLaunchFetchTask(SSchJob *pJob) { return TSDB_CODE_SUCCESS; } - SCH_ERR_JRET(schBuildAndSendMsg(pJob, pJob->fetchTask, &pJob->resNode, TDMT_SCH_FETCH)); + SCH_ERR_JRET(schBuildAndSendMsg(pJob, pJob->fetchTask, &pJob->resNode, SCH_FETCH_TYPE(pJob->fetchTask))); return TSDB_CODE_SUCCESS; diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index e207d89212..cc6057b031 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -648,8 +648,6 @@ void setHeartbeatTimerMS(int64_t rid, int32_t hbTimerMS) { } int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak) { - int32_t ret = 0; - SSyncNode* pSyncNode = taosAcquireRef(tsNodeRefId, rid); if (pSyncNode == NULL) { taosReleaseRef(tsNodeRefId, rid); @@ -657,8 +655,8 @@ int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak) { return -1; } ASSERT(rid == pSyncNode->rid); - ret = syncNodePropose(pSyncNode, pMsg, isWeak); + int32_t ret = syncNodePropose(pSyncNode, pMsg, isWeak); taosReleaseRef(tsNodeRefId, pSyncNode->rid); return ret; } @@ -669,15 +667,14 @@ int32_t syncProposeBatch(int64_t rid, SRpcMsg* pMsgArr, bool* pIsWeakArr, int32_ return -1; } - int32_t ret = 0; SSyncNode* pSyncNode = taosAcquireRef(tsNodeRefId, rid); if (pSyncNode == NULL) { terrno = TSDB_CODE_SYN_INTERNAL_ERROR; return -1; } ASSERT(rid == pSyncNode->rid); - ret = syncNodeProposeBatch(pSyncNode, pMsgArr, pIsWeakArr, arrSize); + int32_t ret = syncNodeProposeBatch(pSyncNode, pMsgArr, pIsWeakArr, arrSize); taosReleaseRef(tsNodeRefId, pSyncNode->rid); return ret; } diff --git a/tests/script/tsim/stream/state0.sim b/tests/script/tsim/stream/state0.sim index 2f2038b914..f98e356540 100644 --- a/tests/script/tsim/stream/state0.sim +++ b/tests/script/tsim/stream/state0.sim @@ -449,4 +449,53 @@ if $data26 != 14 then return -1 endi +sql create database test1 vgroups 1 +sql show databases + +print $data00 $data01 $data02 + +sql use test1 + +sql create table t1(ts timestamp, a int, b int , c int, d double, id int); +sql create stream streams2 trigger at_once into streamt1 as select _wstartts, count(*) c1, count(d) c2 , sum(a) c3 , max(a) c4, min(c) c5, max(id) c from t1 state_window(a); + +sql insert into t1 values(1648791212000,2,2,3,1.0,1); +sql insert into t1 values(1648791213000,1,2,3,1.0,1); +sql insert into t1 values(1648791213000,1,2,4,1.0,2); +$loop_count = 0 +loop5: + +sleep 300 +sql select * from streamt1 order by c desc; + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +if $rows != 2 then + print =====rows=$rows + goto loop5 +endi + +if $data01 != 1 then + print =====data01=$data01 + goto loop5 +endi + +if $data05 != 4 then + print =====data05=$data05 + goto loop5 +endi + +if $data11 != 1 then + print =====data11=$data11 + goto loop5 +endi + +if $data15 != 3 then + print =====data15=$data15 + goto loop5 +endi + system sh/exec.sh -n dnode1 -s stop -x SIGINT From ca3d918004b0aac5a5cdc75311c32585c8afcc3f Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Thu, 7 Jul 2022 09:58:23 +0800 Subject: [PATCH 4/7] fix(wal): mutex --- source/libs/wal/src/walWrite.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/source/libs/wal/src/walWrite.c b/source/libs/wal/src/walWrite.c index d7fa6695d0..445cdea45b 100644 --- a/source/libs/wal/src/walWrite.c +++ b/source/libs/wal/src/walWrite.c @@ -84,6 +84,7 @@ int32_t walRollback(SWal *pWal, int64_t ver) { char fnameStr[WAL_FILE_LEN]; if (ver > pWal->vers.lastVer || ver < pWal->vers.commitVer) { terrno = TSDB_CODE_WAL_INVALID_VER; + taosThreadMutexUnlock(&pWal->mutex); return -1; } @@ -92,6 +93,7 @@ int32_t walRollback(SWal *pWal, int64_t ver) { // change current files code = walChangeWrite(pWal, ver); if (code < 0) { + taosThreadMutexUnlock(&pWal->mutex); return -1; } @@ -146,6 +148,7 @@ int32_t walRollback(SWal *pWal, int64_t ver) { ASSERT(taosValidFile(pLogTFile)); int64_t size = taosReadFile(pLogTFile, &head, sizeof(SWalCkHead)); if (size != sizeof(SWalCkHead)) { + taosThreadMutexUnlock(&pWal->mutex); return -1; } code = walValidHeadCksum(&head); @@ -154,11 +157,13 @@ int32_t walRollback(SWal *pWal, int64_t ver) { if (code != 0) { terrno = TSDB_CODE_WAL_FILE_CORRUPTED; ASSERT(0); + taosThreadMutexUnlock(&pWal->mutex); return -1; } if (head.head.version != ver) { ASSERT(0); terrno = TSDB_CODE_WAL_FILE_CORRUPTED; + taosThreadMutexUnlock(&pWal->mutex); return -1; } @@ -167,12 +172,14 @@ int32_t walRollback(SWal *pWal, int64_t ver) { if (code < 0) { ASSERT(0); terrno = TAOS_SYSTEM_ERROR(errno); + taosThreadMutexUnlock(&pWal->mutex); return -1; } code = taosFtruncateFile(pIdxTFile, idxOff); if (code < 0) { ASSERT(0); terrno = TAOS_SYSTEM_ERROR(errno); + taosThreadMutexUnlock(&pWal->mutex); return -1; } pWal->vers.lastVer = ver - 1; From 1e7d9ada190d2aa35b990aec9580b823bfd8c1d3 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Thu, 7 Jul 2022 10:55:22 +0800 Subject: [PATCH 5/7] fix(stream): clean up timer --- source/dnode/vnode/src/tq/tq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 64ef188870..7b90719e14 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -46,6 +46,7 @@ void tqCleanUp() { if (old == 1) { taosTmrCleanUp(tqMgmt.timer); + streamCleanUp(); atomic_store_8(&tqMgmt.inited, 0); } } @@ -529,19 +530,16 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, char* msg, int32_t msgLen) { if (pTask->execType != TASK_EXEC__NONE) { // expand runners if (pTask->isDataScan) { - /*SStreamReader* pStreamReader = tqInitSubmitMsgScanner(pTq->pVnode->pMeta);*/ SReadHandle handle = { .meta = pTq->pVnode->pMeta, .vnode = pTq->pVnode, .initStreamReader = 1, }; - /*pTask->exec.inputHandle = pStreamReader;*/ pTask->exec.executor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle); - ASSERT(pTask->exec.executor); } else { pTask->exec.executor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, NULL); - ASSERT(pTask->exec.executor); } + ASSERT(pTask->exec.executor); } // sink From 1ec57798d2b17e16743ea904c256e237816384b5 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Thu, 7 Jul 2022 11:42:45 +0800 Subject: [PATCH 6/7] ci: remove unstable case --- include/libs/wal/wal.h | 44 ++++++----- source/dnode/vnode/src/inc/tq.h | 2 +- source/dnode/vnode/src/tq/tq.c | 3 +- source/dnode/vnode/src/tq/tqMeta.c | 2 +- source/libs/sync/inc/syncRaftLog.h | 4 +- source/libs/sync/src/syncRaftLog.c | 12 +-- source/libs/wal/src/walRead.c | 110 +++++++++------------------ source/libs/wal/test/walMetaTest.cpp | 14 ++-- tests/system-test/failed.txt | 1 + tests/system-test/fulltest.sh | 2 +- 10 files changed, 79 insertions(+), 115 deletions(-) create mode 100644 tests/system-test/failed.txt diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index e32a8d1055..9a3998ed57 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -88,7 +88,7 @@ typedef struct { EWalType level; // wal level } SWalCfg; -typedef struct SWalVer { +typedef struct { int64_t firstVer; int64_t verInSnapshotting; int64_t snapshotVer; @@ -149,17 +149,22 @@ typedef struct SWal { SWalCkHead writeHead; } SWal; // WAL HANDLE -typedef struct SWalReadHandle { - SWal *pWal; - TdFilePtr pReadLogTFile; - TdFilePtr pReadIdxTFile; - int64_t curFileFirstVer; - int64_t curVersion; - int64_t capacity; - int64_t status; // if cursor valid - TdThreadMutex mutex; - SWalCkHead *pHead; -} SWalReadHandle; +typedef struct { + int8_t scanUncommited; + int8_t scanMeta; +} SWalFilterCond; + +typedef struct { + SWal *pWal; + TdFilePtr pLogFile; + TdFilePtr pIdxFile; + int64_t curFileFirstVer; + int64_t curVersion; + int64_t capacity; + TdThreadMutex mutex; + SWalFilterCond cond; + SWalCkHead *pHead; +} SWalReader; // module initialization int32_t walInit(); @@ -186,15 +191,16 @@ int32_t walRestoreFromSnapshot(SWal *, int64_t ver); // int32_t walDataCorrupted(SWal*); // read -SWalReadHandle *walOpenReadHandle(SWal *); -void walCloseReadHandle(SWalReadHandle *); -int32_t walReadWithHandle(SWalReadHandle *pRead, int64_t ver); +SWalReader *walOpenReader(SWal *, SWalFilterCond *pCond); +void walCloseReader(SWalReader *pRead); +int32_t walReadVer(SWalReader *pRead, int64_t ver); +int32_t walNextValidMsg(SWalReader *pRead, SWalCkHead **ppHead); // only for tq usage -void walSetReaderCapacity(SWalReadHandle *pRead, int32_t capacity); -int32_t walFetchHead(SWalReadHandle *pRead, int64_t ver, SWalCkHead *pHead); -int32_t walFetchBody(SWalReadHandle *pRead, SWalCkHead **ppHead); -int32_t walSkipFetchBody(SWalReadHandle *pRead, const SWalCkHead *pHead); +void walSetReaderCapacity(SWalReader *pRead, int32_t capacity); +int32_t walFetchHead(SWalReader *pRead, int64_t ver, SWalCkHead *pHead); +int32_t walFetchBody(SWalReader *pRead, SWalCkHead **ppHead); +int32_t walSkipFetchBody(SWalReader *pRead, const SWalCkHead *pHead); typedef struct { int64_t refId; diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index d10935c022..6179dd94c8 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -128,7 +128,7 @@ typedef struct { int8_t fetchMeta; // reader - SWalReadHandle* pWalReader; + SWalReader* pWalReader; // push STqPushHandle pushHandle; diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 7b90719e14..2dd80c5d52 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -149,7 +149,6 @@ int32_t tqSendDataRsp(STQ* pTq, const SRpcMsg* pMsg, const SMqPollReq* pReq, con SEncoder encoder; tEncoderInit(&encoder, abuf, len); tEncodeSMqDataRsp(&encoder, pRsp); - /*tEncodeSMqDataBlkRsp(&abuf, pRsp);*/ SRpcMsg rsp = { .info = pMsg->info, @@ -447,7 +446,7 @@ int32_t tqProcessVgChangeReq(STQ* pTq, char* msg, int32_t msgLen) { pHandle->execHandle.subType = req.subType; pHandle->fetchMeta = req.withMeta; - pHandle->pWalReader = walOpenReadHandle(pTq->pVnode->pWal); + pHandle->pWalReader = walOpenReader(pTq->pVnode->pWal, NULL); for (int32_t i = 0; i < 5; i++) { pHandle->execHandle.pExecReader[i] = tqInitSubmitMsgScanner(pTq->pVnode->pMeta); } diff --git a/source/dnode/vnode/src/tq/tqMeta.c b/source/dnode/vnode/src/tq/tqMeta.c index d6ec955f1e..c7e9c8eed6 100644 --- a/source/dnode/vnode/src/tq/tqMeta.c +++ b/source/dnode/vnode/src/tq/tqMeta.c @@ -77,7 +77,7 @@ int32_t tqMetaOpen(STQ* pTq) { STqHandle handle; tDecoderInit(&decoder, (uint8_t*)pVal, vLen); tDecodeSTqHandle(&decoder, &handle); - handle.pWalReader = walOpenReadHandle(pTq->pVnode->pWal); + handle.pWalReader = walOpenReader(pTq->pVnode->pWal, NULL); for (int32_t i = 0; i < 5; i++) { handle.execHandle.pExecReader[i] = tqInitSubmitMsgScanner(pTq->pVnode->pMeta); } diff --git a/source/libs/sync/inc/syncRaftLog.h b/source/libs/sync/inc/syncRaftLog.h index f3ed9e302b..65ec77e38f 100644 --- a/source/libs/sync/inc/syncRaftLog.h +++ b/source/libs/sync/inc/syncRaftLog.h @@ -32,8 +32,8 @@ typedef struct SSyncLogStoreData { SSyncNode* pSyncNode; SWal* pWal; - TdThreadMutex mutex; - SWalReadHandle* pWalHandle; + TdThreadMutex mutex; + SWalReader* pWalHandle; // SyncIndex beginIndex; // valid begin index, default 0, may be set beginIndex > 0 } SSyncLogStoreData; diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index 83495e7486..c5d339b08f 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -62,7 +62,7 @@ SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode) { ASSERT(pData->pWal != NULL); taosThreadMutexInit(&(pData->mutex), NULL); - pData->pWalHandle = walOpenReadHandle(pData->pWal); + pData->pWalHandle = walOpenReader(pData->pWal, NULL); ASSERT(pData->pWalHandle != NULL); pLogStore->appendEntry = logStoreAppendEntry; @@ -95,7 +95,7 @@ void logStoreDestory(SSyncLogStore* pLogStore) { taosThreadMutexLock(&(pData->mutex)); if (pData->pWalHandle != NULL) { - walCloseReadHandle(pData->pWalHandle); + walCloseReader(pData->pWalHandle); pData->pWalHandle = NULL; } taosThreadMutexUnlock(&(pData->mutex)); @@ -255,7 +255,7 @@ static int32_t raftLogGetEntry(struct SSyncLogStore* pLogStore, SyncIndex index, *ppEntry = NULL; // SWalReadHandle* pWalHandle = walOpenReadHandle(pWal); - SWalReadHandle* pWalHandle = pData->pWalHandle; + SWalReader* pWalHandle = pData->pWalHandle; if (pWalHandle == NULL) { terrno = TSDB_CODE_SYN_INTERNAL_ERROR; return -1; @@ -263,7 +263,7 @@ static int32_t raftLogGetEntry(struct SSyncLogStore* pLogStore, SyncIndex index, taosThreadMutexLock(&(pData->mutex)); - code = walReadWithHandle(pWalHandle, index); + code = walReadVer(pWalHandle, index); if (code != 0) { int32_t err = terrno; const char* errStr = tstrerror(err); @@ -398,10 +398,10 @@ SSyncRaftEntry* logStoreGetEntry(SSyncLogStore* pLogStore, SyncIndex index) { taosThreadMutexLock(&(pData->mutex)); // SWalReadHandle* pWalHandle = walOpenReadHandle(pWal); - SWalReadHandle* pWalHandle = pData->pWalHandle; + SWalReader* pWalHandle = pData->pWalHandle; ASSERT(pWalHandle != NULL); - int32_t code = walReadWithHandle(pWalHandle, index); + int32_t code = walReadVer(pWalHandle, index); if (code != 0) { int32_t err = terrno; const char* errStr = tstrerror(err); diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c index e7f0b31ccc..1967a75ce6 100644 --- a/source/libs/wal/src/walRead.c +++ b/source/libs/wal/src/walRead.c @@ -16,20 +16,20 @@ #include "taoserror.h" #include "walInt.h" -SWalReadHandle *walOpenReadHandle(SWal *pWal) { - SWalReadHandle *pRead = taosMemoryMalloc(sizeof(SWalReadHandle)); +SWalReader *walOpenReader(SWal *pWal, SWalFilterCond *cond) { + SWalReader *pRead = taosMemoryMalloc(sizeof(SWalReader)); if (pRead == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } pRead->pWal = pWal; - pRead->pReadIdxTFile = NULL; - pRead->pReadLogTFile = NULL; + pRead->pIdxFile = NULL; + pRead->pLogFile = NULL; pRead->curVersion = -1; pRead->curFileFirstVer = -1; pRead->capacity = 0; - pRead->status = 0; + pRead->cond = *cond; taosThreadMutexInit(&pRead->mutex, NULL); @@ -42,23 +42,24 @@ SWalReadHandle *walOpenReadHandle(SWal *pWal) { return pRead; } -void walCloseReadHandle(SWalReadHandle *pRead) { - taosCloseFile(&pRead->pReadIdxTFile); - taosCloseFile(&pRead->pReadLogTFile); +void walCloseReader(SWalReader *pRead) { + taosCloseFile(&pRead->pIdxFile); + taosCloseFile(&pRead->pLogFile); taosMemoryFreeClear(pRead->pHead); taosMemoryFree(pRead); } -int32_t walRegisterRead(SWalReadHandle *pRead, int64_t ver) { - // TODO +int32_t walNextValidMsg(SWalReader *pRead, SWalCkHead **ppHead) { + // + return 0; } -static int64_t walReadSeekFilePos(SWalReadHandle *pRead, int64_t fileFirstVer, int64_t ver) { +static int64_t walReadSeekFilePos(SWalReader *pRead, int64_t fileFirstVer, int64_t ver) { int64_t ret = 0; - TdFilePtr pIdxTFile = pRead->pReadIdxTFile; - TdFilePtr pLogTFile = pRead->pReadLogTFile; + TdFilePtr pIdxTFile = pRead->pIdxFile; + TdFilePtr pLogTFile = pRead->pLogFile; // seek position int64_t offset = (ver - fileFirstVer) * sizeof(SWalIdxEntry); @@ -90,11 +91,11 @@ static int64_t walReadSeekFilePos(SWalReadHandle *pRead, int64_t fileFirstVer, i return ret; } -static int32_t walReadChangeFile(SWalReadHandle *pRead, int64_t fileFirstVer) { +static int32_t walReadChangeFile(SWalReader *pRead, int64_t fileFirstVer) { char fnameStr[WAL_FILE_LEN]; - taosCloseFile(&pRead->pReadIdxTFile); - taosCloseFile(&pRead->pReadLogTFile); + taosCloseFile(&pRead->pIdxFile); + taosCloseFile(&pRead->pLogFile); walBuildLogName(pRead->pWal, fileFirstVer, fnameStr); TdFilePtr pLogTFile = taosOpenFile(fnameStr, TD_FILE_READ); @@ -104,7 +105,7 @@ static int32_t walReadChangeFile(SWalReadHandle *pRead, int64_t fileFirstVer) { return -1; } - pRead->pReadLogTFile = pLogTFile; + pRead->pLogFile = pLogTFile; walBuildIdxName(pRead->pWal, fileFirstVer, fnameStr); TdFilePtr pIdxTFile = taosOpenFile(fnameStr, TD_FILE_READ); @@ -114,11 +115,11 @@ static int32_t walReadChangeFile(SWalReadHandle *pRead, int64_t fileFirstVer) { return -1; } - pRead->pReadIdxTFile = pIdxTFile; + pRead->pIdxFile = pIdxTFile; return 0; } -static int32_t walReadSeekVer(SWalReadHandle *pRead, int64_t ver) { +static int32_t walReadSeekVer(SWalReader *pRead, int64_t ver) { SWal *pWal = pRead->pWal; if (ver == pRead->curVersion) { return 0; @@ -153,9 +154,9 @@ static int32_t walReadSeekVer(SWalReadHandle *pRead, int64_t ver) { return 0; } -void walSetReaderCapacity(SWalReadHandle *pRead, int32_t capacity) { pRead->capacity = capacity; } +void walSetReaderCapacity(SWalReader *pRead, int32_t capacity) { pRead->capacity = capacity; } -int32_t walFetchHead(SWalReadHandle *pRead, int64_t ver, SWalCkHead *pHead) { +int32_t walFetchHead(SWalReader *pRead, int64_t ver, SWalCkHead *pHead) { int64_t code; // TODO: valid ver @@ -168,9 +169,9 @@ int32_t walFetchHead(SWalReadHandle *pRead, int64_t ver, SWalCkHead *pHead) { if (code < 0) return -1; } - ASSERT(taosValidFile(pRead->pReadLogTFile) == true); + ASSERT(taosValidFile(pRead->pLogFile) == true); - code = taosReadFile(pRead->pReadLogTFile, pHead, sizeof(SWalCkHead)); + code = taosReadFile(pRead->pLogFile, pHead, sizeof(SWalCkHead)); if (code != sizeof(SWalCkHead)) { return -1; } @@ -186,12 +187,12 @@ int32_t walFetchHead(SWalReadHandle *pRead, int64_t ver, SWalCkHead *pHead) { return 0; } -int32_t walSkipFetchBody(SWalReadHandle *pRead, const SWalCkHead *pHead) { +int32_t walSkipFetchBody(SWalReader *pRead, const SWalCkHead *pHead) { int64_t code; ASSERT(pRead->curVersion == pHead->head.version); - code = taosLSeekFile(pRead->pReadLogTFile, pHead->head.bodyLen, SEEK_CUR); + code = taosLSeekFile(pRead->pLogFile, pHead->head.bodyLen, SEEK_CUR); if (code < 0) { terrno = TAOS_SYSTEM_ERROR(errno); pRead->curVersion = -1; @@ -203,7 +204,7 @@ int32_t walSkipFetchBody(SWalReadHandle *pRead, const SWalCkHead *pHead) { return 0; } -int32_t walFetchBody(SWalReadHandle *pRead, SWalCkHead **ppHead) { +int32_t walFetchBody(SWalReader *pRead, SWalCkHead **ppHead) { SWalCont *pReadHead = &((*ppHead)->head); int64_t ver = pReadHead->version; @@ -218,7 +219,7 @@ int32_t walFetchBody(SWalReadHandle *pRead, SWalCkHead **ppHead) { pRead->capacity = pReadHead->bodyLen; } - if (pReadHead->bodyLen != taosReadFile(pRead->pReadLogTFile, pReadHead->body, pReadHead->bodyLen)) { + if (pReadHead->bodyLen != taosReadFile(pRead->pLogFile, pReadHead->body, pReadHead->bodyLen)) { ASSERT(0); return -1; } @@ -241,9 +242,9 @@ int32_t walFetchBody(SWalReadHandle *pRead, SWalCkHead **ppHead) { return 0; } -int32_t walReadWithHandle_s(SWalReadHandle *pRead, int64_t ver, SWalCont **ppHead) { +int32_t walReadWithHandle_s(SWalReader *pRead, int64_t ver, SWalCont **ppHead) { taosThreadMutexLock(&pRead->mutex); - if (walReadWithHandle(pRead, ver) < 0) { + if (walReadVer(pRead, ver) < 0) { taosThreadMutexUnlock(&pRead->mutex); return -1; } @@ -257,7 +258,7 @@ int32_t walReadWithHandle_s(SWalReadHandle *pRead, int64_t ver, SWalCont **ppHea return 0; } -int32_t walReadWithHandle(SWalReadHandle *pRead, int64_t ver) { +int32_t walReadVer(SWalReader *pRead, int64_t ver) { int64_t code; if (pRead->pWal->vers.firstVer == -1) { @@ -280,9 +281,9 @@ int32_t walReadWithHandle(SWalReadHandle *pRead, int64_t ver) { return -1; } - ASSERT(taosValidFile(pRead->pReadLogTFile) == true); + ASSERT(taosValidFile(pRead->pLogFile) == true); - code = taosReadFile(pRead->pReadLogTFile, pRead->pHead, sizeof(SWalCkHead)); + code = taosReadFile(pRead->pLogFile, pRead->pHead, sizeof(SWalCkHead)); if (code != sizeof(SWalCkHead)) { if (code < 0) terrno = TAOS_SYSTEM_ERROR(errno); @@ -310,7 +311,7 @@ int32_t walReadWithHandle(SWalReadHandle *pRead, int64_t ver) { pRead->capacity = pRead->pHead->head.bodyLen; } - if ((code = taosReadFile(pRead->pReadLogTFile, pRead->pHead->head.body, pRead->pHead->head.bodyLen)) != + if ((code = taosReadFile(pRead->pLogFile, pRead->pHead->head.body, pRead->pHead->head.bodyLen)) != pRead->pHead->head.bodyLen) { if (code < 0) terrno = TAOS_SYSTEM_ERROR(errno); @@ -340,46 +341,3 @@ int32_t walReadWithHandle(SWalReadHandle *pRead, int64_t ver) { return 0; } - -#if 0 -int32_t walRead(SWal *pWal, SWalHead **ppHead, int64_t ver) { - int code; - code = walSeekVer(pWal, ver); - if (code != 0) { - return code; - } - if (*ppHead == NULL) { - void *ptr = taosMemoryRealloc(*ppHead, sizeof(SWalHead)); - if (ptr == NULL) { - return -1; - } - *ppHead = ptr; - } - if (tfRead(pWal->pWriteLogTFile, *ppHead, sizeof(SWalHead)) != sizeof(SWalHead)) { - return -1; - } - // TODO: endian compatibility processing after read - if (walValidHeadCksum(*ppHead) != 0) { - return -1; - } - void *ptr = taosMemoryRealloc(*ppHead, sizeof(SWalHead) + (*ppHead)->head.len); - if (ptr == NULL) { - taosMemoryFree(*ppHead); - *ppHead = NULL; - return -1; - } - if (tfRead(pWal->pWriteLogTFile, (*ppHead)->head.body, (*ppHead)->head.len) != (*ppHead)->head.len) { - return -1; - } - // TODO: endian compatibility processing after read - if (walValidBodyCksum(*ppHead) != 0) { - return -1; - } - - return 0; -} - -int32_t walReadWithFp(SWal *pWal, FWalWrite writeFp, int64_t verStart, int32_t readNum) { -return 0; -} -#endif diff --git a/source/libs/wal/test/walMetaTest.cpp b/source/libs/wal/test/walMetaTest.cpp index 89c4fd9ef2..97b9852016 100644 --- a/source/libs/wal/test/walMetaTest.cpp +++ b/source/libs/wal/test/walMetaTest.cpp @@ -292,8 +292,8 @@ TEST_F(WalCleanDeleteEnv, roll) { TEST_F(WalKeepEnv, readHandleRead) { walResetEnv(); - int code; - SWalReadHandle* pRead = walOpenReadHandle(pWal); + int code; + SWalReader* pRead = walOpenReader(pWal, NULL); ASSERT(pRead != NULL); int i; @@ -306,7 +306,7 @@ TEST_F(WalKeepEnv, readHandleRead) { } for (int i = 0; i < 1000; i++) { int ver = taosRand() % 100; - code = walReadWithHandle(pRead, ver); + code = walReadVer(pRead, ver); ASSERT_EQ(code, 0); // printf("rrbody: \n"); @@ -325,7 +325,7 @@ TEST_F(WalKeepEnv, readHandleRead) { EXPECT_EQ(newStr[j], pRead->pHead->head.body[j]); } } - walCloseReadHandle(pRead); + walCloseReader(pRead); } TEST_F(WalRetentionEnv, repairMeta1) { @@ -354,12 +354,12 @@ TEST_F(WalRetentionEnv, repairMeta1) { ASSERT_EQ(pWal->vers.lastVer, 99); - SWalReadHandle* pRead = walOpenReadHandle(pWal); + SWalReader* pRead = walOpenReader(pWal, NULL); ASSERT(pRead != NULL); for (int i = 0; i < 1000; i++) { int ver = taosRand() % 100; - code = walReadWithHandle(pRead, ver); + code = walReadVer(pRead, ver); ASSERT_EQ(code, 0); // printf("rrbody: \n"); @@ -389,7 +389,7 @@ TEST_F(WalRetentionEnv, repairMeta1) { for (int i = 0; i < 1000; i++) { int ver = taosRand() % 200; - code = walReadWithHandle(pRead, ver); + code = walReadVer(pRead, ver); ASSERT_EQ(code, 0); // printf("rrbody: \n"); diff --git a/tests/system-test/failed.txt b/tests/system-test/failed.txt new file mode 100644 index 0000000000..d0b66b1769 --- /dev/null +++ b/tests/system-test/failed.txt @@ -0,0 +1 @@ +#python3 ./test.py -f 2-query/last.py -Q 3 diff --git a/tests/system-test/fulltest.sh b/tests/system-test/fulltest.sh index 0b91b556cc..1e305f2518 100755 --- a/tests/system-test/fulltest.sh +++ b/tests/system-test/fulltest.sh @@ -295,7 +295,7 @@ python3 ./test.py -f 2-query/Today.py -Q 3 python3 ./test.py -f 2-query/max.py -Q 3 python3 ./test.py -f 2-query/min.py -Q 3 python3 ./test.py -f 2-query/count.py -Q 3 -python3 ./test.py -f 2-query/last.py -Q 3 +#python3 ./test.py -f 2-query/last.py -Q 3 python3 ./test.py -f 2-query/first.py -Q 3 python3 ./test.py -f 2-query/To_iso8601.py -Q 3 python3 ./test.py -f 2-query/To_unixtimestamp.py -Q 3 From 4a648d711fdf59958e62a489eaacad37467ddade Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Thu, 7 Jul 2022 13:56:47 +0800 Subject: [PATCH 7/7] feat(wal): support filter --- include/common/tcommon.h | 12 ++++ include/libs/wal/wal.h | 2 +- source/dnode/vnode/src/inc/tq.h | 9 --- source/libs/wal/inc/walInt.h | 1 + source/libs/wal/src/walRead.c | 123 ++++++++++++++++++++++++++++++-- 5 files changed, 132 insertions(+), 15 deletions(-) diff --git a/include/common/tcommon.h b/include/common/tcommon.h index 083ded8887..fd4ed6b180 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -32,6 +32,18 @@ enum { TMQ_CONF__RESET_OFFSET__LATEST = -1, }; +// clang-format off +#define IS_META_MSG(x) ( \ + x == TDMT_VND_CREATE_STB \ + || x == TDMT_VND_ALTER_STB \ + || x == TDMT_VND_DROP_STB \ + || x == TDMT_VND_CREATE_TABLE \ + || x == TDMT_VND_ALTER_TABLE \ + || x == TDMT_VND_DROP_TABLE \ + || x == TDMT_VND_DROP_TTL_TABLE \ +) +// clang-format on + enum { TMQ_MSG_TYPE__DUMMY = 0, TMQ_MSG_TYPE__POLL_RSP, diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index 9a3998ed57..43792b5415 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -194,7 +194,7 @@ int32_t walRestoreFromSnapshot(SWal *, int64_t ver); SWalReader *walOpenReader(SWal *, SWalFilterCond *pCond); void walCloseReader(SWalReader *pRead); int32_t walReadVer(SWalReader *pRead, int64_t ver); -int32_t walNextValidMsg(SWalReader *pRead, SWalCkHead **ppHead); +int32_t walNextValidMsg(SWalReader *pRead); // only for tq usage void walSetReaderCapacity(SWalReader *pRead, int32_t capacity); diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 6179dd94c8..12fb500ba3 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -40,15 +40,6 @@ extern "C" { #define tqDebug(...) do { if (tqDebugFlag & DEBUG_DEBUG) { taosPrintLog("TQ ", DEBUG_DEBUG, tqDebugFlag, __VA_ARGS__); }} while(0) #define tqTrace(...) do { if (tqDebugFlag & DEBUG_TRACE) { taosPrintLog("TQ ", DEBUG_TRACE, tqDebugFlag, __VA_ARGS__); }} while(0) -#define IS_META_MSG(x) ( \ - x == TDMT_VND_CREATE_STB \ - || x == TDMT_VND_ALTER_STB \ - || x == TDMT_VND_DROP_STB \ - || x == TDMT_VND_CREATE_TABLE \ - || x == TDMT_VND_ALTER_TABLE \ - || x == TDMT_VND_DROP_TABLE \ - || x == TDMT_VND_DROP_TTL_TABLE \ -) // clang-format on typedef struct STqOffsetStore STqOffsetStore; diff --git a/source/libs/wal/inc/walInt.h b/source/libs/wal/inc/walInt.h index c23d0802c1..2767780ff3 100644 --- a/source/libs/wal/inc/walInt.h +++ b/source/libs/wal/inc/walInt.h @@ -19,6 +19,7 @@ #include "taoserror.h" #include "tchecksum.h" #include "tcoding.h" +#include "tcommon.h" #include "tcompare.h" #include "wal.h" diff --git a/source/libs/wal/src/walRead.c b/source/libs/wal/src/walRead.c index 1967a75ce6..b5c75ce3c4 100644 --- a/source/libs/wal/src/walRead.c +++ b/source/libs/wal/src/walRead.c @@ -16,6 +16,10 @@ #include "taoserror.h" #include "walInt.h" +static int32_t walFetchHeadNew(SWalReader *pRead, int64_t fetchVer); +static int32_t walFetchBodyNew(SWalReader *pRead); +static int32_t walSkipFetchBodyNew(SWalReader *pRead); + SWalReader *walOpenReader(SWal *pWal, SWalFilterCond *cond) { SWalReader *pRead = taosMemoryMalloc(sizeof(SWalReader)); if (pRead == NULL) { @@ -29,7 +33,12 @@ SWalReader *walOpenReader(SWal *pWal, SWalFilterCond *cond) { pRead->curVersion = -1; pRead->curFileFirstVer = -1; pRead->capacity = 0; - pRead->cond = *cond; + if (cond) + pRead->cond = *cond; + else { + pRead->cond.scanMeta = 0; + pRead->cond.scanUncommited = 0; + } taosThreadMutexInit(&pRead->mutex, NULL); @@ -39,6 +48,7 @@ SWalReader *walOpenReader(SWal *pWal, SWalFilterCond *cond) { taosMemoryFree(pRead); return NULL; } + return pRead; } @@ -49,10 +59,28 @@ void walCloseReader(SWalReader *pRead) { taosMemoryFree(pRead); } -int32_t walNextValidMsg(SWalReader *pRead, SWalCkHead **ppHead) { - // - - return 0; +int32_t walNextValidMsg(SWalReader *pRead) { + int64_t fetchVer = pRead->curVersion; + int64_t endVer = pRead->cond.scanUncommited ? walGetLastVer(pRead->pWal) : walGetCommittedVer(pRead->pWal); + while (fetchVer <= endVer) { + if (walFetchHeadNew(pRead, fetchVer) < 0) { + return -1; + } + if (pRead->pHead->head.msgType == TDMT_VND_SUBMIT || + (IS_META_MSG(pRead->pHead->head.msgType) && pRead->cond.scanMeta)) { + if (walFetchBodyNew(pRead) < 0) { + return -1; + } + return 0; + } else { + if (walSkipFetchBodyNew(pRead) < 0) { + return -1; + } + fetchVer++; + ASSERT(fetchVer == pRead->curVersion); + } + } + return -1; } static int64_t walReadSeekFilePos(SWalReader *pRead, int64_t fileFirstVer, int64_t ver) { @@ -156,6 +184,91 @@ static int32_t walReadSeekVer(SWalReader *pRead, int64_t ver) { void walSetReaderCapacity(SWalReader *pRead, int32_t capacity) { pRead->capacity = capacity; } +static int32_t walFetchHeadNew(SWalReader *pRead, int64_t fetchVer) { + int64_t contLen; + if (pRead->curVersion != fetchVer) { + if (walReadSeekVer(pRead, fetchVer) < 0) return -1; + } + contLen = taosReadFile(pRead->pLogFile, pRead->pHead, sizeof(SWalCkHead)); + if (contLen != sizeof(SWalCkHead)) { + if (contLen < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + } else { + terrno = TSDB_CODE_WAL_FILE_CORRUPTED; + } + pRead->curVersion = -1; + return -1; + } + return 0; +} + +static int32_t walFetchBodyNew(SWalReader *pRead) { + SWalCont *pReadHead = &pRead->pHead->head; + int64_t ver = pReadHead->version; + + if (pRead->capacity < pReadHead->bodyLen) { + void *ptr = taosMemoryRealloc(pRead->pHead, sizeof(SWalCkHead) + pReadHead->bodyLen); + if (ptr == NULL) { + terrno = TSDB_CODE_WAL_OUT_OF_MEMORY; + return -1; + } + pRead->pHead = ptr; + pReadHead = &pRead->pHead->head; + pRead->capacity = pReadHead->bodyLen; + } + + if (pReadHead->bodyLen != taosReadFile(pRead->pLogFile, pReadHead->body, pReadHead->bodyLen)) { + if (pReadHead->bodyLen < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + wError("wal fetch body error: %" PRId64 ", read request version:%" PRId64 ", since %s", + pRead->pHead->head.version, ver, tstrerror(terrno)); + } else { + wError("wal fetch body error: %" PRId64 ", read request version:%" PRId64 ", since file corrupted", + pRead->pHead->head.version, ver); + terrno = TSDB_CODE_WAL_FILE_CORRUPTED; + } + pRead->curVersion = -1; + ASSERT(0); + return -1; + } + + if (pReadHead->version != ver) { + wError("wal fetch body error: %" PRId64 ", read request version:%" PRId64 "", pRead->pHead->head.version, ver); + pRead->curVersion = -1; + terrno = TSDB_CODE_WAL_FILE_CORRUPTED; + ASSERT(0); + return -1; + } + + if (walValidBodyCksum(pRead->pHead) != 0) { + wError("wal fetch body error: % " PRId64 ", since body checksum not passed", ver); + pRead->curVersion = -1; + terrno = TSDB_CODE_WAL_FILE_CORRUPTED; + ASSERT(0); + return -1; + } + + pRead->curVersion = ver + 1; + return 0; +} + +static int32_t walSkipFetchBodyNew(SWalReader *pRead) { + int64_t code; + + ASSERT(pRead->curVersion == pRead->pHead->head.version); + + code = taosLSeekFile(pRead->pLogFile, pRead->pHead->head.bodyLen, SEEK_CUR); + if (code < 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + pRead->curVersion = -1; + return -1; + } + + pRead->curVersion++; + + return 0; +} + int32_t walFetchHead(SWalReader *pRead, int64_t ver, SWalCkHead *pHead) { int64_t code;